After a successful run, you can access detailed metrics and validation results from the run.metadata
object. This example shows how to parse this object.
To make this example runnable, we’ll create a mock LumeRun
object that simulates the result of a completed pipeline. In a real workflow, this run
object would be the output of lume.run(...)
followed by run.wait()
.
See the Advanced Topics page for the full, detailed schema of the metadata
object.
class MockLumeRun:
"""A mock LumeRun object for demonstration purposes."""
def __init__(self, status, metadata):
self.status = status
self.metadata = metadata
# Create a mock run object with sample data
run = MockLumeRun(
status="PARTIAL_FAILED",
metadata={
"pipeline": {
"duration_seconds": 12.345,
"sync_source_seconds": 4.1,
"transform_seconds": 7.2,
"sync_target_seconds": 1.045
},
"results": {
"input_rows": 1000,
"mapped_rows": 990,
"rejected_rows": 10
},
"validation": {
"tests_executed": 2970,
"error_rate": 0.01,
"top_errors": [
{"field": "email", "error_code": "INVALID_FORMAT", "count": 8},
{"field": "postal_code", "error_code": "VALUE_TOO_SHORT", "count": 2}
]
}
}
)
# --- Start of parsing logic ---
# You can copy-paste everything below this line to process a real run object.
if run.status in ["SUCCEEDED", "PARTIAL_FAILED"]:
metadata = run.metadata
# --- Performance Metrics ---
pipeline_metrics = metadata.get("pipeline", {})
print("\n--- Pipeline Performance ---")
print(f"Total Duration: {pipeline_metrics.get('duration_seconds', 0):.2f}s")
# --- Row Counts ---
result_metrics = metadata.get("results", {})
print("\n--- Row Counts ---")
print(f"Input Rows: {result_metrics.get('input_rows', 'N/A')}")
print(f"Mapped Rows: {result_metrics.get('mapped_rows', 'N/A')}")
print(f"Rejected Rows: {result_metrics.get('rejected_rows', 'N/A')}")
# --- Validation Summary ---
validation_metrics = metadata.get("validation", {})
print("\n--- Validation Summary ---")
print(f"Total Tests Executed: {validation_metrics.get('tests_executed', 'N/A')}")
print(f"Error Rate: {validation_metrics.get('error_rate', 0):.2%}")
# --- Top Errors ---
top_errors = validation_metrics.get("top_errors", [])
if top_errors:
print("\n--- Top 5 Validation Errors ---")
for error in top_errors[:5]:
print(f"- Field: {error.get('field')}, Error: {error.get('error_code')}, Count: {error.get('count')}")