Skip to main content

REST API

info

To use the Datafold REST API, you should first create a Datafold API Key.

Create A New Diff

Parameter
Value
Endpoint /api/v1/datadiffs
Method POST
Request Content-Type application/json
Response Content-Type application/json
Auth API key header. Example:
headers = {'Authorization': 'Key <api_key>'}

Example request body:

{
"data_source1_id": 1,
"data_source2_id": 1,
"table1": [
"DATABASE",
"SCHEMA",
"TABLE_1"
],
"table2": [
"DATABASE",
"SCHEMA",
"TABLE_2"
],
"query1": null,
"query2": null,
"materialize_dataset1": false,
"materialize_dataset2": false,
"materialization_destination_id": null,
"materialize_without_sampling": false,
"pk_columns": [
"PRIMARY_KEY"
],
"include_columns": null,
"exclude_columns": null,
"time_column": null,
"time_aggregate": null,
"filter1": null,
"filter2": null,
"time_travel_point1": null,
"time_travel_point2": null,
"time_interval_start": null,
"time_interval_end": null,
"sampling_tolerance": null,
"sampling_confidence": null,
"sampling_threshold": null,
"diff_tolerances_per_column": null
}

Get Diff Results

Parameter
Value
Endpoint /api/v1/datadiffs/<id>/summary_results
Method GET
Response Content-Type application/json
Auth API key header. Example:
headers = {'Authorization': 'Key <api_key>'}

Immediately after the diff is submitted, the response will contain the following:

{
"status": "running"
}

Or

{
"status": "waiting"
}

When the diff is complete, the status will be set to success and additional fields will contain high-level diff metadata:

{
"status": "success",
"pks": {
"total_rows": [
1509,
1761
],
"nulls": [
0,
0
],
"dupes": [
0,
303
],
"exclusives": [
0,
5
],
"distincts": [
1509,
1514
]
},
"values": {
"total_rows": 1453,
"rows_with_differences": 0,
"total_values": 10171,
"values_with_differences": 0,
"compared_columns": 7,
"columns_with_differences": 0
},
"dependencies": {
"deps": {}
},
"schema": {
"columns_mismatched": [
0,
0
],
"column_type_mismatches": 0,
"column_reorders": 0,
"column_counts": [
8,
8
]
}
}