> ## Documentation Index
> Fetch the complete documentation index at: https://docs.datafold.com/llms.txt
> Use this file to discover all available pages before exploring further.

> Retrieve details of a specific data diff by ID via the Datafold API.

# Get a data diff



## OpenAPI

````yaml get /api/v1/datadiffs/{datadiff_id}
openapi: 3.1.0
info:
  contact:
    email: support@datafold.com
    name: API Support
  description: >-
    The Datafold API reference is a guide to our available endpoints and
    authentication methods.

    If you're just getting started with Datafold, we recommend first checking
    out our [documentation](https://docs.datafold.com).


    :::info
      To use the Datafold API, you should first create a Datafold API Key,
      which should be stored as a local environment variable named DATAFOLD_API_KEY.
      This can be set in your Datafold Cloud's Settings under the Account page.
    :::
  title: Datafold API
  version: latest
servers:
  - description: Default server
    url: https://app.datafold.com
security:
  - ApiKeyAuth: []
paths:
  /api/v1/datadiffs/{datadiff_id}:
    get:
      tags:
        - Data diffs
      summary: Get a data diff
      operationId: get_datadiff_api_v1_datadiffs__datadiff_id__get
      parameters:
        - in: path
          name: datadiff_id
          required: true
          schema:
            title: Data diff id
            type: integer
        - in: query
          name: poll
          required: false
          schema:
            title: Poll
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ApiDataDiffWithProgressState'
          description: Successful Response
        '422':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
          description: Validation Error
components:
  schemas:
    ApiDataDiffWithProgressState:
      properties:
        affected_columns:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Affected Columns
        algorithm:
          anyOf:
            - $ref: '#/components/schemas/DiffAlgorithm'
            - type: 'null'
          description: >-
            Diff algorithm. 'join' for same-database diffs, 'fetch_and_join' for
            cross-database or file diffs. Auto-selected if omitted: 'join' when
            both data sources are the same, 'fetch_and_join' otherwise.
        archived:
          default: false
          title: Archived
          type: boolean
        bisection_factor:
          anyOf:
            - type: integer
            - type: 'null'
          title: Bisection Factor
        bisection_threshold:
          anyOf:
            - type: integer
            - type: 'null'
          title: Bisection Threshold
        ci_base_branch:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Base Branch
        ci_pr_branch:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Pr Branch
        ci_pr_num:
          anyOf:
            - type: integer
            - type: 'null'
          title: Ci Pr Num
        ci_pr_sha:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Pr Sha
        ci_pr_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Pr Url
        ci_pr_user_display_name:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Pr User Display Name
        ci_pr_user_email:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Pr User Email
        ci_pr_user_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Pr User Id
        ci_pr_username:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Pr Username
        ci_run_id:
          anyOf:
            - type: integer
            - type: 'null'
          title: Ci Run Id
        ci_sha_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Ci Sha Url
        column_mapping:
          anyOf:
            - items:
                maxItems: 2
                minItems: 2
                prefixItems:
                  - type: string
                  - type: string
                type: array
              type: array
            - type: 'null'
          description: >-
            Map columns with different names between datasets. List of
            [column_in_A, column_in_B] pairs.
          title: Column Mapping
        columns_to_compare:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: >-
            Columns to compare between datasets. If set, only these columns are
            diffed (primary key columns are always included). Column names must
            match the dataset schema.
          title: Columns To Compare
        compare_duplicates:
          anyOf:
            - type: boolean
            - type: 'null'
          description: Compare rows with duplicate primary keys. Defaults to true.
          title: Compare Duplicates
        created_at:
          anyOf:
            - format: date-time
              type: string
            - type: 'null'
          title: Created At
        data_app_metadata:
          anyOf:
            - $ref: '#/components/schemas/TDataDiffDataAppMetadata'
            - type: 'null'
        data_app_type:
          anyOf:
            - type: string
            - type: 'null'
          title: Data App Type
        data_source1_id:
          description: ID of the first data source (Dataset A).
          title: Data Source1 Id
          type: integer
        data_source1_session_parameters:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          description: >-
            Snowflake session parameters for Dataset A, e.g. {"QUERY_TAG":
            "datadiff", "WAREHOUSE": "COMPUTE_WH"}.
          title: Data Source1 Session Parameters
        data_source2_id:
          description: >-
            ID of the second data source (Dataset B). Can be the same as
            data_source1_id.
          title: Data Source2 Id
          type: integer
        data_source2_session_parameters:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          description: Snowflake session parameters for Dataset B.
          title: Data Source2 Session Parameters
        datetime_tolerance:
          anyOf:
            - type: integer
            - type: 'null'
          description: >-
            Datetime precision for comparison. 0=seconds, 1=tenths,
            2=hundredths, 3=milliseconds, 4=tenth-ms, 5=hundredth-ms,
            6=microseconds.
          title: Datetime Tolerance
        diff_progress:
          anyOf:
            - $ref: '#/components/schemas/DiffProgress'
            - type: 'null'
        diff_stats:
          anyOf:
            - $ref: '#/components/schemas/DiffStats'
            - type: 'null'
        diff_tolerance:
          anyOf:
            - type: number
            - type: 'null'
          description: >-
            Default tolerance for float comparisons. In absolute mode: values
            within this distance are equal. In relative mode: fraction of
            difference allowed.
          title: Diff Tolerance
        diff_tolerances_per_column:
          anyOf:
            - items:
                $ref: '#/components/schemas/ColumnTolerance'
              type: array
            - type: 'null'
          description: >-
            Per-column tolerance overrides. Each entry: {column_name,
            tolerance_value (>= 0), tolerance_mode: 'absolute'|'relative'}.
          title: Diff Tolerances Per Column
        done:
          anyOf:
            - type: boolean
            - type: 'null'
          default: false
          title: Done
        download_limit:
          anyOf:
            - type: integer
            - type: 'null'
          title: Download Limit
        exclude_columns:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: >-
            Columns to exclude from comparison. Ignored if include_columns is
            set.
          title: Exclude Columns
        execute_as_user:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Execute As User
        file1:
          anyOf:
            - format: uri
              minLength: 1
              type: string
            - type: 'null'
          description: >-
            File URL for Dataset A (s3://, gs://, abfss://, https://). Mutually
            exclusive with table1 and query1. Requires file1_options.
          title: File1
        file1_options:
          anyOf:
            - discriminator:
                mapping:
                  csv:
                    $ref: '#/components/schemas/CSVFileOptions'
                  excel:
                    $ref: '#/components/schemas/ExcelFileOptions'
                  parquet:
                    $ref: '#/components/schemas/ParquetFileOptions'
                propertyName: file_type
              oneOf:
                - $ref: '#/components/schemas/CSVFileOptions'
                - $ref: '#/components/schemas/ExcelFileOptions'
                - $ref: '#/components/schemas/ParquetFileOptions'
            - type: 'null'
          description: >-
            File format options for file1 (file_type, delimiter, sheet, skip
            rows).
          title: File1 Options
        file2:
          anyOf:
            - format: uri
              minLength: 1
              type: string
            - type: 'null'
          description: >-
            File URL for Dataset B (s3://, gs://, abfss://, https://). Mutually
            exclusive with table2 and query2. Requires file2_options.
          title: File2
        file2_options:
          anyOf:
            - discriminator:
                mapping:
                  csv:
                    $ref: '#/components/schemas/CSVFileOptions'
                  excel:
                    $ref: '#/components/schemas/ExcelFileOptions'
                  parquet:
                    $ref: '#/components/schemas/ParquetFileOptions'
                propertyName: file_type
              oneOf:
                - $ref: '#/components/schemas/CSVFileOptions'
                - $ref: '#/components/schemas/ExcelFileOptions'
                - $ref: '#/components/schemas/ParquetFileOptions'
            - type: 'null'
          description: >-
            File format options for file2 (file_type, delimiter, sheet, skip
            rows).
          title: File2 Options
        filter1:
          anyOf:
            - type: string
            - type: 'null'
          description: >-
            SQL WHERE clause for Dataset A (omit the WHERE keyword), e.g.
            'status = 1'.
          title: Filter1
        filter2:
          anyOf:
            - type: string
            - type: 'null'
          description: >-
            SQL WHERE clause for Dataset B (omit the WHERE keyword), e.g.
            'status = 1'.
          title: Filter2
        finished_at:
          anyOf:
            - format: date-time
              type: string
            - type: 'null'
          title: Finished At
        id:
          anyOf:
            - type: integer
            - type: 'null'
          title: Id
        include_columns:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: >-
            Explicit list of columns to compare. If set, only these columns are
            diffed.
          title: Include Columns
        infer_pk:
          default: false
          description: >-
            Infer primary key columns automatically before running the diff.
            When true, pk_columns may be omitted; the inferred PK is stored back
            on the diff. Supported configurations: in-db diffs (all input
            types); cross-db diffs with query inputs on both sides (PK is
            inferred inside DuckDB after fetch). Not supported: cross-db with
            table inputs, or any file-based diff — use POST
            /api/v1/data_sources/{id}/guess_pk and pass pk_columns explicitly.
            For in-db query inputs, materialization is auto-forced.
          title: Infer Pk
          type: boolean
        infer_pk_avoid_names:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: >-
            Column names to skip during PK inference. Matched case-insensitively
            against actual column names (exact match, no glob/regex). Requires
            infer_pk=true and pk_columns empty; rejected with 422 otherwise.
          title: Infer Pk Avoid Names
        infer_pk_avoid_types:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: >-
            Extra column types to skip during PK inference, on top of the
            built-in defaults (float, timestamp, array, boolean, binary,
            unsupported). Use canonical type names (e.g. 'integer', 'text') or
            'db:<RAW_TYPE>' to target a raw warehouse type (e.g. 'db:JSONB').
            Requires infer_pk=true and pk_columns empty; rejected with 422
            otherwise.
          title: Infer Pk Avoid Types
        kind:
          $ref: '#/components/schemas/DiffKind'
        materialization_destination_id:
          anyOf:
            - type: integer
            - type: 'null'
          description: Data source ID where materialized diff results are stored.
          title: Materialization Destination Id
        materialize_dataset1:
          anyOf:
            - type: boolean
            - type: 'null'
          description: >-
            Materialize Dataset A before diffing. Improves speed for heavy
            queries, filtered non-indexed columns, or transformed primary keys.
          title: Materialize Dataset1
        materialize_dataset2:
          anyOf:
            - type: boolean
            - type: 'null'
          description: >-
            Materialize Dataset B before diffing. Same use cases as
            materialize_dataset1.
          title: Materialize Dataset2
        materialize_without_sampling:
          anyOf:
            - type: boolean
            - type: 'null'
          default: false
          description: Skip sampling when materializing results.
          title: Materialize Without Sampling
        monitor_error:
          anyOf:
            - $ref: '#/components/schemas/QueryError'
            - type: 'null'
        monitor_id:
          anyOf:
            - type: integer
            - type: 'null'
          title: Monitor Id
        monitor_state:
          anyOf:
            - $ref: '#/components/schemas/MonitorRunState'
            - type: 'null'
        per_column_diff_limit:
          anyOf:
            - type: integer
            - type: 'null'
          title: Per Column Diff Limit
        pk_columns:
          items:
            type: string
          title: Pk Columns
          type: array
        purged:
          default: false
          title: Purged
          type: boolean
        query1:
          anyOf:
            - type: string
            - type: 'null'
          description: SQL query for Dataset A. Mutually exclusive with table1 and file1.
          title: Query1
        query2:
          anyOf:
            - type: string
            - type: 'null'
          description: SQL query for Dataset B. Mutually exclusive with table2 and file2.
          title: Query2
        result:
          anyOf:
            - enum:
                - error
                - bad-pks
                - different
                - missing-pks
                - identical
                - empty
              type: string
            - type: 'null'
          title: Result
        result_revisions:
          additionalProperties:
            type: integer
          default: {}
          title: Result Revisions
          type: object
        result_statuses:
          anyOf:
            - additionalProperties:
                type: string
              type: object
            - type: 'null'
          title: Result Statuses
        run_profiles:
          anyOf:
            - type: boolean
            - type: 'null'
          description: Run column profiling on diff results.
          title: Run Profiles
        runtime:
          anyOf:
            - type: number
            - type: 'null'
          title: Runtime
        sampling_confidence:
          anyOf:
            - type: number
            - type: 'null'
          description: >-
            Sampling confidence level, between 0 and 100 exclusive. Common
            values: 90, 95, 99, 99.5, 99.9. Use with sampling_tolerance.
          title: Sampling Confidence
        sampling_max_rows:
          anyOf:
            - type: integer
            - type: 'null'
          description: >-
            Maximum number of rows to sample (absolute count). Alternative to
            tolerance+confidence and sampling_ratio.
          title: Sampling Max Rows
        sampling_ratio:
          anyOf:
            - type: number
            - type: 'null'
          description: >-
            Sample this fraction of rows. Value between 0 and 1 exclusive (e.g.
            0.1 = 10% of rows). Alternative to tolerance+confidence.
          title: Sampling Ratio
        sampling_threshold:
          anyOf:
            - type: integer
            - type: 'null'
          description: >-
            Minimum row count to activate sampling. Sampling is disabled if the
            largest table has fewer rows than this.
          title: Sampling Threshold
        sampling_tolerance:
          anyOf:
            - type: number
            - type: 'null'
          description: >-
            Sampling tolerance: max fraction of rows with PK errors before
            sampling is disabled. Value between 0 and 1 exclusive (e.g. 0.001 =
            0.1%). Use with sampling_confidence.
          title: Sampling Tolerance
        share_token:
          anyOf:
            - type: string
            - type: 'null'
          title: Share Token
        share_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Share Url
        sort_array_columns:
          anyOf:
            - type: boolean
            - type: 'null'
          description: >-
            Sort array elements before comparison to ignore insertion order
            differences.
          title: Sort Array Columns
        source:
          anyOf:
            - $ref: '#/components/schemas/JobSource'
            - type: 'null'
        status:
          anyOf:
            - $ref: '#/components/schemas/JobStatus'
            - type: 'null'
        table1:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: >-
            Table path for Dataset A as a list of path components, e.g.
            ['schema', 'table'] or ['database', 'schema', 'table']. Mutually
            exclusive with query1 and file1.
          title: Table1
        table2:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: >-
            Table path for Dataset B as a list of path components, e.g.
            ['schema', 'table'] or ['database', 'schema', 'table']. Mutually
            exclusive with query2 and file2.
          title: Table2
        table_modifiers:
          anyOf:
            - items:
                $ref: '#/components/schemas/TableModifiers'
              type: array
            - type: 'null'
          description: >-
            Table-level modifiers. Allowed values: 'case_insensitive_strings'
            (ignore string case), 'null_equals_empty_string' (treat NULL and
            empty string as equal when one warehouse uses NULL and another uses
            '' for missing text).
          title: Table Modifiers
        tags:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          description: Tags for organizing and filtering diffs.
          title: Tags
        temp_schema_override:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Temp Schema Override
        time_aggregate:
          anyOf:
            - $ref: '#/components/schemas/TimeAggregateEnum'
            - type: 'null'
          description: Time aggregation level when using time_column.
        time_column:
          anyOf:
            - type: string
            - type: 'null'
          description: Column name used for time-based filtering or aggregation.
          title: Time Column
        time_interval_end:
          anyOf:
            - format: date-time
              type: string
            - type: 'null'
          title: Time Interval End
        time_interval_start:
          anyOf:
            - format: date-time
              type: string
            - type: 'null'
          title: Time Interval Start
        time_travel_point1:
          anyOf:
            - type: integer
            - format: date-time
              type: string
            - type: string
            - type: 'null'
          description: >-
            Time travel point for Dataset A. Accepts: negative integer offset
            (e.g. -130), UTC timestamp (e.g. '2024-01-15T00:00:00'), or a time
            point hash. Only supported by Snowflake and Databricks.
          title: Time Travel Point1
        time_travel_point2:
          anyOf:
            - type: integer
            - format: date-time
              type: string
            - type: string
            - type: 'null'
          description: Time travel point for Dataset B. Same format as time_travel_point1.
          title: Time Travel Point2
        tolerance_mode:
          anyOf:
            - $ref: '#/components/schemas/ToleranceModeEnum'
            - type: 'null'
          description: 'How diff_tolerance is applied: ''absolute'' or ''relative''.'
        updated_at:
          anyOf:
            - format: date-time
              type: string
            - type: 'null'
          title: Updated At
        user_id:
          anyOf:
            - type: integer
            - type: 'null'
          title: User Id
      required:
        - data_source1_id
        - data_source2_id
        - pk_columns
        - kind
      title: ApiDataDiffWithProgressState
      type: object
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          title: Detail
          type: array
      title: HTTPValidationError
      type: object
    DiffAlgorithm:
      enum:
        - join
        - hash
        - hash_v2_alpha
        - fetch_and_join
      title: DiffAlgorithm
      type: string
    TDataDiffDataAppMetadata:
      properties:
        data_app_id:
          title: Data App Id
          type: integer
        data_app_model1_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Data App Model1 Id
        data_app_model1_name:
          anyOf:
            - type: string
            - type: 'null'
          title: Data App Model1 Name
        data_app_model2_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Data App Model2 Id
        data_app_model2_name:
          anyOf:
            - type: string
            - type: 'null'
          title: Data App Model2 Name
        data_app_model_type:
          title: Data App Model Type
          type: string
        meta_data:
          additionalProperties: true
          title: Meta Data
          type: object
      required:
        - data_app_id
        - data_app_model_type
        - meta_data
      title: TDataDiffDataAppMetadata
      type: object
    DiffProgress:
      properties:
        completed_steps:
          anyOf:
            - type: integer
            - type: 'null'
          title: Completed Steps
        total_steps:
          anyOf:
            - type: integer
            - type: 'null'
          title: Total Steps
        version:
          title: Version
          type: string
      required:
        - version
      title: DiffProgress
      type: object
    DiffStats:
      properties:
        diff_duplicate_pks:
          anyOf:
            - type: number
            - type: 'null'
          title: Diff Duplicate Pks
        diff_null_pks:
          anyOf:
            - type: number
            - type: 'null'
          title: Diff Null Pks
        diff_pks:
          anyOf:
            - type: number
            - type: 'null'
          title: Diff Pks
        diff_rows:
          anyOf:
            - type: number
            - type: 'null'
          title: Diff Rows
        diff_rows_count:
          anyOf:
            - type: integer
            - type: 'null'
          title: Diff Rows Count
        diff_rows_number:
          anyOf:
            - type: number
            - type: 'null'
          title: Diff Rows Number
        diff_schema:
          anyOf:
            - type: number
            - type: 'null'
          title: Diff Schema
        diff_values:
          anyOf:
            - type: number
            - type: 'null'
          title: Diff Values
        errors:
          anyOf:
            - type: integer
            - type: 'null'
          title: Errors
        match_ratio:
          anyOf:
            - type: number
            - type: 'null'
          title: Match Ratio
        rows_added:
          anyOf:
            - type: integer
            - type: 'null'
          title: Rows Added
        rows_removed:
          anyOf:
            - type: integer
            - type: 'null'
          title: Rows Removed
        sampled:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Sampled
        table_a_row_count:
          anyOf:
            - type: integer
            - type: 'null'
          title: Table A Row Count
        table_b_row_count:
          anyOf:
            - type: integer
            - type: 'null'
          title: Table B Row Count
        version:
          title: Version
          type: string
      required:
        - version
      title: DiffStats
      type: object
    ColumnTolerance:
      properties:
        column_name:
          description: Column name to apply tolerance to.
          title: Column Name
          type: string
        tolerance_mode:
          $ref: '#/components/schemas/ToleranceModeEnum'
          description: '''absolute'' or ''relative''.'
        tolerance_value:
          description: Tolerance value. Must be >= 0.
          title: Tolerance Value
          type: number
      required:
        - column_name
        - tolerance_value
        - tolerance_mode
      title: ColumnTolerance
      type: object
    CSVFileOptions:
      properties:
        delimiter:
          anyOf:
            - type: string
            - type: 'null'
          title: Delimiter
        file_type:
          const: csv
          default: csv
          title: File Type
          type: string
        skip_head_rows:
          anyOf:
            - type: integer
            - type: 'null'
          title: Skip Head Rows
        skip_tail_rows:
          anyOf:
            - type: integer
            - type: 'null'
          title: Skip Tail Rows
      title: CSVFileOptions
      type: object
    ExcelFileOptions:
      properties:
        file_type:
          const: excel
          default: excel
          title: File Type
          type: string
        sheet:
          anyOf:
            - type: string
            - type: 'null'
          title: Sheet
        skip_head_rows:
          anyOf:
            - type: integer
            - type: 'null'
          title: Skip Head Rows
        skip_tail_rows:
          anyOf:
            - type: integer
            - type: 'null'
          title: Skip Tail Rows
      title: ExcelFileOptions
      type: object
    ParquetFileOptions:
      properties:
        file_type:
          const: parquet
          default: parquet
          title: File Type
          type: string
      title: ParquetFileOptions
      type: object
    DiffKind:
      enum:
        - in_db
        - cross_db
      title: DiffKind
      type: string
    QueryError:
      properties:
        error_type:
          title: Error Type
          type: string
        error_value:
          title: Error Value
          type: string
      required:
        - error_type
        - error_value
      title: QueryError
      type: object
    MonitorRunState:
      enum:
        - ok
        - alert
        - error
        - learning
        - checking
        - created
        - skipped
        - cancelled
      title: MonitorRunState
      type: string
    JobSource:
      enum:
        - interactive
        - demo_signup
        - manual
        - api
        - ci
        - schedule
        - auto
      title: JobSource
      type: string
    JobStatus:
      enum:
        - needs_confirmation
        - needs_authentication
        - waiting
        - processing
        - done
        - failed
        - cancelled
      title: JobStatus
      type: string
    TableModifiers:
      enum:
        - case_insensitive_strings
        - null_equals_empty_string
      title: TableModifiers
      type: string
    TimeAggregateEnum:
      enum:
        - minute
        - hour
        - day
        - week
        - month
        - year
      title: TimeAggregateEnum
      type: string
    ToleranceModeEnum:
      enum:
        - absolute
        - relative
      title: ToleranceModeEnum
      type: string
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          title: Location
          type: array
        msg:
          title: Message
          type: string
        type:
          title: Error Type
          type: string
      required:
        - loc
        - msg
        - type
      title: ValidationError
      type: object
  securitySchemes:
    ApiKeyAuth:
      description: Use the 'Authorization' header with the format 'Key <api-key>'
      in: header
      name: Authorization
      type: apiKey

````