Complete Configuration Examples Reference
This document provides complete, working TOML configuration examples for all Qarion ETL features. Use these as templates for your own configurations.
Table of Contents
Project Configuration
Basic Project Setup
# qarion-etl.toml
[app]
app = "Qarion ETL"
type = "project"
project_name = "my_project"
[engine]
name = "sqlite"
[engine.config]
path = "data/qarion-etl.db"
dataset_dir = "datasets"
flow_dir = "flows"
migration_dir = "migrations"
schema_storage = "local"
dataset_storage = "local"
flow_storage = "local"
Production Setup with PostgreSQL
# qarion-etl.toml
[app]
app = "Qarion ETL"
type = "project"
project_name = "production_pipeline"
[engine]
name = "postgresql"
[engine.config]
host = "db.example.com"
port = 5432
database = "production_db"
user = "${credential:db_user}"
password = "${credential:db_password}"
# Optional: Separate metadata engine
[metadata_engine]
name = "postgresql"
[metadata_engine.config]
host = "db.example.com"
port = 5432
database = "metadata_db"
user = "${credential:db_user}"
password = "${credential:db_password}"
dataset_storage = "database"
flow_storage = "database"
schema_storage = "database"
metadata_namespace = "xt"
[credential_store]
type = "local_keystore"
[[credentials]]
id = "db_user"
name = "Database User"
credential_type = "database"
[[credentials]]
id = "db_password"
name = "Database Password"
credential_type = "database"
fernet_key = "gAAAAABh..." # Auto-generated, never commit
Note: Requires psycopg2-binary:
pip install psycopg2-binary
Flow Definitions
Change Feed Flow
# flows/orders_change_feed.toml
id = "orders_change_feed"
name = "Orders Change Feed"
flow_type = "change_feed"
namespace = "production"
[input]
primary_key = ["order_id"]
columns = [
{name = "order_id", schema_type = "string", required = true},
{name = "customer_id", schema_type = "string", required = true},
{name = "amount", schema_type = "float", required = true},
{name = "order_date", schema_type = "date", required = true},
{name = "status", schema_type = "string", required = true}
]
[properties]
change_detection_columns = ["amount", "status"]
[properties.load]
source_path = "data/orders"
file_pattern = "orders_*.csv"
format = "csv"
loader_config = {
delimiter = ","
header = true
encoding = "utf-8"
}
[[triggers]]
id = "daily_trigger"
type = "schedule"
schedule = "0 2 * * *"
timezone = "UTC"
enabled = true
Standard Flow with All Task Types
# flows/complete_pipeline.toml
id = "complete_pipeline"
name = "Complete Pipeline"
flow_type = "standard"
namespace = "production"
[input]
primary_key = ["id"]
columns = [
{name = "id", schema_type = "integer", required = true},
{name = "name", schema_type = "string", required = true},
{name = "amount", schema_type = "float", required = true}
]
# Ingestion Task
[[tasks]]
id = "ingest_data"
name = "Ingest Data"
type = "ingestion"
target_dataset_id = "data_landing"
[tasks.properties]
operation = "ingestion"
target_table_type = "landing"
processing_type = "FULL_REFRESH"
[tasks.config]
path = "data/input.csv"
format = "csv"
delimiter = ","
header = true
# Transformation Task
[[tasks]]
id = "transform_data"
name = "Transform Data"
type = "transformation"
source_dataset_id = "data_landing"
target_dataset_id = "data_staging"
dependencies = ["ingest_data"]
[tasks.properties]
operation = "landing_to_staging"
source_table_type = "landing"
target_table_type = "staging"
processing_type = "INCREMENTAL"
[tasks.config]
sql = """
SELECT
id,
UPPER(name) as name,
amount * 1.1 as adjusted_amount
FROM {{ source_dataset }}
WHERE amount > 0
"""
# Quality Check Task
[[tasks]]
id = "quality_check"
name = "Quality Check"
type = "dq_check"
source_dataset_id = "data_staging"
dependencies = ["transform_data"]
[tasks.properties]
operation = "quality_check"
table_type = "staging"
[tasks.properties.quality_checks]
[[tasks.properties.quality_checks]]
check_id = "completeness"
check_type = "completeness"
enabled = true
[tasks.properties.quality_checks.config]
columns = ["id", "name", "adjusted_amount"]
threshold = 0.95
# Export Task
[[tasks]]
id = "export_data"
name = "Export Data"
type = "export"
source_dataset_id = "data_staging"
dependencies = ["quality_check"]
[tasks.properties]
operation = "export"
export_type = "file"
destination = "exports/data.csv"
format = "csv"
[tasks.config]
include_header = true
delimiter = ","
Dataset Definitions
Basic Dataset
# datasets/orders.toml
name = "orders"
namespace = "raw"
description = "Customer orders dataset"
[columns]
[columns.id]
schema_type = "integer"
required = true
primary_key = true
description = "Order identifier"
[columns.customer_id]
schema_type = "integer"
required = true
description = "Customer identifier"
[columns.amount]
schema_type = "float"
required = true
description = "Order total amount"
[columns.created_at]
schema_type = "timestamp"
required = false
description = "Order creation timestamp"
[properties]
table_type = "landing"
schema_evolution = { mode = "forward" }
Dataset with Contract and Quality Checks
# datasets/orders_staging.toml
name = "orders_staging"
namespace = "staging"
description = "Staged orders with validation"
[columns]
[columns.id]
schema_type = "integer"
required = true
primary_key = true
[columns.customer_id]
schema_type = "integer"
required = true
[columns.amount]
schema_type = "float"
required = true
[columns.status]
schema_type = "string"
required = true
[properties]
table_type = "staging"
# Contract validation
[properties.contract]
id = "orders_contract"
mode = "strict"
enabled = true
[[properties.contract.columns]]
name = "id"
schema_type = "integer"
required = true
nullable = false
[[properties.contract.columns]]
name = "amount"
schema_type = "float"
required = true
nullable = false
min_value = 0
max_value = 1000000
# Automatic quality checks
quality_stop_on_first_failure = false
quality_fail_on_error = false
[[properties.quality_checks]]
check_id = "completeness"
check_type = "completeness"
enabled = true
[properties.quality_checks.config]
columns = ["id", "customer_id", "amount"]
threshold = 0.95
[[properties.quality_checks]]
check_id = "uniqueness"
check_type = "uniqueness"
enabled = true
[properties.quality_checks.config]
columns = ["id"]
Complete Pipeline Examples
End-to-End Pipeline
1. Project Configuration:
# qarion-etl.toml
[app]
app = "Qarion ETL"
type = "project"
project_name = "orders_pipeline"
[engine]
name = "sqlite"
[engine.config]
path = "data/qarion-etl.db"
dataset_storage = "local"
flow_storage = "local"
schema_storage = "local"
2. Flow Definition:
# flows/orders_pipeline.toml
id = "orders_pipeline"
name = "Orders Processing Pipeline"
flow_type = "change_feed"
namespace = "production"
[input]
primary_key = ["order_id"]
columns = [
{name = "order_id", schema_type = "string", required = true},
{name = "customer_id", schema_type = "string", required = true},
{name = "amount", schema_type = "float", required = true},
{name = "order_date", schema_type = "date", required = true},
{name = "status", schema_type = "string", required = true}
]
[properties]
change_detection_columns = ["amount", "status"]
[properties.load]
source_path = "data/orders"
file_pattern = "orders_*.csv"
format = "csv"
loader_config = {
delimiter = ","
header = true
encoding = "utf-8"
}
[[triggers]]
id = "daily_trigger"
type = "schedule"
schedule = "0 2 * * *"
timezone = "UTC"
enabled = true
description = "Daily execution at 2 AM UTC"
3. Landing Dataset with Contract:
# datasets/orders_pipeline_landing.toml
id = "orders_pipeline_landing"
name = "Orders Landing Table"
namespace = "production"
[columns]
[columns.order_id]
schema_type = "string"
required = true
primary_key = true
[columns.customer_id]
schema_type = "string"
required = true
[columns.amount]
schema_type = "float"
required = true
[columns.order_date]
schema_type = "date"
required = true
[columns.status]
schema_type = "string"
required = true
[properties]
table_type = "landing"
[properties.contract]
id = "orders_contract"
mode = "strict"
enabled = true
[[properties.contract.columns]]
name = "order_id"
schema_type = "string"
required = true
nullable = false
[[properties.contract.columns]]
name = "amount"
schema_type = "float"
required = true
nullable = false
min_value = 0
max_value = 1000000
[[properties.contract.columns]]
name = "status"
schema_type = "string"
required = true
nullable = false
enum_values = ["pending", "completed", "cancelled"]
4. Change Feed Dataset with Quality Checks:
# datasets/orders_pipeline_change_feed.toml
id = "orders_pipeline_change_feed"
name = "Orders Change Feed Table"
namespace = "production"
[columns]
[columns.order_id]
schema_type = "string"
required = true
[columns.customer_id]
schema_type = "string"
required = true
[columns.amount]
schema_type = "float"
required = true
[columns.order_date]
schema_type = "date"
required = true
[columns.status]
schema_type = "string"
required = true
[columns.xt_record_type]
schema_type = "string"
required = true
[columns.xt_batch_id]
schema_type = "integer"
required = true
[properties]
table_type = "change_feed"
# Automatic quality checks
quality_stop_on_first_failure = false
quality_fail_on_error = false
[[properties.quality_checks]]
check_id = "completeness"
check_type = "completeness"
enabled = true
[properties.quality_checks.config]
columns = ["order_id", "customer_id", "amount"]
threshold = 0.95
[[properties.quality_checks]]
check_id = "uniqueness"
check_type = "uniqueness"
enabled = true
[properties.quality_checks.config]
columns = ["order_id", "xt_batch_id"]
Related Documentation
For detailed information on each feature, see:
- Flows Guide - Complete flow documentation
- Configuration Guide - Configuration options
- Data Ingestion - Ingestion configuration
- Data Contracts - Contract validation
- Data Quality - Quality checks
- Tasks - Task system
- Triggers - Flow triggers