data-modelling-core 2.0.9

[package]
name = "data-modelling-core"
version = "2.0.9"
edition = "2024"
authors = ["Mark Olliver"]
license = "MIT"
description = "Core SDK library for model operations across platforms"
repository = "https://github.com/OffeneDatenmodellierung/data-modelling-sdk"

[lib]
name = "data_modelling_core"
crate-type = ["rlib"]

[dependencies]
# Serialization
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_yaml = "0.9"

# Error handling
anyhow = "1.0"
thiserror = "2.0"

# Async support
async-trait = "0.1"
tokio = { version = "1", features = ["fs", "rt-multi-thread"], optional = true }

# UUID - v4 (random) and v5 (deterministic) support
uuid = { version = "1.0", features = ["v4", "v5", "serde"] }

# Time
chrono = { version = "0.4", features = ["serde"] }

# HTTP client (for API backend)
reqwest = { version = "0.12", features = ["json", "native-tls", "cookies", "blocking"], default-features = false, optional = true }
urlencoding = { version = "2.1", optional = true }

# SQL parsing
sqlparser = "0.60"
regex = "1"
once_cell = "1.19"
datafusion = { version = "45", default-features = false, features = [], optional = true }

# Graph operations (for validation)
petgraph = "0.6"

# YAML processing
yaml-rust = "0.4"

# XML processing (for BPMN/DMN)
quick-xml = { version = "0.36", features = ["serialize"], optional = true }

# Logging
tracing = "0.1"

# Base64 encoding (for PNG export)
base64 = "0.22"

# Image processing (for PNG export)
image = { version = "0.24", optional = true }
imageproc = { version = "0.23", optional = true }

# Git operations (optional, feature-gated)
git2 = { version = "0.19", optional = true }

# Database backends (optional, feature-gated)
duckdb = { version = "1.4", optional = true, features = ["bundled"] }
tokio-postgres = { version = "0.7", optional = true }
deadpool-postgres = { version = "0.14", optional = true }

# Configuration file parsing
toml = { version = "0.8", optional = true }

# Hashing for change detection
sha2 = { version = "0.10", optional = true }

# File globbing (for staging ingestion)
glob = { version = "0.3", optional = true }

# Parallel processing
rayon = { version = "1.10", optional = true }

# Progress reporting
indicatif = { version = "0.17", optional = true }

# AWS SDK for S3 ingestion
aws-config = { version = "1.5", optional = true }
aws-sdk-s3 = { version = "1.65", optional = true }
aws-credential-types = { version = "1.2", optional = true }

# Databricks Unity Catalog (uses reqwest for REST API)

# Apache Iceberg support (for data lakehouse storage)
iceberg = { version = "0.7", optional = true }
iceberg-catalog-rest = { version = "0.7", optional = true }
iceberg-catalog-glue = { version = "0.7", optional = true }

# Arrow and Parquet for Iceberg data writing
arrow = { version = "55", optional = true }
parquet = { version = "55", optional = true, features = ["async"] }

# WASM support (for browser storage backend)
wasm-bindgen = { version = "0.2", optional = true }
wasm-bindgen-futures = { version = "0.4", optional = true }
web-sys = { version = "0.3", optional = true, features = ["IdbDatabase", "IdbFactory", "IdbObjectStore", "IdbRequest", "IdbTransaction", "IdbTransactionMode", "IdbOpenDbRequest", "IdbVersionChangeEvent", "Window", "Storage", "console"] }
js-sys = { version = "0.3", optional = true }

# ZIP archive reading (for docx files in LLM documentation loading)
zip = { version = "2.2", optional = true, default-features = false, features = ["deflate"] }

# llama.cpp bindings for offline LLM inference
llama-cpp-2 = { version = "0.1", optional = true }

# jsonschema for all targets - disable default features to avoid reqwest dependency
[dependencies.jsonschema]
version = "0.38.1"
optional = true
default-features = false

[features]
default = ["api-backend"]
api-backend = ["reqwest", "urlencoding"]
native-fs = ["tokio"]
png-export = ["image", "imageproc"]
databricks-dialect = ["datafusion"]
git = ["git2"]
schema-validation = ["jsonschema"]
odps-validation = ["schema-validation"]
bpmn = ["quick-xml"]
dmn = ["quick-xml"]
openapi = []
wasm = ["wasm-bindgen", "wasm-bindgen-futures", "web-sys", "js-sys"]

# Database backend features
database = ["toml", "sha2"]
duckdb-backend = ["database", "duckdb", "native-fs"]
postgres-backend = ["database", "tokio-postgres", "deadpool-postgres", "native-fs"]

# Staging database for data ingestion pipeline
staging = ["duckdb-backend", "glob", "sha2", "rayon", "indicatif"]
staging-postgres = ["postgres-backend", "glob", "sha2", "rayon", "indicatif"]

# S3 ingestion support
s3 = ["staging", "aws-config", "aws-sdk-s3", "aws-credential-types"]

# Databricks Unity Catalog Volumes ingestion (uses reqwest REST API)
databricks = ["staging", "reqwest", "urlencoding"]

# Apache Iceberg lakehouse storage
iceberg = ["dep:iceberg", "iceberg-catalog-rest", "tokio", "arrow", "parquet"]
iceberg-glue = ["iceberg", "iceberg-catalog-glue"]

# Schema inference engine
inference = []

# LLM-enhanced schema refinement
llm = ["tokio", "zip"]
llm-online = ["llm", "reqwest"]
llm-offline = ["llm", "llama-cpp-2"]

# Schema mapping for target schema alignment
mapping = ["inference"]

# Full pipeline integration
pipeline = ["staging", "inference", "mapping"]

[dev-dependencies]
tempfile = "3"
tokio = { version = "1", features = ["full"] }
criterion = { version = "0.5", features = ["html_reports"] }

[[bench]]
name = "inference_bench"
harness = false
required-features = ["inference"]

[[bench]]
name = "import_bench"
harness = false

[[bench]]
name = "staging_benchmark"
harness = false
required-features = ["staging"]