{
"nbformat": 4,
"nbformat_minor": 5,
"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}},
"cells": [
{"cell_type": "markdown", "metadata": {}, "source": ["# ML QA (tabular)\n", "\n", "Set `CSV` to an absolute path to `tests/fixtures/people.csv` in your clone."]},
{"cell_type": "code", "metadata": {}, "source": ["import json\n", "import rust_data_processing as rdp\n", "\n", "CSV = \"tests/fixtures/people.csv\" # change to absolute path\n", "schema = [{\"name\": \"id\", \"data_type\": \"int64\"}, {\"name\": \"name\", \"data_type\": \"utf8\"}, {\"name\": \"age\", \"data_type\": \"int64\"}]\n", "ds = rdp.ingest_from_path(CSV, schema)\n", "print(\"rows\", ds.row_count())\n", "rep = rdp.profile_dataset(ds)\n", "print(\"keys\", sorted(rep.keys())[:5], \"...\")\n", "val = rdp.validate_dataset(ds, {\"checks\": [{\"kind\": \"not_null\", \"column\": \"name\", \"severity\": \"error\"}]})\n", "print(\"failed_checks\", val[\"summary\"][\"failed_checks\"])\n"]}
]
}