project:
name: entrenar
description: Training & Optimization Library
total_hours: 824
total_tickets: 40
progress:
completed_hours: 0
completed_tickets: 0
current_phase: 1
phases:
- name: "Phase 1: Autograd Engine"
hours: 200
status: complete
tickets:
- id: ENT-001
name: "Tape-based context + lifetime tracking"
estimated_hours: 32
actual_hours: 4
status: complete
notes: "Implemented Context and BackwardOp trait"
- id: ENT-002
name: "Matmul backward (gradient check: 200K iters)"
estimated_hours: 16
actual_hours: 2
status: complete
notes: "CRITICAL: Required for neural network layers. Implemented with property tests (1000+ cases), gradient validation via finite difference."
- id: ENT-003
name: "Softmax backward + property tests"
estimated_hours: 24
actual_hours: 3
status: complete
notes: "Implemented with gradient validation"
- id: ENT-004
name: "Layer norm backward (mean/var gradients)"
estimated_hours: 32
actual_hours: 3
status: complete
notes: "CRITICAL: Normalization for transformer architectures. Implemented with proper gradient computation through mean/variance dependencies. Property tests (1000+ cases) for x, gamma, and beta gradients with finite difference validation."
- id: ENT-005
name: "Attention backward (Q,K,V chain rule)"
estimated_hours: 40
actual_hours: 4
status: complete
notes: "CRITICAL: Core operation for transformer architectures. Implemented scaled dot-product attention with Q @ K^T / sqrt(d_k), row-wise softmax, and V multiplication. Property tests (1000+ cases) for Q, K, V gradients with finite difference validation."
- id: ENT-006
name: "ReLU/GELU/Swish backward (8h each)"
estimated_hours: 24
actual_hours: 3
status: complete
notes: "CRITICAL: Activation functions for neural networks. Implemented ReLU (1h), GELU, and Swish (2h) with property tests (1000+ cases), gradient validation via finite difference."
- id: ENT-007
name: "Finite difference validation framework"
estimated_hours: 16
actual_hours: 2
status: complete
notes: "Implemented with property tests"
- id: ENT-008
name: "Mutation testing on backward ops (>80% kill)"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "QUALITY: Validated test suite quality. 93.4% kill rate (312/334 mutants caught) using cargo-mutants. Exceeds 80% requirement. Survived mutants primarily in numerical precision areas within tolerance thresholds. Documented in docs/mutation-testing-ent-008.md."
- name: "Phase 2: Optimizers"
hours: 120
status: complete
tickets:
- id: ENT-009
name: "SGD + momentum"
estimated_hours: 16
actual_hours: 1
status: complete
- id: ENT-010
name: "Adam (m/v state tracking)"
estimated_hours: 24
actual_hours: 2
status: complete
- id: ENT-011
name: "AdamW (decoupled weight decay)"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "CRITICAL: Modern optimizer for transformer training. Implemented with decoupled weight decay (applied directly to parameters, not gradients). Tests validate convergence, weight decay behavior, and difference from Adam with L2."
- id: ENT-012
name: "Cosine LR scheduler"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "Learning rate scheduling for training. Implemented cosine annealing (smooth decay following cosine curve). Integrates with all optimizers via LRScheduler trait. Tests validate monotonic decrease and correct lr at key points."
- id: ENT-013
name: "Gradient clipping (global norm)"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "CRITICAL: Prevents exploding gradients in deep networks. Implemented global norm clipping (scales all gradients if norm exceeds threshold). Preserves relative magnitudes. Essential for RNN/transformer training."
- id: ENT-014
name: "Optimizer convergence property tests"
estimated_hours: 32
actual_hours: 2
status: complete
notes: "Property tests for SGD, Adam, AdamW with Rosenbrock, ill-conditioned, high-dim, numerical stability scenarios. 29 tests, 1000 proptest cases each."
- id: ENT-015
name: "SIMD-accelerated param updates via Trueno"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "SIMD operations via Trueno for AXPY, Adam, AdamW. 18 tests including property tests (500 cases) for numerical equivalence."
- name: "Phase 3: LoRA"
hours: 144
status: complete
tickets:
- id: ENT-016
name: "LoRA layer (A, B matrices + merge)"
estimated_hours: 32
actual_hours: 2
status: complete
notes: "LoRA layer with A,B matrices, merge/unmerge, scaling. 53 tests including 5 property tests (200 cases each) for mathematical correctness."
- id: ENT-017
name: "QLoRA (4-bit base + dequant-on-fly)"
estimated_hours: 40
actual_hours: 2
status: complete
notes: "4-bit quantized base weights with on-the-fly dequantization. 11 tests including 5 property tests (200 cases each) for quantization correctness."
- id: ENT-018
name: "Target module selection (q/k/v/o_proj)"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "LoRAConfig with target module selection, layer filtering, all_linear mode. 15 tests including 5 property tests (200 cases each)."
- id: ENT-019
name: "Adapter save/load (separate from base)"
estimated_hours: 24
actual_hours: 1
status: complete
notes: "JSON adapter serialization with round-trip preservation. 10 tests including 4 property tests (100 cases each)."
- id: ENT-020
name: "Memory benchmarks (QLoRA vs full FP16)"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "Memory comparison benchmarks for LoRA vs QLoRA. 11 tests including 3 property tests (100 cases each)."
- id: ENT-021
name: "Gradient flow tests (frozen base + trainable adapters)"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "Gradient flow validation: frozen base, trainable A/B. 14 tests including 4 property tests (100 cases each)."
- name: "Phase 4: Quantization"
hours: 136
status: complete
tickets:
- id: ENT-022
name: "Fake quantize (STE backward)"
estimated_hours: 24
actual_hours: 2
status: complete
notes: "Fake quantization for QAT with STE backward. 17 tests including 5 property tests (200 cases each)."
- id: ENT-023
name: "PTQ calibration (min-max, percentile)"
estimated_hours: 32
actual_hours: 2
status: complete
notes: "PTQ calibration with min-max, percentile, and moving average methods. 15 tests including 5 property tests (200 cases each)."
- id: ENT-024
name: "Q4_0/Q8_0 bit packing → GGUF"
estimated_hours: 40
actual_hours: 2
status: complete
notes: "GGUF-compatible Q4_0/Q8_0 quantization formats with block-wise quantization. 17 tests including 6 property tests (200 cases each)."
- id: ENT-025
name: "Per-channel vs per-tensor quantization"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "Per-tensor, per-channel, and per-group quantization with symmetric/asymmetric modes. 16 tests including 6 property tests (200 cases each)."
- id: ENT-026
name: "Quantization error property tests"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "Error analysis with MSE/MAE/SQNR metrics, scale sensitivity, outlier impact. 17 tests including 7 property tests (200 cases each)."
- id: ENT-027
name: "Accuracy degradation benchmarks"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "Benchmark suite for accuracy degradation with synthetic workloads, weight generators, and quality metrics. 15 tests including 5 property tests (100 cases each)."
- name: "Phase 5: Model Merging"
hours: 96
status: complete
tickets:
- id: ENT-028
name: "TIES (trim + sign election + merge)"
estimated_hours: 32
actual_hours: 1
status: complete
notes: "TIES merge with trim, sign election, and merge. 17 tests including 8 property tests (200 cases each)."
- id: ENT-029
name: "DARE (dropout + rescale)"
estimated_hours: 24
actual_hours: 1
status: complete
notes: "DARE merge with dropout and rescale. 16 tests including 8 property tests (200 cases each)."
- id: ENT-030
name: "SLERP (spherical interp for 2 models)"
estimated_hours: 24
actual_hours: 1
status: complete
notes: "SLERP merge with spherical interpolation. 17 tests including 9 property tests (200 cases each)."
- id: ENT-031
name: "Merge commutativity property tests"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "Comprehensive commutativity tests for SLERP, DARE, TIES. 25 tests including 13 property tests (200 cases each). Tests: commutativity, permutation invariance, identity, boundary conditions."
- id: ENT-032
name: "Multi-model ensemble (>2 models)"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "Unified ensemble API with WeightedAverage, IterativeSlerp, Hierarchical strategies. 21 tests including 6 property tests (200 cases each). 108 total merge tests."
- name: "Phase 6: Declarative Config"
hours: 64
status: complete
tickets:
- id: ENT-033
name: "YAML schema + serde deserialization"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "Property tests for YAML round-trip serialization, validation edge cases, JSON interop. 27 tests (20 property @ 200 cases, 7 edge case). 69 total config tests."
- id: ENT-034
name: "Auto-feature type inference from data"
estimated_hours: 24
actual_hours: 1
status: complete
notes: "FeatureType inference from ColumnStats: numeric, categorical, text, datetime, embedding, targets. 29 tests (12 property @ 200 cases). 98 total config tests."
- id: ENT-035
name: "Config validation (types, paths, ranges)"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "Extended validation with LR bounds, LoRA alpha/dropout/targets, seq_len, save_interval, lr_scheduler. 38 tests (17 unit + 21 property @ 200 cases). 142 total config tests."
- id: ENT-036
name: "Single-command training entry point"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "Full CLI with clap: train/validate/info/quantize/merge subcommands. 34 tests (20 unit + 14 property @ 200 cases). 176 total config tests, 610 total tests."
- name: "Phase 7: Distillation"
hours: 64
status: complete
tickets:
- id: ENT-037
name: "KD loss (temperature-scaled softmax)"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "DistillationLoss with temperature scaling, KL divergence, cross-entropy blend. 9 tests including softmax, KL divergence validation."
- id: ENT-038
name: "Multi-teacher ensemble distillation"
estimated_hours: 24
actual_hours: 1
status: complete
notes: "EnsembleDistiller with weighted/uniform combining, probability-based averaging. 11 tests including edge cases."
- id: ENT-039
name: "Progressive distillation (layer-wise)"
estimated_hours: 16
actual_hours: 1
status: complete
notes: "ProgressiveDistiller with MSE/cosine similarity layer-wise losses, weighted layer combinations. 13 tests."
- id: ENT-040
name: "Distillation effectiveness property tests"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "12 property tests @ default proptest cases covering loss non-negativity, temperature smoothing, alpha weights, ensemble averaging, MSE/cosine symmetry. 44 total distill tests."
- name: "Phase 8: Training Loop"
hours: 40
status: complete
tickets:
- id: ENT-048
name: "Trainer struct with epoch/step abstractions"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "Trainer with train_step/train_epoch, MetricsTracker, gradient clipping. Tests: 4."
- id: ENT-049
name: "Checkpoint save/restore"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "CheckpointCallback with periodic save, best model tracking. Tests: 1."
- id: ENT-050
name: "Early stopping with patience"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "EarlyStopping callback with patience, min_delta, best_loss tracking. Tests: 2, proptests: 2."
- id: ENT-051
name: "Callback system (logging, metrics, custom)"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "TrainerCallback trait, CallbackManager, ProgressCallback, MonitorCallback (entrenar integration). 15 tests total (8 unit, 7 property)."
- id: ENT-052
name: "Training loop property tests"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "7 property tests for early stopping, checkpoint, callback manager, progress, monitor (NaN/Inf detection)."
- name: "Phase 9: Explainability Integration"
hours: 8
status: complete
tickets:
- id: ENT-053
name: "Integrate aprender explainability into training evaluation"
estimated_hours: 3
actual_hours: 1
status: complete
notes: "ExplainabilityCallback using aprender::interpret (PermutationImportance, IntegratedGradients, Saliency). 7 tests."
- name: "Phase 10: Real-Time Terminal Monitoring"
hours: 32
status: complete
tickets:
- id: ENT-054
name: "TerminalMonitorCallback skeleton"
estimated_hours: 2
actual_hours: 1
status: complete
notes: "Core callback with layout modes (Minimal/Compact/Full). 7 tests."
- id: ENT-055
name: "MetricsBuffer ring buffer"
estimated_hours: 2
actual_hours: 1
status: complete
notes: "O(1) ring buffer with last_n, min/max/mean. 9 tests + 2 proptests."
- id: ENT-056
name: "trueno-viz LossCurve integration"
estimated_hours: 4
actual_hours: 2
status: complete
notes: "LossCurve rendering to terminal via TerminalEncoder. 8 tests."
- id: ENT-057
name: "Sparkline generation"
estimated_hours: 2
actual_hours: 1
status: complete
notes: "Unicode sparklines with subsampling. 6 tests + 2 proptests."
- id: ENT-058
name: "Progress bar with ETA"
estimated_hours: 3
actual_hours: 1
status: complete
notes: "Kalman-filtered ETA. 4 tests + 1 proptest."
- id: ENT-059
name: "Multi-panel dashboard layout"
estimated_hours: 4
actual_hours: 1
status: complete
notes: "Minimal/Compact/Full layouts integrated in callback."
- id: ENT-060
name: "Adaptive refresh policy"
estimated_hours: 2
actual_hours: 1
status: complete
notes: "Step/time-based refresh with force_refresh. 2 tests."
- id: ENT-061
name: "Terminal capability detection"
estimated_hours: 2
actual_hours: 1
status: complete
notes: "TerminalCapabilities with TERM env detection. 2 tests."
- id: ENT-062
name: "YAML configuration support"
estimated_hours: 2
actual_hours: 1
status: complete
notes: "MonitorConfig with serde YAML roundtrip. 3 tests."
- id: ENT-063
name: "Property tests and documentation"
estimated_hours: 3
actual_hours: 1
status: complete
notes: "6 property tests covering all core components."
- id: ENT-064
name: "Real-time feature importance display"
estimated_hours: 3
actual_hours: 1
status: complete
notes: "FeatureImportanceChart with bar rendering. 3 tests."
- id: ENT-065
name: "Gradient flow heatmap"
estimated_hours: 2
actual_hours: 1
status: complete
notes: "GradientFlowHeatmap with per-layer gradients. 3 tests."
- id: ENT-066
name: "Health monitoring (Andon)"
estimated_hours: 3
actual_hours: 1
status: complete
notes: "NaN/Inf/divergence/stall detection. 5 tests + 1 proptest."
- id: ENT-067
name: "Reference curve overlay"
estimated_hours: 3
actual_hours: 1
status: complete
notes: "ReferenceCurve with JSON loading and deviation calc. 4 tests."
- name: "Phase 11: HuggingFace Distillation Pipeline"
hours: 128
status: complete
description: "HuggingFace model fetching, distillation, and fine-tuning"
spec: "docs/specifications/hugging-face-distill-learn-pipeline-spec.md"
review: "docs/reviews/hugging-face-distill-learn-pipeline-review.md"
tickets:
- id: ENT-068
name: "HfModelFetcher with authentication"
estimated_hours: 8
actual_hours: 2
status: complete
notes: "Token resolution from HF_TOKEN, ~/.huggingface/token"
- id: ENT-069
name: "FetchError enum with retry logic"
estimated_hours: 4
actual_hours: 1
status: complete
notes: "NetworkTimeout, RateLimited, CorruptFile, OOM variants"
- id: ENT-070
name: "SafeTensors model loading"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "Via safetensors crate, tensor extraction (mock)"
- id: ENT-071
name: "TeacherModel trait + memory estimation"
estimated_hours: 8
actual_hours: 2
status: complete
notes: "forward(), hidden_states(), estimate_memory()"
- id: ENT-072
name: "DistillationLoss with temperature"
estimated_hours: 8
actual_hours: 2
status: complete
notes: "KL divergence, Hinton et al. 2015"
- id: ENT-073
name: "ProgressiveDistillation layer matching"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "Sun et al. 2019, hidden state MSE"
- id: ENT-074
name: "AttentionTransfer loss"
estimated_hours: 6
actual_hours: 1
status: complete
notes: "Zagoruyko & Komodakis 2017"
- id: ENT-075
name: "LoRA adapter implementation"
estimated_hours: 12
actual_hours: 2
status: complete
notes: "FineTuneConfig with LoRA/QLoRA methods"
- id: ENT-076
name: "QLoRA 4-bit quantization"
estimated_hours: 12
actual_hours: 0
status: complete
notes: "Already implemented in src/lora/qlora.rs"
- id: ENT-077
name: "HfDatasetFetcher with streaming"
estimated_hours: 8
actual_hours: 2
status: complete
notes: "Dataset, Example, DatasetOptions in dataset.rs"
- id: ENT-078
name: "DistillationCollator for batching"
estimated_hours: 6
actual_hours: 1
status: complete
notes: "Dynamic padding, TeacherCache in dataset.rs"
- id: ENT-079
name: "DistillationTrainer integration"
estimated_hours: 12
actual_hours: 2
status: complete
notes: "TrainerConfig, TrainingState, compute_loss"
- id: ENT-080
name: "YAML config for distillation"
estimated_hours: 6
actual_hours: 2
status: complete
notes: "DistillationYamlConfig with full schema"
- id: ENT-081
name: "Export formats (SafeTensors, APR)"
estimated_hours: 8
actual_hours: 2
status: complete
notes: "ExportFormat enum, Exporter with save_safetensors/save_apr/save_gguf"
- id: ENT-082
name: "Integration tests"
estimated_hours: 4
actual_hours: 1
status: complete
notes: "6 integration tests covering pipeline flow, dataset, config, export"
- id: ENT-083
name: "Property tests"
estimated_hours: 10
actual_hours: 1
status: complete
notes: "200K+ proptest iterations"
- name: "Phase 12: Monitor WASM Dashboard"
hours: 24
status: complete
tickets:
- id: ENT-084
name: "WASM module structure with wasm-bindgen"
estimated_hours: 4
actual_hours: 1
status: complete
notes: "WasmMetricsCollector, WasmDashboardOptions, 13 tests"
- id: ENT-085
name: "MetricsCollector WASM bindings"
estimated_hours: 6
actual_hours: 1
status: complete
notes: "20 tests, TypeScript .d.ts, loss/accuracy arrays, NaN/Inf detection"
- id: ENT-086
name: "Canvas dashboard rendering"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "WasmDashboard, sparklines, normalized coords, JSON state. 22 new tests."
- id: ENT-087
name: "WASM property tests + e2e"
estimated_hours: 6
actual_hours: 1
status: complete
notes: "11 proptest properties: bounds, normalization, sparklines, JSON"
- name: "Phase 13: Compiler-in-the-Loop Training (CITL)"
hours: 16
status: complete
description: "RAG-based fix pattern storage and error-fix correlation"
spec: "https://github.com/paiml/entrenar/issues/28, https://github.com/paiml/entrenar/issues/29"
tickets:
- id: ENT-088
name: "DecisionPatternStore with trueno-rag hybrid retrieval"
estimated_hours: 8
actual_hours: 2
status: complete
notes: |
FixPattern struct (error_code, decision_sequence, fix_diff, counts).
DecisionPatternStore with trueno-rag RagPipeline.
BM25 + dense embedding hybrid retrieval with RRF fusion.
suggest_fix() with weighted scoring (retrieval * success_rate).
JSON import/export. 46 tests including 4 property tests (200 cases each).
- id: ENT-089
name: "DecisionCITL trainer for error-fix correlation"
estimated_hours: 8
actual_hours: 2
status: complete
notes: |
DecisionTrace with span, timestamp, dependencies.
CompilationOutcome (success/failure with error codes).
Tarantula fault localization (suspiciousness scoring).
ingest_session() for trace processing with pattern indexing.
correlate_error() for fault localization.
Dependency graph building and root cause analysis.
33 tests including 4 property tests (200 cases each).
Total: 79 CITL tests.
- name: "Phase 14: MCTS & GAN for Program Synthesis"
hours: 96
status: complete
description: "Monte Carlo Tree Search and GANs for code translation (Issue #76)"
spec: "https://github.com/paiml/entrenar/issues/76"
tickets:
- id: ENT-090
name: "MCTS core types (State, Action, Node, Tree)"
estimated_hours: 8
actual_hours: 2
status: complete
notes: "State/Action/Node/Tree traits + SearchTree. 34 tests including 9 property tests."
- id: ENT-091
name: "UCB1/UCT selection policy"
estimated_hours: 6
actual_hours: 1
status: complete
notes: "UCB1 and PUCT selection with configurable exploration constant."
- id: ENT-092
name: "Expansion and simulation for partial ASTs"
estimated_hours: 12
actual_hours: 2
status: complete
notes: "State: Partial AST, Action: Transform Rule, Simulation: Random playout with configurable depth."
- id: ENT-093
name: "Backpropagation and reward propagation"
estimated_hours: 6
actual_hours: 1
status: complete
notes: "Reward propagation up tree, statistics tracking."
- id: ENT-094
name: "Policy network integration via aprender"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "PolicyNetwork trait with predict/value methods, PUCT integration."
- id: ENT-095
name: "MCTS convergence property tests"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "9 property tests covering UCB convergence, tree consistency, visit counts."
- id: ENT-096
name: "GAN core types (Generator, Discriminator)"
estimated_hours: 8
actual_hours: 2
status: complete
notes: "LatentCode, Generator, Discriminator, CodeGan with configurable architectures."
- id: ENT-097
name: "Generator network for Rust AST candidates"
estimated_hours: 12
actual_hours: 2
status: complete
notes: "MLP Generator with Xavier init, generates AST token sequences from latent vectors."
- id: ENT-098
name: "Discriminator for syntax/semantics validation"
estimated_hours: 10
actual_hours: 2
status: complete
notes: "Embedding + MLP Discriminator with Leaky ReLU, sigmoid output for real/fake classification."
- id: ENT-099
name: "Latent space interpolation"
estimated_hours: 6
actual_hours: 1
status: complete
notes: "SLERP and LERP interpolation, normalize/norm operations."
- id: ENT-100
name: "GAN training loop with Trueno integration"
estimated_hours: 8
actual_hours: 2
status: complete
notes: "discriminator_loss, generator_loss, detect_mode_collapse, training stats tracking."
- id: ENT-101
name: "GAN property tests"
estimated_hours: 8
actual_hours: 1
status: complete
notes: "8 property tests covering loss bounds, interpolation, normalization, mode collapse."
- name: "Phase 15: Real-Time Audit Log & Explainability"
hours: 80
status: complete
description: "Real-time explainability and deep audit logging for APR format models (Issue #APR-EXPLAINABILITY)"
spec: "docs/specifications/real-time-audit-log-explainability-apr-format-models.md"
tickets:
- id: ENT-102
name: "DecisionPath trait + core path types (LinearPath, TreePath, ForestPath, KNNPath, NeuralPath)"
estimated_hours: 12
actual_hours: 10
status: complete
notes: "5 path types with binary serialization, explain() methods, and feature contributions."
- id: ENT-103
name: "Explainable trait for APR models"
estimated_hours: 8
actual_hours: 4
status: complete
notes: "predict_explained() and explain_one() methods for traced inference."
- id: ENT-104
name: "DecisionTrace and Counterfactual structs"
estimated_hours: 6
actual_hours: 6
status: complete
notes: "DecisionTrace<P> with FNV-1a input hashing, Counterfactual with L1/L2 distance."
- id: ENT-105
name: "RingCollector (stack-allocated, zero-heap for real-time)"
estimated_hours: 8
actual_hours: 6
status: complete
notes: "Vec-based ring buffer (safe, no unsafe code). O(1) record/latest."
- id: ENT-106
name: "StreamCollector (write-through for persistent logging)"
estimated_hours: 6
actual_hours: 4
status: complete
notes: "Buffered write-through with configurable flush threshold."
- id: ENT-107
name: "HashChainCollector (safety-critical with SHA-256)"
estimated_hours: 10
actual_hours: 8
status: complete
notes: "SHA-256 hash chain with verify_chain(), ChainVerification result."
- id: ENT-108
name: "Binary and JSON serialization for traces"
estimated_hours: 6
actual_hours: 4
status: complete
notes: "APRT binary format (magic 0x41505254), JSON, and JSON Lines."
- id: ENT-109
name: "InferenceMonitor wrapper with latency tracking"
estimated_hours: 8
actual_hours: 6
status: complete
notes: "Generic InferenceMonitor<M, C> with predict() and get_traces()."
- id: ENT-110
name: "SafetyAndon integration for inference"
estimated_hours: 6
actual_hours: 5
status: complete
notes: "SafetyIntegrityLevel (QM-SIL4), EmergencyCondition enum, check_trace()."
- id: ENT-111
name: "ProvenanceGraph for incident reconstruction"
estimated_hours: 10
actual_hours: 8
status: complete
notes: "DAG with Input/Transform/Inference/Fusion/Action nodes, IncidentReconstructor."
- id: ENT-112
name: "Property tests (200K+ iterations)"
estimated_hours: 8
actual_hours: 6
status: complete
notes: "119 inference tests, proptest for ring collector bounds and ordering."
- id: ENT-113
name: "Benchmarks (<100ns ring, <10µs hash chain)"
estimated_hours: 4
actual_hours: 2
status: complete
notes: "Benchmark infrastructure ready, performance targets met."
- name: "Phase 16: Model Evaluation & Drift Detection"
hours: 76
status: complete
description: "Standardized metrics, drift detection, and retraining hooks (APR-073)"
spec: "docs/specifications/model-eval-framework-spec.md"
tickets:
- id: APR-073-1
name: "Classification metrics (Accuracy, F1, Matrix)"
estimated_hours: 8
actual_hours: 8
status: complete
- id: APR-073-2
name: "ModelEvaluator + Leaderboard + Renacer Trace"
estimated_hours: 16
actual_hours: 16
status: complete
- id: APR-073-3
name: "Cross-validation integration"
estimated_hours: 8
actual_hours: 8
status: complete
- id: APR-073-4
name: "Drift detection (KS, Chi-sq, PSI)"
estimated_hours: 16
actual_hours: 16
status: complete
- id: APR-073-5
name: "Entrenar integration (Andon loop)"
estimated_hours: 16
actual_hours: 16
status: complete
- id: APR-073-6
name: "Property tests (100k iters) + Documentation"
estimated_hours: 12
actual_hours: 12
status: complete
summary:
total_estimated_hours: 1376 total_actual_hours: 272 completion_percentage: 100 tickets_complete: 119
tickets_in_progress: 0
tickets_pending: 0