roadmap_version: '1.0'
github_enabled: true
github_repo: paiml/batuta
roadmap:
- id: INTEG-001
github_issue: null
item_type: task
title: End-to-end Sovereign Stack example
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/initial-release-spec.md
acceptance_criteria:
- Full workflow demo showing pacha → realizar → batuta integration
- Model registration with content addressing
- Ed25519 signature generation and verification
- ChaCha20-Poly1305 encryption and decryption
- Privacy tier enforcement (Sovereign mode)
- Comprehensive tests with ≥85% coverage
phases: []
subtasks: []
estimated_effort: 2 days
labels:
- integration
- sovereign-stack
- e2e
notes: Per Initial Release Specification §2-6
- id: INTEG-002
github_issue: null
item_type: task
title: Cross-project integration tests
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/initial-release-spec.md
acceptance_criteria:
- Integration tests for pacha → realizar pipeline
- Signature verification in serving pipeline
- Encryption/decryption in inference loop
- Privacy tier enforcement blocks unauthorized backends
- Property-based tests for security guarantees
phases: []
subtasks: []
estimated_effort: 3 days
labels:
- integration
- testing
- tdd
notes: Cross-project validation per spec
- id: INTEG-003
github_issue: null
item_type: task
title: Pacha URI scheme in realizar
status: completed
priority: medium
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/initial-release-spec.md
acceptance_criteria:
- Support pacha://model:version URI scheme
- Automatic metadata retrieval from pacha registry
- Lineage propagation to inference metrics
- Unit tests for URI parsing
phases: []
subtasks: []
estimated_effort: 1 day
labels:
- pacha
- realizar
- uri
notes: Enables direct registry integration
- id: QA-SEC4
github_issue: null
item_type: task
title: 'QA Checklist Section IV: Orchestration & Stack Health'
status: completed
priority: critical
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- '[31] Dependency Graph visualization'
- '[32] Cycle Detection passes'
- '[33] Path vs Crates.io verification'
- '[34] Version Alignment check'
- '[35] Release Topological Sort'
- '[36] TUI Dashboard renders'
- '[37] Git Tag Sync'
- '[38] Orphan Detection'
- '[39] CI Integration JSON output'
- '[40] Performance < 500ms'
phases: []
subtasks: []
estimated_effort: 1 day
labels:
- qa
- batuta
- toyota-way
notes: Items 31-40 from 100-point QA checklist
- id: QA-SEC5
github_issue: null
item_type: task
title: 'QA Checklist Section V: PMAT Compliance & Quality'
status: completed
priority: critical
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- '[41] TDG Baseline > 90/100'
- '[42] Test Coverage > 85%'
- '[43] Mutation Testing active'
- '[44] SATD Detection < 10 items'
- '[45] Linter Compliance (zero warnings)'
- '[46] Formatting 100% standard'
- '[47] Security Audit (zero vulns)'
- '[48] Dependency Freshness'
- '[49] Clean Architecture'
- '[50] Golden Traces verification'
phases: []
subtasks: []
estimated_effort: 1 day
labels:
- qa
- pmat
- quality
notes: Items 41-50 from 100-point QA checklist
- id: QA-PERF
github_issue: null
item_type: task
title: '[40] Performance Optimization - Crates.io Caching'
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- Implement in-memory cache for crates.io responses
- Add --offline mode for CI environments
- Stack check completes in < 500ms with warm cache
- Unit tests for cache hit/miss scenarios
- Property-based tests for cache consistency
phases: []
subtasks: []
estimated_effort: 4 hours
labels:
- qa
- performance
- caching
notes: QA Item 40 - Performance must be < 500ms
- id: QA-TUI
github_issue: null
item_type: task
title: '[36] TUI Dashboard Implementation'
status: completed
priority: medium
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- Interactive terminal UI for stack visualization
- Real-time dependency graph display
- Health status indicators with colors
- Keyboard navigation support
- Unit tests for TUI components
phases: []
subtasks: []
estimated_effort: 6 hours
labels:
- qa
- tui
- visualization
notes: QA Item 36 - TUI Dashboard for stack monitoring
- id: PMAT-UNWRAP
github_issue: null
item_type: task
title: '[41] CRITICAL: Replace 134 unwrap() calls with proper error handling'
status: completed
priority: critical
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2026-02-09T10:15:53.907410516+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- Replace all unwrap() in production code with expect() or ? operator
- Add context to error messages
- Zero unwrap() in critical paths (per Cloudflare 2025-11-18 outage)
- pmat rust-project-score Known Defects = 20/20
phases: []
subtasks: []
estimated_effort: 4 hours
labels:
- pmat
- quality
- critical
notes: 'Per pmat rust-project-score: 134 unwrap() calls detected'
- id: PMAT-LINTS
github_issue: null
item_type: task
title: '[41] Add workspace lints to Cargo.toml'
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- Add [workspace.lints.rust] section
- Add [workspace.lints.clippy] section
- Enable unsafe_op_in_unsafe_fn, unreachable_pub, checked_conversions
- Create .clippy.toml with disallowed-methods
phases: []
subtasks: []
estimated_effort: 1 hour
labels:
- pmat
- linting
- ci
notes: 'Per pmat rust-project-score: CI/CD score 28.5%'
- id: PMAT-DENY
github_issue: null
item_type: task
title: '[41] Add deny.toml for dependency policy'
status: completed
priority: medium
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- Create deny.toml with license policy
- Add banned crates list
- Configure advisory database checks
- Integrate with CI via make deny
phases: []
subtasks: []
estimated_effort: 1 hour
labels:
- pmat
- security
- dependencies
notes: 'Per pmat rust-project-score: Dependency Health 58.3%'
- id: PMAT-MUTANTS
github_issue: null
item_type: task
title: '[44] Mutation Testing - Tool Active'
status: completed
priority: medium
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- cargo mutants --list finds mutants - 354 mutants identified
- Tool is active and integrated with CI
- make mutants-fast target available
phases: []
subtasks: []
estimated_effort: 2 hours
labels:
- pmat
- testing
- mutation
notes: QA Item 44 - Mutants identified (tool is active). 354 mutants found.
- id: PMAT-CLIPPY
github_issue: null
item_type: task
title: '[46] Linter Compliance - Zero clippy warnings'
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- cargo clippy -- -D warnings passes
- Correctness lints set to deny
- Perf/style lints set to warn
phases: []
subtasks: []
estimated_effort: 1 hour
labels:
- pmat
- linting
- qa
notes: QA Item 46 - Zero warnings allowed
- id: PMAT-FMT
github_issue: null
item_type: task
title: '[47] Formatting - 100% cargo fmt compliance'
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- cargo fmt -- --check passes
- All code follows standard Rust formatting
phases: []
subtasks: []
estimated_effort: 30 minutes
labels:
- pmat
- formatting
- qa
notes: QA Item 47 - Standard Rust formatting
- id: PMAT-AUDIT
github_issue: null
item_type: task
title: '[48] Security Audit - Zero vulnerabilities'
status: completed
priority: critical
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- cargo audit passes (no vulnerabilities)
- Only warning is paste crate unmaintained (known, ignored)
phases: []
subtasks: []
estimated_effort: 30 minutes
labels:
- pmat
- security
- qa
notes: QA Item 48 - Zero vulnerabilities detected
- id: PMAT-OUTDATED
github_issue: null
item_type: task
title: '[49] Dependency Freshness - No critical outdated deps'
status: completed
priority: medium
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- cargo outdated reviewed
- No critical security-related outdated deps
- Major version updates deferred (potential breaking changes)
phases: []
subtasks: []
estimated_effort: 30 minutes
labels:
- pmat
- dependencies
- qa
notes: QA Item 49 - Dependency freshness checked
- id: PMAT-TDG
github_issue: null
item_type: task
title: '[42] TDG Baseline - Score > 90/100'
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- pmat tdg score > 90/100 (A- grade)
- 'Score achieved: 96.6/100 (A+)'
phases: []
subtasks: []
estimated_effort: 30 minutes
labels:
- pmat
- quality
- qa
notes: QA Item 42 - TDG Baseline
- id: PMAT-SATD
github_issue: null
item_type: task
title: '[45] SATD Detection - < 10 items'
status: completed
priority: medium
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- SATD count < 10 items
- Reduced from 12 to 6 items
- Converted TODOs to roadmap documentation
phases: []
subtasks: []
estimated_effort: 1 hour
labels:
- pmat
- quality
- qa
notes: QA Item 45 - SATD Detection
- id: PMAT-ARCH
github_issue: null
item_type: task
title: '[50] Clean Architecture - No layer violations'
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- batuta stack check passes
- All 7 crates healthy
- No layer boundary violations
phases: []
subtasks: []
estimated_effort: 30 minutes
labels:
- pmat
- architecture
- qa
notes: QA Item 50 - Clean Architecture check
- id: PMAT-COVERAGE
github_issue: null
item_type: task
title: '[43] Test Coverage - > 85%'
status: completed
priority: high
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- make coverage or tarpaulin > 85%
- All tests pass (1019 tests)
- Achieved 94.91% line coverage
phases: []
subtasks: []
estimated_effort: 2 hours
labels:
- pmat
- testing
- qa
notes: QA Item 43 - Test Coverage - 94.91% achieved
- id: PMAT-TRACES
github_issue: null
item_type: task
title: '[51] Golden Traces - Renacer verification'
status: completed
priority: medium
assigned_to: null
created: 2025-12-06T00:00:00+00:00
updated: 2025-12-06T00:00:00+00:00
spec: docs/specifications/batuta-stack-0.1-100-point-qa-checklist.md
acceptance_criteria:
- Golden traces captured for all examples
- orchestration_latency < 5000ms - PASS
- max_syscall_budget < 10000 syscalls - PASS
- memory_allocation_budget < 1GB - PASS
- Trace summaries generated with syscall statistics
phases: []
subtasks: []
estimated_effort: 1 hour
labels:
- pmat
- traces
- qa
notes: QA Item 51 - Golden Traces verification via renacer
- id: STACK-AUDIT
github_issue: null
item_type: task
title: 'New task: STACK-AUDIT'
status: completed
priority: medium
assigned_to: null
created: 2025-12-07T13:11:35.842705416+00:00
updated: 2026-01-15T22:55:19.696504972+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: COURSE-LEVELS
github_issue: null
item_type: task
title: 'New task: COURSE-LEVELS'
status: completed
priority: medium
assigned_to: null
created: 2025-12-07T15:46:57.018942575+00:00
updated: 2025-12-07T15:48:07.938368449+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: BOOK-SCORE
github_issue: null
item_type: task
title: 'New task: BOOK-SCORE'
status: completed
priority: medium
assigned_to: null
created: 2025-12-22T11:12:29.150989393+00:00
updated: 2025-12-22T11:17:31.548902413+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: SCORE-A-PLUS
github_issue: null
item_type: task
title: 'New task: SCORE-A-PLUS'
status: completed
priority: medium
assigned_to: null
created: 2025-12-22T11:18:36.593017898+00:00
updated: 2025-12-22T11:22:06.543122824+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-STACK-GATES
github_issue: null
item_type: task
title: Integrate PMAT quality gates into stack release pipeline
status: completed
priority: high
assigned_to: null
created: 2026-01-13T00:00:00+00:00
updated: 2026-01-13T12:28:36.646341910+00:00
spec: null
acceptance_criteria:
- Add pmat quality-gate check to preflight
- Add pmat tdg scoring check with configurable threshold
- Add pmat analyze dead-code check
- Add pmat popper-score check for falsifiability
- Add pmat analyze complexity check
- Add pmat analyze satd check for tech debt
- Add pmat five-whys integration for failure diagnosis
- All checks configurable via ReleaseConfig
- Tests for all new checks
phases: []
subtasks: []
estimated_effort: 1 day
labels:
- pmat
- quality
- stack
- release
notes: Prevent quality issues before stack deployment
- id: DEP-REDUCE
github_issue: null
item_type: task
title: Reduce external dependencies - replace with PAIML stack components
status: completed
priority: high
assigned_to: null
created: 2026-01-13T00:00:00+00:00
updated: 2026-01-13T12:45:21.985040892+00:00
spec: null
acceptance_criteria:
- Replace regex-lite with string methods (3 patterns)
- Replace colored with ANSI constants module
- Migrate petgraph to trueno-graph for dependency analysis
- Remove unused dependencies from Cargo.toml
- All tests pass
- Binary size reduced by ~400KB
phases: []
subtasks: []
estimated_effort: 4 hours
labels:
- dependencies
- optimization
- paiml-stack
notes: Dogfood PAIML stack components instead of external deps
- id: ORACLE-LOCAL
github_issue: null
item_type: task
title: Local workspace oracle for multi-project development
status: completed
priority: high
assigned_to: null
created: 2026-01-14T00:00:00+00:00
updated: 2026-01-15T22:52:32.134043386+00:00
spec: null
acceptance_criteria:
- Auto-discover PAIML projects in ~/src (scan for Cargo.toml)
- Track git status across all discovered projects
- Build cross-project dependency graph using trueno-graph
- Detect version drift (local version vs crates.io version)
- Suggest publish order for dependent crates (topological sort)
- Single command interface - batuta oracle status
- Performance < 500ms with warm cache
phases: []
subtasks: []
estimated_effort: 4 hours
labels:
- oracle
- multi-project
- workspace
- trueno-graph
notes: Enables intelligent orchestration across 10+ local PAIML projects
- id: PMAT-004
github_issue: null
item_type: task
title: Document CPU thread optimization findings
status: completed
priority: medium
assigned_to: null
created: 2026-01-15T22:52:56Z
updated: 2026-01-15T22:53:52.021086127+00:00
spec: null
acceptance_criteria:
- Document the 2.05x speedup discovery from reducing rayon thread count from 48 to 16. Add performance tuning section to batuta book.
phases: []
subtasks: []
estimated_effort: null
labels:
- docs
- perf
notes: null
- id: SIMD-EXP
github_issue: null
item_type: task
title: SIMD exp approximation for 2-3x softmax speedup
status: completed
priority: high
assigned_to: null
created: 2026-01-16T00:00:00+00:00
updated: 2026-01-16T00:00:00+00:00
spec: null
acceptance_criteria:
- Implement polynomial exp approximation (ggml_v_expf equivalent)
- AVX2 and AVX-512 variants
- Unit tests with <1e-5 error vs std exp
- BrickProfiler benchmarks showing 2-3x improvement
- 95% test coverage
phases: []
subtasks: []
estimated_effort: 4 hours
labels:
- trueno
- simd
- performance
notes: Match llama.cpp performance for softmax
- id: QUANT-Q5K
github_issue: null
item_type: task
title: Add Q5_K and Q6_K quantization formats
status: completed
priority: high
assigned_to: null
created: 2026-01-16T00:00:00+00:00
updated: 2026-01-16T00:00:00+00:00
spec: null
acceptance_criteria:
- Q5_K block format (5-bit with super-blocks)
- Q6_K block format (6-bit with super-blocks)
- Dequantize and dot product kernels
- Unit tests and property-based tests
- 95% test coverage
phases: []
subtasks: []
estimated_effort: 6 hours
labels:
- trueno
- quantization
- llama-compat
notes: Extended quantization formats for mejor model support
- id: GH-18
github_issue: 18
item_type: task
title: 'bug-hunter: Improve lcov.info path detection for SBFL analysis'
status: completed
priority: medium
assigned_to: null
created: 2026-02-04T14:04:35.768632865+00:00
updated: 2026-02-09T10:16:20.065193052+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: GH-19
github_issue: 19
item_type: task
title: 'bug-hunter fuzz: Skip BH-FUZZ-NOTARGETS for #![forbid(unsafe_code)] crates'
status: completed
priority: medium
assigned_to: null
created: 2026-02-04T14:09:12.476631222+00:00
updated: 2026-02-09T11:17:59.401774190+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: RAG-SQLITE-CLEANUP
github_issue: null
item_type: task
title: 'New task: RAG-SQLITE-CLEANUP'
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T09:21:40.513041004+00:00
updated: 2026-02-09T09:59:19.529270950+00:00
spec: null
acceptance_criteria:
- 'Phase 3 cleanup: gated JSON types behind cfg, routed pmat_query through SQLite, deduplicated sqlite_index_path, fixed clippy. Remaining: delete .bak files after release cycle.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: GH-23
github_issue: null
item_type: task
title: 'New task: GH-23'
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T10:18:12.544059064+00:00
updated: 2026-02-09T10:25:53.622730966+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: GH-22
github_issue: null
item_type: task
title: 'New task: GH-22'
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T10:26:04.855564972+00:00
updated: 2026-02-09T11:07:40.123628442+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: GH-9
github_issue: 9
item_type: task
title: 'feat: Add release orchestration for PAIML stack dependencies'
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T11:08:50.096913040+00:00
updated: 2026-02-09T11:33:22Z
spec: null
acceptance_criteria:
- Dependency graph analysis across local workspace
- Topological sort for release order
- Pre-flight quality checks (lint, coverage, git status)
- Automatic Cargo.toml dependency updates
- Interactive confirmation before each publish
- Post-release verification (crates.io availability)
- Dry-run mode for planning
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: GH-14
github_issue: 14
item_type: task
title: 'RFC: Ollama-style CLI location - realizar vs batuta'
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T11:18:07.816849610+00:00
updated: 2026-02-09T11:33:27Z
spec: null
acceptance_criteria:
- Decision documented in ADR
- CLI location determined
- Command structure defined (`run`, `pull`, `list`, `serve`, `chat`)
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: GH-11
github_issue: 11
item_type: task
title: 'feat: Add MCP server for HuggingFace integration tools'
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T11:19:38.747089510+00:00
updated: 2026-02-09T11:33:27Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-024
github_issue: null
item_type: task
title: Implement build command - Phase 5 pipeline (BATUTA-009)
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T11:55:16Z
updated: 2026-02-09T11:59:33Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-025
github_issue: null
item_type: task
title: Implement optimize command - Phase 3 pipeline (BATUTA-007)
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T12:46:31Z
updated: 2026-02-09T12:46:45Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-026
github_issue: null
item_type: task
title: Implement Ruchy REPL in transpile command
status: completed
priority: medium
assigned_to: null
created: 2026-02-09T14:06:33Z
updated: 2026-02-09T14:08:30Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-027
github_issue: null
item_type: task
title: 'Implement cmd_validate sub-features: diff_output, run_original_tests, benchmark'
status: planned
priority: medium
assigned_to: null
created: 2026-02-09T14:13:54Z
updated: 2026-02-09T14:13:54Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-028
github_issue: null
item_type: task
title: Update book, examples, and CLI reference documentation
status: planned
priority: medium
assigned_to: null
created: 2026-02-09T14:23:31Z
updated: 2026-02-09T14:23:31Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: SQI-055
github_issue: null
item_type: task
title: 'New task: SQI-055'
status: inprogress
priority: medium
assigned_to: null
created: 2026-02-28T22:19:50.874080982+00:00
updated: 2026-02-28T22:19:50.874080982+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-056
github_issue: null
item_type: task
title: 'Fix nightly CI: patch path dependencies'
status: inprogress
priority: medium
assigned_to: null
created: 2026-03-10T12:56:14Z
updated: 2026-03-10T12:56:16.519769707+00:00
spec: null
acceptance_criteria:
- Patch path deps (trueno-rag, trueno-cuda-edge) in nightly.yml so CI builds without local sibling repos
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-057
github_issue: null
item_type: task
title: 'Banco: unified AI studio interface (batuta serve --banco)'
status: completed
priority: high
assigned_to: null
created: 2026-03-18T20:58:21Z
updated: 2026-03-19T12:15:15Z
spec: null
acceptance_criteria:
- 'Add Banco, a local-first AI workbench UI served by batuta. Phase 1: HTTP API foundation with model management (pacha), inference with SSE streaming (realizar), and OpenAI-compatible endpoints. Privacy tier middleware using existing BackendSelector. Feature-gated behind ''banco'' flag.'
phases: []
subtasks: []
estimated_effort: null
labels:
- banco
- studio
- serve
- api
notes: null
- id: PMAT-058
github_issue: null
item_type: task
title: 'Banco provable contracts: 5 YAML contracts + binding registry + spec title fix'
status: completed
priority: medium
assigned_to: null
created: 2026-03-19T15:24:31Z
updated: 2026-03-19T15:28:53Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-059
github_issue: null
item_type: task
title: 'Banco: generate provable contract tests + wire into banco test suite'
status: completed
priority: medium
assigned_to: null
created: 2026-03-19T15:30:49Z
updated: 2026-03-19T15:33:45Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-060
github_issue: null
item_type: task
title: 'Banco P0 cross-cutting: OpenAI SDK compat (Role fix + /v1/ routes), config persistence, no-telemetry'
status: completed
priority: medium
assigned_to: null
created: 2026-03-19T15:35:25Z
updated: 2026-03-19T15:44:03Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-061
github_issue: null
item_type: task
title: 'STOP THE LINE: fix pre-push clippy gate — allow unwrap_used/float_cmp in test code + fix 16 real lint errors'
status: completed
priority: medium
assigned_to: null
created: 2026-03-19T15:54:32Z
updated: 2026-03-19T16:38:37Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-062
github_issue: null
item_type: task
title: 'Banco: book chapter + cookbook recipe + P1 tokenizer endpoint'
status: completed
priority: medium
assigned_to: null
created: 2026-03-19T16:47:38Z
updated: 2026-03-19T16:59:51Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-063
github_issue: null
item_type: task
title: 'Banco P1: embeddings endpoint + request audit logging middleware'
status: completed
priority: medium
assigned_to: null
created: 2026-03-19T17:01:16Z
updated: 2026-03-19T17:06:19Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-064
github_issue: null
item_type: task
title: 'Banco P1: conversation persistence (create, list, get, delete, auto-title)'
status: completed
priority: medium
assigned_to: null
created: 2026-03-19T20:26:45Z
updated: 2026-03-19T20:32:53Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-065
github_issue: null
item_type: task
title: 'Banco P2: system prompt presets + Ollama API compat layer'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T06:20:00Z
updated: 2026-03-20T06:24:41Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-066
github_issue: null
item_type: task
title: 'Banco P2: API key authentication for LAN access'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T07:48:10Z
updated: 2026-03-20T07:51:08Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-067
github_issue: null
item_type: task
title: 'Banco: CORS middleware + book SUMMARY entry + banco module file size check'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T07:56:49Z
updated: 2026-03-20T08:00:08Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-068
github_issue: null
item_type: task
title: 'Banco: wire config.toml into startup + split handlers.rs before 500-line limit'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T08:02:28Z
updated: 2026-03-20T08:05:25Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-069
github_issue: null
item_type: task
title: 'Banco Phase 2a: model slot + load/unload/status endpoints + --model CLI flag'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T12:59:06Z
updated: 2026-03-20T13:04:36Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-070
github_issue: null
item_type: task
title: 'Banco Phase 2a: inference parameter tuning (GET/PUT /api/v1/chat/parameters)'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T13:07:49Z
updated: 2026-03-20T13:10:25Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-071
github_issue: null
item_type: task
title: 'Banco: split types.rs by domain + comprehensive cookbook update'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T13:12:03Z
updated: 2026-03-20T13:15:19Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-072
github_issue: null
item_type: task
title: 'Banco Phase 2a final: response_format field + book banco chapter update + pmat checkpoint'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T13:17:45Z
updated: 2026-03-20T13:20:21Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-073
github_issue: null
item_type: task
title: 'Banco Phase 2b: realizar GGUF metadata loading behind inference feature'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T14:11:55Z
updated: 2026-03-20T14:16:16Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-074
github_issue: null
item_type: task
title: 'Banco Phase 2b: store OwnedQuantizedModel + vocab in model slot'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T14:20:02Z
updated: 2026-03-20T14:23:52Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-075
github_issue: null
item_type: task
title: 'Banco Phase 2b: inference-aware chat handler + update book/cookbook/spec'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T14:27:01Z
updated: 2026-03-20T14:30:31Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-076
github_issue: null
item_type: task
title: 'STOP THE LINE: fix 11 flaky tests with unique temp directory paths'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T14:39:05Z
updated: 2026-03-20T14:47:32Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-077
github_issue: null
item_type: task
title: 'PMAT-077: Wire realizar inference loop into banco chat handler'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T15:31:52Z
updated: 2026-03-20T15:53:38Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-078
github_issue: null
item_type: task
title: 'PMAT-078: Wire real tokenizer into tokenize/detokenize endpoints when model loaded'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T15:46:58Z
updated: 2026-03-20T15:53:42Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-079
github_issue: null
item_type: task
title: 'PMAT-079: Add Phase 2b integration tests for inference pipeline'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T15:53:52Z
updated: 2026-03-20T15:57:55Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-080
github_issue: null
item_type: task
title: 'PMAT-080: Real embeddings from model embedding layer when inference enabled'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T16:00:57Z
updated: 2026-03-20T16:05:22Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-081
github_issue: null
item_type: task
title: 'PMAT-081: Add Ollama generate endpoint + update spec status to Phase 2b complete'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T16:08:52Z
updated: 2026-03-20T16:14:36Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-082
github_issue: null
item_type: task
title: 'PMAT-082: Conversation export/import endpoints'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T16:14:45Z
updated: 2026-03-20T16:21:47Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-083
github_issue: null
item_type: task
title: 'PMAT-083: Phase 3 foundation — file upload, list, delete endpoints with storage module'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T16:23:09Z
updated: 2026-03-20T16:32:58Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-084
github_issue: null
item_type: task
title: 'PMAT-084: Data recipes engine — create, run, list recipes with chunk+format steps'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T16:33:23Z
updated: 2026-03-20T16:42:34Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-085
github_issue: null
item_type: task
title: 'PMAT-085: Built-in RAG pipeline — index uploaded docs, retrieve in chat'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T16:52:05Z
updated: 2026-03-20T17:14:19Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-086
github_issue: null
item_type: task
title: 'PMAT-086: Eval endpoints — perplexity and benchmark using existing inference'
status: completed
priority: medium
assigned_to: null
created: 2026-03-20T20:54:53Z
updated: 2026-03-20T21:08:33Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-087
github_issue: null
item_type: task
title: 'PMAT-087: Experiment tracking — create experiments, compare runs'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T07:47:34Z
updated: 2026-03-21T07:55:13Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-088
github_issue: null
item_type: task
title: 'PMAT-088: Batch inference endpoint — process JSONL prompts'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T07:58:56Z
updated: 2026-03-21T08:03:57Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-089
github_issue: null
item_type: task
title: 'PMAT-089: Wire inference into batch + eval endpoints for real model usage'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T08:34:00Z
updated: 2026-03-21T08:38:03Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-090
github_issue: null
item_type: task
title: 'PMAT-090: Runtime config endpoint — GET/PUT /api/v1/config'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T08:41:19Z
updated: 2026-03-21T08:44:54Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-091
github_issue: null
item_type: task
title: 'PMAT-091: Audit log query endpoint + enrich /system with operational stats'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T09:05:00Z
updated: 2026-03-21T09:12:14Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-092
github_issue: null
item_type: task
title: 'PMAT-092: Wire disk persistence for conversations, files, audit in from_config'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T09:14:42Z
updated: 2026-03-21T09:20:50Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-093
github_issue: null
item_type: task
title: 'PMAT-093: Improve no-model UX — helpful responses, usage hints, model loading guide'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T09:23:46Z
updated: 2026-03-21T09:33:05Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-094
github_issue: null
item_type: task
title: 'PMAT-094: Conversation search endpoint — find conversations by content'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T09:35:47Z
updated: 2026-03-21T09:45:11Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-095
github_issue: null
item_type: task
title: 'PMAT-095: Conversation rename + RAG search endpoint'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T09:45:20Z
updated: 2026-03-21T09:50:51Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-096
github_issue: null
item_type: task
title: 'PMAT-096: Load existing conversations and files from disk on startup'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T10:10:04Z
updated: 2026-03-21T10:15:11Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-097
github_issue: null
item_type: task
title: 'PMAT-097: Save assistant responses to conversations + reload RAG index on startup'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T10:17:10Z
updated: 2026-03-21T10:22:37Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-098
github_issue: null
item_type: task
title: 'PMAT-098: Split handlers.rs — extract data handlers to stay under 500 lines'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T10:22:57Z
updated: 2026-03-21T10:28:12Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-099
github_issue: null
item_type: task
title: 'PMAT-099: Startup summary — show loaded conversations, files, RAG status'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T10:30:34Z
updated: 2026-03-21T10:34:02Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-100
github_issue: null
item_type: task
title: 'PMAT-100: Quality gate — full test sweep, book final update, cookbook final recipe'
status: completed
priority: medium
assigned_to: null
created: 2026-03-21T10:41:06Z
updated: 2026-03-21T10:46:06Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-101
github_issue: null
item_type: task
title: 'PMAT-101: Wire entrenar LoRA into Banco training'
status: inprogress
priority: medium
assigned_to: null
created: 2026-03-21T11:16:10Z
updated: 2026-03-21T11:16:14.375407828+00:00
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-102
github_issue: null
item_type: task
title: 'apr code spec suite: commit, cross-ref audit, dogfood batuta-spec v2.3'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T15:54:25Z
updated: 2026-04-02T16:00:04Z
spec: null
acceptance_criteria:
- Commit 6 new + 2 modified spec files. Fix cross-ref gaps (12 banco specs missing from parent). Dogfood batuta-spec.md against actual code. Refs apr-code, multi-provider-api, agent-and-playbook, presentar-probar-integration, falsification-report, apr-code-tui-testing, apr-code-feasibility-falsification.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-103
github_issue: null
item_type: task
title: 'apr code Phase 1: implement FileReadTool + FileWriteTool + FileEditTool'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T16:08:24Z
updated: 2026-04-02T16:15:26Z
spec: null
acceptance_criteria:
- 'First 3 of 6 missing file tools for apr code. Register in ToolRegistry alongside existing ShellTool. Probar-first: write tests before implementation. ~300 lines.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-104
github_issue: null
item_type: task
title: 'apr code Phase 1: implement GlobTool + GrepTool'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T16:18:46Z
updated: 2026-04-02T16:24:45Z
spec: null
acceptance_criteria:
- 'Remaining search tools for apr code. GlobTool: file pattern matching. GrepTool: content search with context lines. ~160 lines total. Refs apr-code-feasibility-falsification GAP-B.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-105
github_issue: null
item_type: task
title: 'apr code Phase 1 GAP-A: interactive REPL with streaming'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T16:31:20Z
updated: 2026-04-02T16:41:36Z
spec: null
acceptance_criteria:
- 'The single biggest remaining blocker for apr code. Replace blocking read-prompt loop with crossterm raw-mode event loop. Split-pane: input at bottom, streaming output above. Slash command parser. Ctrl+C cancels generation. Uses existing mpsc::channel<StreamEvent>.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-106
github_issue: null
item_type: task
title: 'apr code: wire REPL into batuta CLI subcommand'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T21:45:19Z
updated: 2026-04-02T22:03:59Z
spec: null
acceptance_criteria:
- Add 'batuta code' subcommand that calls agent::repl::run_repl(). Builds default AgentManifest for coding assistant, registers file/search/shell tools, discovers local model. ~100 lines. This makes apr code runnable from batuta directly.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-107
github_issue: null
item_type: task
title: 'apr code: smoke test batuta code with MockDriver'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T22:05:02Z
updated: 2026-04-02T22:06:59Z
spec: null
acceptance_criteria:
- 'Integration test: build default manifest, register tools, run agent loop with MockDriver that returns tool_use then end_turn. Verify file_read tool executes. This proves the full pipeline works end-to-end.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-108
github_issue: null
item_type: task
title: 'apr code: test with real model via RealizarDriver'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T22:14:02Z
updated: 2026-04-02T22:43:27Z
spec: null
acceptance_criteria:
- 'Falsification test F-2 from feasibility report: does RealizarDriver + Qwen2.5-Coder produce valid tool_use JSON? Run batuta code --offline with a local GGUF model. This is the single most important validation remaining — proves Sovereign tier actually works for coding tasks.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-109
github_issue: null
item_type: task
title: 'spec dogfood: reconcile apr-code.md claims vs actual implementation'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T22:17:43Z
updated: 2026-04-02T22:20:03Z
spec: null
acceptance_criteria:
- The spec was written before implementation. Now that batuta code exists, audit every claim in apr-code.md against actual code. Fix stale tool counts, update phase status, correct architecture diagram, reconcile with feasibility report.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-110
github_issue: null
item_type: task
title: 'apr code Phase 2: wire RemoteDriver for Anthropic API'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T22:21:07Z
updated: 2026-04-02T22:21:49Z
spec: null
acceptance_criteria:
- Cancelled — apr code is Sovereign stack ONLY. No remote API wiring.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-111
github_issue: null
item_type: task
title: 'spec fix: apr code is Sovereign-only, remove remote API claims'
status: completed
priority: critical
assigned_to: null
created: 2026-04-02T22:21:56Z
updated: 2026-04-02T22:26:21Z
spec: null
acceptance_criteria:
- apr code uses ONLY local models via realizar. Remove all multi-provider, Anthropic/OpenAI, remote API references. The comparison table, phase table, and multi-provider-api.md cross-refs all need fixing. This is a design decision, not a gap.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-112
github_issue: null
item_type: task
title: 'spec fix: cascade Sovereign-only into all dependent specs'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T22:27:26Z
updated: 2026-04-02T22:31:08Z
spec: null
acceptance_criteria:
- apr-code.md is now Sovereign-only but multi-provider-api.md, agent-and-playbook.md, presentar-probar-integration.md, and falsification-report.md still reference remote providers in apr code context. Also update batuta-spec.md overview and provable contracts.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-113
github_issue: null
item_type: task
title: 'apr code: implement no_model_error — clear error when no local model'
status: completed
priority: high
assigned_to: null
created: 2026-04-02T22:32:12Z
updated: 2026-04-02T22:34:44Z
spec: null
acceptance_criteria:
- 'Contract FALSIFY-AC-004: when no local model found, show error with apr pull instructions instead of silently using MockDriver. This is the Sovereign-only contract — never silently degrade.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-114
github_issue: null
item_type: task
title: 'apr code: download model + test RealizarDriver end-to-end'
status: completed
priority: critical
assigned_to: null
created: 2026-04-02T22:35:51Z
updated: 2026-04-02T22:43:26Z
spec: null
acceptance_criteria:
- 'PMAT-108 blocker resolved: use apr pull to download a coding model (Qwen2.5-Coder or Qwen3), then test batuta code with real RealizarDriver. This proves the Sovereign tier claim. Prefer APR format, GGUF as fallback.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-115
github_issue: null
item_type: task
title: 'apr code Phase 2a: multi-turn conversation history for REPL'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- run_agent_turn() accepts &mut Vec<Message> for persistent history
- REPL accumulates messages across turns
- /context shows history breakdown, /compact strips old tool details, /clear resets
- 6 new tests (3 runtime multi-turn, 3 repl compact)
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- multi-turn
notes: 'Dogfood finding: REPL called run_agent_loop fresh each turn, losing all context. Critical gap for a coding assistant.'
- id: PMAT-116
github_issue: null
item_type: task
title: 'apr code Phase 2a: model discovery — auto-detect local APR/GGUF'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- ModelConfig::discover_model() scans ~/.apr/models/, ~/.cache/huggingface/, ./models/
- APR files preferred over GGUF (stack native format)
- Sorted by mtime (newest first)
- resolve_model_path() falls back to discovery when no explicit path/repo
- Welcome banner shows discovered model name and format
- 4 new tests
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- model-discovery
notes: 'Dogfood finding: user had to manually specify --model every time. APR-preferred aligns with stack native format policy.'
- id: PMAT-117
github_issue: null
item_type: task
title: 'apr code Phase 2a: always-Sovereign fix + chat template detection'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- build_default_manifest() always returns Sovereign tier (spec §5.4)
- 'ChatTemplate enum: ChatMl, Llama3, Generic — auto-detected from model filename'
- RealizarDriver stores and uses detected template
- 6 new tests (template detection + format verification)
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- sovereignty
- chat-templates
notes: 'Dogfood finding: build_default_manifest(false) returned Standard tier, violating spec. Generic prompt template was not standard for any model family.'
- id: PMAT-118
github_issue: null
item_type: task
title: 'PMAT-115: apr code Phase 2a — multi-turn conversation history for REPL'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T06:54:22Z
updated: 2026-04-03T06:54:28Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-119
github_issue: null
item_type: task
title: 'PMAT-116: apr code Phase 2a — model discovery, auto-detect local APR/GGUF'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T06:54:22Z
updated: 2026-04-03T06:54:28Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-120
github_issue: null
item_type: task
title: 'PMAT-117: apr code Phase 2a — always-Sovereign fix + chat template detection'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T06:54:23Z
updated: 2026-04-03T06:54:28Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-121
github_issue: null
item_type: task
title: 'apr code Phase 2b: inject tool definitions into prompt for local models'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- build_enriched_system() appends tool definitions + JSON schemas to system prompt
- compact_schema() formats tool parameters for readability
- Tool call format (<tool_call> blocks) taught in prompt
- 4 new tests (injection, no-tools, schema compact, tool messages)
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- tool-use
notes: 'CRITICAL dogfood finding: chat_template.rs ignored request.tools entirely. Local models had NO way to know about available tools — the entire agentic system was non-functional.'
- id: PMAT-122
github_issue: null
item_type: task
title: 'apr code Phase 2b: expand system prompt with tool call format + APR preference'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- CODE_SYSTEM_PROMPT includes <tool_call> format examples
- APR format (.apr) preference documented in prompt
- 'Sovereign identity: model identifies as apr code'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- system-prompt
notes: Previous prompt was minimal (6 lines). Now includes tool call format, APR preference, and coding guidelines.
- id: PMAT-123
github_issue: null
item_type: task
title: 'apr code Phase 2b: session persistence (JSONL)'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- SessionStore writes to ~/.apr/sessions/{id}/manifest.json + messages.jsonl
- REPL persists messages after each turn via persist_messages()
- SessionStore::resume() reloads from disk
- SessionStore::find_recent_for_cwd() finds sessions for current directory
- 6 new tests with temp dir isolation
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- session-persistence
notes: 'Phase 2b: conversations now survive restarts. Each session gets manifest.json (metadata) + messages.jsonl (append-only log).'
- id: PMAT-124
github_issue: null
item_type: task
title: 'apr code Phase 2b: load APR.md/CLAUDE.md project instructions'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- load_project_instructions() discovers APR.md then CLAUDE.md in cwd
- Instructions merged into system prompt under Project Instructions header
- Truncated to 4KB to protect context window
- 2 new tests
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- project-instructions
notes: 'Spec §3.5: project-level instructions flow into agent behavior. APR.md preferred over CLAUDE.md.'
- id: PMAT-125
github_issue: null
item_type: task
title: 'PMAT-121: apr code Phase 2b — inject tool definitions into prompt for local models'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T07:33:37Z
updated: 2026-04-03T07:33:43Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-126
github_issue: null
item_type: task
title: 'PMAT-122: apr code Phase 2b — expand system prompt with tool call format + APR preference'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T07:33:37Z
updated: 2026-04-03T07:33:43Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-127
github_issue: null
item_type: task
title: 'PMAT-123: apr code Phase 2b — session persistence (JSONL)'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T07:33:38Z
updated: 2026-04-03T07:33:43Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-128
github_issue: null
item_type: task
title: 'PMAT-124: apr code Phase 2b — load APR.md/CLAUDE.md project instructions'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T07:33:38Z
updated: 2026-04-03T07:33:43Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-129
github_issue: null
item_type: task
title: 'apr code Phase 2c: --resume/--project CLI flags, /session slash commands'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- --resume flag wires SessionStore::find_recent_for_cwd()
- --project changes cwd for APR.md discovery
- /session and /sessions slash commands with list_recent_sessions()
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- cli
notes: Session persistence infrastructure was dead code. This wires it end-to-end.
- id: PMAT-130
github_issue: null
item_type: task
title: 'apr code Phase 2c: integration tests for session, tool injection, multi-turn'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- test_session_roundtrip, test_tool_definitions_in_prompt, test_multi_turn_session_integration
- Fixed 2 pre-existing tests broken by model discovery fallback
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- testing
notes: Previous tests only covered individual tools. Now validates full session lifecycle.
- id: PMAT-131
github_issue: null
item_type: task
title: 'PMAT-129: apr code Phase 2c — --resume/--project CLI flags, /session commands'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T07:59:29Z
updated: 2026-04-03T07:59:29Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-132
github_issue: null
item_type: task
title: 'PMAT-130: apr code Phase 2c — integration tests (session, tool injection, multi-turn)'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T07:59:29Z
updated: 2026-04-03T07:59:34Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-133
github_issue: null
item_type: task
title: 'apr code Phase 3a: auto-compaction at 80% context + token tracking'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- AUTO_COMPACT_THRESHOLD at 80% — auto_compact_if_needed() after each turn
- /context shows ~tokens / window_size (N%) with warning at 80%+
- ReplSession tracks context_window from driver
- 4 new tests (context_usage, auto_compact, slash commands)
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- context-management
notes: Spec §7.3 promised auto-compact at 80%. Now implemented. Long sessions no longer silently truncate.
- id: PMAT-134
github_issue: null
item_type: task
title: 'apr code Phase 3a: /test and /quality shortcut commands'
status: completed
priority: medium
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- /test runs cargo test --lib via shell subprocess
- /quality runs clippy + test via shell subprocess
- run_shell_shortcut() helper with exit code display
- Updated /help to show all 10 commands
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- ux
notes: Most common coding session actions now available as single-keystroke commands.
- id: PMAT-135
github_issue: null
item_type: task
title: 'PMAT-133: apr code Phase 3a — auto-compaction at 80% context + token tracking'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T08:25:44Z
updated: 2026-04-03T08:25:50Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-136
github_issue: null
item_type: task
title: 'PMAT-134: apr code Phase 3a — /test and /quality shortcut commands'
status: completed
priority: medium
assigned_to: null
created: 2026-04-03T08:25:44Z
updated: 2026-04-03T08:25:50Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-137
github_issue: null
item_type: task
title: 'apr code Phase 3b: agents in default features + APR format awareness'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- agents feature added to Cargo.toml default — batuta code exists in standard binary
- Release binary 17MB (well under 50MB spec threshold)
- GGUF models show APR conversion tip in welcome banner
- No-model error mentions auto-discovery from ~/.apr/models/
- APR format labeled as native in welcome
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- binary
- apr-format
notes: 'CRITICAL dogfood finding: batuta code did not exist in the default binary. cargo install batuta gave a binary with no code subcommand.'
- id: PMAT-138
github_issue: null
item_type: task
title: 'PMAT-136: agents in default features — batuta code in standard binary'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T08:44:25Z
updated: 2026-04-03T08:44:30Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-139
github_issue: null
item_type: task
title: 'PMAT-137: APR format awareness — GGUF conversion tip, native label'
status: completed
priority: medium
assigned_to: null
created: 2026-04-03T08:44:25Z
updated: 2026-04-03T08:44:30Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-139
github_issue: null
item_type: task
title: Dogfood batuta-spec.md — reconcile with implementation, bump to v2.4.0
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/batuta-spec.md
acceptance_criteria:
- Version 2.3.0→2.4.0, date updated to 2026-04-03
- Agent §3.6 split into Implemented vs Planned sections
- Feature flags table adds agents (default), rag (default), agents-inference, agents-rag
- 'Key Commands §7 updated: batuta code with --resume/--project, slash commands listed'
- 'Component spec table updated: apr-code description reflects Phases 1-3b DONE'
phases: []
subtasks: []
estimated_effort: null
labels:
- spec
- dogfood
notes: Top-level spec was frozen at 2.3.0 from before apr code implementation. Now reflects actual state.
- id: PMAT-140
github_issue: null
item_type: task
title: 'apr code: project context enrichment — git info, file stats, language at session start'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T09:17:28Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- gather_project_context() collects git branch, dirty files, language, build system
- Injected into system prompt under Project Context header
- Helps local model understand the codebase before first turn
- 2 new tests (content, manifest integration)
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- context
notes: Spec §6.2 promised project context capture at session start. Now implemented.
- id: PMAT-141
github_issue: null
item_type: task
title: 'PMAT-139: Dogfood batuta-spec.md — reconcile with impl, bump v2.4.0'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T09:17:28Z
updated: 2026-04-03T09:17:28Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-142
github_issue: null
item_type: task
title: 'PMAT-140: Project context enrichment — git, language, files at session start'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T09:17:28Z
updated: 2026-04-03T09:17:34Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-141
github_issue: null
item_type: task
title: 'apr code Phase 3d: inference in default features + context-aware prompt budget'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T00:00:00Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- inference feature in Cargo.toml default — RealizarDriver compiles in standard binary
- resolve_model_path() no longer calls discover_model() (moved to cmd_code only)
- instruction_budget() scales CLAUDE.md truncation to 25% of context window, max 4KB
- Models <4K context skip project instructions entirely
- UTF-8 safe truncation for multi-byte chars
- 4 new tests, 11,020 total pass
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- inference
- context-budget
notes: 'Real-model dogfood: batuta code --model silently fell back to MockDriver. System prompt consumed 67% of context for small models.'
- id: PMAT-143
github_issue: null
item_type: task
title: 'PMAT-141: inference in default features + context-aware prompt budget'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T09:42:23Z
updated: 2026-04-03T09:42:23Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-144
github_issue: null
item_type: task
title: 'apr code Phase 3e: CONTRACT apr_model_validity — APR tokenizer validation at load boundary'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T10:05:15Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- Contract equation apr_model_validity added to apr-code-v1.yaml
- 'FALSIFY-AC-008: APR without tokenizer rejected at load time'
- 'KANI-AC-005: Formal verification harness for model validity'
- validate_model_file() enforces Jidoka at RealizarDriver::new() boundary
- APR magic check, tokenizer scan, actionable apr convert error
- GGUF magic validation
- '5 new tests: apr_without_tokenizer, apr_with_tokenizer, gguf_valid, gguf_invalid, empty_file'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- contract
- jidoka
- apr-format
notes: 'CATASTROPHIC dogfood finding: APR model without embedded tokenizer crashed at inference with opaque error. Now caught at the load boundary with actionable fix instructions. Design by contract, not ad-hoc validation.'
- id: PMAT-145
github_issue: null
item_type: task
title: 'PMAT-144: CONTRACT apr_model_validity — APR tokenizer validation at load boundary (Jidoka)'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T10:05:15Z
updated: 2026-04-03T10:05:21Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-146
github_issue: null
item_type: task
title: 'apr code Phase 3f: output sanitization + model size warning'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T10:18:30Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- sanitize_output() strips echoed system prompt from small model responses
- Strips leaked chat template markers (im_start, im_end, eot_id, etc.)
- Welcome banner warns when context_window <= 2048 tokens
- 'Spec §5.2 updated: 3B+ minimum, 7B+ recommended for tool-use'
- 4 new tests for sanitization
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- output-quality
notes: 'Dogfood: TinyLlama 1.1B echoed the entire system prompt as its response. sanitize_output() detects and strips this. Model size warning added.'
- id: PMAT-147
github_issue: null
item_type: task
title: 'PMAT-146: Output sanitization — strip echoed system prompt + model size warning'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T10:18:30Z
updated: 2026-04-03T10:18:36Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-148
github_issue: null
item_type: task
title: 'Spec hygiene: fix stale refs, contradictions, duplicate phases in apr-code.md'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T00:00:00Z
updated: 2026-04-03T10:47:49Z
spec: docs/specifications/components/apr-code.md
acceptance_criteria:
- 'Fixed: --features agents stale → now default'
- 'Fixed: --offline described as feature → always sovereign, no flag'
- 'Fixed: comparison table outdated → reflects 7 tools + 10 commands + sessions + APR'
- 'Fixed: duplicate Phase 4 entries → renumbered 4-7'
- 'Fixed: Phase 3 "planned" labels → shell fallback documented'
- 'Fixed: slash commands table → split into Implemented (10) vs Planned (2)'
- Qwen2.5-Coder 1.5B as default model throughout spec
phases: []
subtasks: []
estimated_effort: null
labels:
- spec
- hygiene
notes: 12 stale/contradictory references fixed. Spec now accurately reflects implementation state after 12 phases of work.
- id: PMAT-149
github_issue: null
item_type: task
title: 'PMAT-148: Spec hygiene — fix 12 stale refs and contradictions in apr-code.md'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T10:47:49Z
updated: 2026-04-03T10:47:55Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-150
github_issue: null
item_type: task
title: 'apr code: model discovery validates APR tokenizer before selection'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T11:27:52Z
updated: 2026-04-03T11:43:30Z
spec: null
acceptance_criteria:
- 'discover_model() prefers .apr by extension alone. When APR lacks embedded tokenizer, user hits dead end. Fix: validate APR at discovery, fall through to GGUF if invalid.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- dogfood
notes: null
- id: PMAT-151
github_issue: null
item_type: task
title: 'apr code: spec dogfood — fix stale refs, document tokenizer gap, exit codes'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T11:27:56Z
updated: 2026-04-03T11:43:30Z
spec: null
acceptance_criteria:
- 'Update apr-code.md with dogfooding findings: APR tokenizer requirement at discovery, GGUF fallback behavior, exit code 2/3/4 gaps, compact_history location fix.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- dogfood
- spec
notes: null
- id: PMAT-152
github_issue: null
item_type: task
title: 'apr code: implement exit codes 2/3/4 for non-interactive mode'
status: completed
priority: medium
assigned_to: null
created: 2026-04-03T11:27:57Z
updated: 2026-04-03T11:43:30Z
spec: null
acceptance_criteria:
- Spec says exit 2=budget, 3=max turns, 4=sandbox violation. Only 0/1/5 implemented. Wire missing codes.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- dogfood
notes: null
- id: PMAT-153
github_issue: null
item_type: task
title: 'apr code Phase 4a: wire RagTool into build_code_tools()'
status: completed
priority: medium
assigned_to: null
created: 2026-04-03T11:27:58Z
updated: 2026-04-03T11:43:30Z
spec: null
acceptance_criteria:
- RagTool exists at src/agent/tool/rag.rs but not registered in build_code_tools(). Wire with Rag capability for semantic code search.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- phase4
notes: null
- id: PMAT-154
github_issue: null
item_type: task
title: 'P0: APR Q4K converter missing tokenizer embedding — save_model_tensors_q4k never calls insert_tokenizer_metadata'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T12:04:18Z
updated: 2026-04-03T12:29:49Z
spec: null
acceptance_criteria:
- 'save_model_tensors_q4k() in aprender infer_q4k_config.rs builds metadata without tokenizer. All other APR paths embed it. Causes ''Tokenizer encode failed'' in realizar. Fix: extract GGUF tokenizer and pass to insert_tokenizer_metadata().'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr
- p0
- aprender
notes: null
- id: PMAT-155
github_issue: null
item_type: task
title: 'Dogfood: re-convert qwen2.5-coder-1.5b to APR with tokenizer, verify batuta code works'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T12:04:24Z
updated: 2026-04-03T12:29:49Z
spec: null
acceptance_criteria:
- 'After PMAT-154 fix, re-convert the default model and verify end-to-end: batuta code -p with APR model produces coherent tool-using output.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- dogfood
notes: null
- id: PMAT-156
github_issue: null
item_type: task
title: 'P0: APR Q4K passthrough produces garbage inference — tensor data misalignment between GGUF raw bytes and APR loader'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T12:35:43Z
updated: 2026-04-03T12:53:43Z
spec: null
acceptance_criteria:
- 'GGUF works fine, same model converted via Q4K passthrough to APR produces garbage. Tokenizer loads correctly (PMAT-154 fixed). Issue is tensor data layout: write_apr_file_raw copies raw GGUF Q4K bytes but APR loader may expect different alignment/naming.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr
- p0
- realizar
- aprender
notes: null
- id: PMAT-157
github_issue: null
item_type: task
title: Publish realizar 0.8.4 with has_quantized_tensors_apr fix for APR Q4K inference
status: inprogress
priority: critical
assigned_to: null
created: 2026-04-03T13:45:48Z
updated: 2026-04-04T10:22:19.560067419+00:00
spec: null
acceptance_criteria:
- PMAT-156 fix committed locally but not published. batuta uses crates.io realizar which lacks the fix. APR Q4K inference broken without it.
phases: []
subtasks: []
estimated_effort: null
labels:
- realizar
- publish
- apr
notes: null
- id: PMAT-158
github_issue: null
item_type: task
title: 'apr code: lenient tool_call parser — accept unclosed tags at end of output'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T13:48:28Z
updated: 2026-04-03T14:10:00Z
spec: null
acceptance_criteria:
- 'Small models (1.5B) emit <tool_call> JSON without </tool_call>. Parser misses them, prints raw text. Fix: try parsing to end-of-string when closing tag missing.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- dogfood
notes: null
- id: PMAT-159
github_issue: null
item_type: task
title: 'apr code: enable realizar/cuda feature for GGUF GPU inference'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T14:09:55Z
updated: 2026-04-05T07:34:53Z
spec: null
acceptance_criteria:
- Without cuda feature, GGUF inference is CPU-only (minutes per response). Need cuda feature or apr serve HTTP backend for interactive use.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- performance
notes: null
- id: PMAT-160
github_issue: null
item_type: task
title: 'apr code: use apr serve as first-class inference backend (auto-launch + HTTP)'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T14:56:58Z
updated: 2026-04-03T15:04:17Z
spec: null
acceptance_criteria:
- Instead of embedded realizar (no cuda feature), auto-launch 'apr serve run <model> --port 0' as subprocess and connect via RemoteDriver/HTTP. apr-cli has full CUDA+GPU. Solves PMAT-157 (no publish needed) and PMAT-159 (GPU inference) in one shot.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- architecture
notes: null
- id: PMAT-161
github_issue: null
item_type: task
title: 'apr code: compact system prompt + zombie cleanup + GPU flag for apr serve'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T16:03:31Z
updated: 2026-04-03T16:03:56Z
spec: null
acceptance_criteria:
- 'PMAT-161: Strip verbose tool schemas for HTTP path, cap max_tokens at 512, add --gpu flag, fix zombie processes (return exit code instead of process::exit), request timeout.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- dogfood
notes: null
- id: PMAT-162
github_issue: null
item_type: task
title: 'apr code Phase 6: wire Code subcommand in apr-cli commands_enum.rs and dispatch'
status: completed
priority: critical
assigned_to: null
created: 2026-04-03T20:05:18Z
updated: 2026-04-03T20:23:54Z
spec: null
acceptance_criteria:
- Add Code variant to apr-cli commands_enum.rs (model, project, resume, prompt, print, max_turns, manifest). Dispatch to batuta::cli::code::cmd_code. Add batuta dep to apr-cli Cargo.toml behind code feature flag.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- phase-6
- apr-cli
notes: null
- id: PMAT-163
github_issue: null
item_type: task
title: 'apr code: dedicated PmatQueryTool — structured pmat query output instead of shell fallback'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T20:05:21Z
updated: 2026-04-03T20:23:54Z
spec: null
acceptance_criteria:
- 'Replace shell: pmat query with a dedicated PmatQueryTool in build_code_tools(). Executes pmat query as subprocess, parses structured output (function name, file, line, grade, complexity). Returns structured results to agent instead of raw shell output.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- phase-4
- tools
notes: null
- id: PMAT-164
github_issue: null
item_type: task
title: 'apr code: clean AprServeDriver — remove debug eprintln, conditional no_gpu for APR vs GGUF'
status: completed
priority: high
assigned_to: null
created: 2026-04-03T20:05:23Z
updated: 2026-04-03T20:23:54Z
spec: null
acceptance_criteria:
- 'Remove [PMAT-160] debug output. Make --gpu flag conditional: GGUF gets --gpu (needs CUDA), APR gets no --gpu flag (wgpu shader bug with -inf). Currently always passes --gpu.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- cleanup
- dogfood
notes: null
- id: PMAT-165
github_issue: null
item_type: task
title: 'apr code: interactive auto-resume prompt for recent sessions (spec §6.3)'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T06:29:28Z
updated: 2026-04-04T06:43:39Z
spec: null
acceptance_criteria:
- When no --resume flag given, check for recent session (<24h) for cwd. Show interactive Y/n prompt. Currently silent — UX contract violation.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- ux
- dogfood
notes: null
- id: PMAT-166
github_issue: null
item_type: task
title: 'apr code: AprServeDriver graceful shutdown — SIGTERM before SIGKILL'
status: completed
priority: medium
assigned_to: null
created: 2026-04-04T06:29:29Z
updated: 2026-04-04T06:43:39Z
spec: null
acceptance_criteria:
- Drop impl sends SIGKILL immediately. Should SIGTERM → 2s timeout → SIGKILL for clean model unload.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- robustness
- dogfood
notes: null
- id: PMAT-167
github_issue: null
item_type: task
title: 'apr code: unblock apr-cli build — align trueno version in entrenar or disable training-gpu'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T06:29:32Z
updated: 2026-04-04T06:43:39Z
spec: null
acceptance_criteria:
- 'apr-cli build fails due to trueno 0.16 vs 0.17 version mismatch between entrenar and aprender. Fix: update entrenar to trueno 0.17 or disable training-gpu default feature in apr-cli.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- phase-6
- apr-cli
- blocker
notes: null
- id: PMAT-168
github_issue: null
item_type: task
title: 'apr code: optimize system prompt for small models — enumerate tools, add pmat_query example'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T06:54:07Z
updated: 2026-04-04T07:01:48Z
spec: null
acceptance_criteria:
- System prompt says 'tools listed below' but never lists them. Add explicit tool names + pmat_query usage example for 1.5B-7B models.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- prompt
- dogfood
notes: null
- id: PMAT-169
github_issue: null
item_type: task
title: 'apr code: fix /cost display for local inference — show tokens not dollars'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T06:54:09Z
updated: 2026-04-04T07:01:48Z
spec: null
acceptance_criteria:
- /cost shows misleading dollar amounts for free local inference. Show 'free (local)' + token counts instead.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- ux
- dogfood
notes: null
- id: PMAT-170
github_issue: null
item_type: task
title: 'apr code: raise AprServeDriver max_tokens from 512 to 1024 — tool calls truncated'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T06:54:11Z
updated: 2026-04-04T07:01:48Z
spec: null
acceptance_criteria:
- 512 token cap on HTTP responses truncates long file edits and multi-tool responses. Raise to 1024 with comment explaining rationale.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- inference
- dogfood
notes: null
- id: PMAT-171
github_issue: null
item_type: task
title: 'apr code: capture apr serve stderr on startup failure — actionable debug output'
status: completed
priority: medium
assigned_to: null
created: 2026-04-04T06:54:13Z
updated: 2026-04-04T07:01:48Z
spec: null
acceptance_criteria:
- When apr serve fails to start, user sees generic error. Capture subprocess stderr, log last lines, suggest manual debug command.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- ux
- dogfood
notes: null
- id: PMAT-172
github_issue: null
item_type: task
title: 'apr code: -p mode exhausts 50 iterations without output — agent loop stuck'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T07:14:57Z
updated: 2026-04-04T07:23:01Z
spec: null
acceptance_criteria:
- 'batuta code -p ''What files are in src/agent/?'' hits MaxIterationsReached(50) with no output. Model likely stuck in tool loop or producing unparseable tool calls. Need: (1) lower -p max_iterations, (2) detect stuck loops, (3) output partial response on budget exhaustion.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- dogfood
- inference
notes: null
- id: PMAT-173
github_issue: null
item_type: task
title: 'apr code: fix tool format mismatch — system prompt teaches <tool_call> but HTTP driver teaches raw JSON'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T09:04:00Z
updated: 2026-04-04T09:09:53Z
spec: null
acceptance_criteria:
- 'CODE_SYSTEM_PROMPT teaches <tool_call> blocks but AprServeDriver build_openai_body appends conflicting ''respond with JSON object'' instruction. Strip logic only looks for ''## Available Tools'' header. Fix: align format instructions, ensure HTTP path strips tool section correctly.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- inference
- dogfood
notes: null
- id: PMAT-174
github_issue: null
item_type: task
title: 'apr code: offer_auto_resume consumes piped stdin — breaks -p with piped input'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T09:04:02Z
updated: 2026-04-04T09:09:53Z
spec: null
acceptance_criteria:
- offer_auto_resume() calls stdin().read_line() which steals piped input. Skip auto-resume when stdin is not a TTY (atty check) or when -p flag is set.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- ux
- dogfood
notes: null
- id: PMAT-175
github_issue: null
item_type: task
title: 'apr code: shell injection filter blocks pipes and chained commands'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T09:04:03Z
updated: 2026-04-04T09:09:53Z
spec: null
acceptance_criteria:
- 'Shell tool blocks |, &&, backticks even with allowed_commands: [*]. Common coding patterns (cargo test | head, git diff && git log) fail. Relax filter for wildcard mode or use proper parsing.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- tools
- dogfood
notes: null
- id: PMAT-176
github_issue: null
item_type: task
title: 'apr code: preserve CODE_SYSTEM_PROMPT tool table in AprServeDriver — only strip build_enriched_system section'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T09:15:30Z
updated: 2026-04-04T09:25:39Z
spec: null
acceptance_criteria:
- AprServeDriver strips '## Tools' from CODE_SYSTEM_PROMPT (the compact 9-tool table with examples designed for 1.5B models). Only the verbose '## Available Tools' from build_enriched_system() should be stripped. Dogfood shows model outputs 'Hello, World!' without tool context.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- inference
- dogfood
notes: null
- id: PMAT-177
github_issue: null
item_type: task
title: 'apr code: single-turn retry when model ignores tools on first iteration'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T09:15:32Z
updated: 2026-04-04T09:25:39Z
spec: null
acceptance_criteria:
- 'When model returns EndTurn with no tool calls on iteration 1, re-prompt with ''Use a tool to answer. Which tool helps for: {query}?'' Max 1 retry. Helps small models that need nudging.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- inference
- dogfood
notes: null
- id: PMAT-178
github_issue: null
item_type: task
title: 'apr code: main.rs was replaced with ''Hello, World!'' stub — all previous -p dogfood was running stub'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T09:35:53Z
updated: 2026-04-04T09:36:19Z
spec: null
acceptance_criteria:
- src/main.rs had uncommitted local modification replacing the full CLI with println!("Hello, World!"). All dogfood runs since PMAT-172 were testing the stub, not the real agent. Restored from git. Previous 'Hello, World!' dogfood findings (PMAT-176/177) were misdiagnosed — the model wasn't even being loaded.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- dogfood
- regression
notes: null
- id: PMAT-179
github_issue: null
item_type: task
title: 'apr code: switch default model from Qwen2.5-Coder 1.5B to Qwen3 1.7B — 0.960 tool-calling score'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T09:45:27Z
updated: 2026-04-04T09:50:53Z
spec: null
acceptance_criteria:
- Qwen2.5-Coder 1.5B cannot do tool use (dogfood confirmed). Qwen3 1.7B scores 0.960 on tool-calling benchmark, native <tool_call> format, 1.2GB Q4K. Update spec, model discovery preferences, system prompt examples, and recommended model table.
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- model
- dogfood
notes: null
- id: PMAT-180
github_issue: null
item_type: task
title: 'apr code: disable default --gpu for GGUF in AprServeDriver — Qwen3 GGUF produces garbage with GPU'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T09:58:03Z
updated: 2026-04-04T10:12:05Z
spec: null
acceptance_criteria:
- 'Qwen3-1.7B-Q4_K_M.gguf produces mojibake with --gpu via apr serve, works fine with --no-gpu. Current logic always passes --gpu for GGUF. Fix: don''t pass --gpu by default, let apr serve auto-detect. CPU inference works correctly.'
phases: []
subtasks: []
estimated_effort: null
labels:
- apr-code
- inference
- dogfood
notes: null
- id: PMAT-181
github_issue: null
item_type: task
title: 'realizar: apr serve needs enable_thinking=false for Qwen3 — model loops on </think> tokens'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T10:09:59Z
updated: 2026-04-05T08:50:44Z
spec: null
acceptance_criteria:
- 'Qwen3 1.7B GGUF through apr serve produces only </think> tokens. apr run --no-gpu --chat works fine (thinking completes then answers). apr serve applies chat template but doesn''t disable thinking mode. Need: (1) chat_template_kwargs support in /v1/chat/completions, or (2) auto-detect Qwen3 and suppress thinking tokens.'
phases: []
subtasks: []
estimated_effort: null
labels:
- realizar
- inference
- qwen3
- blocker
notes: null
- id: PMAT-182
github_issue: null
item_type: task
title: 'PMAT-182: Wire apr code into apr-cli — Code variant + dispatch + batuta dep'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T10:28:14Z
updated: 2026-04-05T06:47:23Z
spec: null
acceptance_criteria:
- 'apr-cli has no Code variant. Spec claims PMAT-162 done but reality: zero wiring. Need Code in commands_enum.rs, dispatch to batuta::agent::code::cmd_code(), batuta dep with code feature flag.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-183
github_issue: null
item_type: task
title: 'PMAT-183: Dogfood spec update — apr-cli wiring NOT done, model family warning, test gaps'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T10:28:19Z
updated: 2026-04-04T14:40:26Z
spec: null
acceptance_criteria:
- 'Update apr-code.md: Phase 6 NOT done (apr-cli has no Code), add model family detection for broken Qwen2.5-Coder, add missing tests for discovery + entrypoint'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-184
github_issue: null
item_type: task
title: 'PMAT-185: Model discovery mtime-first + Qwen3 tool-use confirmed'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T11:15:08Z
updated: 2026-04-05T06:50:26Z
spec: null
acceptance_criteria:
- discover_model() sorts valid>mtime>APR. Qwen3 1.7B GGUF tool-use confirmed. AppState caches architecture for Qwen3NoThinkTemplate auto-selection.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-185
github_issue: null
item_type: task
title: 'PMAT-186: Popperian falsification of apr-code.md and batuta-spec.md'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T11:25:08Z
updated: 2026-04-05T06:02:35Z
spec: null
acceptance_criteria:
- '10 findings: single-binary claim contradicted, APR.md 4-level discovery is 2-level, permission model 6→4 active, stale default model, feature flags 8→29, welcome banner mismatch, duplicate resume field, config not implemented, tool counts stale, pipeline.rs path wrong'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-186
github_issue: null
item_type: task
title: 'PMAT-187: chat-template-v1 provable contract — 6 equations, 10 FALSIFY tests, 1 bug found'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T11:48:50Z
updated: 2026-04-04T14:40:20Z
spec: null
acceptance_criteria:
- Contract for chat template correctness. Found Qwen3NoThinkTemplate::format() returning wrong TemplateFormat. 10 enforcement tests across realizar + batuta.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-187
github_issue: null
item_type: task
title: 'PMAT-188: provable-contracts for all apr-cli components — dispatch, serve, discovery'
status: completed
priority: critical
assigned_to: null
created: 2026-04-04T12:16:22Z
updated: 2026-04-04T14:40:26Z
spec: null
acceptance_criteria:
- Updated cli-dispatch-v1 (code subcommand + feature gate), apr-serve-v1 (chat template dispatch + format detection), NEW apr-model-discovery-v1 (search order, mtime-first, Jidoka, architecture extraction, no-model UX). 25+ falsification tests across 6 contracts.
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-188
github_issue: null
item_type: task
title: 'PMAT-189: contract enforcement tests for http-api, session, tokenizer'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T12:40:41Z
updated: 2026-04-04T14:40:26Z
spec: null
acceptance_criteria:
- '9 new enforcement tests: 5 FALSIFY-HTTP-001 (OpenAI body schema, max_tokens cap, tool format), 4 FALSIFY-SESSION (JSONL roundtrip, resume, manifest serde, 24h filter), 5 tokenizer F-TOK (deterministic encode, empty input, vocab size, thread safety). Total: 46 passing in batuta + 19 in realizar.'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-189
github_issue: null
item_type: task
title: 'PMAT-190: -p mode blank output with thinking models + JSON false alarm'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T14:11:05Z
updated: 2026-04-04T14:40:26Z
spec: null
acceptance_criteria:
- 'batuta code -p produces empty stdout when Qwen3 responds with only thinking tokens. Added diagnostic message. JSON parse error was bash test script bug (echo unescaping), not realizar. Root fix: publish realizar with Qwen3NoThinkTemplate (PMAT-181).'
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-190
github_issue: null
item_type: task
title: 'apr-cli contract enforcement: close 9 FALSIFY gaps across chat/serve/tokenizer/data'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T20:11:10Z
updated: 2026-04-04T20:24:33Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-191
github_issue: null
item_type: task
title: 'apr-cli: tokenizer-loading-v1 FALSIFY enforcement tests (7 tests defined, 0 wired)'
status: completed
priority: high
assigned_to: null
created: 2026-04-04T20:24:40Z
updated: 2026-04-05T06:02:35Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-192
github_issue: null
item_type: task
title: 'apr-cli: apr-data-pipeline-v1 FALSIFY enforcement tests (5 tests defined, 0 wired)'
status: completed
priority: medium
assigned_to: null
created: 2026-04-04T20:24:41Z
updated: 2026-04-05T06:02:35Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-193
github_issue: null
item_type: task
title: 'apr-cli: finetune/prune/distill provable-contracts (model-ops coverage gap)'
status: completed
priority: medium
assigned_to: null
created: 2026-04-04T20:24:42Z
updated: 2026-04-05T07:03:04Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-194
github_issue: null
item_type: task
title: cgp roofline profiling for realizr inference kernels (Q4K/Q6K matvec, attention, softmax)
status: completed
priority: high
assigned_to: null
created: 2026-04-05T06:20:06Z
updated: 2026-04-05T09:09:44Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-195
github_issue: null
item_type: task
title: probar LLM load testing for apr serve — TTFT/TPOT/P99 SLO baselines at concurrency 1/4/8
status: completed
priority: high
assigned_to: null
created: 2026-04-05T06:20:07Z
updated: 2026-04-05T08:58:05Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-196
github_issue: null
item_type: task
title: Wire apr serve loadtest + bench subcommands into apr-cli (probar llm)
status: completed
priority: high
assigned_to: null
created: 2026-04-05T06:24:43Z
updated: 2026-04-05T06:43:27Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-197
github_issue: null
item_type: task
title: 'batuta code -p: compact prompt + 32K context window (fixes thinking loops)'
status: completed
priority: critical
assigned_to: null
created: 2026-04-05T07:49:46Z
updated: 2026-04-05T07:49:51Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-198
github_issue: null
item_type: task
title: Prompt scaling by model size — auto-detect params from filename (Refs PMAT-198)
status: completed
priority: high
assigned_to: null
created: 2026-04-05T08:09:51Z
updated: 2026-04-05T08:09:51Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null
- id: PMAT-199
github_issue: null
item_type: task
title: model-format-conversion-v1 + apr-model-lifecycle-v1 FALSIFY enforcement (17 tests)
status: completed
priority: high
assigned_to: null
created: 2026-04-05T08:17:51Z
updated: 2026-04-05T08:17:51Z
spec: null
acceptance_criteria: []
phases: []
subtasks: []
estimated_effort: null
labels: []
notes: null