aprender-orchestrate 0.31.2

Sovereign AI orchestration: autonomous agents, ML serving, code analysis, and transpilation pipelines
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
# Agent Loop Design-by-Contract Specification v1
#
# Defines formal invariants for the perceive-reason-act agent loop.
# Used by provable-contracts for compile-time binding audit and
# Kani verification harness generation.
#
# Ref: docs/specifications/batuta-agent.md Section 13.2
# arXiv: 2512.10350 (Tacheny — agentic loop dynamics)
# arXiv: 2503.14470 (Wang — formal verification of LLM agents)

metadata:
  version: "1.0.0"
  created: "2026-04-05"
  description: "Agent loop design-by-contract invariants"
  kind: pattern
  references:
    - "docs/specifications/batuta-agent.md"
    - "arXiv: 2512.10350"

contract:
  name: agent-loop-v1
  version: "1.0.0"
  module: batuta::agent
  description: >
    Formal invariants for the sovereign agent perceive-reason-act loop.
    Guarantees termination, capability enforcement, guard monotonicity,
    and ping-pong detection.

# ============================================================================
# INVARIANT 1: Loop Termination Guarantee (Jidoka)
# ============================================================================
# The agent loop MUST terminate within max_iterations.
# No infinite loops are possible when the guard is properly wired.

invariants:
  - id: INV-001
    name: loop-terminates
    description: >
      The agent loop terminates within max_iterations + 1 LLM calls.
      After max_iterations, the guard returns CircuitBreak, and the
      loop exits with AgentError::CircuitBreak.
    preconditions:
      - guard.max_iterations > 0
      - guard.current_iteration == 0
    postconditions:
      - guard.current_iteration <= guard.max_iterations + 1
      - result.is_ok() || matches!(result.unwrap_err(), AgentError::CircuitBreak(_))
    equation: |
      ∀ n ∈ ℕ, n > max_iterations ⟹ check_iteration(n) = CircuitBreak
    module_path: batuta::agent::guard::LoopGuard::check_iteration
    test_binding: agent::guard::tests::test_iteration_limit

  # ==========================================================================
  # INVARIANT 2: Guard Monotonicity
  # ==========================================================================
  # The iteration counter strictly increases on each check_iteration() call.

  - id: INV-002
    name: guard-monotonic
    description: >
      current_iteration strictly increases by 1 on each check_iteration() call.
      The counter never decreases or stays the same.
    preconditions:
      - guard.current_iteration == n
    postconditions:
      - guard.current_iteration == n + 1
    equation: |
      check_iteration(guard) ⟹ guard.current_iteration' = guard.current_iteration + 1
    module_path: batuta::agent::guard::LoopGuard::check_iteration
    test_binding: agent::guard::tests::test_counters

  # ==========================================================================
  # INVARIANT 3: Capability Enforcement (Poka-Yoke)
  # ==========================================================================
  # A tool MUST NOT execute if the manifest does not grant its capability.

  - id: INV-003
    name: capability-poka-yoke
    description: >
      Tool execution is blocked when the agent manifest does not include
      the tool's required_capability in its capabilities list.
      The loop pushes an error ToolResult instead of executing.
    preconditions:
      - tool.required_capability() ∉ manifest.capabilities
    postconditions:
      - tool.execute() is NOT called
      - error message contains "capability"
    equation: |
      ¬capability_matches(manifest.capabilities, tool.required_capability())
        ⟹ tool_result.is_error = true
    module_path: batuta::agent::runtime::handle_tool_calls
    test_binding: agent::runtime::tests::test_capability_denied_handled

  # ==========================================================================
  # INVARIANT 4: Ping-Pong Detection (Tacheny Oscillatory Regime)
  # ==========================================================================
  # After PINGPONG_THRESHOLD (3) identical tool calls, the guard blocks.

  - id: INV-004
    name: pingpong-halting
    description: >
      When the same (tool_name, input) pair is observed PINGPONG_THRESHOLD
      times, the guard returns Block to prevent oscillatory loops.
      Uses FxHash for efficient collision-resistant detection.
    preconditions:
      - tool_call_counts[hash(tool_name, input)] >= PINGPONG_THRESHOLD
    postconditions:
      - check_tool_call() returns Block(msg)
    equation: |
      count(hash(name, input)) ≥ 3 ⟹ check_tool_call(name, input) = Block
    module_path: batuta::agent::guard::LoopGuard::check_tool_call
    test_binding: agent::guard::tests::test_pingpong_detection

  # ==========================================================================
  # INVARIANT 5: Cost Budget Enforcement (Muda Elimination)
  # ==========================================================================
  # Accumulated cost exceeding budget triggers CircuitBreak.

  - id: INV-005
    name: cost-budget
    description: >
      When accumulated_cost_usd exceeds max_cost_usd (and max_cost_usd > 0),
      the guard returns CircuitBreak to prevent runaway spend.
    preconditions:
      - max_cost_usd > 0.0
      - accumulated_cost_usd + new_cost > max_cost_usd
    postconditions:
      - record_cost() returns CircuitBreak(msg)
    equation: |
      max_cost > 0 ∧ Σcost > max_cost ⟹ record_cost(c) = CircuitBreak
    module_path: batuta::agent::guard::LoopGuard::record_cost
    test_binding: agent::guard::tests::test_cost_budget

  # ==========================================================================
  # INVARIANT 6: MaxTokens Truncation Recovery
  # ==========================================================================
  # Consecutive MaxTokens responses are tracked; circuit-break at threshold.

  - id: INV-006
    name: truncation-circuit-break
    description: >
      After MAX_CONSECUTIVE_TRUNCATION (5) consecutive MaxTokens stop
      reasons without any EndTurn or ToolUse, the guard circuit-breaks.
      Any non-MaxTokens response resets the counter.
    preconditions:
      - consecutive_max_tokens >= MAX_CONSECUTIVE_TRUNCATION
    postconditions:
      - record_max_tokens() returns CircuitBreak(msg)
    equation: |
      consecutive_max_tokens ≥ 5 ⟹ record_max_tokens() = CircuitBreak
    module_path: batuta::agent::guard::LoopGuard::record_max_tokens
    test_binding: agent::guard::tests::test_consecutive_max_tokens

  # ==========================================================================
  # INVARIANT 7: Memory Store Guarantee
  # ==========================================================================
  # After a successful agent loop, the conversation is stored in memory.

  - id: INV-007
    name: memory-store
    description: >
      On successful loop completion (EndTurn), the query and response
      are stored in the memory substrate for future recall.
    preconditions:
      - result.is_ok()
    postconditions:
      - memory.recall(query) returns non-empty results
    equation: |
      run_agent_loop(q) = Ok(r) ⟹ memory.recall(q) ≠ ∅
    module_path: batuta::agent::runtime::run_agent_loop
    test_binding: agent::runtime::tests::test_conversation_stored_in_memory

  # ==========================================================================
  # INVARIANT 8: Pool Capacity Enforcement (Muda)
  # ==========================================================================
  # AgentPool MUST NOT spawn beyond max_concurrent.

  - id: INV-008
    name: pool-capacity
    description: >
      When the number of active agents equals max_concurrent, spawn()
      returns CircuitBreak error. No unbounded resource consumption.
    preconditions:
      - pool.active_count() >= pool.max_concurrent()
    postconditions:
      - pool.spawn(config).is_err()
      - matches!(err, AgentError::CircuitBreak(_))
    equation: |
      active_count ≥ max_concurrent ⟹ spawn() = Err(CircuitBreak)
    module_path: batuta::agent::pool::AgentPool::spawn
    test_binding: agent::pool::tests::test_pool_capacity_limit

  # ==========================================================================
  # INVARIANT 9: Fan-Out Count Preservation
  # ==========================================================================
  # fan_out(N configs) spawns exactly N agents.

  - id: INV-009
    name: fanout-count
    description: >
      fan_out() with N configs spawns exactly N agents and returns
      N AgentIds, provided pool has sufficient capacity.
    preconditions:
      - pool.active_count() + configs.len() <= pool.max_concurrent()
    postconditions:
      - ids.len() == configs.len()
      - pool.active_count() == initial_count + configs.len()
    equation: |
      |configs| ≤ capacity_remaining ⟹ |fan_out(configs)| = |configs|
    module_path: batuta::agent::pool::AgentPool::fan_out
    test_binding: agent::pool::tests::test_pool_fan_out_fan_in

  # ==========================================================================
  # INVARIANT 10: Fan-In Completeness
  # ==========================================================================
  # join_all() collects exactly one result per spawned agent.

  - id: INV-010
    name: fanin-complete
    description: >
      join_all() returns one Result per spawned agent. No results
      are lost. The returned HashMap has as many entries as were
      spawned (barring panics).
    preconditions:
      - pool.active_count() == N
    postconditions:
      - results.len() == N (if no panics)
    equation: |
      join_all() → HashMap with |results| = |spawned_agents|
    module_path: batuta::agent::pool::AgentPool::join_all
    test_binding: agent::pool::tests::test_pool_join_all

  # ==========================================================================
  # INVARIANT 11: Tool Output Sanitization (Poka-Yoke)
  # ==========================================================================
  # All tool results are sanitized before entering conversation history.

  - id: INV-011
    name: output-sanitization
    description: >
      Known prompt injection patterns are stripped from tool results
      before they enter the conversation history. Case-insensitive
      matching for ChatML, LLaMA, and instruction-override markers.
    preconditions:
      - tool_result.content contains INJECTION_MARKERS pattern
    postconditions:
      - sanitized.content does NOT contain the pattern
      - sanitized.content contains "[SANITIZED]"
    equation: |
      content ∋ marker ⟹ sanitized(content) ∌ marker ∧ sanitized(content) ∋ "[SANITIZED]"
    module_path: batuta::agent::tool::sanitize_output
    test_binding: agent::tool::tests::test_sanitize_output_system_injection

  # ==========================================================================
  # INVARIANT 12: Spawn Depth Bound (Jidoka)
  # ==========================================================================
  # SpawnTool MUST NOT spawn beyond max_depth.

  - id: INV-012
    name: spawn-depth-bound
    description: >
      When current_depth >= max_depth, SpawnTool.execute() returns an
      error result without spawning a child agent. Prevents unbounded
      recursion in multi-agent hierarchies.
    preconditions:
      - spawn_tool.current_depth >= spawn_tool.max_depth
    postconditions:
      - result.is_error == true
      - result.content contains "depth limit"
    equation: |
      current_depth ≥ max_depth ⟹ execute() = ToolResult::error("depth limit")
    module_path: batuta::agent::tool::spawn::SpawnTool::execute
    test_binding: agent::tool::spawn::tests::test_spawn_tool_depth_limit

  # ==========================================================================
  # INVARIANT 13: Network Host Allowlist (Poka-Yoke)
  # ==========================================================================
  # NetworkTool MUST reject requests to non-allowed hosts.

  - id: INV-013
    name: network-host-allowlist
    description: >
      When the target host is not in allowed_hosts (and no wildcard "*"),
      NetworkTool.execute() returns an error. No data exfiltration to
      unauthorized endpoints is possible.
    preconditions:
      - "host(url) ∉ allowed_hosts"
      - "'*' ∉ allowed_hosts"
    postconditions:
      - result.is_error == true
      - "result.content contains 'not in allowed_hosts'"
    equation: |
      host ∉ allowed ∧ '*' ∉ allowed ⟹ execute(url) = ToolResult::error
    module_path: batuta::agent::tool::network::NetworkTool::execute
    test_binding: agent::tool::network::tests::test_blocked_host

  # ==========================================================================
  # INVARIANT 14: Inference Timeout Bound
  # ==========================================================================
  # InferenceTool has a 300s timeout, longer than default 120s.

  - id: INV-014
    name: inference-timeout
    description: >
      InferenceTool declares a 300-second timeout to allow for complex
      chain-of-thought reasoning. The tool MUST complete or timeout
      within this bound.
    preconditions:
      - tool.name() == "inference"
    postconditions:
      - tool.timeout() == Duration::from_secs(300)
    equation: |
      name = "inference" ⟹ timeout = 300s
    module_path: batuta::agent::tool::inference::InferenceTool::timeout
    test_binding: agent::tool::inference::tests::test_inference_tool_timeout

  # ==========================================================================
  # INVARIANT 15: Sovereign Privacy Blocks Network (Poka-Yoke)
  # ==========================================================================
  # Sovereign tier MUST NOT permit network egress tools.

  - id: INV-015
    name: sovereign-blocks-network
    description: >
      When the agent manifest has privacy=Sovereign, NetworkTool execute()
      is blocked by the runtime even if Capability::Network is granted.
      Defense-in-depth: capability granting does not override privacy tier.
    preconditions:
      - manifest.privacy == PrivacyTier::Sovereign
      - manifest.capabilities contains Capability::Network
    postconditions:
      - tool_result.is_error == true
      - tool_result.content contains "sovereign"
    equation: |
      privacy = Sovereign ∧ tool.capability = Network ⟹ tool_denied
    module_path: batuta::agent::runtime::handle_tool_calls
    test_binding: agent::runtime::tests_advanced::test_sovereign_privacy_blocks_network

  # ==========================================================================
  # INVARIANT 16: Token Budget Enforcement
  # ==========================================================================
  # When max_tokens_budget is set, cumulative usage must not exceed it.

  - id: INV-016
    name: token-budget-enforcement
    description: >
      When ResourceQuota.max_tokens_budget is Some(budget), the LoopGuard
      MUST circuit-break once cumulative input+output tokens exceed budget.
      Warn at 80% threshold. None = unlimited (sovereign local inference).
    preconditions:
      - guard.max_tokens_budget.is_some()
    postconditions:
      - total_tokens > budget ⟹ CircuitBreak
      - total_tokens >= 0.8*budget ⟹ Warn
      - total_tokens < 0.8*budget ⟹ Allow
    equation: |
      budget > 0 ∧ (input + output) > budget ⟹ CircuitBreak
    module_path: batuta::agent::guard::LoopGuard::check_token_budget
    test_binding: agent::guard::tests::test_token_budget_exhausted

# ============================================================================
# VERIFICATION TARGETS
# ============================================================================

verification:
  unit_tests:
    - agent::guard::tests::test_iteration_limit
    - agent::guard::tests::test_warn_at_80_percent
    - agent::guard::tests::test_tool_call_limit
    - agent::guard::tests::test_pingpong_detection
    - agent::guard::tests::test_different_inputs_no_pingpong
    - agent::guard::tests::test_consecutive_max_tokens
    - agent::guard::tests::test_max_tokens_reset
    - agent::guard::tests::test_cost_budget
    - agent::guard::tests::test_zero_cost_budget_unlimited
    - agent::guard::tests::test_usage_tracking
    - agent::guard::tests::test_fx_hash_deterministic
    - agent::guard::tests::test_fx_hash_different_tools
    - agent::guard::tests::test_counters
    - agent::guard::tests::test_token_budget_unlimited
    - agent::guard::tests::test_token_budget_allow
    - agent::guard::tests::test_token_budget_warn
    - agent::guard::tests::test_token_budget_exhausted
    - agent::runtime::tests::test_single_turn_response
    - agent::runtime::tests::test_tool_call_and_response
    - agent::runtime::tests::test_max_iterations_reached
    - agent::runtime::tests::test_unknown_tool_handled
    - agent::runtime::tests::test_capability_denied_handled
    - agent::runtime::tests::test_stream_events_emitted
    - agent::runtime::tests::test_memories_recalled_into_system
    - agent::runtime::tests::test_conversation_stored_in_memory
    - agent::pool::tests::test_pool_capacity_limit
    - agent::pool::tests::test_pool_fan_out_fan_in
    - agent::pool::tests::test_pool_join_all
    - agent::tool::tests::test_sanitize_output_system_injection
    - agent::tool::tests::test_sanitize_output_case_insensitive
    - agent::tool::tests::test_sanitize_preserves_non_injection
    - agent::tool::spawn::tests::test_spawn_tool_depth_limit
    - agent::tool::network::tests::test_blocked_host
    - agent::tool::inference::tests::test_inference_tool_timeout
    - agent::runtime::tests_advanced::test_sovereign_privacy_blocks_network

  coverage_target: 95
  mutation_target: 80
  complexity_max_cyclomatic: 30
  complexity_max_cognitive: 25