reasonkit-core 0.1.8

The Reasoning Engine — Auditable Reasoning for Production AI | Rust-Native | Turn Prompts into Protocols
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
# ═══════════════════════════════════════════════════════════════════════════════
#                    PROOFGUARD DEEP RESEARCH PROTOCOL
#                    Enhanced Verification & Synthesis Standard
# ═══════════════════════════════════════════════════════════════════════════════
#
# PURPOSE: Standardize deep research workflows with rigorous verification,
#          structured synthesis, and explicit confidence intervals.
#
# VALIDATED: This protocol was derived from successful application on
#            "BGE-M3 & RAPTOR Hierarchical Chunking" research (2025-12-11)
#            which produced high-quality, actionable outputs.
#
# LICENSE: Apache 2.0 (Open Source)
#
# ═══════════════════════════════════════════════════════════════════════════════

version: "1.0.0"
schema: "reasonkit-proofguard-deep-research-v1"
created: "2025-12-11"
last_updated: "2025-12-11"
license: "Apache-2.0"

# ─────────────────────────────────────────────────────────────────────────────
# PROTOCOL METADATA
# ─────────────────────────────────────────────────────────────────────────────

metadata:
  id: "PROT-PG-DEEP-001"
  name: "ProofGuard Deep Research Protocol"
  shortcode: "pg-deep"
  thinktool: "ProofGuard"
  priority: 1
  enforcement: "on_demand"

  triggers:
    explicit:
      - "deep research"
      - "ProofGuard analysis"
      - "verify claims"
      - "triangulate sources"
    implicit:
      - "uncertainty > 0.5 on technical claim"
      - "high-stakes decision pending"
      - "multiple conflicting sources"

  description: |
    A rigorous research protocol that combines multi-source triangulation,
    structured synthesis, and explicit confidence intervals to produce
    high-quality, auditable technical assessments.

# ─────────────────────────────────────────────────────────────────────────────
# PHASE 1: CONTEXT LOADING
# ─────────────────────────────────────────────────────────────────────────────

phase_1_context_loading:
  name: "Context Loading"
  objective: "Establish baseline understanding before research"
  duration_estimate: "2-5 minutes"

  steps:
    - id: "1.1"
      action: "Search episodic memory for prior work on topic"
      tool: "mcp__plugin_episodic-memory_episodic-memory__search"
      fallback: "Proceed without if unavailable"

    - id: "1.2"
      action: "Read relevant project documentation"
      tools: ["Read", "Glob"]
      targets:
        - "CLAUDE.md / ORCHESTRATOR.md"
        - "ARCHITECTURE.md"
        - "Existing implementation files"

    - id: "1.3"
      action: "Identify existing decisions and constraints"
      output: "Context summary with known constraints"

  checklist:
    - "[ ] Prior conversations searched"
    - "[ ] Project docs reviewed"
    - "[ ] Existing code examined"
    - "[ ] Constraints identified"

# ─────────────────────────────────────────────────────────────────────────────
# PHASE 2: PARALLEL SEARCH INITIATION
# ─────────────────────────────────────────────────────────────────────────────

phase_2_parallel_search:
  name: "Parallel Search Initiation"
  objective: "Cast wide net across multiple source types"
  duration_estimate: "3-8 minutes"

  search_strategy:
    minimum_queries: 3
    parallel_execution: true
    query_diversity_required: true

  query_templates:
    technical_specs: "{topic} technical specifications benchmarks {year}"
    academic_papers: "{topic} research paper arxiv {year}"
    comparisons: "{topic_a} vs {topic_b} benchmark comparison {year}"
    implementations: "{topic} implementation {language} integration"

  source_tiers:
    tier_1_authoritative:
      description: "Official docs, GitHub repos, peer-reviewed papers"
      examples:
        - "HuggingFace model cards"
        - "arXiv papers"
        - "Official documentation"
        - "Nature/Science publications"
      weight: 1.0

    tier_2_secondary:
      description: "Reputable tech blogs, industry analysis"
      examples:
        - "NVIDIA technical docs"
        - "Major framework docs (LangChain, LlamaIndex)"
        - "Research lab blogs"
      weight: 0.8

    tier_3_independent:
      description: "Independent validation, community implementations"
      examples:
        - "Academic course projects"
        - "Independent benchmarks"
        - "Community tutorials"
      weight: 0.6

  checklist:
    - "[ ] Minimum 3 parallel searches executed"
    - "[ ] Multiple source tiers targeted"
    - "[ ] Current year included in queries"

# ─────────────────────────────────────────────────────────────────────────────
# PHASE 3: SOURCE VERIFICATION (PROOFGUARD CORE)
# ─────────────────────────────────────────────────────────────────────────────

phase_3_source_verification:
  name: "Source Verification (ProofGuard Core)"
  objective: "Triangulate every major claim with 3+ sources"
  duration_estimate: "5-15 minutes"

  triangulation_rule:
    mandatory: true
    minimum_sources: 3
    requirement: |
      EVERY factual claim MUST be verified by:
      - Source A (Primary): Authoritative/official source
      - Source B (Secondary): Different domain, same conclusion
      - Source C (Independent): Different author, same timeframe

  verification_levels:
    "VERIFIED":
      symbol: ""
      criteria: "3+ sources agree, all URLs accessed"
      confidence_boost: "+15%"
    "LIKELY":
      symbol: "~"
      criteria: "2 sources agree, 1 indirect"
      confidence_boost: "+5%"
    "UNVERIFIED":
      symbol: "?"
      criteria: "Single source or conflicting info"
      confidence_boost: "0%"
      action: "MUST disclose in output"
    "CONTRADICTED":
      symbol: ""
      criteria: "Sources disagree"
      confidence_boost: "-20%"
      action: "MUST investigate further"

  output_format:
    triangulation_table:
      required: true
      template: |
        | Finding | Source A (Primary) | Source B (Secondary) | Source C (Independent) | Consensus |
        |---------|-------------------|---------------------|----------------------|-----------|
        | [Claim] | [URL/Citation]    | [URL/Citation]      | [URL/Citation]       | [Status]  |

  anti_patterns:
    - "Claiming verification without 3 sources"
    - "Using same domain for all 3 sources"
    - "Not actually accessing URLs"
    - "Ignoring contradicting evidence"

  checklist:
    - "[ ] Each major claim has 3+ sources"
    - "[ ] Sources are from different domains/authors"
    - "[ ] All URLs were actually accessed"
    - "[ ] Contradictions investigated"
    - "[ ] Triangulation table created"

# ─────────────────────────────────────────────────────────────────────────────
# PHASE 4: STRUCTURED SYNTHESIS
# ─────────────────────────────────────────────────────────────────────────────

phase_4_structured_synthesis:
  name: "Structured Synthesis"
  objective: "Transform verified data into structured, actionable output"
  duration_estimate: "5-10 minutes"

  required_output_elements:
    technical_specifications_table:
      required: true
      purpose: "Consolidate verified specs"
      template: |
        | Property | Value | Source |
        |----------|-------|--------|
        | [Spec]   | [Val] | [Cite] |

    benchmark_comparison_table:
      required: "when benchmarks exist"
      purpose: "Quantitative performance comparison"
      template: |
        | Benchmark | Subject Score | Baseline | Improvement |
        |-----------|---------------|----------|-------------|
        | [Name]    | [Score]       | [Base]   | [Delta]     |

    fit_analysis_matrix:
      required: "when evaluating adoption"
      purpose: "Structured decision support"
      template: |
        | Criterion | Score | Rationale |
        |-----------|-------|-----------|
        | [Factor]  | [✅/⚠️/❌] | [Why] |

    architecture_diagram:
      required: "when systems involved"
      purpose: "Visual spatial reasoning"
      format: "ASCII art within code blocks"

    implementation_roadmap:
      required: "when action items exist"
      purpose: "Actionable next steps"
      template: |
        PRIORITY 1 (Immediate):
        ├── [ ] Task 1
        └── [ ] Task 2

        PRIORITY 2 (Next):
        └── [ ] Task 3

  confidence_requirements:
    every_recommendation: true
    format: "Confidence: XX%"
    calibration_guidance: |
      95%+ = Multiple sources agree, tested personally
      85-94% = Strong triangulation, no contradictions
      70-84% = Good evidence, some gaps
      50-69% = Partial evidence, significant uncertainty
      <50% = Should not recommend without disclaimer

  checklist:
    - "[ ] Technical specs table created"
    - "[ ] Benchmarks compared (if applicable)"
    - "[ ] Fit analysis matrix built"
    - "[ ] Architecture diagram included"
    - "[ ] Implementation roadmap provided"
    - "[ ] All recommendations have confidence %"

# ─────────────────────────────────────────────────────────────────────────────
# PHASE 5: OUTPUT WITH EXPLICIT CONFIDENCE
# ─────────────────────────────────────────────────────────────────────────────

phase_5_output:
  name: "Output with Explicit Confidence"
  objective: "Deliver auditable, actionable report"
  duration_estimate: "3-5 minutes"

  required_sections:
    proofguard_verification_summary:
      position: "top"
      required: true
      purpose: "Immediate audit trail"

    detailed_analysis:
      position: "middle"
      required: true
      subsections:
        - "Technical Specifications"
        - "Benchmark Results"
        - "Integration Assessment"
        - "Production Considerations"

    decision_matrix:
      position: "before conclusion"
      required: true
      purpose: "At-a-glance decision support"
      template: |
        | Component | Project A | Project B | Confidence |
        |-----------|-----------|-----------|------------|
        | [Item]    | [Y/N/Defer] | [Y/N/Defer] | [%] |

    executive_summary:
      position: "end"
      required: true
      purpose: "Quick reference for decisions"
      max_length: "5-7 rows in table format"

    sources_section:
      position: "end"
      required: true
      tiers:
        - "Primary Sources (Tier 1)"
        - "Secondary Sources (Tier 2)"
        - "Independent Validation (Tier 3)"

  quality_gates:
    - gate: "All claims triangulated"
      required: true
    - gate: "All recommendations have confidence"
      required: true
    - gate: "All sources hyperlinked"
      required: true
    - gate: "Executive summary present"
      required: true
    - gate: "Actionable roadmap included"
      required: true

# ─────────────────────────────────────────────────────────────────────────────
# OBJECTIVE MEASURES FOR TESTING
# ─────────────────────────────────────────────────────────────────────────────

objective_measures:
  completeness_metrics:
    - metric: "Triangulation Coverage"
      formula: "(claims_with_3_sources / total_claims) * 100"
      target: ">= 90%"

    - metric: "Source Diversity"
      formula: "unique_domains / total_sources"
      target: ">= 0.7"

    - metric: "Confidence Explicitness"
      formula: "(recommendations_with_confidence / total_recommendations) * 100"
      target: "100%"

  quality_metrics:
    - metric: "Actionability Score"
      criteria: "Does output include implementation roadmap with priorities?"
      scoring: "0-10 scale"
      target: ">= 8"

    - metric: "Auditability Score"
      criteria: "Can every claim be traced to source?"
      scoring: "0-10 scale"
      target: ">= 9"

    - metric: "Decision Support Score"
      criteria: "Does output enable yes/no decision?"
      scoring: "0-10 scale"
      target: ">= 8"

  efficiency_metrics:
    - metric: "Research Depth vs Time"
      formula: "sources_consulted / minutes_elapsed"
      benchmark: ">= 2 sources/minute"

    - metric: "Output Density"
      formula: "structured_tables / total_output_sections"
      target: ">= 0.5"

# ─────────────────────────────────────────────────────────────────────────────
# INTEGRATION WITH THINKTOOL
# ─────────────────────────────────────────────────────────────────────────────

thinktool_integration:
  activation:
    shortcut: "pg-deep"
    full_command: "--profile paranoid --thinktool ProofGuard"
    automatic_triggers:
      - "Research request with technical claims"
      - "Evaluation of new technology"
      - "High-stakes architectural decision"

  profile_mapping:
    paranoid:
      modules:
        ["GigaThink", "LaserLogic", "BedRock", "ProofGuard", "BrutalHonesty"]
      confidence_target: 95%
      this_protocol: "REQUIRED"

    scientific:
      # Core modules (Pro adds: SciEngine, AtomicBreak)
      modules: ["GigaThink", "LaserLogic", "BedRock", "ProofGuard"]
      confidence_target: 85%
      this_protocol: "RECOMMENDED"

    deep:
      # Core modules (Pro adds: HighReflect)
      modules:
        ["GigaThink", "LaserLogic", "BedRock", "ProofGuard", "BrutalHonesty"]
      confidence_target: 85%
      this_protocol: "RECOMMENDED"

  output_template_injection:
    when: "ProofGuard Deep Research activated"
    inject:
      - "Triangulation table at top"
      - "Decision matrix before conclusion"
      - "Executive summary at end"
      - "Sources section with tiers"

# ─────────────────────────────────────────────────────────────────────────────
# EXAMPLE OUTPUT STRUCTURE (REFERENCE)
# ─────────────────────────────────────────────────────────────────────────────

example_output_structure: |
  # DEEP RESEARCH REPORT: [Topic]

  ## ProofGuard Verification Summary
  | Finding | Source A (Primary) | Source B (Secondary) | Source C (Independent) | Consensus |
  |---------|-------------------|---------------------|----------------------|-----------|
  | [Claim] | [URL]             | [URL]               | [URL]                | VERIFIED ✓|

  ---
  ## 1. [TOPIC A] ANALYSIS

  ### Technical Specifications (VERIFIED)
  | Property | Value | Source |
  |----------|-------|--------|

  ### Benchmark Performance (VERIFIED)
  | Benchmark | Score | Baseline | Improvement |
  |-----------|-------|----------|-------------|

  ---
  ## 2. [TOPIC B] ANALYSIS
  [Same structure...]

  ---
  ## 3. FIT ANALYSIS FOR [PROJECT]

  ### [Project Name] Assessment
  | Criterion | Score | Rationale |
  |-----------|-------|-----------|

  **RECOMMENDATION:** [HIGH/MEDIUM/LOW PRIORITY]
  ```
  Confidence: XX%
  Implementation Path:
  1. Step 1
  2. Step 2

  Estimated Effort: [Low/Medium/High]
  Risk: [Low/Medium/High]
  ```

  ---
  ## 4. SYNTHESIS & RECOMMENDATIONS

  ### Decision Matrix
  | Component | Project A | Project B | Confidence |
  |-----------|-----------|-----------|------------|

  ### Implementation Priority
  ```
  PRIORITY 1 (Immediate):
  ├── [ ] Task 1
  └── [ ] Task 2
  ```

  ---
  ## 5. SOURCES

  ### Primary Sources (Tier 1)
  - [URL 1]
  - [URL 2]

  ### Secondary Sources (Tier 2)
  - [URL 3]

  ### Independent Validation (Tier 3)
  - [URL 4]

  ---
  ## EXECUTIVE SUMMARY
  | Topic | Recommendation | Project Fit | Confidence |
  |-------|----------------|-------------|------------|

# ─────────────────────────────────────────────────────────────────────────────
# CHANGELOG
# ─────────────────────────────────────────────────────────────────────────────

changelog:
  - version: "1.0.0"
    date: "2025-12-11"
    changes:
      - "Initial protocol derived from BGE-M3/RAPTOR deep research session"
      - "Defined 5-phase workflow"
      - "Established triangulation requirements"
      - "Created structured output templates"
      - "Added objective measures for testing"
      - "Integrated with ThinkTool profiles"