1use std::cmp::Ordering;
16use std::collections::{HashMap, HashSet};
17use std::fmt;
18use std::sync::{Arc, Mutex};
19
20use converge_pack::{AgentEffect, Context, ContextKey, Fact, FactId, ProposedFact, Suggestor};
21use serde::{Deserialize, Serialize};
22use uuid::Uuid;
23
24use crate::suggestor::SharedBudget;
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
32pub enum DdError {
33 CreditsExhausted { provider: String, detail: String },
35 RateLimited {
37 provider: String,
38 retry_after_ms: Option<u64>,
39 },
40 ProviderUnavailable { provider: String, detail: String },
42 BadResponse { provider: String, detail: String },
44 PromptTooLarge {
46 provider: String,
47 tokens: Option<usize>,
48 },
49 ParseFailed { provider: String, detail: String },
51}
52
53impl fmt::Display for DdError {
54 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55 match self {
56 Self::CreditsExhausted { provider, detail } => {
57 write!(f, "[{provider}] credits exhausted: {detail}")
58 }
59 Self::RateLimited { provider, .. } => write!(f, "[{provider}] rate limited"),
60 Self::ProviderUnavailable { provider, detail } => {
61 write!(f, "[{provider}] unavailable: {detail}")
62 }
63 Self::BadResponse { provider, detail } => {
64 write!(f, "[{provider}] bad response: {detail}")
65 }
66 Self::PromptTooLarge { provider, tokens } => write!(
67 f,
68 "[{provider}] prompt too large ({})",
69 tokens.map_or("unknown".into(), |t| format!("{t} tokens"))
70 ),
71 Self::ParseFailed { provider, detail } => {
72 write!(f, "[{provider}] parse failed: {detail}")
73 }
74 }
75 }
76}
77
78impl DdError {
79 pub fn is_infra_failure(&self) -> bool {
81 matches!(
82 self,
83 Self::CreditsExhausted { .. }
84 | Self::RateLimited { .. }
85 | Self::ProviderUnavailable { .. }
86 )
87 }
88
89 pub fn is_fatal(&self) -> bool {
91 matches!(self, Self::CreditsExhausted { .. })
92 }
93
94 fn constraint_id(&self, suggestor: &str) -> String {
96 let kind = match self {
97 Self::CreditsExhausted { .. } => "credits-exhausted",
98 Self::RateLimited { .. } => "rate-limited",
99 Self::ProviderUnavailable { .. } => "provider-unavailable",
100 Self::BadResponse { .. } => "bad-response",
101 Self::PromptTooLarge { .. } => "prompt-too-large",
102 Self::ParseFailed { .. } => "parse-failed",
103 };
104 format!("dd:constraint:{suggestor}:{kind}")
105 }
106}
107
108fn error_to_constraint(error: &DdError, suggestor: &str) -> ProposedFact {
110 let id = error.constraint_id(suggestor);
111 let content = serde_json::json!({
112 "type": "error",
113 "error": serde_json::to_value(error).unwrap_or_default(),
114 "is_infra_failure": error.is_infra_failure(),
115 "is_fatal": error.is_fatal(),
116 "message": error.to_string(),
117 })
118 .to_string();
119 ProposedFact::new(ContextKey::Constraints, id, content, suggestor).with_confidence(1.0)
120}
121
122#[async_trait::async_trait]
130pub trait DdSearch: Send + Sync {
131 async fn search(&self, query: &str) -> Result<Vec<SearchHit>, DdError>;
132}
133
134#[async_trait::async_trait]
140pub trait DdLlm: Send + Sync {
141 async fn complete(&self, prompt: &str) -> Result<String, DdError>;
142}
143
144#[derive(Debug, Clone)]
146pub struct SearchHit {
147 pub title: String,
148 pub url: String,
149 pub content: String,
150 pub provider: String,
151}
152
153pub struct FailoverDdLlm {
160 backends: Vec<Arc<dyn DdLlm>>,
161}
162
163impl FailoverDdLlm {
164 pub fn new(backends: Vec<Arc<dyn DdLlm>>) -> Self {
165 Self { backends }
166 }
167}
168
169#[async_trait::async_trait]
170impl DdLlm for FailoverDdLlm {
171 async fn complete(&self, prompt: &str) -> Result<String, DdError> {
172 let mut last_error = None;
173 for backend in &self.backends {
174 match backend.complete(prompt).await {
175 Ok(result) => return Ok(result),
176 Err(e) => {
177 let should_failover = e.is_infra_failure();
178 eprintln!(
179 "[failover] {} — {}",
180 e,
181 if should_failover {
182 "trying next"
183 } else {
184 "not retryable"
185 }
186 );
187 if !should_failover {
188 return Err(e);
189 }
190 last_error = Some(e);
191 }
192 }
193 }
194 Err(last_error.unwrap_or_else(|| DdError::ProviderUnavailable {
195 provider: "failover".into(),
196 detail: "no backends configured".into(),
197 }))
198 }
199}
200
201pub struct FailoverDdSearch {
203 backends: Vec<Arc<dyn DdSearch>>,
204}
205
206impl FailoverDdSearch {
207 pub fn new(backends: Vec<Arc<dyn DdSearch>>) -> Self {
208 Self { backends }
209 }
210}
211
212#[async_trait::async_trait]
213impl DdSearch for FailoverDdSearch {
214 async fn search(&self, query: &str) -> Result<Vec<SearchHit>, DdError> {
215 let mut last_error = None;
216 for backend in &self.backends {
217 match backend.search(query).await {
218 Ok(result) => return Ok(result),
219 Err(e) => {
220 let should_failover = e.is_infra_failure();
221 eprintln!(
222 "[failover] {} — {}",
223 e,
224 if should_failover {
225 "trying next"
226 } else {
227 "not retryable"
228 }
229 );
230 if !should_failover {
231 return Err(e);
232 }
233 last_error = Some(e);
234 }
235 }
236 }
237 Err(last_error.unwrap_or_else(|| DdError::ProviderUnavailable {
238 provider: "failover".into(),
239 detail: "no backends configured".into(),
240 }))
241 }
242}
243
244pub struct BreadthResearchSuggestor {
249 subject: String,
250 budget: Arc<SharedBudget>,
251 search: Arc<dyn DdSearch>,
252 tag: String,
253 processed: Mutex<HashSet<FactId>>,
254}
255
256impl BreadthResearchSuggestor {
257 pub fn new(
258 subject: impl Into<String>,
259 budget: Arc<SharedBudget>,
260 search: Arc<dyn DdSearch>,
261 ) -> Self {
262 Self {
263 subject: subject.into(),
264 budget,
265 search,
266 tag: "breadth".into(),
267 processed: Mutex::new(HashSet::new()),
268 }
269 }
270
271 pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
272 self.tag = tag.into();
273 self
274 }
275
276 fn unprocessed_strategies(&self, ctx: &dyn Context) -> Vec<String> {
277 let processed = self.processed.lock().unwrap();
278 ctx.get(ContextKey::Strategies)
279 .iter()
280 .filter(|f| f.content.contains(&self.tag))
281 .filter(|f| !processed.contains(&f.id))
282 .map(|f| f.content.clone())
283 .collect()
284 }
285}
286
287#[async_trait::async_trait]
288impl Suggestor for BreadthResearchSuggestor {
289 fn name(&self) -> &'static str {
290 "dd-breadth-research"
291 }
292
293 fn dependencies(&self) -> &[ContextKey] {
294 &[ContextKey::Strategies]
295 }
296
297 fn accepts(&self, ctx: &dyn Context) -> bool {
298 self.budget.remaining("searches") > 0 && !self.unprocessed_strategies(ctx).is_empty()
299 }
300
301 async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
302 let strategies = self.unprocessed_strategies(ctx);
303 let mut proposals = Vec::new();
304
305 for strategy in strategies {
306 if !self.budget.try_use("searches") {
307 break;
308 }
309
310 let query = format!("{} {strategy}", self.subject);
311 match self.search.search(&query).await {
312 Ok(hits) => {
313 for hit in &hits {
314 if !is_relevant(&hit.title, &hit.content, &hit.url, &self.subject) {
315 continue;
316 }
317 let id = format!("signal-breadth-{}", Uuid::new_v4());
318 let content = serde_json::json!({
319 "title": hit.title,
320 "url": hit.url,
321 "content": hit.content,
322 "provider": hit.provider,
323 "query": query,
324 })
325 .to_string();
326 proposals.push(
327 ProposedFact::new(
328 ContextKey::Signals,
329 id,
330 content,
331 "dd-breadth-research",
332 )
333 .with_confidence(1.0),
334 );
335 }
336 }
337 Err(e) => {
338 proposals.push(error_to_constraint(&e, "dd-breadth-research"));
339 if e.is_fatal() {
340 break;
341 }
342 }
343 }
344
345 self.processed.lock().unwrap().insert(
346 ctx.get(ContextKey::Strategies)
347 .iter()
348 .find(|f| f.content == strategy)
349 .map_or_else(|| FactId::new(""), |f| f.id.clone()),
350 );
351 }
352
353 AgentEffect::with_proposals(proposals)
354 }
355}
356
357pub struct DepthResearchSuggestor {
362 subject: String,
363 budget: Arc<SharedBudget>,
364 search: Arc<dyn DdSearch>,
365 tag: String,
366 processed: Mutex<HashSet<FactId>>,
367}
368
369impl DepthResearchSuggestor {
370 pub fn new(
371 subject: impl Into<String>,
372 budget: Arc<SharedBudget>,
373 search: Arc<dyn DdSearch>,
374 ) -> Self {
375 Self {
376 subject: subject.into(),
377 budget,
378 search,
379 tag: "depth".into(),
380 processed: Mutex::new(HashSet::new()),
381 }
382 }
383
384 pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
385 self.tag = tag.into();
386 self
387 }
388
389 fn unprocessed_strategies(&self, ctx: &dyn Context) -> Vec<String> {
390 let processed = self.processed.lock().unwrap();
391 ctx.get(ContextKey::Strategies)
392 .iter()
393 .filter(|f| f.content.contains(&self.tag))
394 .filter(|f| !processed.contains(&f.id))
395 .map(|f| f.content.clone())
396 .collect()
397 }
398}
399
400#[async_trait::async_trait]
401impl Suggestor for DepthResearchSuggestor {
402 fn name(&self) -> &'static str {
403 "dd-depth-research"
404 }
405
406 fn dependencies(&self) -> &[ContextKey] {
407 &[ContextKey::Strategies]
408 }
409
410 fn accepts(&self, ctx: &dyn Context) -> bool {
411 self.budget.remaining("searches") > 0 && !self.unprocessed_strategies(ctx).is_empty()
412 }
413
414 async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
415 let strategies = self.unprocessed_strategies(ctx);
416 let mut proposals = Vec::new();
417
418 for strategy in strategies {
419 if !self.budget.try_use("searches") {
420 break;
421 }
422
423 let query = format!("{} {strategy}", self.subject);
424 match self.search.search(&query).await {
425 Ok(hits) => {
426 for hit in &hits {
427 if !is_relevant(&hit.title, &hit.content, &hit.url, &self.subject) {
428 continue;
429 }
430 let id = format!("signal-depth-{}", Uuid::new_v4());
431 let content = serde_json::json!({
432 "title": hit.title,
433 "url": hit.url,
434 "content": hit.content,
435 "provider": hit.provider,
436 "query": query,
437 })
438 .to_string();
439 proposals.push(
440 ProposedFact::new(
441 ContextKey::Signals,
442 id,
443 content,
444 "dd-depth-research",
445 )
446 .with_confidence(1.0),
447 );
448 }
449 }
450 Err(e) => {
451 proposals.push(error_to_constraint(&e, "dd-depth-research"));
452 if e.is_fatal() {
453 break;
454 }
455 }
456 }
457
458 self.processed.lock().unwrap().insert(
459 ctx.get(ContextKey::Strategies)
460 .iter()
461 .find(|f| f.content == strategy)
462 .map_or_else(|| FactId::new(""), |f| f.id.clone()),
463 );
464 }
465
466 AgentEffect::with_proposals(proposals)
467 }
468}
469
470pub struct FactExtractorSuggestor {
475 subject: String,
476 budget: Arc<SharedBudget>,
477 llm: Arc<dyn DdLlm>,
478 processed_signal_count: Mutex<usize>,
479}
480
481impl FactExtractorSuggestor {
482 pub fn new(subject: impl Into<String>, budget: Arc<SharedBudget>, llm: Arc<dyn DdLlm>) -> Self {
483 Self {
484 subject: subject.into(),
485 budget,
486 llm,
487 processed_signal_count: Mutex::new(0),
488 }
489 }
490}
491
492#[async_trait::async_trait]
493impl Suggestor for FactExtractorSuggestor {
494 fn name(&self) -> &'static str {
495 "dd-fact-extractor"
496 }
497
498 fn dependencies(&self) -> &[ContextKey] {
499 &[ContextKey::Signals]
500 }
501
502 fn accepts(&self, ctx: &dyn Context) -> bool {
503 let current = ctx.count(ContextKey::Signals);
504 let processed = *self.processed_signal_count.lock().unwrap();
505 self.budget.remaining("llm") > 0 && current > processed
506 }
507
508 async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
509 let all_signals = ctx.get(ContextKey::Signals);
510 let (start, end) = next_batch_bounds(
511 all_signals.len(),
512 *self.processed_signal_count.lock().unwrap(),
513 15,
514 );
515 let signals: Vec<_> = all_signals
516 .iter()
517 .skip(start)
518 .take(end - start)
519 .cloned()
520 .collect();
521
522 if signals.is_empty() || !self.budget.try_use("llm") {
523 return AgentEffect::empty();
524 }
525
526 *self.processed_signal_count.lock().unwrap() = end;
527 let prompt = prompts::fact_extraction(&self.subject, &signals);
528 let mut seen_fact_keys: HashSet<String> = ctx
529 .get(ContextKey::Hypotheses)
530 .iter()
531 .filter_map(|fact| existing_fact_signature(&fact.content))
532 .collect();
533
534 let mut proposals = Vec::new();
535 match self.llm.complete(&prompt).await {
536 Ok(raw) => match parse_json_array_response(&raw, "facts") {
537 Ok(facts) => {
538 for (i, fact) in facts.iter().enumerate() {
539 let Some(normalized_fact) = normalize_dd_fact(fact) else {
540 continue;
541 };
542 let signature = dd_fact_signature(&normalized_fact);
543 if !seen_fact_keys.insert(signature) {
544 continue;
545 }
546 let id = format!("hypothesis-{}-{i}", Uuid::new_v4());
547 proposals.push(
548 ProposedFact::new(
549 ContextKey::Hypotheses,
550 id,
551 normalized_fact.to_string(),
552 "dd-fact-extractor",
553 )
554 .with_confidence(normalized_fact["confidence"].as_f64().unwrap_or(0.5)),
555 );
556 }
557 }
558 Err(detail) => {
559 let parse_err = DdError::ParseFailed {
560 provider: "llm".into(),
561 detail: format!(
562 "{detail} (first 200 chars: {})",
563 &raw[..raw.len().min(200)]
564 ),
565 };
566 proposals.push(error_to_constraint(&parse_err, "dd-fact-extractor"));
567 }
568 },
569 Err(e) => {
570 proposals.push(error_to_constraint(&e, "dd-fact-extractor"));
571 }
572 }
573
574 AgentEffect::with_proposals(proposals)
575 }
576}
577
578pub struct GapDetectorSuggestor {
583 subject: String,
584 budget: Arc<SharedBudget>,
585 llm: Arc<dyn DdLlm>,
586 last_hypothesis_count: Mutex<usize>,
587 generation_count: Mutex<usize>,
588 max_generations: usize,
589 min_hypotheses: usize,
590}
591
592impl GapDetectorSuggestor {
593 pub fn new(subject: impl Into<String>, budget: Arc<SharedBudget>, llm: Arc<dyn DdLlm>) -> Self {
594 Self {
595 subject: subject.into(),
596 budget,
597 llm,
598 last_hypothesis_count: Mutex::new(0),
599 generation_count: Mutex::new(0),
600 max_generations: 3,
601 min_hypotheses: 5,
602 }
603 }
604
605 pub fn with_max_generations(mut self, max: usize) -> Self {
606 self.max_generations = max;
607 self
608 }
609
610 pub fn with_min_hypotheses(mut self, min: usize) -> Self {
611 self.min_hypotheses = min;
612 self
613 }
614}
615
616#[async_trait::async_trait]
617impl Suggestor for GapDetectorSuggestor {
618 fn name(&self) -> &'static str {
619 "dd-gap-detector"
620 }
621
622 fn dependencies(&self) -> &[ContextKey] {
623 &[ContextKey::Hypotheses]
624 }
625
626 fn accepts(&self, ctx: &dyn Context) -> bool {
627 let current = ctx.count(ContextKey::Hypotheses);
628 let last = *self.last_hypothesis_count.lock().unwrap();
629 let gens = *self.generation_count.lock().unwrap();
630
631 current >= self.min_hypotheses
632 && current > last
633 && gens < self.max_generations
634 && self.budget.remaining("llm") > 0
635 }
636
637 async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
638 if !self.budget.try_use("llm") {
639 return AgentEffect::empty();
640 }
641
642 let hypotheses = ctx.get(ContextKey::Hypotheses);
643 *self.last_hypothesis_count.lock().unwrap() = hypotheses.len();
644 let generation = {
645 let mut g = self.generation_count.lock().unwrap();
646 *g += 1;
647 *g
648 };
649
650 let prompt =
651 prompts::gap_detection(&self.subject, hypotheses, generation, self.max_generations);
652 let mut proposals = Vec::new();
653 let mut seen_strategy_contents: HashSet<String> = ctx
654 .get(ContextKey::Strategies)
655 .iter()
656 .map(|fact| fact.content.clone())
657 .collect();
658
659 match self.llm.complete(&prompt).await {
660 Ok(raw) => match parse_json_array_response(&raw, "strategies") {
661 Ok(strategies) => {
662 for (i, s) in strategies.iter().enumerate() {
663 let mode = s["mode"].as_str().unwrap_or("breadth");
664 let query = s["query"].as_str().unwrap_or("");
665 let reason = s["reason"].as_str().unwrap_or("");
666 let content = format!("[{mode}] {query} -- {reason}");
667 if query.trim().is_empty()
668 || !seen_strategy_contents.insert(content.clone())
669 {
670 continue;
671 }
672 let id = format!("strategy-gap-{i}-{}", Uuid::new_v4());
673
674 proposals.push(ProposedFact::new(
675 ContextKey::Strategies,
676 id,
677 content,
678 "dd-gap-detector",
679 ));
680 }
681 }
682 Err(detail) => {
683 let parse_err = DdError::ParseFailed {
684 provider: "llm".into(),
685 detail: format!(
686 "{detail} (first 200 chars: {})",
687 &raw[..raw.len().min(200)]
688 ),
689 };
690 proposals.push(error_to_constraint(&parse_err, "dd-gap-detector"));
691 }
692 },
693 Err(e) => {
694 proposals.push(error_to_constraint(&e, "dd-gap-detector"));
695 }
696 }
697
698 AgentEffect::with_proposals(proposals)
699 }
700}
701
702pub struct ContradictionFinderSuggestor {
707 last_hypothesis_count: Mutex<usize>,
708}
709
710impl ContradictionFinderSuggestor {
711 pub fn new() -> Self {
712 Self {
713 last_hypothesis_count: Mutex::new(0),
714 }
715 }
716}
717
718#[async_trait::async_trait]
719impl Suggestor for ContradictionFinderSuggestor {
720 fn name(&self) -> &'static str {
721 "dd-contradiction-finder"
722 }
723
724 fn dependencies(&self) -> &[ContextKey] {
725 &[ContextKey::Hypotheses]
726 }
727
728 fn accepts(&self, ctx: &dyn Context) -> bool {
729 let current = ctx.count(ContextKey::Hypotheses);
730 let last = *self.last_hypothesis_count.lock().unwrap();
731 current > last && current >= 3
732 }
733
734 async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
735 let hypotheses = ctx.get(ContextKey::Hypotheses);
736 *self.last_hypothesis_count.lock().unwrap() = hypotheses.len();
737
738 let mut by_category: HashMap<String, Vec<(FactId, String)>> = HashMap::new();
740 for fact in hypotheses {
741 if let Ok(v) = serde_json::from_str::<serde_json::Value>(&fact.content) {
742 let category = v["category"].as_str().unwrap_or("unknown").to_string();
743 let claim = v["claim"].as_str().unwrap_or("").to_string();
744 if !claim.is_empty() {
745 by_category
746 .entry(category)
747 .or_default()
748 .push((fact.id.clone(), claim));
749 }
750 }
751 }
752
753 let mut proposals = Vec::new();
754 let existing_evaluations: HashSet<FactId> = ctx
755 .get(ContextKey::Evaluations)
756 .iter()
757 .map(|f| f.id.clone())
758 .collect();
759
760 for (category, claims) in &by_category {
762 if claims.len() < 2 {
763 continue;
764 }
765
766 let has_contradiction = claims.iter().any(|(_, c)| {
768 c.to_lowercase().contains("contradiction")
769 || c.to_lowercase().contains("disagree")
770 || c.to_lowercase().contains("conflict")
771 });
772
773 if has_contradiction {
774 let id = format!("contradiction-{category}-{}", Uuid::new_v4());
775 if existing_evaluations.contains(id.as_str()) {
776 continue;
777 }
778
779 let claim_ids: Vec<&str> = claims.iter().map(|(id, _)| id.as_str()).collect();
780 let content = serde_json::json!({
781 "category": category,
782 "type": "contradiction",
783 "claim_count": claims.len(),
784 "claim_ids": claim_ids,
785 "description": format!("Contradictory claims detected in {category} — sources disagree"),
786 "needs_human_review": true,
787 })
788 .to_string();
789
790 proposals.push(
791 ProposedFact::new(
792 ContextKey::Evaluations,
793 id,
794 content,
795 "dd-contradiction-finder",
796 )
797 .with_confidence(0.9),
798 );
799 }
800 }
801
802 AgentEffect::with_proposals(proposals)
803 }
804}
805
806pub struct SynthesisSuggestor {
811 subject: String,
812 budget: Arc<SharedBudget>,
813 llm: Arc<dyn DdLlm>,
814 last_hypothesis_count: Mutex<usize>,
815 stable_cycles: Mutex<usize>,
816 required_stable_cycles: usize,
817}
818
819impl SynthesisSuggestor {
820 pub fn new(subject: impl Into<String>, budget: Arc<SharedBudget>, llm: Arc<dyn DdLlm>) -> Self {
821 Self {
822 subject: subject.into(),
823 budget,
824 llm,
825 last_hypothesis_count: Mutex::new(0),
826 stable_cycles: Mutex::new(0),
827 required_stable_cycles: 2,
828 }
829 }
830
831 pub fn with_required_stable_cycles(mut self, n: usize) -> Self {
832 self.required_stable_cycles = n;
833 self
834 }
835}
836
837#[async_trait::async_trait]
838impl Suggestor for SynthesisSuggestor {
839 fn name(&self) -> &'static str {
840 "dd-synthesis"
841 }
842
843 fn dependencies(&self) -> &[ContextKey] {
844 &[]
847 }
848
849 fn accepts(&self, ctx: &dyn Context) -> bool {
850 let current = ctx.count(ContextKey::Hypotheses);
851 let mut last = self.last_hypothesis_count.lock().unwrap();
852 let mut stable = self.stable_cycles.lock().unwrap();
853
854 if current == *last && current > 0 {
855 *stable += 1;
856 } else {
857 *stable = 0;
858 *last = current;
859 }
860
861 *stable >= self.required_stable_cycles
862 && !ctx.has(ContextKey::Proposals)
863 && self.budget.remaining("llm") > 0
864 }
865
866 async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
867 if !self.budget.try_use("llm") {
868 return AgentEffect::empty();
869 }
870
871 let hypotheses = ctx.get(ContextKey::Hypotheses);
872 let consolidated = consolidate_dd_hypotheses(hypotheses);
873 let prompt = prompts::synthesis(&self.subject, &consolidated);
874
875 match self.llm.complete(&prompt).await {
876 Ok(raw) => {
877 let id = format!("synthesis-{}", Uuid::new_v4());
878 AgentEffect::with_proposal(
879 ProposedFact::new(ContextKey::Proposals, id, raw, "dd-synthesis")
880 .with_confidence(0.8),
881 )
882 }
883 Err(e) => AgentEffect::with_proposal(error_to_constraint(&e, "dd-synthesis")),
884 }
885 }
886}
887
888#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
889pub struct DdFactSummary {
890 pub category: String,
891 pub claim: String,
892 pub confidence: f64,
893 pub support_count: usize,
894 pub evidence_count: usize,
895}
896
897#[derive(Debug, Clone)]
898struct ConsolidationCandidate {
899 summary: DdFactSummary,
900 distinctive_tokens: HashSet<String>,
901 topic_tokens: HashSet<String>,
902 numeric_tokens: Vec<String>,
903 approximate: bool,
904 priority_score: f64,
905}
906
907pub mod prompts {
910 use super::{DdFactSummary, covered_dd_categories, missing_expected_dd_categories};
911 use converge_pack::Fact;
912
913 pub fn fact_extraction(subject: &str, signals: &[Fact]) -> String {
914 let sources_text: String = signals
915 .iter()
916 .enumerate()
917 .filter_map(|(i, f)| {
918 let v: serde_json::Value = serde_json::from_str(&f.content).ok()?;
919 Some(format!(
920 "[Source {i}] ({}) {}\n URL: {}\n {}",
921 v["provider"].as_str().unwrap_or("?"),
922 v["title"].as_str().unwrap_or(""),
923 v["url"].as_str().unwrap_or(""),
924 v["content"].as_str().unwrap_or("")
925 ))
926 })
927 .collect::<Vec<_>>()
928 .join("\n\n");
929
930 format!(
931 r#"You are an analyst extracting facts about {subject} from research sources.
932
933{sources_text}
934
935Extract key facts as JSON array. ONLY valid JSON, no fences:
936{{
937 "facts": [
938 {{
939 "claim": "specific factual claim",
940 "category": "product|customers|technology|competition|market|financials|team|risk|governance",
941 "source_indices": [0, 3],
942 "confidence": 0.9
943 }}
944 ]
945}}
946
947Rules:
948- Return an object with a top-level "facts" array
949- Return at most 20 facts, prioritized by investment relevance
950- Return DISTINCT facts only. Do not restate the same claim with cosmetic wording changes.
951- Aim to cover these DD categories when evidence exists: product, customers, technology, competition, market, financials
952- Use category "technology" for platform, architecture, integrations, attack-surface management mechanics, threat-intelligence infrastructure, APIs, or technical moat
953- Do NOT label a clearly technical platform claim as "product" just because it mentions a product name
954- Every fact MUST cite source_indices
955- 0.9+ for primary sources, 0.7 for secondary, 0.5 for inferred
956- Flag contradictions between sources as separate facts with category "risk"
957- If no reliable facts can be extracted, return {{"facts":[]}}"#
958 )
959 }
960
961 pub fn gap_detection(
962 subject: &str,
963 hypotheses: &[Fact],
964 generation: usize,
965 max_generations: usize,
966 ) -> String {
967 let facts_text: String = hypotheses
968 .iter()
969 .map(|f| f.content.as_str())
970 .collect::<Vec<_>>()
971 .join("\n");
972 let covered_categories = covered_dd_categories(hypotheses);
973 let missing_categories = missing_expected_dd_categories(hypotheses);
974 let covered_text = if covered_categories.is_empty() {
975 "none yet".to_string()
976 } else {
977 covered_categories.join(", ")
978 };
979 let missing_text = if missing_categories.is_empty() {
980 "none".to_string()
981 } else {
982 missing_categories.join(", ")
983 };
984
985 format!(
986 r#"You are a PE analyst reviewing extracted facts about {subject}.
987
988Current facts:
989{facts_text}
990
991Covered categories:
992{covered_text}
993
994Missing expected categories:
995{missing_text}
996
997What critical gaps remain? Focus on:
998- Missing financials (ARR, growth, margins)
999- Unknown ownership/investors
1000- Unclear competitive positioning
1001- Missing tech stack details
1002- Unknown customer concentration
1003
1004Return JSON object:
1005{{
1006 "strategies": [
1007 {{"query": "search terms", "mode": "breadth|depth", "reason": "why this matters"}}
1008 ]
1009}}
1010
1011This is research pass {generation} of {max_generations}. Only propose searches for gaps that are CRITICAL for investment decision-making.
1012Pass 1: broad gaps (max 4). Pass 2+: only truly unresolved items (max 2).
1013ONLY valid JSON, no markdown fences. If no critical gaps remain, return {{"strategies":[]}}."#
1014 )
1015 }
1016
1017 pub fn synthesis(subject: &str, hypotheses: &[DdFactSummary]) -> String {
1018 let facts_text = if hypotheses.is_empty() {
1019 "No consolidated facts were available.".to_string()
1020 } else {
1021 hypotheses
1022 .iter()
1023 .map(|fact| {
1024 format!(
1025 "- [{} | confidence {:.2} | support {} | evidence {}] {}",
1026 fact.category,
1027 fact.confidence,
1028 fact.support_count,
1029 fact.evidence_count,
1030 fact.claim
1031 )
1032 })
1033 .collect::<Vec<_>>()
1034 .join("\n")
1035 };
1036
1037 format!(
1038 r#"You are a senior PE analyst producing a final due diligence synthesis for {subject}.
1039
1040Consolidated facts:
1041{facts_text}
1042
1043Produce a final analysis as JSON:
1044{{
1045 "summary": "2-3 paragraph executive summary",
1046 "market_analysis": "market analysis",
1047 "competitive_landscape": "competitive analysis",
1048 "technology_assessment": "tech assessment",
1049 "risk_factors": ["risk 1", "risk 2"],
1050 "growth_opportunities": ["opp 1", "opp 2"],
1051 "recommendation": "investment recommendation"
1052}}
1053
1054ONLY valid JSON, no markdown fences. All values plain strings."#
1055 )
1056 }
1057}
1058
1059fn strip_fences(raw: &str) -> &str {
1062 let s = raw.trim();
1063 let s = s
1064 .strip_prefix("```json")
1065 .or_else(|| s.strip_prefix("```"))
1066 .unwrap_or(s);
1067 s.strip_suffix("```").unwrap_or(s).trim()
1068}
1069
1070pub fn consolidate_dd_hypotheses(hypotheses: &[Fact]) -> Vec<DdFactSummary> {
1071 consolidate_dd_fact_values(
1072 hypotheses
1073 .iter()
1074 .filter_map(|fact| serde_json::from_str::<serde_json::Value>(&fact.content).ok())
1075 .collect::<Vec<_>>(),
1076 )
1077}
1078
1079#[derive(Debug, Clone, Default)]
1083pub struct DdHooks {
1084 pub investors: Vec<String>,
1085 pub business_areas: Vec<String>,
1086 pub regions: Vec<String>,
1087 pub competitors: Vec<String>,
1088}
1089
1090#[derive(Debug, Clone)]
1092pub struct HookPatterns {
1093 pub business_areas: Vec<(String, String)>,
1094 pub regions: Vec<(String, String)>,
1095 pub entity_triggers: Vec<String>,
1096}
1097
1098impl Default for HookPatterns {
1099 fn default() -> Self {
1100 Self {
1101 business_areas: vec![
1102 ("saas".into(), "SaaS".into()),
1103 (" grc".into(), "Governance, Risk & Compliance (GRC)".into()),
1104 (",grc".into(), "Governance, Risk & Compliance (GRC)".into()),
1105 (
1106 "governance, risk".into(),
1107 "Governance, Risk & Compliance (GRC)".into(),
1108 ),
1109 (" esg".into(), "ESG Reporting".into()),
1110 ("sustainability".into(), "ESG Reporting".into()),
1111 ("compliance".into(), "Compliance Management".into()),
1112 ("strategic planning".into(), "Strategic Planning".into()),
1113 ("quality management".into(), "Quality Management".into()),
1114 ("risk management".into(), "Risk Management".into()),
1115 ("edc".into(), "Electronic Data Capture (EDC)".into()),
1116 ("clinical trial".into(), "Clinical Trial Management".into()),
1117 ("eclinical".into(), "eClinical Solutions".into()),
1118 ("cybersecurity".into(), "Cybersecurity".into()),
1119 ("vulnerability".into(), "Vulnerability Management".into()),
1120 ("threat intelligence".into(), "Threat Intelligence".into()),
1121 ("penetration testing".into(), "Penetration Testing".into()),
1122 ("attack surface".into(), "Attack Surface Management".into()),
1123 ("workforce management".into(), "Workforce Management".into()),
1124 (
1125 "scheduling".into(),
1126 "Scheduling & Resource Management".into(),
1127 ),
1128 ("timetabling".into(), "Timetabling".into()),
1129 (
1130 "higher education".into(),
1131 "Higher Education Software".into(),
1132 ),
1133 ("edtech".into(), "EdTech".into()),
1134 (
1135 "business intelligence".into(),
1136 "Business Intelligence".into(),
1137 ),
1138 ("analytics".into(), "Analytics".into()),
1139 ("fintech".into(), "FinTech".into()),
1140 ("payment".into(), "Payment Solutions".into()),
1141 ("information security".into(), "Information Security".into()),
1142 ("regulatory".into(), "Regulatory Technology".into()),
1143 ("crm".into(), "CRM".into()),
1144 ("marketing automation".into(), "Marketing Automation".into()),
1145 ("sales automation".into(), "Sales Automation".into()),
1146 ],
1147 regions: vec![
1148 ("nordic".into(), "Nordics".into()),
1149 ("scandinav".into(), "Nordics".into()),
1150 ("sweden".into(), "Nordics".into()),
1151 ("norway".into(), "Nordics".into()),
1152 ("denmark".into(), "Nordics".into()),
1153 ("finland".into(), "Nordics".into()),
1154 ("europe".into(), "Europe".into()),
1155 ("north america".into(), "North America".into()),
1156 ("united states".into(), "North America".into()),
1157 (" us ".into(), "North America".into()),
1158 ("apac".into(), "APAC".into()),
1159 ("asia".into(), "APAC".into()),
1160 ("japan".into(), "Japan".into()),
1161 ("united kingdom".into(), "United Kingdom".into()),
1162 (" uk ".into(), "United Kingdom".into()),
1163 ("germany".into(), "Europe".into()),
1164 ("france".into(), "France".into()),
1165 ("global".into(), "Global".into()),
1166 ],
1167 entity_triggers: vec![
1168 "acquired by ".into(),
1169 "acquired ".into(),
1170 "investment from ".into(),
1171 "invested by ".into(),
1172 "backed by ".into(),
1173 "funded by ".into(),
1174 "partnership with ".into(),
1175 "partner ".into(),
1176 "competes with ".into(),
1177 "competitor ".into(),
1178 "competitors like ".into(),
1179 "competitors such as ".into(),
1180 "alternatives include ".into(),
1181 "compared to ".into(),
1182 "compared against ".into(),
1183 "compared against competitors like ".into(),
1184 ],
1185 }
1186 }
1187}
1188
1189pub fn extract_hooks_from_facts(
1191 subject: &str,
1192 facts: &[DdFactSummary],
1193 patterns: &HookPatterns,
1194) -> DdHooks {
1195 let mut business_areas = std::collections::BTreeSet::new();
1196 let mut competitors = std::collections::BTreeSet::new();
1197 let mut investors = std::collections::BTreeSet::new();
1198 let mut regions = std::collections::BTreeSet::new();
1199
1200 let subject_lower = subject.to_lowercase();
1201
1202 for fact in facts {
1203 let claim = &fact.claim;
1204 let claim_lower = claim.to_lowercase();
1205
1206 for (pattern, label) in &patterns.business_areas {
1208 if claim_lower.contains(pattern.as_str()) {
1209 business_areas.insert(label.clone());
1210 }
1211 }
1212
1213 match fact.category.as_str() {
1215 "competition" | "competitors" => {
1216 for name in extract_named_entities(claim, &subject_lower, &patterns.entity_triggers)
1217 {
1218 competitors.insert(name);
1219 }
1220 }
1221 "financials" => {
1222 for name in extract_named_entities(claim, &subject_lower, &patterns.entity_triggers)
1223 {
1224 investors.insert(name);
1225 }
1226 }
1227 _ => {}
1228 }
1229
1230 let mut seen_regions = std::collections::HashSet::new();
1232 for (pattern, label) in &patterns.regions {
1233 if claim_lower.contains(pattern.as_str()) && seen_regions.insert(label.clone()) {
1234 regions.insert(label.clone());
1235 }
1236 }
1237 }
1238
1239 DdHooks {
1240 investors: investors.into_iter().collect(),
1241 business_areas: business_areas.into_iter().collect(),
1242 regions: regions.into_iter().collect(),
1243 competitors: competitors.into_iter().collect(),
1244 }
1245}
1246
1247fn extract_named_entities(claim: &str, exclude_lower: &str, triggers: &[String]) -> Vec<String> {
1248 let mut entities = Vec::new();
1249 let claim_lower = claim.to_lowercase();
1250
1251 for trigger in triggers {
1252 if let Some(pos) = claim_lower.find(trigger.as_str()) {
1253 let after = &claim[pos + trigger.len()..];
1254 let entity = after
1255 .split([',', '.', ';', '(', ')'])
1256 .next()
1257 .unwrap_or("")
1258 .trim();
1259 if !entity.is_empty() && entity.len() < 60 && entity.to_lowercase() != exclude_lower {
1260 entities.push(entity.to_string());
1261 }
1262 }
1263 }
1264
1265 entities
1266}
1267
1268fn next_batch_bounds(
1269 total_items: usize,
1270 processed_items: usize,
1271 max_batch: usize,
1272) -> (usize, usize) {
1273 let start = processed_items.min(total_items);
1274 let end = (start + max_batch).min(total_items);
1275 (start, end)
1276}
1277
1278fn consolidate_dd_fact_values<I>(values: I) -> Vec<DdFactSummary>
1279where
1280 I: IntoIterator<Item = serde_json::Value>,
1281{
1282 let mut by_signature: HashMap<String, DdFactSummary> = HashMap::new();
1283 for value in values {
1284 let Some(normalized) = normalize_dd_fact(&value) else {
1285 continue;
1286 };
1287 let Some(summary) = summary_from_normalized_fact(&normalized) else {
1288 continue;
1289 };
1290 let signature = dd_fact_signature(&normalized);
1291 if let Some(existing) = by_signature.get_mut(&signature) {
1292 merge_exact_summary(existing, summary);
1293 } else {
1294 by_signature.insert(signature, summary);
1295 }
1296 }
1297
1298 let summaries: Vec<DdFactSummary> = by_signature.into_values().collect();
1299 if summaries.is_empty() {
1300 return Vec::new();
1301 }
1302
1303 let token_frequencies = token_document_frequency(&summaries);
1304 let total_summaries = summaries.len();
1305 let mut candidates: Vec<ConsolidationCandidate> = summaries
1306 .into_iter()
1307 .map(|summary| build_consolidation_candidate(summary, &token_frequencies, total_summaries))
1308 .collect();
1309 candidates.sort_by(compare_candidates);
1310
1311 let mut kept = Vec::new();
1312 let mut counts_by_category: HashMap<String, usize> = HashMap::new();
1313 for candidate in candidates {
1314 if kept
1315 .iter()
1316 .any(|existing| should_skip_candidate(&candidate, existing))
1317 {
1318 continue;
1319 }
1320
1321 let count = counts_by_category
1322 .get(candidate.summary.category.as_str())
1323 .copied()
1324 .unwrap_or(0);
1325 if count >= category_fact_cap(candidate.summary.category.as_str()) {
1326 continue;
1327 }
1328
1329 counts_by_category
1330 .entry(candidate.summary.category.clone())
1331 .and_modify(|value| *value += 1)
1332 .or_insert(1);
1333 kept.push(candidate);
1334 }
1335
1336 kept.sort_by(compare_candidates);
1337 kept.into_iter()
1338 .map(|candidate| candidate.summary)
1339 .collect()
1340}
1341
1342fn summary_from_normalized_fact(fact: &serde_json::Value) -> Option<DdFactSummary> {
1343 let category = fact.get("category")?.as_str()?.to_string();
1344 let claim = fact.get("claim")?.as_str()?.trim().to_string();
1345 if claim.is_empty() {
1346 return None;
1347 }
1348
1349 let confidence = fact
1350 .get("confidence")
1351 .and_then(serde_json::Value::as_f64)
1352 .unwrap_or(0.5)
1353 .clamp(0.0, 1.0);
1354 let evidence_count = fact
1355 .get("source_indices")
1356 .and_then(serde_json::Value::as_array)
1357 .map_or(1, |values| values.len().max(1));
1358
1359 Some(DdFactSummary {
1360 category,
1361 claim,
1362 confidence,
1363 support_count: 1,
1364 evidence_count,
1365 })
1366}
1367
1368#[allow(clippy::float_cmp)]
1369fn merge_exact_summary(existing: &mut DdFactSummary, candidate: DdFactSummary) {
1370 existing.support_count += candidate.support_count;
1371 existing.evidence_count += candidate.evidence_count;
1372 if candidate.confidence > existing.confidence
1373 || (candidate.confidence == existing.confidence
1374 && candidate.claim.len() > existing.claim.len())
1375 {
1376 existing.claim = candidate.claim;
1377 }
1378 existing.confidence = existing.confidence.max(candidate.confidence);
1379}
1380
1381fn build_consolidation_candidate(
1382 summary: DdFactSummary,
1383 token_frequencies: &HashMap<String, usize>,
1384 total_summaries: usize,
1385) -> ConsolidationCandidate {
1386 let claim_tokens = informative_claim_tokens(&summary.claim);
1387 let topic_tokens: HashSet<String> = claim_tokens
1388 .iter()
1389 .filter(|token| !token.chars().any(|ch| ch.is_ascii_digit()))
1390 .cloned()
1391 .collect();
1392 let distinctive_tokens: HashSet<String> = claim_tokens
1393 .iter()
1394 .filter(|token| {
1395 token_frequencies.get(*token).copied().unwrap_or_default() * 2 <= total_summaries + 1
1396 })
1397 .cloned()
1398 .collect();
1399 let approximate = claim_is_approximate(&summary.claim);
1400 let numeric_tokens = numeric_claim_tokens(&summary.claim);
1401 let priority_score = fact_priority_score(&summary, approximate, numeric_tokens.len());
1402
1403 ConsolidationCandidate {
1404 summary,
1405 distinctive_tokens: if distinctive_tokens.is_empty() {
1406 claim_tokens.iter().cloned().collect()
1407 } else {
1408 distinctive_tokens
1409 },
1410 topic_tokens: if topic_tokens.is_empty() {
1411 claim_tokens.iter().cloned().collect()
1412 } else {
1413 topic_tokens
1414 },
1415 numeric_tokens,
1416 approximate,
1417 priority_score,
1418 }
1419}
1420
1421fn token_document_frequency(summaries: &[DdFactSummary]) -> HashMap<String, usize> {
1422 let mut frequencies = HashMap::new();
1423 for summary in summaries {
1424 let mut seen = HashSet::new();
1425 for token in informative_claim_tokens(&summary.claim) {
1426 if seen.insert(token.clone()) {
1427 frequencies
1428 .entry(token)
1429 .and_modify(|count| *count += 1)
1430 .or_insert(1);
1431 }
1432 }
1433 }
1434 frequencies
1435}
1436
1437fn informative_claim_tokens(claim: &str) -> Vec<String> {
1438 canonicalize_claim(claim)
1439 .split_whitespace()
1440 .filter(|token| token.len() > 2 && !dd_stopwords().contains(token))
1441 .map(ToOwned::to_owned)
1442 .collect()
1443}
1444
1445fn numeric_claim_tokens(claim: &str) -> Vec<String> {
1446 canonicalize_claim(claim)
1447 .split_whitespace()
1448 .filter(|token| token.chars().any(|ch| ch.is_ascii_digit()))
1449 .map(ToOwned::to_owned)
1450 .collect()
1451}
1452
1453fn claim_is_approximate(claim: &str) -> bool {
1454 let normalized = claim.to_ascii_lowercase();
1455 [
1456 "estimated",
1457 "estimate",
1458 "approximately",
1459 "approx",
1460 "about ",
1461 "over ",
1462 "under ",
1463 "close to",
1464 "around ",
1465 "range of",
1466 "between ",
1467 "currently",
1468 "historically",
1469 "more than",
1470 "less than",
1471 ]
1472 .iter()
1473 .any(|needle| normalized.contains(needle))
1474}
1475
1476#[allow(clippy::cast_precision_loss)]
1477fn fact_priority_score(
1478 summary: &DdFactSummary,
1479 approximate: bool,
1480 numeric_token_count: usize,
1481) -> f64 {
1482 let confidence_score = summary.confidence * 100.0;
1483 let support_bonus = summary.support_count as f64 * 6.0;
1484 let evidence_bonus = summary.evidence_count as f64 * 2.0;
1485 let exactness_bonus = if approximate { 0.0 } else { 5.0 };
1486 let numeric_bonus = if numeric_token_count > 0 { 2.0 } else { 0.0 };
1487 confidence_score + support_bonus + evidence_bonus + exactness_bonus + numeric_bonus
1488}
1489
1490fn compare_candidates(left: &ConsolidationCandidate, right: &ConsolidationCandidate) -> Ordering {
1491 category_sort_order(left.summary.category.as_str())
1492 .cmp(&category_sort_order(right.summary.category.as_str()))
1493 .then_with(|| {
1494 right
1495 .priority_score
1496 .partial_cmp(&left.priority_score)
1497 .unwrap_or(Ordering::Equal)
1498 })
1499 .then_with(|| right.summary.claim.len().cmp(&left.summary.claim.len()))
1500}
1501
1502fn category_sort_order(category: &str) -> usize {
1503 match category {
1504 "product" => 0,
1505 "customers" => 1,
1506 "technology" => 2,
1507 "competition" => 3,
1508 "market" => 4,
1509 "financials" => 5,
1510 "team" => 6,
1511 "governance" => 7,
1512 "risk" => 8,
1513 _ => 9,
1514 }
1515}
1516
1517fn category_fact_cap(category: &str) -> usize {
1518 match category {
1519 "technology" => 5,
1520 "financials" => 4,
1521 "customers" | "competition" => 3,
1522 _ => 2,
1523 }
1524}
1525
1526fn should_skip_candidate(
1527 candidate: &ConsolidationCandidate,
1528 existing: &ConsolidationCandidate,
1529) -> bool {
1530 if candidate.summary.category != existing.summary.category {
1531 return false;
1532 }
1533
1534 let similarity = token_similarity(&candidate.distinctive_tokens, &existing.distinctive_tokens);
1535 let topic_similarity = token_similarity(&candidate.topic_tokens, &existing.topic_tokens);
1536 if similarity >= 0.86 {
1537 return true;
1538 }
1539
1540 if !candidate.numeric_tokens.is_empty()
1541 && candidate.numeric_tokens == existing.numeric_tokens
1542 && similarity >= 0.55
1543 {
1544 return true;
1545 }
1546
1547 candidate.approximate
1548 && topic_similarity >= 0.5
1549 && (!existing.approximate || candidate.numeric_tokens == existing.numeric_tokens)
1550}
1551
1552#[allow(clippy::cast_precision_loss)]
1553fn token_similarity(left: &HashSet<String>, right: &HashSet<String>) -> f64 {
1554 if left.is_empty() || right.is_empty() {
1555 return 0.0;
1556 }
1557
1558 let intersection = left.intersection(right).count() as f64;
1559 let union = left.union(right).count() as f64;
1560 if union == 0.0 {
1561 0.0
1562 } else {
1563 intersection / union
1564 }
1565}
1566
1567fn dd_stopwords() -> &'static [&'static str] {
1568 &[
1569 "and",
1570 "for",
1571 "the",
1572 "with",
1573 "into",
1574 "that",
1575 "from",
1576 "their",
1577 "this",
1578 "those",
1579 "these",
1580 "across",
1581 "through",
1582 "using",
1583 "used",
1584 "helps",
1585 "help",
1586 "offer",
1587 "offers",
1588 "provides",
1589 "provide",
1590 "company",
1591 "companies",
1592 "solution",
1593 "solutions",
1594 ]
1595}
1596
1597fn existing_fact_signature(content: &str) -> Option<String> {
1598 let value = serde_json::from_str::<serde_json::Value>(content).ok()?;
1599 let normalized = normalize_dd_fact(&value)?;
1600 Some(dd_fact_signature(&normalized))
1601}
1602
1603fn normalize_dd_fact(fact: &serde_json::Value) -> Option<serde_json::Value> {
1604 let claim = fact.get("claim")?.as_str()?.trim();
1605 if claim.is_empty() {
1606 return None;
1607 }
1608
1609 let category = normalize_dd_category(
1610 fact.get("category").and_then(serde_json::Value::as_str),
1611 claim,
1612 );
1613 let confidence = fact
1614 .get("confidence")
1615 .and_then(serde_json::Value::as_f64)
1616 .unwrap_or(0.5)
1617 .clamp(0.0, 1.0);
1618 let source_indices = fact
1619 .get("source_indices")
1620 .and_then(serde_json::Value::as_array)
1621 .map(|values| {
1622 values
1623 .iter()
1624 .filter(|value| value.is_i64() || value.is_u64())
1625 .cloned()
1626 .collect::<Vec<_>>()
1627 })
1628 .unwrap_or_default();
1629
1630 Some(serde_json::json!({
1631 "claim": claim,
1632 "category": category,
1633 "source_indices": source_indices,
1634 "confidence": confidence,
1635 }))
1636}
1637
1638fn normalize_dd_category(raw_category: Option<&str>, claim: &str) -> &'static str {
1639 match raw_category
1640 .unwrap_or_default()
1641 .trim()
1642 .to_ascii_lowercase()
1643 .as_str()
1644 {
1645 "product" => {
1646 if claim_looks_technical(claim) {
1647 "technology"
1648 } else {
1649 "product"
1650 }
1651 }
1652 "customer" | "customers" => "customers",
1653 "technology" | "tech" | "platform" | "architecture" | "engineering" | "integrations"
1654 | "integration" | "stack" => "technology",
1655 "competition" | "competitor" | "competitors" => "competition",
1656 "market" | "positioning" => "market",
1657 "financial" | "financials" | "finance" | "funding" | "ownership" | "investors" => {
1658 "financials"
1659 }
1660 "team" | "leadership" | "management" => "team",
1661 "risk" => "risk",
1662 "governance" => "governance",
1663 _ => infer_dd_category_from_claim(claim),
1664 }
1665}
1666
1667fn infer_dd_category_from_claim(claim: &str) -> &'static str {
1668 let claim = claim.to_ascii_lowercase();
1669 if claim_looks_technical(&claim) {
1670 "technology"
1671 } else if claim.contains("customer")
1672 || claim.contains("clients")
1673 || claim.contains("serves ")
1674 || claim.contains("countries")
1675 {
1676 "customers"
1677 } else if claim.contains("funding")
1678 || claim.contains("raised")
1679 || claim.contains("investor")
1680 || claim.contains("acquired")
1681 || claim.contains("revenue")
1682 || claim.contains("arr")
1683 {
1684 "financials"
1685 } else if claim.contains("competitor") || claim.contains("competes") {
1686 "competition"
1687 } else if claim.contains("market") || claim.contains("major player") || claim.contains("idc") {
1688 "market"
1689 } else if claim.contains("chief ")
1690 || claim.contains("officer")
1691 || claim.contains("executive")
1692 || claim.contains("leadership")
1693 {
1694 "team"
1695 } else {
1696 "product"
1697 }
1698}
1699
1700fn claim_looks_technical(claim: &str) -> bool {
1701 let claim = claim.to_ascii_lowercase();
1702 [
1703 "technology",
1704 "architecture",
1705 "platform",
1706 "integration",
1707 "integrations",
1708 "api",
1709 "apis",
1710 "cloud",
1711 "threat intelligence",
1712 "attack surface",
1713 "monitor",
1714 "monitoring",
1715 "ctem",
1716 "exposure management",
1717 "internet-facing",
1718 "dark web",
1719 "open web",
1720 "deep web",
1721 "technical moat",
1722 ]
1723 .iter()
1724 .any(|needle| claim.contains(needle))
1725}
1726
1727fn dd_fact_signature(fact: &serde_json::Value) -> String {
1728 let category = fact
1729 .get("category")
1730 .and_then(serde_json::Value::as_str)
1731 .unwrap_or("product");
1732 let claim = fact
1733 .get("claim")
1734 .and_then(serde_json::Value::as_str)
1735 .unwrap_or_default();
1736 format!("{category}:{}", canonicalize_claim(claim))
1737}
1738
1739fn canonicalize_claim(claim: &str) -> String {
1740 claim
1741 .chars()
1742 .map(|ch| {
1743 if ch.is_ascii_alphanumeric() {
1744 ch.to_ascii_lowercase()
1745 } else {
1746 ' '
1747 }
1748 })
1749 .collect::<String>()
1750 .split_whitespace()
1751 .collect::<Vec<_>>()
1752 .join(" ")
1753}
1754
1755fn covered_dd_categories(hypotheses: &[Fact]) -> Vec<String> {
1756 let mut categories: Vec<String> = hypotheses
1757 .iter()
1758 .filter_map(|fact| {
1759 let value = serde_json::from_str::<serde_json::Value>(&fact.content).ok()?;
1760 let normalized = normalize_dd_fact(&value)?;
1761 normalized["category"].as_str().map(ToOwned::to_owned)
1762 })
1763 .collect::<HashSet<_>>()
1764 .into_iter()
1765 .collect();
1766 categories.sort();
1767 categories
1768}
1769
1770fn missing_expected_dd_categories(hypotheses: &[Fact]) -> Vec<&'static str> {
1771 let covered: HashSet<String> = covered_dd_categories(hypotheses).into_iter().collect();
1772 expected_dd_categories()
1773 .into_iter()
1774 .filter(|category| !covered.contains(*category))
1775 .collect()
1776}
1777
1778fn expected_dd_categories() -> [&'static str; 6] {
1779 [
1780 "product",
1781 "customers",
1782 "technology",
1783 "competition",
1784 "market",
1785 "financials",
1786 ]
1787}
1788
1789fn parse_json_array_response(
1790 raw: &str,
1791 field_name: &str,
1792) -> Result<Vec<serde_json::Value>, String> {
1793 let cleaned = strip_fences(raw);
1794 try_parse_json_array(cleaned, field_name).or_else(|first_error| {
1795 extract_first_json_value(cleaned)
1796 .filter(|candidate| *candidate != cleaned)
1797 .ok_or(first_error.clone())
1798 .and_then(|candidate| {
1799 try_parse_json_array(candidate, field_name).map_err(|second_error| {
1800 format!("{first_error}; recovered JSON failed: {second_error}")
1801 })
1802 })
1803 })
1804}
1805
1806fn try_parse_json_array(raw: &str, field_name: &str) -> Result<Vec<serde_json::Value>, String> {
1807 match serde_json::from_str::<serde_json::Value>(raw) {
1808 Ok(serde_json::Value::Array(values)) => Ok(values),
1809 Ok(serde_json::Value::Object(map)) => map
1810 .get(field_name)
1811 .and_then(serde_json::Value::as_array)
1812 .cloned()
1813 .ok_or_else(|| format!("expected object field `{field_name}` containing an array")),
1814 Ok(_) => Err(format!(
1815 "expected top-level JSON array or object with `{field_name}`"
1816 )),
1817 Err(error) => Err(error.to_string()),
1818 }
1819}
1820
1821fn extract_first_json_value(raw: &str) -> Option<&str> {
1822 let (start, _) = raw.char_indices().find(|(_, ch)| matches!(ch, '{' | '['))?;
1823 let mut stack = Vec::new();
1824 let mut in_string = false;
1825 let mut escaped = false;
1826
1827 for (offset, ch) in raw[start..].char_indices() {
1828 if in_string {
1829 if escaped {
1830 escaped = false;
1831 continue;
1832 }
1833 match ch {
1834 '\\' => escaped = true,
1835 '"' => in_string = false,
1836 _ => {}
1837 }
1838 continue;
1839 }
1840
1841 match ch {
1842 '"' => in_string = true,
1843 '{' => stack.push('}'),
1844 '[' => stack.push(']'),
1845 '}' | ']' => {
1846 if stack.pop() != Some(ch) {
1847 return None;
1848 }
1849 if stack.is_empty() {
1850 let end = start + offset + ch.len_utf8();
1851 return Some(&raw[start..end]);
1852 }
1853 }
1854 _ => {}
1855 }
1856 }
1857
1858 None
1859}
1860
1861fn is_relevant(title: &str, content: &str, url: &str, subject: &str) -> bool {
1862 let s = subject.to_lowercase();
1863 let t = title.to_lowercase();
1864 let b = content.to_lowercase();
1865 let u = url.to_lowercase();
1866 t.contains(&s)
1867 || b.contains(&s)
1868 || u.contains(&s.replace(' ', ""))
1869 || u.contains(&s.replace(' ', "-"))
1870}
1871
1872#[cfg(test)]
1873mod tests {
1874 use std::sync::Arc;
1875
1876 use converge_pack::{Context, ContextKey, Fact, ProposedFact, Suggestor};
1877
1878 use super::{
1879 DdError, DdLlm, SharedBudget, SynthesisSuggestor, canonicalize_claim,
1880 consolidate_dd_fact_values, extract_first_json_value, next_batch_bounds, normalize_dd_fact,
1881 parse_json_array_response,
1882 };
1883
1884 struct StubLlm;
1885
1886 #[async_trait::async_trait]
1887 impl DdLlm for StubLlm {
1888 async fn complete(&self, prompt: &str) -> Result<String, DdError> {
1889 let _ = prompt;
1890 Ok("{}".to_string())
1891 }
1892 }
1893
1894 struct StubContext {
1895 hypothesis_count: usize,
1896 has_proposals: bool,
1897 }
1898
1899 impl Context for StubContext {
1900 fn has(&self, key: ContextKey) -> bool {
1901 match key {
1902 ContextKey::Hypotheses => self.hypothesis_count > 0,
1903 ContextKey::Proposals => self.has_proposals,
1904 _ => false,
1905 }
1906 }
1907
1908 fn get(&self, _key: ContextKey) -> &[Fact] {
1909 &[]
1910 }
1911
1912 fn get_proposals(&self, _key: ContextKey) -> &[ProposedFact] {
1913 &[]
1914 }
1915
1916 fn count(&self, key: ContextKey) -> usize {
1917 match key {
1918 ContextKey::Hypotheses => self.hypothesis_count,
1919 _ => 0,
1920 }
1921 }
1922 }
1923
1924 #[test]
1925 fn synthesis_suggestor_is_always_schedulable() {
1926 let budget = Arc::new(SharedBudget::new().with_limit("llm", 1));
1927 let suggestor = SynthesisSuggestor::new("Acme", budget, Arc::new(StubLlm));
1928
1929 assert!(suggestor.dependencies().is_empty());
1930 }
1931
1932 #[test]
1933 fn synthesis_accepts_after_hypotheses_stabilize() {
1934 let budget = Arc::new(SharedBudget::new().with_limit("llm", 1));
1935 let suggestor = SynthesisSuggestor::new("Acme", budget, Arc::new(StubLlm))
1936 .with_required_stable_cycles(2);
1937
1938 let first_fact_wave = StubContext {
1939 hypothesis_count: 5,
1940 has_proposals: false,
1941 };
1942 let first_stable_cycle = StubContext {
1943 hypothesis_count: 5,
1944 has_proposals: false,
1945 };
1946 let second_stable_cycle = StubContext {
1947 hypothesis_count: 5,
1948 has_proposals: false,
1949 };
1950
1951 assert!(!suggestor.accepts(&first_fact_wave));
1952 assert!(!suggestor.accepts(&first_stable_cycle));
1953 assert!(suggestor.accepts(&second_stable_cycle));
1954 }
1955
1956 #[test]
1957 fn parse_json_array_response_accepts_wrapped_object() {
1958 let parsed = parse_json_array_response(
1959 r#"{"facts":[{"claim":"Acme sells software","confidence":0.9}]}"#,
1960 "facts",
1961 )
1962 .expect("wrapped array should parse");
1963
1964 assert_eq!(parsed.len(), 1);
1965 assert_eq!(parsed[0]["claim"], "Acme sells software");
1966 }
1967
1968 #[test]
1969 fn parse_json_array_response_accepts_legacy_array_shape() {
1970 let parsed = parse_json_array_response(
1971 r#"[{"query":"Acme competitors","mode":"breadth","reason":"market"}]"#,
1972 "strategies",
1973 )
1974 .expect("legacy array should parse");
1975
1976 assert_eq!(parsed.len(), 1);
1977 assert_eq!(parsed[0]["query"], "Acme competitors");
1978 }
1979
1980 #[test]
1981 fn parse_json_array_response_recovers_json_from_prose() {
1982 let parsed = parse_json_array_response(
1983 "Here is the JSON you requested:\n```json\n{\"facts\":[{\"claim\":\"Acme grows\",\"confidence\":0.7}]}\n```\nThanks.",
1984 "facts",
1985 )
1986 .expect("embedded JSON should parse");
1987
1988 assert_eq!(parsed.len(), 1);
1989 assert_eq!(parsed[0]["claim"], "Acme grows");
1990 }
1991
1992 #[test]
1993 fn extract_first_json_value_handles_nested_arrays_and_objects() {
1994 let extracted = extract_first_json_value(
1995 "prefix {\"facts\":[{\"claim\":\"A\",\"source_indices\":[0,1]}]} suffix",
1996 )
1997 .expect("should find first JSON value");
1998
1999 assert_eq!(
2000 extracted,
2001 r#"{"facts":[{"claim":"A","source_indices":[0,1]}]}"#
2002 );
2003 }
2004
2005 #[test]
2006 fn next_batch_bounds_advances_through_unprocessed_signals() {
2007 assert_eq!(next_batch_bounds(37, 0, 15), (0, 15));
2008 assert_eq!(next_batch_bounds(37, 15, 15), (15, 30));
2009 assert_eq!(next_batch_bounds(37, 30, 15), (30, 37));
2010 assert_eq!(next_batch_bounds(37, 37, 15), (37, 37));
2011 }
2012
2013 #[test]
2014 fn normalize_dd_fact_reclassifies_technical_product_claims() {
2015 let normalized = normalize_dd_fact(&serde_json::json!({
2016 "claim": "Outpost24's Sweepatic Platform monitors internet-facing assets for attack surface management.",
2017 "category": "product",
2018 "source_indices": [0],
2019 "confidence": 0.9,
2020 }))
2021 .expect("fact should normalize");
2022
2023 assert_eq!(normalized["category"], "technology");
2024 }
2025
2026 #[test]
2027 fn canonicalize_claim_ignores_case_and_punctuation() {
2028 assert_eq!(
2029 canonicalize_claim("Outpost24 raised $23.8M!"),
2030 canonicalize_claim("outpost24 raised 23 8m")
2031 );
2032 }
2033
2034 #[test]
2035 fn consolidate_dd_fact_values_merges_exact_duplicates() {
2036 let summaries = consolidate_dd_fact_values(vec![
2037 serde_json::json!({
2038 "claim": "Outpost24 offers a 100% open API for easy integration into security operations.",
2039 "category": "technology",
2040 "source_indices": [0],
2041 "confidence": 0.9,
2042 }),
2043 serde_json::json!({
2044 "claim": "Outpost24 offers a 100% open API for easy integration into security operations.",
2045 "category": "technology",
2046 "source_indices": [1],
2047 "confidence": 0.8,
2048 }),
2049 ]);
2050
2051 assert_eq!(summaries.len(), 1);
2052 assert_eq!(summaries[0].support_count, 2);
2053 assert_eq!(summaries[0].evidence_count, 2);
2054 }
2055
2056 #[test]
2057 fn consolidate_dd_fact_values_drops_vague_same_topic_repeats() {
2058 let summaries = consolidate_dd_fact_values(vec![
2059 serde_json::json!({
2060 "claim": "Outpost24 has 195 employees.",
2061 "category": "team",
2062 "source_indices": [0],
2063 "confidence": 0.9,
2064 }),
2065 serde_json::json!({
2066 "claim": "Outpost24 has over 200 employees.",
2067 "category": "team",
2068 "source_indices": [1],
2069 "confidence": 0.7,
2070 }),
2071 serde_json::json!({
2072 "claim": "Outpost24 offers a 100% open API for easy integration into security operations.",
2073 "category": "technology",
2074 "source_indices": [2],
2075 "confidence": 0.9,
2076 }),
2077 ]);
2078
2079 let team_facts: Vec<_> = summaries
2080 .iter()
2081 .filter(|summary| summary.category == "team")
2082 .collect();
2083 assert_eq!(team_facts.len(), 1);
2084 assert_eq!(team_facts[0].claim, "Outpost24 has 195 employees.");
2085 }
2086
2087 #[test]
2088 fn consolidate_dd_fact_values_preserves_conflicting_exact_financials() {
2089 let summaries = consolidate_dd_fact_values(vec![
2090 serde_json::json!({
2091 "claim": "Outpost24's 2023 revenue was $42.19M.",
2092 "category": "financials",
2093 "source_indices": [0],
2094 "confidence": 0.9,
2095 }),
2096 serde_json::json!({
2097 "claim": "Outpost24 generates $67.5 million in revenue.",
2098 "category": "financials",
2099 "source_indices": [1],
2100 "confidence": 0.9,
2101 }),
2102 ]);
2103
2104 assert_eq!(summaries.len(), 2);
2105 }
2106
2107 #[test]
2110 fn normalize_dd_fact_rejects_empty_claim() {
2111 assert!(
2112 normalize_dd_fact(&serde_json::json!({
2113 "claim": "",
2114 "category": "product",
2115 }))
2116 .is_none()
2117 );
2118 }
2119
2120 #[test]
2121 fn normalize_dd_fact_rejects_whitespace_only_claim() {
2122 assert!(
2123 normalize_dd_fact(&serde_json::json!({
2124 "claim": " ",
2125 "category": "product",
2126 }))
2127 .is_none()
2128 );
2129 }
2130
2131 #[test]
2132 fn normalize_dd_fact_rejects_missing_claim() {
2133 assert!(
2134 normalize_dd_fact(&serde_json::json!({
2135 "category": "product",
2136 }))
2137 .is_none()
2138 );
2139 }
2140
2141 #[test]
2142 fn normalize_dd_fact_clamps_confidence() {
2143 let normalized = normalize_dd_fact(&serde_json::json!({
2144 "claim": "test",
2145 "category": "product",
2146 "confidence": 5.0,
2147 }))
2148 .unwrap();
2149 assert_eq!(normalized["confidence"], 1.0);
2150
2151 let normalized = normalize_dd_fact(&serde_json::json!({
2152 "claim": "test",
2153 "category": "product",
2154 "confidence": -1.0,
2155 }))
2156 .unwrap();
2157 assert_eq!(normalized["confidence"], 0.0);
2158 }
2159
2160 #[test]
2161 fn normalize_dd_fact_defaults_missing_confidence() {
2162 let normalized = normalize_dd_fact(&serde_json::json!({
2163 "claim": "test claim",
2164 "category": "product",
2165 }))
2166 .unwrap();
2167 assert_eq!(normalized["confidence"], 0.5);
2168 }
2169
2170 #[test]
2171 fn normalize_dd_fact_filters_non_integer_source_indices() {
2172 let normalized = normalize_dd_fact(&serde_json::json!({
2173 "claim": "test",
2174 "category": "product",
2175 "source_indices": [0, "bad", 2, null, 3],
2176 }))
2177 .unwrap();
2178 let indices = normalized["source_indices"].as_array().unwrap();
2179 assert_eq!(indices.len(), 3);
2180 }
2181
2182 #[test]
2183 fn parse_json_array_response_rejects_plain_text() {
2184 assert!(parse_json_array_response("just some text", "facts").is_err());
2185 }
2186
2187 #[test]
2188 fn parse_json_array_response_rejects_object_with_wrong_field() {
2189 assert!(parse_json_array_response(r#"{"results":[{"claim":"X"}]}"#, "facts").is_err());
2190 }
2191
2192 #[test]
2193 fn parse_json_array_response_rejects_scalar() {
2194 assert!(parse_json_array_response("42", "facts").is_err());
2195 assert!(parse_json_array_response("true", "facts").is_err());
2196 assert!(parse_json_array_response(r#""string""#, "facts").is_err());
2197 }
2198
2199 #[test]
2200 fn extract_first_json_value_returns_none_for_no_json() {
2201 assert!(extract_first_json_value("no json here").is_none());
2202 }
2203
2204 #[test]
2205 fn extract_first_json_value_returns_none_for_mismatched_braces() {
2206 assert!(extract_first_json_value("{unclosed").is_none());
2207 assert!(extract_first_json_value("[}").is_none());
2208 }
2209
2210 #[test]
2211 fn extract_first_json_value_handles_escaped_quotes_in_strings() {
2212 let result = extract_first_json_value(r#"prefix {"key":"val\"ue"} suffix"#);
2213 assert!(result.is_some());
2214 let parsed: serde_json::Value = serde_json::from_str(result.unwrap()).unwrap();
2215 assert_eq!(parsed["key"], r#"val"ue"#);
2216 }
2217
2218 #[test]
2219 fn consolidate_dd_fact_values_handles_empty_input() {
2220 assert!(consolidate_dd_fact_values(vec![]).is_empty());
2221 }
2222
2223 #[test]
2224 fn consolidate_dd_fact_values_handles_all_invalid_facts() {
2225 let summaries = consolidate_dd_fact_values(vec![
2226 serde_json::json!({"claim": "", "category": "product"}),
2227 serde_json::json!({"no_claim": true}),
2228 serde_json::json!(null),
2229 ]);
2230 assert!(summaries.is_empty());
2231 }
2232
2233 #[test]
2234 fn next_batch_bounds_zero_total() {
2235 assert_eq!(next_batch_bounds(0, 0, 15), (0, 0));
2236 }
2237
2238 #[test]
2239 fn next_batch_bounds_processed_exceeds_total() {
2240 assert_eq!(next_batch_bounds(5, 100, 15), (5, 5));
2241 }
2242
2243 #[test]
2244 fn canonicalize_claim_handles_empty_string() {
2245 assert_eq!(canonicalize_claim(""), "");
2246 }
2247
2248 #[test]
2249 fn canonicalize_claim_handles_only_punctuation() {
2250 assert_eq!(canonicalize_claim("!!!...???"), "");
2251 }
2252
2253 #[test]
2254 fn synthesis_does_not_accept_when_proposals_exist() {
2255 let budget = Arc::new(SharedBudget::new().with_limit("llm", 1));
2256 let suggestor = SynthesisSuggestor::new("Acme", budget, Arc::new(StubLlm));
2257
2258 let ctx_with_proposals = StubContext {
2259 hypothesis_count: 10,
2260 has_proposals: true,
2261 };
2262 assert!(!suggestor.accepts(&ctx_with_proposals));
2263 }
2264
2265 #[test]
2266 fn synthesis_does_not_accept_without_hypotheses() {
2267 let budget = Arc::new(SharedBudget::new().with_limit("llm", 1));
2268 let suggestor = SynthesisSuggestor::new("Acme", budget, Arc::new(StubLlm));
2269
2270 let empty_ctx = StubContext {
2271 hypothesis_count: 0,
2272 has_proposals: false,
2273 };
2274 assert!(!suggestor.accepts(&empty_ctx));
2275 }
2276
2277 #[test]
2278 fn dd_error_infra_vs_non_infra() {
2279 assert!(
2280 DdError::CreditsExhausted {
2281 provider: "x".into(),
2282 detail: "y".into()
2283 }
2284 .is_infra_failure()
2285 );
2286 assert!(
2287 DdError::RateLimited {
2288 provider: "x".into(),
2289 retry_after_ms: None
2290 }
2291 .is_infra_failure()
2292 );
2293 assert!(
2294 DdError::ProviderUnavailable {
2295 provider: "x".into(),
2296 detail: "y".into()
2297 }
2298 .is_infra_failure()
2299 );
2300
2301 assert!(
2302 !DdError::BadResponse {
2303 provider: "x".into(),
2304 detail: "y".into()
2305 }
2306 .is_infra_failure()
2307 );
2308 assert!(
2309 !DdError::ParseFailed {
2310 provider: "x".into(),
2311 detail: "y".into()
2312 }
2313 .is_infra_failure()
2314 );
2315 assert!(
2316 !DdError::PromptTooLarge {
2317 provider: "x".into(),
2318 tokens: None
2319 }
2320 .is_infra_failure()
2321 );
2322 }
2323
2324 #[test]
2325 fn dd_error_only_credits_exhausted_is_fatal() {
2326 assert!(
2327 DdError::CreditsExhausted {
2328 provider: "x".into(),
2329 detail: "y".into()
2330 }
2331 .is_fatal()
2332 );
2333 assert!(
2334 !DdError::RateLimited {
2335 provider: "x".into(),
2336 retry_after_ms: None
2337 }
2338 .is_fatal()
2339 );
2340 assert!(
2341 !DdError::ProviderUnavailable {
2342 provider: "x".into(),
2343 detail: "y".into()
2344 }
2345 .is_fatal()
2346 );
2347 }
2348
2349 #[allow(clippy::cast_precision_loss)]
2352 mod proptests {
2353 use super::*;
2354 use proptest::prelude::*;
2355
2356 proptest! {
2357 #[test]
2358 fn canonicalize_is_idempotent(claim in ".*") {
2359 let first = canonicalize_claim(&claim);
2360 let second = canonicalize_claim(&first);
2361 prop_assert_eq!(first, second);
2362 }
2363
2364 #[test]
2365 fn canonicalize_is_case_insensitive(claim in "[a-zA-Z0-9 ]{1,100}") {
2366 prop_assert_eq!(
2367 canonicalize_claim(&claim),
2368 canonicalize_claim(&claim.to_uppercase())
2369 );
2370 }
2371
2372 #[test]
2373 fn normalize_dd_fact_never_panics(
2374 claim in ".*",
2375 category in ".*",
2376 confidence in proptest::num::f64::ANY,
2377 ) {
2378 let _ = normalize_dd_fact(&serde_json::json!({
2379 "claim": claim,
2380 "category": category,
2381 "confidence": confidence,
2382 }));
2383 }
2384
2385 #[test]
2386 fn normalize_preserves_non_empty_claims(
2387 claim in "[a-zA-Z]{1,50}",
2388 category in prop_oneof![
2389 Just("product"), Just("technology"), Just("financials"),
2390 Just("customers"), Just("competition"), Just("market"),
2391 ],
2392 ) {
2393 let normalized = normalize_dd_fact(&serde_json::json!({
2394 "claim": claim,
2395 "category": category,
2396 "confidence": 0.8,
2397 }));
2398 prop_assert!(normalized.is_some());
2399 let n = normalized.unwrap();
2400 prop_assert!(!n["claim"].as_str().unwrap().is_empty());
2401 }
2402
2403 #[test]
2404 fn consolidate_never_panics(
2405 n in 0_usize..20,
2406 ) {
2407 let categories = ["product", "technology", "financials"];
2408 let facts: Vec<serde_json::Value> = (0..n).map(|i| {
2409 serde_json::json!({
2410 "claim": format!("Fact number {i} about the company"),
2411 "category": categories[i % 3],
2412 "source_indices": [i],
2413 "confidence": 0.5 + (i as f64 * 0.02),
2414 })
2415 }).collect();
2416 let result = consolidate_dd_fact_values(facts);
2417 prop_assert!(result.len() <= n);
2418 }
2419
2420 #[test]
2421 fn next_batch_bounds_always_valid(
2422 total in 0_usize..1000,
2423 processed in 0_usize..1000,
2424 max_batch in 1_usize..100,
2425 ) {
2426 let (start, end) = next_batch_bounds(total, processed, max_batch);
2427 prop_assert!(start <= total);
2428 prop_assert!(end <= total);
2429 prop_assert!(start <= end);
2430 prop_assert!(end - start <= max_batch);
2431 }
2432
2433 #[test]
2434 fn extract_first_json_value_never_panics(input in ".*") {
2435 let _ = extract_first_json_value(&input);
2436 }
2437
2438 #[test]
2439 fn parse_json_array_response_never_panics(
2440 input in ".*",
2441 field in "[a-z]{1,10}",
2442 ) {
2443 let _ = parse_json_array_response(&input, &field);
2444 }
2445 }
2446 }
2447}