Skip to main content

llmtxt_core/
semantic.rs

1//! Semantic similarity primitives for embedding-based document comparison.
2//!
3//! This module operates on **pre-computed** embeddings supplied by the caller.
4//! It never calls external APIs — all network I/O is the backend's responsibility.
5//! Functions accept JSON strings for WASM compatibility; native callers can use
6//! the struct-based helpers directly.
7
8use serde::{Deserialize, Serialize};
9
10#[cfg(feature = "wasm")]
11use wasm_bindgen::prelude::*;
12
13// ── Vector math ──────────────────────────────────────────────────
14
15/// Cosine similarity between two embedding vectors.
16///
17/// Returns a value in `[-1.0, 1.0]`:
18/// - `1.0`  — identical direction
19/// - `0.0`  — orthogonal (unrelated)
20/// - `-1.0` — opposite direction
21///
22/// Returns `0.0` for mismatched lengths or zero-magnitude vectors.
23pub fn cosine_similarity(a: &[f64], b: &[f64]) -> f64 {
24    if a.len() != b.len() || a.is_empty() {
25        return 0.0;
26    }
27    let dot: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
28    let mag_a: f64 = a.iter().map(|x| x * x).sum::<f64>().sqrt();
29    let mag_b: f64 = b.iter().map(|x| x * x).sum::<f64>().sqrt();
30    if mag_a == 0.0 || mag_b == 0.0 {
31        return 0.0;
32    }
33    dot / (mag_a * mag_b)
34}
35
36// ── Data types ───────────────────────────────────────────────────
37
38/// A document section with a pre-computed embedding.
39#[derive(Debug, Clone, Deserialize)]
40pub struct EmbeddedSection {
41    /// Section heading title.
42    pub title: String,
43    /// Raw text content (excluding the heading line itself).
44    pub content: String,
45    /// Embedding vector produced by the backend embedding provider.
46    pub embedding: Vec<f64>,
47}
48
49/// How a section from version A maps to version B.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51#[serde(rename_all = "camelCase")]
52pub enum SectionAlignment {
53    /// Same title found in both versions.
54    Matched,
55    /// Different title but high content similarity (≥ 0.85).
56    Renamed,
57    /// Section only exists in version B.
58    Added,
59    /// Section only exists in version A.
60    Removed,
61}
62
63/// Per-section similarity record.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65#[serde(rename_all = "camelCase")]
66pub struct SectionSimilarity {
67    /// Section heading from version A (empty string for `Added` sections).
68    pub section_a: String,
69    /// Matched section heading from version B (empty string for `Removed` sections).
70    pub section_b: String,
71    /// Cosine similarity of the section embeddings (`0.0` to `1.0`).
72    pub similarity: f64,
73    /// How the section maps between versions.
74    pub alignment: SectionAlignment,
75}
76
77/// A semantic change annotation for a section pair.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79#[serde(rename_all = "camelCase")]
80pub struct SemanticChange {
81    /// One of: `"unchanged"`, `"rephrased"`, `"modified"`, `"rewritten"`.
82    pub change_type: String,
83    /// Section title this change refers to.
84    pub section: String,
85    /// Cosine similarity score for this section pair.
86    pub similarity: f64,
87    /// Human-readable summary of what changed.
88    pub description: String,
89}
90
91/// Full result of a semantic diff between two document versions.
92#[derive(Debug, Clone, Serialize, Deserialize)]
93#[serde(rename_all = "camelCase")]
94pub struct SemanticDiffResult {
95    /// Weighted average cosine similarity across all matched/renamed sections.
96    /// Pure adds/removes contribute `0.0` to the average.
97    pub overall_similarity: f64,
98    /// Per-section comparison details.
99    pub section_similarities: Vec<SectionSimilarity>,
100    /// Change annotations for matched/renamed sections.
101    pub semantic_changes: Vec<SemanticChange>,
102}
103
104/// Compute cosine similarity between two embedding vectors supplied as JSON arrays.
105///
106/// WASM entry point for [`cosine_similarity`].
107///
108/// Both arguments must be JSON arrays of numbers, e.g. `[0.1, 0.2, 0.3]`.
109/// Returns a value in `[-1.0, 1.0]`, or `0.0` on parse error.
110///
111/// # Examples (TypeScript)
112/// ```ts
113/// import { cosineSimilarity } from 'llmtxt';
114/// const sim = cosineSimilarity('[1.0, 0.0]', '[0.0, 1.0]'); // 0.0 — orthogonal
115/// ```
116#[cfg_attr(feature = "wasm", wasm_bindgen)]
117pub fn cosine_similarity_wasm(a_json: &str, b_json: &str) -> f64 {
118    let a: Vec<f64> = match serde_json::from_str(a_json) {
119        Ok(v) => v,
120        Err(_) => return 0.0,
121    };
122    let b: Vec<f64> = match serde_json::from_str(b_json) {
123        Ok(v) => v,
124        Err(_) => return 0.0,
125    };
126    cosine_similarity(&a, &b)
127}
128
129// ── Semantic diff ─────────────────────────────────────────────────
130
131/// Classify the type of semantic change for a matched section pair.
132fn classify_change(title: &str, similarity: f64) -> SemanticChange {
133    let (change_type, description) = if similarity >= 0.95 {
134        (
135            "unchanged",
136            format!("Section '{title}' is semantically identical (similarity {similarity:.2})"),
137        )
138    } else if similarity >= 0.85 {
139        (
140            "rephrased",
141            format!(
142                "Section '{title}' expresses the same meaning with different wording (similarity {similarity:.2})"
143            ),
144        )
145    } else if similarity >= 0.70 {
146        (
147            "modified",
148            format!("Section '{title}' has been partially changed (similarity {similarity:.2})"),
149        )
150    } else {
151        (
152            "rewritten",
153            format!(
154                "Section '{title}' has been substantially rewritten (similarity {similarity:.2})"
155            ),
156        )
157    };
158    SemanticChange {
159        change_type: change_type.to_string(),
160        section: title.to_string(),
161        similarity,
162        description,
163    }
164}
165
166/// Compute a semantic diff between two sets of pre-embedded sections (native API).
167///
168/// For each section in `a`, finds the best-matching section in `b` by cosine
169/// similarity, then classifies the alignment and change type.
170pub fn semantic_diff_native(
171    sections_a: &[EmbeddedSection],
172    sections_b: &[EmbeddedSection],
173) -> SemanticDiffResult {
174    // Track which sections in B have already been matched.
175    let mut matched_b: Vec<bool> = vec![false; sections_b.len()];
176    let mut section_similarities: Vec<SectionSimilarity> = Vec::new();
177    let mut semantic_changes: Vec<SemanticChange> = Vec::new();
178
179    // ── Pass 1: match sections in A to the best section in B ─────
180    for sec_a in sections_a {
181        if sections_b.is_empty() {
182            section_similarities.push(SectionSimilarity {
183                section_a: sec_a.title.clone(),
184                section_b: String::new(),
185                similarity: 0.0,
186                alignment: SectionAlignment::Removed,
187            });
188            continue;
189        }
190
191        // Find the most similar section in B.
192        let (best_idx, best_sim) = sections_b
193            .iter()
194            .enumerate()
195            .map(|(i, sec_b)| (i, cosine_similarity(&sec_a.embedding, &sec_b.embedding)))
196            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
197            .unwrap_or((0, 0.0));
198
199        // Determine alignment.
200        if best_sim < 0.40 {
201            // No meaningful match — treat as removed.
202            section_similarities.push(SectionSimilarity {
203                section_a: sec_a.title.clone(),
204                section_b: String::new(),
205                similarity: 0.0,
206                alignment: SectionAlignment::Removed,
207            });
208        } else {
209            let sec_b = &sections_b[best_idx];
210            matched_b[best_idx] = true;
211
212            let alignment =
213                if sec_a.title.trim().to_lowercase() == sec_b.title.trim().to_lowercase() {
214                    SectionAlignment::Matched
215                } else if best_sim >= 0.85 {
216                    SectionAlignment::Renamed
217                } else {
218                    SectionAlignment::Matched
219                };
220
221            section_similarities.push(SectionSimilarity {
222                section_a: sec_a.title.clone(),
223                section_b: sec_b.title.clone(),
224                similarity: best_sim,
225                alignment,
226            });
227
228            semantic_changes.push(classify_change(&sec_a.title, best_sim));
229        }
230    }
231
232    // ── Pass 2: unmatched sections in B are Added ─────────────────
233    for (i, sec_b) in sections_b.iter().enumerate() {
234        if !matched_b[i] {
235            section_similarities.push(SectionSimilarity {
236                section_a: String::new(),
237                section_b: sec_b.title.clone(),
238                similarity: 0.0,
239                alignment: SectionAlignment::Added,
240            });
241        }
242    }
243
244    // ── Overall similarity: mean of non-zero similarities ─────────
245    let non_zero: Vec<f64> = section_similarities
246        .iter()
247        .filter(|s| s.similarity > 0.0)
248        .map(|s| s.similarity)
249        .collect();
250
251    let overall_similarity = if non_zero.is_empty() {
252        0.0
253    } else {
254        let sum: f64 = non_zero.iter().sum();
255        sum / non_zero.len() as f64
256    };
257
258    SemanticDiffResult {
259        overall_similarity,
260        section_similarities,
261        semantic_changes,
262    }
263}
264
265/// Compute semantic diff from JSON strings (WASM / backend entry point).
266///
267/// `sections_a_json` and `sections_b_json` must each be a JSON array of objects
268/// with the shape `{ title: string, content: string, embedding: number[] }`.
269///
270/// Returns a JSON-serialised [`SemanticDiffResult`], or `{"error":"..."}` on failure.
271#[cfg_attr(feature = "wasm", wasm_bindgen)]
272pub fn semantic_diff(sections_a_json: &str, sections_b_json: &str) -> String {
273    let sections_a: Vec<EmbeddedSection> = match serde_json::from_str(sections_a_json) {
274        Ok(v) => v,
275        Err(e) => return format!(r#"{{"error":"Invalid sections_a JSON: {e}"}}"#),
276    };
277    let sections_b: Vec<EmbeddedSection> = match serde_json::from_str(sections_b_json) {
278        Ok(v) => v,
279        Err(e) => return format!(r#"{{"error":"Invalid sections_b JSON: {e}"}}"#),
280    };
281
282    let result = semantic_diff_native(&sections_a, &sections_b);
283    serde_json::to_string(&result)
284        .unwrap_or_else(|e| format!(r#"{{"error":"Serialization: {e}"}}"#))
285}
286
287// ── Semantic consensus ────────────────────────────────────────────
288
289/// A single agent review with a pre-computed embedding of its content.
290#[derive(Debug, Clone, Deserialize)]
291#[serde(rename_all = "camelCase")]
292pub struct EmbeddedReview {
293    /// Agent/reviewer identifier.
294    pub reviewer_id: String,
295    /// Raw review text content.
296    pub content: String,
297    /// Embedding vector of the review content.
298    pub embedding: Vec<f64>,
299}
300
301/// A cluster of reviewers whose embeddings are mutually similar.
302#[derive(Debug, Clone, Serialize, Deserialize)]
303#[serde(rename_all = "camelCase")]
304pub struct ReviewCluster {
305    /// Reviewer IDs that belong to this cluster.
306    pub members: Vec<String>,
307    /// Average pairwise cosine similarity within the cluster.
308    pub avg_similarity: f64,
309}
310
311/// Result of semantic consensus evaluation across a set of reviews.
312#[derive(Debug, Clone, Serialize, Deserialize)]
313#[serde(rename_all = "camelCase")]
314pub struct SemanticConsensusResult {
315    /// `true` when the largest cluster contains > 50% of reviewers.
316    pub consensus: bool,
317    /// Mean pairwise cosine similarity across all review pairs.
318    pub agreement_score: f64,
319    /// Agreement clusters ordered by size (largest first).
320    pub clusters: Vec<ReviewCluster>,
321    /// Reviewer IDs whose embeddings fall outside the majority cluster.
322    pub outliers: Vec<String>,
323}
324
325/// Evaluate semantic consensus across a set of reviews (native API).
326///
327/// `threshold` — minimum cosine similarity for two reviews to be considered
328/// in agreement (recommended: 0.80).
329pub fn semantic_consensus_native(
330    reviews: &[EmbeddedReview],
331    threshold: f64,
332) -> SemanticConsensusResult {
333    if reviews.is_empty() {
334        return SemanticConsensusResult {
335            consensus: false,
336            agreement_score: 0.0,
337            clusters: vec![],
338            outliers: vec![],
339        };
340    }
341
342    let n = reviews.len();
343
344    // ── Pairwise similarity matrix ────────────────────────────────
345    // `sims[i][j]` = cosine_similarity(reviews[i].embedding, reviews[j].embedding)
346    let mut sims = vec![vec![0.0f64; n]; n];
347    for (i, review_i) in reviews.iter().enumerate() {
348        sims[i][i] = 1.0;
349        for j in (i + 1)..n {
350            let s = cosine_similarity(&review_i.embedding, &reviews[j].embedding);
351            sims[i][j] = s;
352            sims[j][i] = s;
353        }
354    }
355
356    // ── Overall agreement score: mean of off-diagonal upper triangle ─
357    let pair_count = n * (n - 1) / 2;
358    let agreement_score = if pair_count == 0 {
359        1.0 // single reviewer — trivially unanimous
360    } else {
361        // Sum all upper-triangle pairs without range-index loops (Clippy-clean).
362        let total: f64 = sims
363            .iter()
364            .enumerate()
365            .flat_map(|(i, row)| row.iter().enumerate().skip(i + 1).map(|(_, &v)| v))
366            .sum();
367        total / pair_count as f64
368    };
369
370    // ── Greedy clustering: assign each reviewer to the first cluster
371    // whose centroid (represented by the founding member) is similar
372    // enough, otherwise start a new cluster. ─────────────────────────
373    let mut clusters: Vec<Vec<usize>> = Vec::new(); // indices into `reviews`
374
375    'outer: for (i, _) in reviews.iter().enumerate() {
376        for cluster in &mut clusters {
377            // Check similarity against all current cluster members (complete-linkage).
378            if cluster.iter().all(|&j| sims[i][j] >= threshold) {
379                cluster.push(i);
380                continue 'outer;
381            }
382        }
383        clusters.push(vec![i]);
384    }
385
386    // Sort clusters by size (largest first).
387    clusters.sort_by_key(|c| std::cmp::Reverse(c.len()));
388
389    // Build result clusters with named members.
390    let result_clusters: Vec<ReviewCluster> = clusters
391        .iter()
392        .map(|members| {
393            let ids: Vec<String> = members
394                .iter()
395                .map(|&i| reviews[i].reviewer_id.clone())
396                .collect();
397            let avg_sim = if members.len() == 1 {
398                1.0
399            } else {
400                let mut total = 0.0;
401                let mut count = 0;
402                for a in 0..members.len() {
403                    for b in (a + 1)..members.len() {
404                        total += sims[members[a]][members[b]];
405                        count += 1;
406                    }
407                }
408                if count > 0 { total / count as f64 } else { 1.0 }
409            };
410            ReviewCluster {
411                members: ids,
412                avg_similarity: avg_sim,
413            }
414        })
415        .collect();
416
417    // ── Consensus: majority cluster covers > 50% of reviewers ─────
418    let majority_size = result_clusters
419        .first()
420        .map(|c| c.members.len())
421        .unwrap_or(0);
422    let consensus = majority_size * 2 > n; // strict majority
423
424    // Outliers: everyone not in the majority cluster.
425    let majority_members: std::collections::HashSet<&str> = result_clusters
426        .first()
427        .map(|c| c.members.iter().map(String::as_str).collect())
428        .unwrap_or_default();
429
430    let outliers: Vec<String> = reviews
431        .iter()
432        .filter(|r| !majority_members.contains(r.reviewer_id.as_str()))
433        .map(|r| r.reviewer_id.clone())
434        .collect();
435
436    SemanticConsensusResult {
437        consensus,
438        agreement_score,
439        clusters: result_clusters,
440        outliers,
441    }
442}
443
444/// Evaluate semantic consensus from a JSON array of reviews (WASM / backend entry point).
445///
446/// `reviews_json` must be a JSON array of objects with the shape
447/// `{ reviewerId: string, content: string, embedding: number[] }`.
448///
449/// Returns a JSON-serialised [`SemanticConsensusResult`], or `{"error":"..."}` on failure.
450#[cfg_attr(feature = "wasm", wasm_bindgen)]
451pub fn semantic_consensus(reviews_json: &str, threshold: f64) -> String {
452    let reviews: Vec<EmbeddedReview> = match serde_json::from_str(reviews_json) {
453        Ok(v) => v,
454        Err(e) => return format!(r#"{{"error":"Invalid reviews JSON: {e}"}}"#),
455    };
456
457    let result = semantic_consensus_native(&reviews, threshold);
458    serde_json::to_string(&result)
459        .unwrap_or_else(|e| format!(r#"{{"error":"Serialization: {e}"}}"#))
460}
461
462// ── Tests ─────────────────────────────────────────────────────────
463
464#[cfg(test)]
465mod tests {
466    use super::*;
467    use std::f64::EPSILON;
468
469    // ── cosine_similarity ─────────────────────────────────────────
470
471    #[test]
472    fn cosine_identical_vectors() {
473        let v = vec![1.0, 2.0, 3.0];
474        let sim = cosine_similarity(&v, &v);
475        assert!(
476            (sim - 1.0).abs() < EPSILON,
477            "identical vectors → 1.0, got {sim}"
478        );
479    }
480
481    #[test]
482    fn cosine_orthogonal_vectors() {
483        let a = vec![1.0, 0.0, 0.0];
484        let b = vec![0.0, 1.0, 0.0];
485        let sim = cosine_similarity(&a, &b);
486        assert!(sim.abs() < EPSILON, "orthogonal vectors → 0.0, got {sim}");
487    }
488
489    #[test]
490    fn cosine_opposite_vectors() {
491        let a = vec![1.0, 0.0];
492        let b = vec![-1.0, 0.0];
493        let sim = cosine_similarity(&a, &b);
494        assert!(
495            (sim - (-1.0)).abs() < EPSILON,
496            "opposite vectors → -1.0, got {sim}"
497        );
498    }
499
500    #[test]
501    fn cosine_different_lengths_returns_zero() {
502        let a = vec![1.0, 2.0];
503        let b = vec![1.0, 2.0, 3.0];
504        assert_eq!(cosine_similarity(&a, &b), 0.0, "mismatched lengths → 0.0");
505    }
506
507    #[test]
508    fn cosine_zero_vector_returns_zero() {
509        let a = vec![0.0, 0.0, 0.0];
510        let b = vec![1.0, 2.0, 3.0];
511        assert_eq!(cosine_similarity(&a, &b), 0.0, "zero vector → 0.0");
512    }
513
514    #[test]
515    fn cosine_empty_vectors_returns_zero() {
516        assert_eq!(cosine_similarity(&[], &[]), 0.0, "empty vectors → 0.0");
517    }
518
519    // ── semantic_diff ─────────────────────────────────────────────
520
521    fn make_section(title: &str, vec: Vec<f64>) -> EmbeddedSection {
522        EmbeddedSection {
523            title: title.to_string(),
524            content: title.to_string(),
525            embedding: vec,
526        }
527    }
528
529    #[test]
530    fn semantic_diff_identical_sections() {
531        let sections = vec![make_section("Intro", vec![1.0, 0.0, 0.0])];
532        let result = semantic_diff_native(&sections, &sections);
533        assert!(
534            (result.overall_similarity - 1.0).abs() < 1e-9,
535            "identical sections → overall_similarity ≈ 1.0, got {}",
536            result.overall_similarity
537        );
538        assert_eq!(result.section_similarities.len(), 1);
539        assert_eq!(result.semantic_changes.len(), 1);
540        assert_eq!(result.semantic_changes[0].change_type, "unchanged");
541    }
542
543    #[test]
544    fn semantic_diff_detects_added_section() {
545        let sections_a = vec![make_section("Overview", vec![1.0, 0.0])];
546        let sections_b = vec![
547            make_section("Overview", vec![1.0, 0.0]),
548            make_section("NewSection", vec![0.0, 1.0]),
549        ];
550        let result = semantic_diff_native(&sections_a, &sections_b);
551        let added = result
552            .section_similarities
553            .iter()
554            .find(|s| matches!(s.alignment, SectionAlignment::Added));
555        assert!(added.is_some(), "should detect Added section");
556        assert_eq!(added.unwrap().section_b, "NewSection");
557    }
558
559    #[test]
560    fn semantic_diff_detects_removed_section() {
561        let sections_a = vec![
562            make_section("Overview", vec![1.0, 0.0]),
563            make_section("OldSection", vec![0.0, 1.0]),
564        ];
565        let sections_b = vec![make_section("Overview", vec![1.0, 0.0])];
566        let result = semantic_diff_native(&sections_a, &sections_b);
567        let removed = result
568            .section_similarities
569            .iter()
570            .find(|s| matches!(s.alignment, SectionAlignment::Removed));
571        assert!(removed.is_some(), "should detect Removed section");
572        assert_eq!(removed.unwrap().section_a, "OldSection");
573    }
574
575    #[test]
576    fn semantic_diff_detects_renamed_section() {
577        // High similarity embedding, different title.
578        let sections_a = vec![make_section("Old Title", vec![0.9, 0.1])];
579        let sections_b = vec![make_section("New Title", vec![0.91, 0.09])];
580        let result = semantic_diff_native(&sections_a, &sections_b);
581        let renamed = result
582            .section_similarities
583            .iter()
584            .find(|s| matches!(s.alignment, SectionAlignment::Renamed));
585        assert!(renamed.is_some(), "should detect Renamed section");
586    }
587
588    #[test]
589    fn semantic_diff_json_roundtrip() {
590        let sections_a = r#"[{"title":"A","content":"A text","embedding":[1.0,0.0]}]"#;
591        let sections_b = r#"[{"title":"A","content":"A text","embedding":[1.0,0.0]}]"#;
592        let out = semantic_diff(sections_a, sections_b);
593        let parsed: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
594        assert!(parsed.get("error").is_none(), "should not have error field");
595        assert!(parsed.get("overallSimilarity").is_some());
596    }
597
598    // ── semantic_consensus ────────────────────────────────────────
599
600    fn make_review(id: &str, vec: Vec<f64>) -> EmbeddedReview {
601        EmbeddedReview {
602            reviewer_id: id.to_string(),
603            content: id.to_string(),
604            embedding: vec,
605        }
606    }
607
608    #[test]
609    fn consensus_unanimous_single_cluster() {
610        let reviews = vec![
611            make_review("a", vec![1.0, 0.0]),
612            make_review("b", vec![0.98, 0.02]),
613            make_review("c", vec![0.99, 0.01]),
614        ];
615        let result = semantic_consensus_native(&reviews, 0.80);
616        assert!(result.consensus, "3 similar reviews should reach consensus");
617        assert!(result.outliers.is_empty(), "no outliers expected");
618        assert_eq!(result.clusters.len(), 1, "should form one cluster");
619    }
620
621    #[test]
622    fn consensus_divergent_outlier() {
623        // Two reviewers agree; one is completely orthogonal.
624        let reviews = vec![
625            make_review("a", vec![1.0, 0.0]),
626            make_review("b", vec![0.99, 0.01]),
627            make_review("c", vec![0.0, 1.0]), // orthogonal outlier
628        ];
629        let result = semantic_consensus_native(&reviews, 0.80);
630        // Majority cluster has 2/3 > 50% → consensus
631        assert!(result.consensus, "2/3 agreement should reach consensus");
632        assert!(
633            result.outliers.contains(&"c".to_string()),
634            "'c' should be an outlier"
635        );
636    }
637
638    #[test]
639    fn consensus_no_consensus_split() {
640        // Two groups of 1 each — no majority (single reviewers).
641        let reviews = vec![
642            make_review("a", vec![1.0, 0.0]),
643            make_review("b", vec![0.0, 1.0]),
644        ];
645        let result = semantic_consensus_native(&reviews, 0.80);
646        // 1/2 is NOT > 50% strict majority.
647        assert!(!result.consensus, "50/50 split should not reach consensus");
648    }
649
650    #[test]
651    fn consensus_empty_reviews() {
652        let result = semantic_consensus_native(&[], 0.80);
653        assert!(!result.consensus);
654        assert_eq!(result.agreement_score, 0.0);
655        assert!(result.clusters.is_empty());
656        assert!(result.outliers.is_empty());
657    }
658
659    #[test]
660    fn consensus_json_roundtrip() {
661        let reviews_json = r#"[
662            {"reviewerId":"a","content":"test","embedding":[1.0,0.0]},
663            {"reviewerId":"b","content":"test","embedding":[0.99,0.01]}
664        ]"#;
665        let out = semantic_consensus(reviews_json, 0.80);
666        let parsed: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
667        assert!(parsed.get("error").is_none(), "should not have error field");
668        assert!(parsed.get("consensus").is_some());
669    }
670}