Skip to main content

crw_search/
structured.rs

1//! Structured facts from SearXNG's `infoboxes[]` / `answers[]` arrays.
2//!
3//! SearXNG's `format=json` envelope returns five arrays; the Wikidata/Wikipedia
4//! engines emit their knowledge-panel data (entity attributes like
5//! `religion → X`, `capital → Y`) into `infoboxes[]` / `answers[]`, NOT into
6//! `results[]`. The normal transform path reads only `results[]`, so these
7//! already-retrieved structured facts were silently discarded (W0). This module
8//! parses them so the answer path can pin them as a high-trust source. They are
9//! still UNTRUSTED-wrapped by the synthesizer — this only widens the evidence,
10//! it does not bypass the safety wrapper.
11
12use crate::client::SearxngResponse;
13use serde_json::Value;
14
15/// A structured fact extracted from an infobox or a direct answer. `attributes`
16/// are the infobox key/value rows (e.g. `("religion", "Sunni Islam")`).
17#[derive(Debug, Clone, PartialEq)]
18pub struct StructuredFact {
19    pub title: String,
20    pub url: String,
21    pub content: String,
22    pub attributes: Vec<(String, String)>,
23    /// Always true — marks this as a pinned structured source so a later
24    /// rerank-bypass (W1) can key off the flag, not the domain.
25    pub is_structured_source: bool,
26}
27
28impl StructuredFact {
29    /// Compact markdown body for the answer-path source (title is carried
30    /// separately in the `Source` tuple).
31    pub fn to_markdown(&self) -> String {
32        let mut s = String::new();
33        if !self.content.is_empty() {
34            s.push_str(&self.content);
35            s.push('\n');
36        }
37        for (k, v) in &self.attributes {
38            s.push_str("- ");
39            s.push_str(k);
40            s.push_str(": ");
41            s.push_str(v);
42            s.push('\n');
43        }
44        s.trim_end().to_string()
45    }
46}
47
48fn str_field(v: &Value, key: &str) -> Option<String> {
49    v.get(key)
50        .and_then(|x| x.as_str())
51        .map(|x| x.trim().to_string())
52        .filter(|x| !x.is_empty())
53}
54
55/// Parse `infoboxes[]` + `answers[]` into structured facts. Defensive: every
56/// field is optional, malformed/empty entries are skipped (degrade to nothing).
57pub fn structured_facts(resp: &SearxngResponse) -> Vec<StructuredFact> {
58    let mut out = Vec::new();
59
60    for ib in &resp.infoboxes {
61        let title = str_field(ib, "infobox").unwrap_or_default();
62        let url = str_field(ib, "id").unwrap_or_default();
63        let content = str_field(ib, "content").unwrap_or_default();
64        let mut attributes = Vec::new();
65        if let Some(arr) = ib.get("attributes").and_then(|x| x.as_array()) {
66            for a in arr {
67                if let (Some(label), Some(value)) = (str_field(a, "label"), str_field(a, "value")) {
68                    attributes.push((label, value));
69                }
70            }
71        }
72        // Nothing useful to feed the synthesizer.
73        if content.is_empty() && attributes.is_empty() {
74            continue;
75        }
76        out.push(StructuredFact {
77            title: if title.is_empty() {
78                "Structured fact".to_string()
79            } else {
80                title
81            },
82            url,
83            content,
84            attributes,
85            is_structured_source: true,
86        });
87    }
88
89    // `answers[]` entries are either a bare string or `{answer, url}`.
90    for ans in &resp.answers {
91        let (content, url) = match ans {
92            Value::String(t) => (t.trim().to_string(), String::new()),
93            Value::Object(_) => (
94                str_field(ans, "answer").unwrap_or_default(),
95                str_field(ans, "url").unwrap_or_default(),
96            ),
97            _ => continue,
98        };
99        if content.is_empty() {
100            continue;
101        }
102        out.push(StructuredFact {
103            title: "Direct answer".to_string(),
104            url,
105            content,
106            attributes: Vec::new(),
107            is_structured_source: true,
108        });
109    }
110
111    out
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117    use serde_json::json;
118
119    fn resp_with(infoboxes: Vec<Value>, answers: Vec<Value>) -> SearxngResponse {
120        SearxngResponse {
121            infoboxes,
122            answers,
123            ..SearxngResponse::default()
124        }
125    }
126
127    #[test]
128    fn parses_infobox_attributes() {
129        let r = resp_with(
130            vec![json!({
131                "infobox": "Abdullah of Pahang",
132                "id": "https://en.wikipedia.org/wiki/Abdullah_of_Pahang",
133                "content": "Sultan of Pahang",
134                "attributes": [
135                    {"label": "Religion", "value": "Sunni Islam"},
136                    {"label": "Born", "value": "1959"}
137                ]
138            })],
139            vec![],
140        );
141        let facts = structured_facts(&r);
142        assert_eq!(facts.len(), 1);
143        assert_eq!(facts[0].title, "Abdullah of Pahang");
144        assert!(facts[0].is_structured_source);
145        assert_eq!(facts[0].attributes.len(), 2);
146        let md = facts[0].to_markdown();
147        assert!(md.contains("Religion: Sunni Islam"));
148        assert!(md.contains("Sultan of Pahang"));
149    }
150
151    #[test]
152    fn parses_string_and_object_answers() {
153        let r = resp_with(
154            vec![],
155            vec![
156                json!("42 is the answer"),
157                json!({"answer": "Tokyo", "url": "https://x"}),
158            ],
159        );
160        let facts = structured_facts(&r);
161        assert_eq!(facts.len(), 2);
162        assert_eq!(facts[0].content, "42 is the answer");
163        assert_eq!(facts[1].content, "Tokyo");
164        assert_eq!(facts[1].url, "https://x");
165    }
166
167    #[test]
168    fn skips_empty_and_malformed() {
169        let r = resp_with(
170            vec![
171                json!({"infobox": "Empty"}),
172                json!({"attributes": []}),
173                json!(123),
174            ],
175            vec![json!(""), json!({"no_answer": "x"}), json!(true)],
176        );
177        assert_eq!(structured_facts(&r).len(), 0);
178    }
179}