Skip to main content

sqlite_graphrag/parsers/
mod.rs

1//! Input format parsers (timestamp, range validators).
2
3use chrono::DateTime;
4
5/// Accepts a Unix epoch (integer >= 0) or RFC 3339 timestamp and returns the Unix epoch.
6pub fn parse_expected_updated_at(s: &str) -> Result<i64, String> {
7    if let Ok(secs) = s.parse::<i64>() {
8        if secs >= 0 {
9            return Ok(secs);
10        }
11    }
12    DateTime::parse_from_rfc3339(s)
13        .map(|dt| dt.timestamp())
14        .map_err(|e| {
15            format!(
16                "value must be a Unix epoch (integer >= 0) or RFC 3339 (e.g. 2026-04-19T12:00:00Z): {e}"
17            )
18        })
19}
20
21/// Validates `-k`/`--k` for `recall` and `hybrid-search` to the inclusive range `1..=4096`.
22///
23/// The upper bound matches the `sqlite-vec` knn limit; values above it would surface a leaky
24/// engine error such as `k value in knn query too large, provided 10000 and the limit is 4096`.
25/// Validating at parse time turns the failure into a clean Clap error before any database work.
26pub fn parse_k_range(s: &str) -> Result<usize, String> {
27    let value: usize = s
28        .parse()
29        .map_err(|_| format!("'{s}' is not a valid non-negative integer"))?;
30    if !(1..=4096).contains(&value) {
31        return Err(format!(
32            "k must be between 1 and 4096 (inclusive); got {value}"
33        ));
34    }
35    Ok(value)
36}
37
38/// Flexible boolean parser for Clap env var integration.
39///
40/// Accepts common truthy/falsy conventions used in shell environments:
41/// truthy: `1`, `true`, `yes`, `on` (case-insensitive)
42/// falsy: `0`, `false`, `no`, `off`, empty string (case-insensitive)
43pub fn parse_bool_flexible(s: &str) -> Result<bool, String> {
44    match s.to_lowercase().as_str() {
45        "1" | "true" | "yes" | "on" => Ok(true),
46        "0" | "false" | "no" | "off" | "" => Ok(false),
47        _ => Err(format!(
48            "invalid boolean value '{s}': expected true/false/1/0/yes/no/on/off"
49        )),
50    }
51}
52
53#[cfg(test)]
54mod tests {
55    use super::*;
56
57    #[test]
58    fn accepts_unix_epoch() {
59        assert_eq!(parse_expected_updated_at("1700000000").unwrap(), 1700000000);
60    }
61
62    #[test]
63    fn accepts_zero() {
64        assert_eq!(parse_expected_updated_at("0").unwrap(), 0);
65    }
66
67    #[test]
68    fn accepts_rfc_3339_utc() {
69        let result = parse_expected_updated_at("2020-01-01T00:00:00Z");
70        assert!(result.is_ok());
71        assert_eq!(result.unwrap(), 1577836800);
72    }
73
74    #[test]
75    fn accepts_rfc_3339_with_offset() {
76        let result = parse_expected_updated_at("2026-04-19T12:00:00+00:00");
77        assert!(result.is_ok());
78    }
79
80    #[test]
81    fn rejects_invalid_string() {
82        assert!(parse_expected_updated_at("bananas").is_err());
83    }
84
85    #[test]
86    fn rejects_negative() {
87        let err = parse_expected_updated_at("-1");
88        assert!(err.is_err());
89    }
90
91    #[test]
92    fn error_message_mentions_format() {
93        let msg = parse_expected_updated_at("invalid").unwrap_err();
94        assert!(msg.contains("RFC 3339") || msg.contains("Unix epoch"));
95    }
96
97    #[test]
98    fn k_accepts_valid_range_endpoints() {
99        assert_eq!(parse_k_range("1").unwrap(), 1);
100        assert_eq!(parse_k_range("4096").unwrap(), 4096);
101        assert_eq!(parse_k_range("10").unwrap(), 10);
102    }
103
104    #[test]
105    fn k_rejects_zero() {
106        let msg = parse_k_range("0").unwrap_err();
107        assert!(msg.contains("between 1 and 4096"));
108    }
109
110    #[test]
111    fn k_rejects_above_limit() {
112        let msg = parse_k_range("10000").unwrap_err();
113        assert!(msg.contains("between 1 and 4096"));
114    }
115
116    #[test]
117    fn k_rejects_non_integer() {
118        let msg = parse_k_range("abc").unwrap_err();
119        assert!(msg.contains("not a valid"));
120    }
121
122    #[test]
123    fn k_rejects_negative() {
124        // usize parser fails on negatives before range check
125        assert!(parse_k_range("-5").is_err());
126    }
127
128    #[test]
129    fn bool_flexible_truthy() {
130        for v in &["1", "true", "True", "TRUE", "yes", "Yes", "on", "ON"] {
131            assert!(parse_bool_flexible(v).unwrap(), "should be true: {v}");
132        }
133    }
134
135    #[test]
136    fn bool_flexible_falsy() {
137        for v in &["0", "false", "False", "FALSE", "no", "No", "off", "OFF", ""] {
138            assert!(!parse_bool_flexible(v).unwrap(), "should be false: {v}");
139        }
140    }
141
142    #[test]
143    fn bool_flexible_rejects_invalid() {
144        assert!(parse_bool_flexible("banana").is_err());
145        assert!(parse_bool_flexible("2").is_err());
146        assert!(parse_bool_flexible("nope").is_err());
147    }
148}
149
150/// The 12 well-known relation types from v1.0.0.
151///
152/// Non-canonical relations are accepted but emit a `tracing::warn!`.
153pub const CANONICAL_RELATIONS: &[&str] = &[
154    "applies_to",
155    "uses",
156    "depends_on",
157    "causes",
158    "fixes",
159    "contradicts",
160    "supports",
161    "follows",
162    "related",
163    "mentions",
164    "replaces",
165    "tracked_in",
166];
167
168/// Returns `true` when the relation is one of the 12 canonical types.
169pub fn is_canonical_relation(s: &str) -> bool {
170    CANONICAL_RELATIONS.contains(&s)
171}
172
173/// Normalizes a relation string: lowercase + hyphens to underscores.
174pub fn normalize_relation(s: &str) -> String {
175    s.to_lowercase().replace('-', "_")
176}
177
178/// Validates that a normalized relation matches `^[a-z][a-z0-9_]*$`.
179pub fn validate_relation_format(s: &str) -> Result<(), String> {
180    if s.is_empty() {
181        return Err("relation must not be empty".to_string());
182    }
183    if !s.as_bytes()[0].is_ascii_lowercase() {
184        return Err(format!(
185            "relation must start with a lowercase letter, got '{s}'"
186        ));
187    }
188    if !s
189        .bytes()
190        .all(|b| b.is_ascii_lowercase() || b.is_ascii_digit() || b == b'_')
191    {
192        return Err(format!(
193            "relation must contain only lowercase letters, digits and underscores, got '{s}'"
194        ));
195    }
196    Ok(())
197}
198
199/// Emits a `tracing::warn!` when the relation is not in [`CANONICAL_RELATIONS`].
200pub fn warn_if_non_canonical(relation: &str) {
201    if !is_canonical_relation(relation) {
202        tracing::warn!(
203            relation,
204            "non-canonical relation accepted; consider using a well-known value"
205        );
206    }
207}
208
209/// Clap `value_parser` for `--relation`: normalizes and validates format.
210///
211/// Accepts any kebab-case or snake_case string. Non-canonical values are
212/// accepted at parse time; the warning is emitted at command execution.
213pub fn parse_relation(s: &str) -> Result<String, String> {
214    let normalized = normalize_relation(s);
215    validate_relation_format(&normalized)?;
216    Ok(normalized)
217}
218
219#[cfg(test)]
220mod relation_tests {
221    use super::*;
222
223    #[test]
224    fn canonical_relations_all_valid() {
225        for r in CANONICAL_RELATIONS {
226            assert!(
227                validate_relation_format(r).is_ok(),
228                "canonical relation '{r}' should be valid"
229            );
230        }
231    }
232
233    #[test]
234    fn normalize_converts_hyphens_and_uppercase() {
235        assert_eq!(normalize_relation("Depends-On"), "depends_on");
236        assert_eq!(normalize_relation("TESTED-BY"), "tested_by");
237        assert_eq!(normalize_relation("uses"), "uses");
238    }
239
240    #[test]
241    fn validate_rejects_empty() {
242        assert!(validate_relation_format("").is_err());
243    }
244
245    #[test]
246    fn validate_rejects_digit_start() {
247        assert!(validate_relation_format("123abc").is_err());
248    }
249
250    #[test]
251    fn validate_rejects_spaces() {
252        assert!(validate_relation_format("has spaces").is_err());
253    }
254
255    #[test]
256    fn validate_accepts_custom_relations() {
257        assert!(validate_relation_format("implements").is_ok());
258        assert!(validate_relation_format("tested_by").is_ok());
259        assert!(validate_relation_format("part_of").is_ok());
260        assert!(validate_relation_format("blocks").is_ok());
261    }
262
263    #[test]
264    fn parse_relation_normalizes_and_validates() {
265        assert_eq!(parse_relation("Tested-By").unwrap(), "tested_by");
266        assert_eq!(parse_relation("uses").unwrap(), "uses");
267        assert!(parse_relation("").is_err());
268    }
269
270    #[test]
271    fn is_canonical_detects_known() {
272        assert!(is_canonical_relation("uses"));
273        assert!(is_canonical_relation("applies_to"));
274        assert!(!is_canonical_relation("implements"));
275        assert!(!is_canonical_relation("blocks"));
276    }
277}