1use chrono::DateTime;
4use unicode_normalization::UnicodeNormalization;
5
6pub fn parse_expected_updated_at(s: &str) -> Result<i64, String> {
8 if let Ok(secs) = s.parse::<i64>() {
9 if secs >= 0 {
10 return Ok(secs);
11 }
12 }
13 DateTime::parse_from_rfc3339(s)
14 .map(|dt| dt.timestamp())
15 .map_err(|e| {
16 format!(
17 "value must be a Unix epoch (integer >= 0) or RFC 3339 (e.g. 2026-04-19T12:00:00Z): {e}"
18 )
19 })
20}
21
22pub fn parse_k_range(s: &str) -> Result<usize, String> {
28 let value: usize = s
29 .parse()
30 .map_err(|_| format!("'{s}' is not a valid non-negative integer"))?;
31 if !(1..=4096).contains(&value) {
32 return Err(format!(
33 "k must be between 1 and 4096 (inclusive); got {value}"
34 ));
35 }
36 Ok(value)
37}
38
39pub fn parse_bool_flexible(s: &str) -> Result<bool, String> {
45 match s.to_lowercase().as_str() {
46 "1" | "true" | "yes" | "on" => Ok(true),
47 "0" | "false" | "no" | "off" | "" => Ok(false),
48 _ => Err(format!(
49 "invalid boolean value '{s}': expected true/false/1/0/yes/no/on/off"
50 )),
51 }
52}
53
54#[cfg(test)]
55mod tests {
56 use super::*;
57
58 #[test]
59 fn accepts_unix_epoch() {
60 assert_eq!(parse_expected_updated_at("1700000000").unwrap(), 1700000000);
61 }
62
63 #[test]
64 fn accepts_zero() {
65 assert_eq!(parse_expected_updated_at("0").unwrap(), 0);
66 }
67
68 #[test]
69 fn accepts_rfc_3339_utc() {
70 let result = parse_expected_updated_at("2020-01-01T00:00:00Z");
71 assert!(result.is_ok());
72 assert_eq!(result.unwrap(), 1577836800);
73 }
74
75 #[test]
76 fn accepts_rfc_3339_with_offset() {
77 let result = parse_expected_updated_at("2026-04-19T12:00:00+00:00");
78 assert!(result.is_ok());
79 }
80
81 #[test]
82 fn rejects_invalid_string() {
83 assert!(parse_expected_updated_at("bananas").is_err());
84 }
85
86 #[test]
87 fn rejects_negative() {
88 let err = parse_expected_updated_at("-1");
89 assert!(err.is_err());
90 }
91
92 #[test]
93 fn error_message_mentions_format() {
94 let msg = parse_expected_updated_at("invalid").unwrap_err();
95 assert!(msg.contains("RFC 3339") || msg.contains("Unix epoch"));
96 }
97
98 #[test]
99 fn k_accepts_valid_range_endpoints() {
100 assert_eq!(parse_k_range("1").unwrap(), 1);
101 assert_eq!(parse_k_range("4096").unwrap(), 4096);
102 assert_eq!(parse_k_range("10").unwrap(), 10);
103 }
104
105 #[test]
106 fn k_rejects_zero() {
107 let msg = parse_k_range("0").unwrap_err();
108 assert!(msg.contains("between 1 and 4096"));
109 }
110
111 #[test]
112 fn k_rejects_above_limit() {
113 let msg = parse_k_range("10000").unwrap_err();
114 assert!(msg.contains("between 1 and 4096"));
115 }
116
117 #[test]
118 fn k_rejects_non_integer() {
119 let msg = parse_k_range("abc").unwrap_err();
120 assert!(msg.contains("not a valid"));
121 }
122
123 #[test]
124 fn k_rejects_negative() {
125 assert!(parse_k_range("-5").is_err());
127 }
128
129 #[test]
130 fn bool_flexible_truthy() {
131 for v in &["1", "true", "True", "TRUE", "yes", "Yes", "on", "ON"] {
132 assert!(parse_bool_flexible(v).unwrap(), "should be true: {v}");
133 }
134 }
135
136 #[test]
137 fn bool_flexible_falsy() {
138 for v in &["0", "false", "False", "FALSE", "no", "No", "off", "OFF", ""] {
139 assert!(!parse_bool_flexible(v).unwrap(), "should be false: {v}");
140 }
141 }
142
143 #[test]
144 fn bool_flexible_rejects_invalid() {
145 assert!(parse_bool_flexible("banana").is_err());
146 assert!(parse_bool_flexible("2").is_err());
147 assert!(parse_bool_flexible("nope").is_err());
148 }
149}
150
151pub const CANONICAL_RELATIONS: &[&str] = &[
155 "applies_to",
156 "uses",
157 "depends_on",
158 "causes",
159 "fixes",
160 "contradicts",
161 "supports",
162 "follows",
163 "related",
164 "mentions",
165 "replaces",
166 "tracked_in",
167];
168
169pub fn is_canonical_relation(s: &str) -> bool {
171 CANONICAL_RELATIONS.contains(&s)
172}
173
174pub fn normalize_relation(s: &str) -> String {
176 s.to_lowercase().replace('-', "_")
177}
178
179pub fn normalize_entity_name(s: &str) -> String {
196 let ascii: String = s.nfkd().filter(|c| c.is_ascii()).collect();
199 let hyphenated: String = ascii
201 .to_lowercase()
202 .chars()
203 .map(|c| if c.is_ascii_alphanumeric() { c } else { '-' })
204 .collect();
205 let mut result = String::with_capacity(hyphenated.len());
207 let mut prev_was_hyphen = false;
208 for ch in hyphenated.chars() {
209 if ch == '-' {
210 if !prev_was_hyphen {
211 result.push('-');
212 }
213 prev_was_hyphen = true;
214 } else {
215 result.push(ch);
216 prev_was_hyphen = false;
217 }
218 }
219 result.trim_matches('-').to_string()
220}
221
222pub fn validate_relation_format(s: &str) -> Result<(), String> {
224 if s.is_empty() {
225 return Err("relation must not be empty".to_string());
226 }
227 if !s.as_bytes()[0].is_ascii_lowercase() {
228 return Err(format!(
229 "relation must start with a lowercase letter, got '{s}'"
230 ));
231 }
232 if !s
233 .bytes()
234 .all(|b| b.is_ascii_lowercase() || b.is_ascii_digit() || b == b'_')
235 {
236 return Err(format!(
237 "relation must contain only lowercase letters, digits and underscores, got '{s}'"
238 ));
239 }
240 Ok(())
241}
242
243pub fn map_to_canonical_relation(s: &str) -> String {
257 let normalized = normalize_relation(s);
258 if is_canonical_relation(&normalized) {
259 return normalized;
260 }
261 match normalized.as_str() {
262 "adds" | "creates" => "causes",
263 "implements" => "supports",
264 "blocks" => "contradicts",
265 "tested_by" => "related",
266 "part_of" => "applies_to",
267 _ => "related",
270 }
271 .to_string()
272}
273
274pub fn warn_if_non_canonical(relation: &str) {
276 if !is_canonical_relation(relation) {
277 tracing::warn!(target: "parsers",
278 relation,
279 "non-canonical relation accepted; consider using a well-known value"
280 );
281 }
282}
283
284pub fn parse_relation(s: &str) -> Result<String, String> {
289 let normalized = normalize_relation(s);
290 validate_relation_format(&normalized)?;
291 Ok(normalized)
292}
293
294#[cfg(test)]
295mod relation_tests {
296 use super::*;
297
298 #[test]
299 fn canonical_relations_all_valid() {
300 for r in CANONICAL_RELATIONS {
301 assert!(
302 validate_relation_format(r).is_ok(),
303 "canonical relation '{r}' should be valid"
304 );
305 }
306 }
307
308 #[test]
309 fn normalize_converts_hyphens_and_uppercase() {
310 assert_eq!(normalize_relation("Depends-On"), "depends_on");
311 assert_eq!(normalize_relation("TESTED-BY"), "tested_by");
312 assert_eq!(normalize_relation("uses"), "uses");
313 }
314
315 #[test]
316 fn validate_rejects_empty() {
317 assert!(validate_relation_format("").is_err());
318 }
319
320 #[test]
321 fn validate_rejects_digit_start() {
322 assert!(validate_relation_format("123abc").is_err());
323 }
324
325 #[test]
326 fn validate_rejects_spaces() {
327 assert!(validate_relation_format("has spaces").is_err());
328 }
329
330 #[test]
331 fn validate_accepts_custom_relations() {
332 assert!(validate_relation_format("implements").is_ok());
333 assert!(validate_relation_format("tested_by").is_ok());
334 assert!(validate_relation_format("part_of").is_ok());
335 assert!(validate_relation_format("blocks").is_ok());
336 }
337
338 #[test]
339 fn parse_relation_normalizes_and_validates() {
340 assert_eq!(parse_relation("Tested-By").unwrap(), "tested_by");
341 assert_eq!(parse_relation("uses").unwrap(), "uses");
342 assert!(parse_relation("").is_err());
343 }
344
345 #[test]
346 fn is_canonical_detects_known() {
347 assert!(is_canonical_relation("uses"));
348 assert!(is_canonical_relation("applies_to"));
349 assert!(!is_canonical_relation("implements"));
350 assert!(!is_canonical_relation("blocks"));
351 }
352
353 #[test]
354 fn map_to_canonical_relation_passes_through_canonical() {
355 assert_eq!(map_to_canonical_relation("uses"), "uses");
356 assert_eq!(map_to_canonical_relation("Applies-To"), "applies_to");
357 assert_eq!(map_to_canonical_relation("DEPENDS_ON"), "depends_on");
358 }
359
360 #[test]
361 fn map_to_canonical_relation_rewrites_known_aliases() {
362 assert_eq!(map_to_canonical_relation("part-of"), "applies_to");
364 assert_eq!(map_to_canonical_relation("part_of"), "applies_to");
365 assert_eq!(map_to_canonical_relation("implements"), "supports");
366 assert_eq!(map_to_canonical_relation("blocks"), "contradicts");
367 assert_eq!(map_to_canonical_relation("adds"), "causes");
368 assert_eq!(map_to_canonical_relation("creates"), "causes");
369 assert_eq!(map_to_canonical_relation("tested-by"), "related");
370 }
371
372 #[test]
373 fn map_to_canonical_relation_unknown_folds_to_related() {
374 assert_eq!(map_to_canonical_relation("some-weird-relation"), "related");
375 assert!(is_canonical_relation(&map_to_canonical_relation("xyz")));
377 }
378}
379
380#[cfg(test)]
381mod entity_name_tests {
382 use super::*;
383
384 #[test]
385 fn strips_diacritics_from_accented_name() {
386 assert_eq!(normalize_entity_name("Danilo Aguiar"), "danilo-aguiar");
387 }
388
389 #[test]
390 fn strips_diacritics_unicode_accents() {
391 assert_eq!(normalize_entity_name("São Paulo"), "sao-paulo");
393 assert_eq!(normalize_entity_name("Ünit Tëst"), "unit-test");
394 }
395
396 #[test]
397 fn converts_spaces_to_hyphens() {
398 assert_eq!(normalize_entity_name("hello world"), "hello-world");
399 assert_eq!(normalize_entity_name(" hello world "), "hello-world");
400 }
401
402 #[test]
403 fn converts_underscores_to_hyphens() {
404 assert_eq!(normalize_entity_name("hello_world"), "hello-world");
405 assert_eq!(
406 normalize_entity_name("CANONICAL_RELATIONS"),
407 "canonical-relations"
408 );
409 }
410
411 #[test]
412 fn all_caps_becomes_lowercase_kebab() {
413 assert_eq!(
414 normalize_entity_name("CANONICAL_RELATIONS"),
415 "canonical-relations"
416 );
417 assert_eq!(normalize_entity_name("MY_ENTITY_NAME"), "my-entity-name");
418 }
419
420 #[test]
421 fn idempotent_on_already_normalized() {
422 let name = "danilo-aguiar";
423 assert_eq!(normalize_entity_name(name), name);
424 let name2 = "canonical-relations";
425 assert_eq!(normalize_entity_name(name2), name2);
426 }
427
428 #[test]
429 fn collapses_consecutive_hyphens() {
430 assert_eq!(normalize_entity_name("foo--bar"), "foo-bar");
431 assert_eq!(normalize_entity_name("foo - bar"), "foo-bar");
432 }
433
434 #[test]
435 fn trims_leading_trailing_hyphens() {
436 assert_eq!(normalize_entity_name("-foo-"), "foo");
437 assert_eq!(normalize_entity_name("--hello--"), "hello");
438 }
439
440 #[test]
441 fn empty_or_only_separators_returns_empty() {
442 assert_eq!(normalize_entity_name(""), "");
443 assert_eq!(normalize_entity_name("---"), "");
444 }
445
446 #[test]
447 fn normalizes_dots_slashes_and_punctuation() {
448 assert_eq!(normalize_entity_name("lei-14.478/2022"), "lei-14-478-2022");
449 assert_eq!(normalize_entity_name("src/main.rs"), "src-main-rs");
450 assert_eq!(normalize_entity_name("user@domain.com"), "user-domain-com");
451 assert_eq!(normalize_entity_name("v1.0.66"), "v1-0-66");
452 assert_eq!(normalize_entity_name("key:value"), "key-value");
453 }
454}