sqlite_graphrag/commands/
ingest_heuristics.rs1pub fn extract_heuristic_description(body: &str, path_hint: Option<&str>) -> String {
27 let from_body = body
28 .lines()
29 .map(str::trim)
30 .find(|line| line.len() > 20 && !line.starts_with('#'))
31 .map(|line| line.chars().take(100).collect::<String>());
32 if let Some(desc) = from_body {
33 return desc;
34 }
35 if let Some(stem) = path_hint.and_then(derive_stem) {
37 return stem;
38 }
39 "ingested document".to_string()
40}
41
42fn derive_stem(path: &str) -> Option<String> {
44 let basename = std::path::Path::new(path)
45 .file_stem()
46 .and_then(|s| s.to_str())
47 .unwrap_or("")
48 .trim();
49 if basename.is_empty() || basename.len() < 2 {
50 return None;
51 }
52 Some(basename.chars().take(100).collect::<String>())
53}
54
55#[cfg(test)]
56mod tests {
57 use super::*;
58
59 #[test]
60 fn extracts_first_meaningful_line() {
61 let body = "\
62# Title
63
64This is the actual first sentence of the document that has more than twenty characters.
65Second line should be ignored.
66";
67 let desc = extract_heuristic_description(body, Some("/tmp/spec.md"));
68 assert!(
69 desc.starts_with("This is the actual"),
70 "desc deve começar com a primeira linha útil, got: {desc}"
71 );
72 }
73
74 #[test]
75 fn falls_back_to_stem_when_only_headers() {
76 let body = "\
78# Header 1
79## Header 2
80### Header 3
81";
82 let desc = extract_heuristic_description(body, Some("/tmp/headers-only.md"));
83 assert_eq!(desc, "headers-only");
84 }
85
86 #[test]
87 fn falls_back_to_ingested_document_when_no_path() {
88 let body = "# Only Header";
89 let desc = extract_heuristic_description(body, None);
90 assert_eq!(desc, "ingested document");
91 }
92
93 #[test]
94 fn truncates_at_100_chars() {
95 let long = "a".repeat(200);
96 let desc = extract_heuristic_description(&long, None);
97 assert!(
98 desc.chars().count() <= 100,
99 "desc deve ter no máximo 100 chars, got: {}",
100 desc.chars().count()
101 );
102 }
103
104 #[test]
105 fn back_compat_single_arg_returns_body_only() {
106 let body = "\
108# H
109
110First sentence that has more than twenty characters of useful text.
111";
112 let desc = extract_heuristic_description(body, None);
113 assert!(desc.starts_with("First sentence"));
114 }
115}