agentroot_core/index/
parser.rs1use lazy_static::lazy_static;
4use regex::Regex;
5use std::path::Path;
6
7lazy_static! {
8 static ref HEADING_RE: Regex = Regex::new(r"^##?\s+(.+)$").unwrap();
9 static ref SECOND_HEADING_RE: Regex = Regex::new(r"^##\s+(.+)$").unwrap();
10}
11
12const SKIP_TITLES: &[&str] = &["Notes", "README", "Index"];
14
15pub fn extract_title(content: &str, filename: &str) -> String {
17 for line in content.lines().take(50) {
18 if let Some(caps) = HEADING_RE.captures(line) {
19 let title = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
20
21 if SKIP_TITLES
22 .iter()
23 .any(|&s| title == s || title.contains("Notes"))
24 {
25 for line2 in content.lines().skip(1).take(50) {
26 if let Some(caps2) = SECOND_HEADING_RE.captures(line2) {
27 if let Some(title2) = caps2.get(1) {
28 return title2.as_str().trim().to_string();
29 }
30 }
31 }
32 }
33
34 if !title.is_empty() {
35 return title.to_string();
36 }
37 }
38 }
39
40 Path::new(filename)
41 .file_stem()
42 .and_then(|s| s.to_str())
43 .map(|s| s.replace(['-', '_'], " "))
44 .unwrap_or_else(|| filename.to_string())
45}
46
47pub fn handelize(path: &str) -> String {
49 path.to_lowercase()
50 .replace("___", "/")
51 .chars()
52 .map(|c| {
53 if c.is_alphanumeric() || c == '/' || c == '.' || c == '-' {
54 c
55 } else {
56 '-'
57 }
58 })
59 .collect::<String>()
60 .replace("--", "-")
61 .trim_matches('-')
62 .to_string()
63}
64
65#[cfg(test)]
66mod tests {
67 use super::*;
68
69 #[test]
70 fn test_extract_title_heading() {
71 let content = "# My Document\n\nSome content here.";
72 assert_eq!(extract_title(content, "doc.md"), "My Document");
73 }
74
75 #[test]
76 fn test_extract_title_fallback() {
77 let content = "No heading here, just text.";
78 assert_eq!(extract_title(content, "my-doc.md"), "my doc");
79 }
80
81 #[test]
82 fn test_handelize() {
83 assert_eq!(
84 handelize("My Docs/2024/Report.md"),
85 "my-docs/2024/report.md"
86 );
87 assert_eq!(handelize("foo___bar.md"), "foo/bar.md");
88 }
89}