Skip to main content

synapse_core/ingest/
extractor.rs

1pub struct ExtractedTriple {
2    pub subject: String,
3    pub predicate: String,
4    pub object: String,
5}
6
7pub fn extract_metadata(content: &str, source_path: &str) -> Vec<ExtractedTriple> {
8    let mut triples = Vec::new();
9    let mut current_header = String::new();
10    let _filename = std::path::Path::new(source_path)
11        .file_name()
12        .unwrap_or_default()
13        .to_string_lossy();
14
15    for line in content.lines() {
16        let trimmed = line.trim();
17        if trimmed.is_empty() {
18            continue;
19        }
20
21        if let Some(header) = trimmed.strip_prefix('#') {
22            current_header = header.trim_start_matches('#').trim().to_string();
23            // Link file to header
24            triples.push(ExtractedTriple {
25                subject: format!("file://{}", source_path),
26                predicate: "http://synapse.os/contains_section".to_string(),
27                object: current_header.clone(),
28            });
29        } else if let Some(item) = trimmed
30            .strip_prefix("- ")
31            .or_else(|| trimmed.strip_prefix("* "))
32        {
33            if !current_header.is_empty() {
34                triples.push(ExtractedTriple {
35                    subject: current_header.clone(),
36                    predicate: "http://synapse.os/has_list_item".to_string(),
37                    object: item.trim().to_string(),
38                });
39            }
40        } else if trimmed.contains(':') {
41            let parts: Vec<&str> = trimmed.splitn(2, ':').collect();
42            if parts.len() == 2 {
43                let key = parts[0].trim();
44                let value = parts[1].trim();
45                if !key.is_empty() && !value.is_empty() {
46                    let subject = if current_header.is_empty() {
47                        format!("file://{}", source_path)
48                    } else {
49                        current_header.clone()
50                    };
51
52                    triples.push(ExtractedTriple {
53                        subject,
54                        predicate: format!("http://synapse.os/property/{}", key.replace(" ", "_")),
55                        object: value.to_string(),
56                    });
57                }
58            }
59        }
60    }
61
62    triples
63}