Skip to main content

synapse_core/ingest/
extractor.rs

1pub struct ExtractedTriple {
2    pub subject: String,
3    pub predicate: String,
4    pub object: String,
5}
6
7pub fn extract_metadata(content: &str, source_path: &str) -> Vec<ExtractedTriple> {
8    let mut triples = Vec::new();
9    let mut current_header = String::new();
10    let _filename = std::path::Path::new(source_path)
11        .file_name()
12        .unwrap_or_default()
13        .to_string_lossy();
14
15    for line in content.lines() {
16        let trimmed = line.trim();
17        if trimmed.is_empty() {
18            continue;
19        }
20
21        if let Some(header) = trimmed.strip_prefix('#') {
22            current_header = header.trim_start_matches('#').trim().to_string();
23            // Link file to header
24            triples.push(ExtractedTriple {
25                subject: format!("file://{}", source_path),
26                predicate: "http://synapse.os/contains_section".to_string(),
27                object: current_header.clone(),
28            });
29        } else if let Some(item) = trimmed.strip_prefix("- ").or_else(|| trimmed.strip_prefix("* ")) {
30            if !current_header.is_empty() {
31                triples.push(ExtractedTriple {
32                    subject: current_header.clone(),
33                    predicate: "http://synapse.os/has_list_item".to_string(),
34                    object: item.trim().to_string(),
35                });
36            }
37        } else if trimmed.contains(':') {
38            let parts: Vec<&str> = trimmed.splitn(2, ':').collect();
39            if parts.len() == 2 {
40                let key = parts[0].trim();
41                let value = parts[1].trim();
42                if !key.is_empty() && !value.is_empty() {
43                    let subject = if current_header.is_empty() {
44                        format!("file://{}", source_path)
45                    } else {
46                        current_header.clone()
47                    };
48
49                    triples.push(ExtractedTriple {
50                        subject,
51                        predicate: format!("http://synapse.os/property/{}", key.replace(" ", "_")),
52                        object: value.to_string(),
53                    });
54                }
55            }
56        }
57    }
58
59    triples
60}