synapse_core/ingest/
extractor.rs1pub struct ExtractedTriple {
2 pub subject: String,
3 pub predicate: String,
4 pub object: String,
5}
6
7pub fn extract_metadata(content: &str, source_path: &str) -> Vec<ExtractedTriple> {
8 let mut triples = Vec::new();
9 let mut current_header = String::new();
10 let _filename = std::path::Path::new(source_path)
11 .file_name()
12 .unwrap_or_default()
13 .to_string_lossy();
14
15 for line in content.lines() {
16 let trimmed = line.trim();
17 if trimmed.is_empty() {
18 continue;
19 }
20
21 if let Some(header) = trimmed.strip_prefix('#') {
22 current_header = header.trim_start_matches('#').trim().to_string();
23 triples.push(ExtractedTriple {
25 subject: format!("file://{}", source_path),
26 predicate: "http://synapse.os/contains_section".to_string(),
27 object: current_header.clone(),
28 });
29 } else if let Some(item) = trimmed
30 .strip_prefix("- ")
31 .or_else(|| trimmed.strip_prefix("* "))
32 {
33 if !current_header.is_empty() {
34 triples.push(ExtractedTriple {
35 subject: current_header.clone(),
36 predicate: "http://synapse.os/has_list_item".to_string(),
37 object: item.trim().to_string(),
38 });
39 }
40 } else if trimmed.contains(':') {
41 let parts: Vec<&str> = trimmed.splitn(2, ':').collect();
42 if parts.len() == 2 {
43 let key = parts[0].trim();
44 let value = parts[1].trim();
45 if !key.is_empty() && !value.is_empty() {
46 let subject = if current_header.is_empty() {
47 format!("file://{}", source_path)
48 } else {
49 current_header.clone()
50 };
51
52 triples.push(ExtractedTriple {
53 subject,
54 predicate: format!("http://synapse.os/property/{}", key.replace(" ", "_")),
55 object: value.to_string(),
56 });
57 }
58 }
59 }
60 }
61
62 triples
63}