synapse_core/ingest/
extractor.rs1pub struct ExtractedTriple {
2 pub subject: String,
3 pub predicate: String,
4 pub object: String,
5}
6
7pub fn extract_metadata(content: &str, source_path: &str) -> Vec<ExtractedTriple> {
8 let mut triples = Vec::new();
9 let mut current_header = String::new();
10 let _filename = std::path::Path::new(source_path)
11 .file_name()
12 .unwrap_or_default()
13 .to_string_lossy();
14
15 for line in content.lines() {
16 let trimmed = line.trim();
17 if trimmed.is_empty() {
18 continue;
19 }
20
21 if let Some(header) = trimmed.strip_prefix('#') {
22 current_header = header.trim_start_matches('#').trim().to_string();
23 triples.push(ExtractedTriple {
25 subject: format!("file://{}", source_path),
26 predicate: "http://synapse.os/contains_section".to_string(),
27 object: current_header.clone(),
28 });
29 } else if let Some(item) = trimmed.strip_prefix("- ").or_else(|| trimmed.strip_prefix("* ")) {
30 if !current_header.is_empty() {
31 triples.push(ExtractedTriple {
32 subject: current_header.clone(),
33 predicate: "http://synapse.os/has_list_item".to_string(),
34 object: item.trim().to_string(),
35 });
36 }
37 } else if trimmed.contains(':') {
38 let parts: Vec<&str> = trimmed.splitn(2, ':').collect();
39 if parts.len() == 2 {
40 let key = parts[0].trim();
41 let value = parts[1].trim();
42 if !key.is_empty() && !value.is_empty() {
43 let subject = if current_header.is_empty() {
44 format!("file://{}", source_path)
45 } else {
46 current_header.clone()
47 };
48
49 triples.push(ExtractedTriple {
50 subject,
51 predicate: format!("http://synapse.os/property/{}", key.replace(" ", "_")),
52 object: value.to_string(),
53 });
54 }
55 }
56 }
57 }
58
59 triples
60}