Skip to main content

sem_core/parser/plugins/
toml_plugin.rs

1use crate::model::entity::{build_entity_id, SemanticEntity};
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct TomlParserPlugin;
6
7impl SemanticParserPlugin for TomlParserPlugin {
8    fn id(&self) -> &str {
9        "toml"
10    }
11
12    fn extensions(&self) -> &[&str] {
13        &[".toml"]
14    }
15
16    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17        // Extract top-level keys and [sections] with proper line ranges.
18        // TOML has two kinds of top-level entries:
19        //   1. Key-value pairs before any section header
20        //   2. Section headers like [package] or [dependencies]
21        let lines: Vec<&str> = content.lines().collect();
22        let sections = find_toml_sections(&lines);
23
24        if sections.is_empty() {
25            return Vec::new();
26        }
27
28        // Parse for content hashing
29        let parsed: toml::Value = match content.parse() {
30            Ok(v) => v,
31            Err(_) => return Vec::new(),
32        };
33        let table = match parsed.as_table() {
34            Some(t) => t,
35            None => return Vec::new(),
36        };
37
38        let mut entities = Vec::new();
39        for (i, section) in sections.iter().enumerate() {
40            let end_line = if i + 1 < sections.len() {
41                let next_start = sections[i + 1].line;
42                trim_trailing_blanks_toml(&lines, section.line, next_start)
43            } else {
44                trim_trailing_blanks_toml(&lines, section.line, lines.len() + 1)
45            };
46
47            let entity_content = lines[section.line - 1..end_line].join("\n");
48
49            // Look up in parsed table for content hash
50            let (value_str, entity_type) = if let Some(val) = table.get(&section.key) {
51                let is_table = val.is_table();
52                let vs = if is_table {
53                    serde_json::to_string_pretty(val).unwrap_or_default()
54                } else {
55                    toml_value_to_string(val)
56                };
57                (vs, if is_table { "section" } else { "property" })
58            } else {
59                (entity_content.clone(), "property")
60            };
61
62            entities.push(SemanticEntity {
63                id: build_entity_id(file_path, entity_type, &section.key, None),
64                file_path: file_path.to_string(),
65                entity_type: entity_type.to_string(),
66                name: section.key.clone(),
67                parent_id: None,
68                content_hash: content_hash(&value_str),
69                structural_hash: None,
70                content: entity_content,
71                start_line: section.line,
72                end_line,
73                metadata: None,
74            });
75        }
76
77        entities
78    }
79}
80
81struct TomlSection {
82    key: String,
83    line: usize, // 1-based
84}
85
86/// Find top-level entries in TOML: section headers ([name]) and root key-value pairs.
87fn find_toml_sections(lines: &[&str]) -> Vec<TomlSection> {
88    let mut sections = Vec::new();
89
90    for (i, line) in lines.iter().enumerate() {
91        let trimmed = line.trim();
92        if trimmed.is_empty() || trimmed.starts_with('#') {
93            continue;
94        }
95
96        // Section header: [package] or [[bin]]
97        if trimmed.starts_with('[') {
98            let key = trimmed
99                .trim_start_matches('[')
100                .trim_end_matches(']')
101                .trim()
102                .to_string();
103            if !key.is_empty() {
104                sections.push(TomlSection {
105                    key,
106                    line: i + 1,
107                });
108            }
109            continue;
110        }
111
112        // Root key-value pair (only if no section header seen yet, or it's before the first [section])
113        // Actually in TOML, root keys can appear before any section header.
114        // After a [section], keys belong to that section.
115        if sections.is_empty() || !has_section_before(lines, i) {
116            if let Some(eq_pos) = trimmed.find('=') {
117                let key = trimmed[..eq_pos].trim().to_string();
118                if !key.is_empty() {
119                    sections.push(TomlSection {
120                        key,
121                        line: i + 1,
122                    });
123                }
124            }
125        }
126    }
127
128    sections
129}
130
131/// Check if there's a [section] header before line index `idx`.
132fn has_section_before(lines: &[&str], idx: usize) -> bool {
133    for line in &lines[..idx] {
134        if line.trim().starts_with('[') {
135            return true;
136        }
137    }
138    false
139}
140
141fn trim_trailing_blanks_toml(lines: &[&str], start: usize, next_start: usize) -> usize {
142    let mut end = next_start - 1;
143    while end > start {
144        let trimmed = lines[end - 1].trim();
145        if trimmed.is_empty() || trimmed.starts_with('#') {
146            end -= 1;
147        } else {
148            break;
149        }
150    }
151    end
152}
153
154fn toml_value_to_string(value: &toml::Value) -> String {
155    match value {
156        toml::Value::String(s) => s.clone(),
157        toml::Value::Integer(n) => n.to_string(),
158        toml::Value::Float(f) => f.to_string(),
159        toml::Value::Boolean(b) => b.to_string(),
160        toml::Value::Array(arr) => serde_json::to_string_pretty(arr).unwrap_or_default(),
161        _ => format!("{value}"),
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn test_toml_line_positions() {
171        let content = r#"[package]
172name = "my-app"
173version = "1.0.0"
174
175[dependencies]
176serde = "1.0"
177tokio = { version = "1", features = ["full"] }
178"#;
179        let plugin = TomlParserPlugin;
180        let entities = plugin.extract_entities(content, "Cargo.toml");
181
182        assert_eq!(entities.len(), 2);
183
184        assert_eq!(entities[0].name, "package");
185        assert_eq!(entities[0].start_line, 1);
186        assert_eq!(entities[0].end_line, 3);
187
188        assert_eq!(entities[1].name, "dependencies");
189        assert_eq!(entities[1].start_line, 5);
190        assert_eq!(entities[1].end_line, 7);
191    }
192}