1use crate::model::entity::{build_entity_id, SemanticEntity};
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct YamlParserPlugin;
6
7impl SemanticParserPlugin for YamlParserPlugin {
8 fn id(&self) -> &str {
9 "yaml"
10 }
11
12 fn extensions(&self) -> &[&str] {
13 &[".yml", ".yaml"]
14 }
15
16 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17 let lines: Vec<&str> = content.lines().collect();
21 let top_level_keys = find_top_level_keys(&lines);
22
23 if top_level_keys.is_empty() {
24 if !content.trim().is_empty() {
27 return vec![SemanticEntity {
28 id: build_entity_id(file_path, "chunk", "(document)", None),
29 file_path: file_path.to_string(),
30 entity_type: "chunk".to_string(),
31 name: "(document)".to_string(),
32 parent_id: None,
33 content_hash: content_hash(content),
34 structural_hash: None,
35 content: content.to_string(),
36 start_line: 1,
37 end_line: lines.len(),
38 metadata: None,
39 }];
40 }
41 return Vec::new();
42 }
43
44 let section_keys: std::collections::HashSet<String> =
46 if let Ok(serde_yaml::Value::Mapping(mapping)) = serde_yaml::from_str(content) {
47 mapping
48 .iter()
49 .filter(|(_, v)| v.is_mapping() || v.is_sequence())
50 .filter_map(|(k, _)| k.as_str().map(String::from))
51 .collect()
52 } else {
53 std::collections::HashSet::new()
54 };
55
56 let mut entities = Vec::new();
57
58 if top_level_keys[0].line > 1 {
60 let preamble_end =
61 trim_trailing_blanks_yaml(&lines, 1, top_level_keys[0].line);
62 if preamble_end >= 1 {
63 let preamble_content = lines[..preamble_end].join("\n");
64 if !preamble_content.trim().is_empty() {
65 entities.push(SemanticEntity {
66 id: build_entity_id(file_path, "chunk", "(preamble)", None),
67 file_path: file_path.to_string(),
68 entity_type: "chunk".to_string(),
69 name: "(preamble)".to_string(),
70 parent_id: None,
71 content_hash: content_hash(&preamble_content),
72 structural_hash: None,
73 content: preamble_content,
74 start_line: 1,
75 end_line: preamble_end,
76 metadata: None,
77 });
78 }
79 }
80 }
81
82 for (i, tk) in top_level_keys.iter().enumerate() {
83 let end_line = if i + 1 < top_level_keys.len() {
84 let next_start = top_level_keys[i + 1].line;
85 trim_trailing_blanks_yaml(&lines, tk.line, next_start)
86 } else {
87 trim_trailing_blanks_yaml(&lines, tk.line, lines.len() + 1)
88 };
89
90 let entity_content = lines[tk.line - 1..end_line].join("\n");
91 let is_section = section_keys.contains(&tk.key);
92 let entity_type = if is_section { "section" } else { "property" };
93
94 entities.push(SemanticEntity {
96 id: build_entity_id(file_path, entity_type, &tk.key, None),
97 file_path: file_path.to_string(),
98 entity_type: entity_type.to_string(),
99 name: tk.key.clone(),
100 parent_id: None,
101 content_hash: content_hash(&entity_content),
102 structural_hash: None,
103 content: entity_content,
104 start_line: tk.line,
105 end_line,
106 metadata: None,
107 });
108 }
109
110 entities
111 }
112}
113
114struct TopLevelKey {
115 key: String,
116 line: usize, }
118
119fn find_top_level_keys(lines: &[&str]) -> Vec<TopLevelKey> {
122 let mut keys = Vec::new();
123 for (i, line) in lines.iter().enumerate() {
124 if line.is_empty() || line.starts_with(' ') || line.starts_with('\t') {
125 continue;
126 }
127 if line.starts_with('#') || line.starts_with("---") || line.starts_with("...") {
129 continue;
130 }
131 if let Some(colon_pos) = line.find(':') {
133 let key = line[..colon_pos].trim().to_string();
134 if !key.is_empty() {
135 keys.push(TopLevelKey {
136 key,
137 line: i + 1,
138 });
139 }
140 }
141 }
142 keys
143}
144
145fn trim_trailing_blanks_yaml(lines: &[&str], start: usize, next_start: usize) -> usize {
146 let mut end = next_start - 1;
147 while end > start {
148 let trimmed = lines[end - 1].trim();
149 if trimmed.is_empty() {
150 end -= 1;
151 } else {
152 break;
153 }
154 }
155 end
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161
162 #[test]
163 fn test_yaml_line_positions() {
164 let content = "name: my-app\nversion: 1.0.0\nscripts:\n build: tsc\n test: jest\ndescription: a test app\n";
165 let plugin = YamlParserPlugin;
166 let entities = plugin.extract_entities(content, "config.yaml");
167
168 assert_eq!(entities.len(), 4);
169
170 assert_eq!(entities[0].name, "name");
171 assert_eq!(entities[0].start_line, 1);
172 assert_eq!(entities[0].end_line, 1);
173
174 assert_eq!(entities[1].name, "version");
175 assert_eq!(entities[1].start_line, 2);
176 assert_eq!(entities[1].end_line, 2);
177
178 assert_eq!(entities[2].name, "scripts");
179 assert_eq!(entities[2].entity_type, "section");
180 assert_eq!(entities[2].start_line, 3);
181 assert_eq!(entities[2].end_line, 5);
182
183 assert_eq!(entities[3].name, "description");
184 assert_eq!(entities[3].start_line, 6);
185 assert_eq!(entities[3].end_line, 6);
186 }
187
188 #[test]
189 fn test_yaml_preamble() {
190 let content = "# Config file\n---\nname: my-app\nversion: 1.0.0\n";
191 let plugin = YamlParserPlugin;
192 let entities = plugin.extract_entities(content, "config.yaml");
193
194 assert_eq!(entities[0].name, "(preamble)");
195 assert_eq!(entities[0].entity_type, "chunk");
196 assert_eq!(entities[0].start_line, 1);
197
198 assert_eq!(entities[1].name, "name");
199 assert_eq!(entities[2].name, "version");
200 }
201
202 #[test]
203 fn test_yaml_comment_only_file() {
204 let content = "# Just a comment\n# Another line\n";
205 let plugin = YamlParserPlugin;
206 let entities = plugin.extract_entities(content, "notes.yaml");
207
208 assert_eq!(entities.len(), 1);
209 assert_eq!(entities[0].name, "(document)");
210 assert_eq!(entities[0].entity_type, "chunk");
211 }
212
213 #[test]
214 fn test_yaml_comment_changes_detected() {
215 let content_a = "name: my-app\n# old comment\nversion: 1.0.0\n";
216 let content_b = "name: my-app\n# new comment\nversion: 1.0.0\n";
217 let plugin = YamlParserPlugin;
218 let entities_a = plugin.extract_entities(content_a, "config.yaml");
219 let entities_b = plugin.extract_entities(content_b, "config.yaml");
220
221 assert_ne!(entities_a[0].content_hash, entities_b[0].content_hash);
224 }
225}