sem_core/parser/plugins/
yaml.rs1use crate::model::entity::{build_entity_id, SemanticEntity};
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct YamlParserPlugin;
6
7impl SemanticParserPlugin for YamlParserPlugin {
8 fn id(&self) -> &str {
9 "yaml"
10 }
11
12 fn extensions(&self) -> &[&str] {
13 &[".yml", ".yaml"]
14 }
15
16 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17 let lines: Vec<&str> = content.lines().collect();
21 let top_level_keys = find_top_level_keys(&lines);
22
23 if top_level_keys.is_empty() {
24 return Vec::new();
25 }
26
27 let parsed: serde_yaml::Value = match serde_yaml::from_str(content) {
29 Ok(v) => v,
30 Err(_) => return Vec::new(),
31 };
32 let mapping = match parsed.as_mapping() {
33 Some(m) => m,
34 None => return Vec::new(),
35 };
36
37 let mut value_map: std::collections::HashMap<String, (String, bool)> =
39 std::collections::HashMap::new();
40 for (key, value) in mapping {
41 let key_str = match key.as_str() {
42 Some(s) => s.to_string(),
43 None => format!("{:?}", key),
44 };
45 let is_section = value.is_mapping() || value.is_sequence();
46 let value_str = if is_section {
47 serde_yaml::to_string(value)
48 .unwrap_or_default()
49 .trim()
50 .to_string()
51 } else {
52 yaml_value_to_string(value)
53 };
54 value_map.insert(key_str, (value_str, is_section));
55 }
56
57 let mut entities = Vec::new();
58 for (i, tk) in top_level_keys.iter().enumerate() {
59 let end_line = if i + 1 < top_level_keys.len() {
60 let next_start = top_level_keys[i + 1].line;
62 trim_trailing_blanks_yaml(&lines, tk.line, next_start)
63 } else {
64 trim_trailing_blanks_yaml(&lines, tk.line, lines.len() + 1)
66 };
67
68 let entity_content = lines[tk.line - 1..end_line].join("\n");
69 let (value_str, is_section) = value_map
70 .get(&tk.key)
71 .cloned()
72 .unwrap_or_else(|| (entity_content.clone(), false));
73
74 let entity_type = if is_section { "section" } else { "property" };
75
76 entities.push(SemanticEntity {
77 id: build_entity_id(file_path, entity_type, &tk.key, None),
78 file_path: file_path.to_string(),
79 entity_type: entity_type.to_string(),
80 name: tk.key.clone(),
81 parent_id: None,
82 content_hash: content_hash(&value_str),
83 structural_hash: None,
84 content: entity_content,
85 start_line: tk.line,
86 end_line,
87 metadata: None,
88 });
89 }
90
91 entities
92 }
93}
94
95struct TopLevelKey {
96 key: String,
97 line: usize, }
99
100fn find_top_level_keys(lines: &[&str]) -> Vec<TopLevelKey> {
103 let mut keys = Vec::new();
104 for (i, line) in lines.iter().enumerate() {
105 if line.is_empty() || line.starts_with(' ') || line.starts_with('\t') {
106 continue;
107 }
108 if line.starts_with('#') || line.starts_with("---") || line.starts_with("...") {
110 continue;
111 }
112 if let Some(colon_pos) = line.find(':') {
114 let key = line[..colon_pos].trim().to_string();
115 if !key.is_empty() {
116 keys.push(TopLevelKey {
117 key,
118 line: i + 1,
119 });
120 }
121 }
122 }
123 keys
124}
125
126fn trim_trailing_blanks_yaml(lines: &[&str], start: usize, next_start: usize) -> usize {
127 let mut end = next_start - 1;
128 while end > start {
129 let trimmed = lines[end - 1].trim();
130 if trimmed.is_empty() {
131 end -= 1;
132 } else {
133 break;
134 }
135 }
136 end
137}
138
139fn yaml_value_to_string(value: &serde_yaml::Value) -> String {
140 match value {
141 serde_yaml::Value::String(s) => s.clone(),
142 serde_yaml::Value::Number(n) => n.to_string(),
143 serde_yaml::Value::Bool(b) => b.to_string(),
144 serde_yaml::Value::Null => "null".to_string(),
145 _ => format!("{:?}", value),
146 }
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152
153 #[test]
154 fn test_yaml_line_positions() {
155 let content = "name: my-app\nversion: 1.0.0\nscripts:\n build: tsc\n test: jest\ndescription: a test app\n";
156 let plugin = YamlParserPlugin;
157 let entities = plugin.extract_entities(content, "config.yaml");
158
159 assert_eq!(entities.len(), 4);
160
161 assert_eq!(entities[0].name, "name");
162 assert_eq!(entities[0].start_line, 1);
163 assert_eq!(entities[0].end_line, 1);
164
165 assert_eq!(entities[1].name, "version");
166 assert_eq!(entities[1].start_line, 2);
167 assert_eq!(entities[1].end_line, 2);
168
169 assert_eq!(entities[2].name, "scripts");
170 assert_eq!(entities[2].entity_type, "section");
171 assert_eq!(entities[2].start_line, 3);
172 assert_eq!(entities[2].end_line, 5);
173
174 assert_eq!(entities[3].name, "description");
175 assert_eq!(entities[3].start_line, 6);
176 assert_eq!(entities[3].end_line, 6);
177 }
178}