ooxml_codegen/
analysis.rs1use crate::ast::{Pattern, Schema};
7use crate::codegen::{CodegenConfig, to_pascal_case};
8use std::collections::{HashMap, HashSet};
9
10#[derive(Debug, Default)]
12pub struct ModuleReport {
13 pub unmapped_types: Vec<String>,
15 pub unmapped_fields: Vec<String>,
17 pub total_types: usize,
19 pub total_fields: usize,
21}
22
23impl ModuleReport {
24 pub fn has_unmapped(&self) -> bool {
26 !self.unmapped_types.is_empty() || !self.unmapped_fields.is_empty()
27 }
28
29 pub fn print(&self, module: &str) {
31 if self.unmapped_types.is_empty() && self.unmapped_fields.is_empty() {
32 eprintln!(
33 " {} types, {} fields - all mapped ✓",
34 self.total_types, self.total_fields
35 );
36 return;
37 }
38
39 eprintln!(
40 " {} types ({} unmapped), {} fields ({} unmapped)",
41 self.total_types,
42 self.unmapped_types.len(),
43 self.total_fields,
44 self.unmapped_fields.len()
45 );
46
47 if !self.unmapped_types.is_empty() {
48 eprintln!(" Unmapped types in ooxml-names.yaml [{}]:", module);
49 for t in &self.unmapped_types {
50 eprintln!(" - {}", t);
51 }
52 }
53
54 if !self.unmapped_fields.is_empty() {
55 eprintln!(" Unmapped fields in ooxml-features.yaml [{}]:", module);
56 for f in &self.unmapped_fields {
57 eprintln!(" - {}", f);
58 }
59 }
60 }
61}
62
63pub fn analyze_schema(schema: &Schema, config: &CodegenConfig) -> ModuleReport {
65 let mut report = ModuleReport::default();
66
67 let definitions: HashMap<&str, &Pattern> = schema
69 .definitions
70 .iter()
71 .map(|d| (d.name.as_str(), &d.pattern))
72 .collect();
73
74 for def in &schema.definitions {
76 if is_inline_attribute_ref(&def.name, &def.pattern)
83 || is_simple_type(&def.pattern)
84 || def.name.contains("_EG_")
85 || def.name.contains("_AG_")
86 || matches!(&def.pattern, Pattern::Element { .. })
87 {
88 continue;
89 }
90
91 let spec_name = strip_namespace_prefix(&def.name, &config.strip_prefix);
93
94 report.total_types += 1;
96 if !has_type_mapping(config, spec_name) {
97 report.unmapped_types.push(spec_name.to_string());
98 }
99
100 let mapped_name = get_mapped_name(config, spec_name);
103 let fields = collect_fields(&def.pattern, &definitions);
104 for field in fields {
105 report.total_fields += 1;
106 if !has_field_mapping(config, &mapped_name, &field) {
107 report
108 .unmapped_fields
109 .push(format!("{}.{}", mapped_name, field));
110 }
111 }
112 }
113
114 report
115}
116
117fn has_type_mapping(config: &CodegenConfig, spec_name: &str) -> bool {
119 if let Some(ref mappings) = config.name_mappings {
120 let module_mappings = mappings.for_module(&config.module_name);
121 if module_mappings.types.contains_key(spec_name) {
123 return true;
124 }
125 if mappings.shared.types.contains_key(spec_name) {
127 return true;
128 }
129 }
130 config.name_mappings.is_none()
132}
133
134fn get_mapped_name(config: &CodegenConfig, spec_name: &str) -> String {
136 if let Some(ref mappings) = config.name_mappings {
137 let module_mappings = mappings.for_module(&config.module_name);
138 if let Some(mapped) = module_mappings.types.get(spec_name) {
140 return mapped.clone();
141 }
142 if let Some(mapped) = mappings.shared.types.get(spec_name) {
144 return mapped.clone();
145 }
146 }
147 to_pascal_case(spec_name)
149}
150
151fn has_field_mapping(config: &CodegenConfig, type_name: &str, field_name: &str) -> bool {
153 if let Some(ref mappings) = config.feature_mappings {
154 let module_features = mappings.for_module(&config.module_name);
155 if let Some(type_fields) = module_features.get(type_name) {
157 return type_fields.contains_key(field_name) || type_fields.contains_key("*");
160 }
161 }
162 config.feature_mappings.is_none()
164}
165
166fn collect_fields(pattern: &Pattern, definitions: &HashMap<&str, &Pattern>) -> Vec<String> {
168 let mut fields = Vec::new();
169 collect_fields_recursive(pattern, definitions, &mut fields, &mut HashSet::new());
170 fields
171}
172
173fn collect_fields_recursive(
174 pattern: &Pattern,
175 definitions: &HashMap<&str, &Pattern>,
176 fields: &mut Vec<String>,
177 visited: &mut HashSet<String>,
178) {
179 match pattern {
180 Pattern::Group(inner)
181 | Pattern::Optional(inner)
182 | Pattern::ZeroOrMore(inner)
183 | Pattern::OneOrMore(inner)
184 | Pattern::Mixed(inner) => {
185 collect_fields_recursive(inner, definitions, fields, visited);
186 }
187 Pattern::Interleave(parts) | Pattern::Choice(parts) | Pattern::Sequence(parts) => {
188 for part in parts {
189 collect_fields_recursive(part, definitions, fields, visited);
190 }
191 }
192 Pattern::Attribute { name, .. } => {
193 let field_name = name.local.clone();
195 if !fields.contains(&field_name) {
196 fields.push(field_name);
197 }
198 }
199 Pattern::Element { name, .. } => {
200 let field_name = name.local.clone();
202 if !fields.contains(&field_name) {
203 fields.push(field_name);
204 }
205 }
206 Pattern::Ref(name) => {
207 if visited.insert(name.clone())
209 && let Some(ref_pattern) = definitions.get(name.as_str())
210 {
211 if name.contains("_AG_") || is_inline_attribute_ref(name, ref_pattern) {
213 collect_fields_recursive(ref_pattern, definitions, fields, visited);
214 } else if name.contains("_EG_") {
215 }
217 }
218 }
219 Pattern::Empty
220 | Pattern::Text
221 | Pattern::Any
222 | Pattern::StringLiteral(_)
223 | Pattern::Datatype { .. }
224 | Pattern::List(_) => {}
225 }
226}
227
228fn strip_namespace_prefix<'a>(name: &'a str, prefix: &Option<String>) -> &'a str {
229 if let Some(p) = prefix {
230 name.strip_prefix(p).unwrap_or(name)
231 } else {
232 name
233 }
234}
235
236fn is_inline_attribute_ref(name: &str, pattern: &Pattern) -> bool {
237 matches!(pattern, Pattern::Attribute { .. }) && !name.contains("_CT_") && !name.contains("_AG_")
239}
240
241fn is_simple_type(pattern: &Pattern) -> bool {
242 match pattern {
243 Pattern::Choice(variants) => variants.iter().all(is_simple_type),
244 Pattern::StringLiteral(_) | Pattern::Datatype { .. } | Pattern::Text => true,
245 Pattern::Group(inner) => is_simple_type(inner),
246 _ => false,
247 }
248}
249
250#[cfg(test)]
251fn to_snake_case(s: &str) -> String {
252 let mut result = String::new();
253 let mut prev_lower = false;
254
255 for c in s.chars() {
256 if c.is_uppercase() {
257 if prev_lower {
258 result.push('_');
259 }
260 result.push(c.to_lowercase().next().unwrap());
261 prev_lower = false;
262 } else {
263 result.push(c);
264 prev_lower = c.is_lowercase();
265 }
266 }
267
268 result
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274
275 #[test]
276 fn test_to_snake_case() {
277 assert_eq!(to_snake_case("fooBar"), "foo_bar");
278 assert_eq!(to_snake_case("FooBar"), "foo_bar");
279 assert_eq!(to_snake_case("foo"), "foo");
280 assert_eq!(to_snake_case("XMLParser"), "xmlparser");
282 assert_eq!(to_snake_case("val"), "val");
283 assert_eq!(to_snake_case("colId"), "col_id");
284 }
285}