Skip to main content

ooxml_codegen/
analysis.rs

1//! Static analysis for codegen configuration files.
2//!
3//! Analyzes schemas against ooxml-names.yaml and ooxml-features.yaml to find
4//! unmapped types and fields.
5
6use crate::ast::{Pattern, Schema};
7use crate::codegen::{CodegenConfig, to_pascal_case};
8use std::collections::{HashMap, HashSet};
9
10/// Analysis report for a single module.
11#[derive(Debug, Default)]
12pub struct ModuleReport {
13    /// Types that exist in schema but have no name mapping.
14    pub unmapped_types: Vec<String>,
15    /// Fields (type.field) that exist in schema but have no feature mapping.
16    pub unmapped_fields: Vec<String>,
17    /// Total types analyzed.
18    pub total_types: usize,
19    /// Total fields analyzed.
20    pub total_fields: usize,
21}
22
23impl ModuleReport {
24    /// Check if the report has any unmapped items.
25    pub fn has_unmapped(&self) -> bool {
26        !self.unmapped_types.is_empty() || !self.unmapped_fields.is_empty()
27    }
28
29    /// Print the report to stderr.
30    pub fn print(&self, module: &str) {
31        if self.unmapped_types.is_empty() && self.unmapped_fields.is_empty() {
32            eprintln!(
33                "  {} types, {} fields - all mapped ✓",
34                self.total_types, self.total_fields
35            );
36            return;
37        }
38
39        eprintln!(
40            "  {} types ({} unmapped), {} fields ({} unmapped)",
41            self.total_types,
42            self.unmapped_types.len(),
43            self.total_fields,
44            self.unmapped_fields.len()
45        );
46
47        if !self.unmapped_types.is_empty() {
48            eprintln!("  Unmapped types in ooxml-names.yaml [{}]:", module);
49            for t in &self.unmapped_types {
50                eprintln!("    - {}", t);
51            }
52        }
53
54        if !self.unmapped_fields.is_empty() {
55            eprintln!("  Unmapped fields in ooxml-features.yaml [{}]:", module);
56            for f in &self.unmapped_fields {
57                eprintln!("    - {}", f);
58            }
59        }
60    }
61}
62
63/// Analyze a schema against configuration files.
64pub fn analyze_schema(schema: &Schema, config: &CodegenConfig) -> ModuleReport {
65    let mut report = ModuleReport::default();
66
67    // Build definition map
68    let definitions: HashMap<&str, &Pattern> = schema
69        .definitions
70        .iter()
71        .map(|d| (d.name.as_str(), &d.pattern))
72        .collect();
73
74    // Analyze each definition
75    for def in &schema.definitions {
76        // Skip inline refs, simple types, attribute groups, element groups,
77        // and root element wrappers (e.g., `document = element w:document { CT_Document }`).
78        // AG_* and EG_* are always inlined into parent types, so their fields
79        // are already accounted for in the parent type's feature mapping.
80        // Root element wrappers are deduplicated by the codegen (their PascalCase
81        // name matches the mapped CT_* type), so they don't generate separate structs.
82        if is_inline_attribute_ref(&def.name, &def.pattern)
83            || is_simple_type(&def.pattern)
84            || def.name.contains("_EG_")
85            || def.name.contains("_AG_")
86            || matches!(&def.pattern, Pattern::Element { .. })
87        {
88            continue;
89        }
90
91        // This is a complex type that generates a struct
92        let spec_name = strip_namespace_prefix(&def.name, &config.strip_prefix);
93
94        // Check if type has name mapping
95        report.total_types += 1;
96        if !has_type_mapping(config, spec_name) {
97            report.unmapped_types.push(spec_name.to_string());
98        }
99
100        // Collect and check fields
101        // Use mapped name for feature lookup (YAML uses Worksheet, not CT_Worksheet)
102        let mapped_name = get_mapped_name(config, spec_name);
103        let fields = collect_fields(&def.pattern, &definitions);
104        for field in fields {
105            report.total_fields += 1;
106            if !has_field_mapping(config, &mapped_name, &field) {
107                report
108                    .unmapped_fields
109                    .push(format!("{}.{}", mapped_name, field));
110            }
111        }
112    }
113
114    report
115}
116
117/// Check if a type has a name mapping in the config.
118fn has_type_mapping(config: &CodegenConfig, spec_name: &str) -> bool {
119    if let Some(ref mappings) = config.name_mappings {
120        let module_mappings = mappings.for_module(&config.module_name);
121        // Check if there's a type mapping
122        if module_mappings.types.contains_key(spec_name) {
123            return true;
124        }
125        // Check shared mappings
126        if mappings.shared.types.contains_key(spec_name) {
127            return true;
128        }
129    }
130    // If no mappings configured, consider it "mapped" (using default naming)
131    config.name_mappings.is_none()
132}
133
134/// Get the mapped name for a type (e.g., CT_Worksheet -> Worksheet).
135fn get_mapped_name(config: &CodegenConfig, spec_name: &str) -> String {
136    if let Some(ref mappings) = config.name_mappings {
137        let module_mappings = mappings.for_module(&config.module_name);
138        // Check module-specific mappings first
139        if let Some(mapped) = module_mappings.types.get(spec_name) {
140            return mapped.clone();
141        }
142        // Check shared mappings
143        if let Some(mapped) = mappings.shared.types.get(spec_name) {
144            return mapped.clone();
145        }
146    }
147    // No mapping - apply PascalCase like the codegen does
148    to_pascal_case(spec_name)
149}
150
151/// Check if a field has a feature mapping in the config.
152fn has_field_mapping(config: &CodegenConfig, type_name: &str, field_name: &str) -> bool {
153    if let Some(ref mappings) = config.feature_mappings {
154        let module_features = mappings.for_module(&config.module_name);
155        // Check if the type has any field mappings
156        if let Some(type_fields) = module_features.get(type_name) {
157            // If the type is listed, check if this specific field is mapped
158            // (or if there's a wildcard)
159            return type_fields.contains_key(field_name) || type_fields.contains_key("*");
160        }
161    }
162    // If no mappings configured, consider it "mapped"
163    config.feature_mappings.is_none()
164}
165
166/// Collect field names from a pattern.
167fn collect_fields(pattern: &Pattern, definitions: &HashMap<&str, &Pattern>) -> Vec<String> {
168    let mut fields = Vec::new();
169    collect_fields_recursive(pattern, definitions, &mut fields, &mut HashSet::new());
170    fields
171}
172
173fn collect_fields_recursive(
174    pattern: &Pattern,
175    definitions: &HashMap<&str, &Pattern>,
176    fields: &mut Vec<String>,
177    visited: &mut HashSet<String>,
178) {
179    match pattern {
180        Pattern::Group(inner)
181        | Pattern::Optional(inner)
182        | Pattern::ZeroOrMore(inner)
183        | Pattern::OneOrMore(inner)
184        | Pattern::Mixed(inner) => {
185            collect_fields_recursive(inner, definitions, fields, visited);
186        }
187        Pattern::Interleave(parts) | Pattern::Choice(parts) | Pattern::Sequence(parts) => {
188            for part in parts {
189                collect_fields_recursive(part, definitions, fields, visited);
190            }
191        }
192        Pattern::Attribute { name, .. } => {
193            // Use original XML name (camelCase) to match ooxml-features.yaml keys
194            let field_name = name.local.clone();
195            if !fields.contains(&field_name) {
196                fields.push(field_name);
197            }
198        }
199        Pattern::Element { name, .. } => {
200            // Use original XML name (camelCase) to match ooxml-features.yaml keys
201            let field_name = name.local.clone();
202            if !fields.contains(&field_name) {
203                fields.push(field_name);
204            }
205        }
206        Pattern::Ref(name) => {
207            // Follow refs to inline attributes/groups
208            if visited.insert(name.clone())
209                && let Some(ref_pattern) = definitions.get(name.as_str())
210            {
211                // Only follow AG_* (attribute groups) and CT_* base types
212                if name.contains("_AG_") || is_inline_attribute_ref(name, ref_pattern) {
213                    collect_fields_recursive(ref_pattern, definitions, fields, visited);
214                } else if name.contains("_EG_") {
215                    // Element groups are always included (no feature gating) — skip
216                }
217            }
218        }
219        Pattern::Empty
220        | Pattern::Text
221        | Pattern::Any
222        | Pattern::StringLiteral(_)
223        | Pattern::Datatype { .. }
224        | Pattern::List(_) => {}
225    }
226}
227
228fn strip_namespace_prefix<'a>(name: &'a str, prefix: &Option<String>) -> &'a str {
229    if let Some(p) = prefix {
230        name.strip_prefix(p).unwrap_or(name)
231    } else {
232        name
233    }
234}
235
236fn is_inline_attribute_ref(name: &str, pattern: &Pattern) -> bool {
237    // Inline attribute refs like "r_id = attribute r:id {...}"
238    matches!(pattern, Pattern::Attribute { .. }) && !name.contains("_CT_") && !name.contains("_AG_")
239}
240
241fn is_simple_type(pattern: &Pattern) -> bool {
242    match pattern {
243        Pattern::Choice(variants) => variants.iter().all(is_simple_type),
244        Pattern::StringLiteral(_) | Pattern::Datatype { .. } | Pattern::Text => true,
245        Pattern::Group(inner) => is_simple_type(inner),
246        _ => false,
247    }
248}
249
250#[cfg(test)]
251fn to_snake_case(s: &str) -> String {
252    let mut result = String::new();
253    let mut prev_lower = false;
254
255    for c in s.chars() {
256        if c.is_uppercase() {
257            if prev_lower {
258                result.push('_');
259            }
260            result.push(c.to_lowercase().next().unwrap());
261            prev_lower = false;
262        } else {
263            result.push(c);
264            prev_lower = c.is_lowercase();
265        }
266    }
267
268    result
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    #[test]
276    fn test_to_snake_case() {
277        assert_eq!(to_snake_case("fooBar"), "foo_bar");
278        assert_eq!(to_snake_case("FooBar"), "foo_bar");
279        assert_eq!(to_snake_case("foo"), "foo");
280        // All-caps at start stays lowercase (realistic for OOXML attr names)
281        assert_eq!(to_snake_case("XMLParser"), "xmlparser");
282        assert_eq!(to_snake_case("val"), "val");
283        assert_eq!(to_snake_case("colId"), "col_id");
284    }
285}