1use std::collections::HashMap;
8use std::path::Path;
9
10use xot::Xot;
11
12use clayers_xml::rnc::RncSchema;
13
14pub fn export_rnc(schema_dir: &Path) -> Result<RncSchema, crate::Error> {
20 let mut schema = clayers_xml::rnc::xsd_to_rnc(schema_dir, &[])?;
21 fuse_descriptions(schema_dir, &mut schema)?;
22 Ok(schema)
23}
24
25pub fn export_rnc_filtered(
31 schema_dir: &Path,
32 prefixes: &[&str],
33) -> Result<RncSchema, crate::Error> {
34 let mut schema = export_rnc(schema_dir)?;
35 schema
36 .layers
37 .retain(|layer| prefixes.contains(&layer.prefix.as_str()));
38 Ok(schema)
39}
40
41fn split_type_local(type_ref: &str) -> &str {
43 type_ref.rsplit_once(':').map_or(type_ref, |(_, l)| l)
44}
45
46fn extract_llm_describe(xot: &mut Xot, node: xot::Node, llm_uri: &str) -> Option<String> {
48 let xs_ns = xot.add_namespace("http://www.w3.org/2001/XMLSchema");
49 let annotation = xot.add_name_ns("annotation", xs_ns);
50 let appinfo = xot.add_name_ns("appinfo", xs_ns);
51 let llm_ns = xot.add_namespace(llm_uri);
52 let describe = xot.add_name_ns("describe", llm_ns);
53
54 for ann_child in xot.children(node) {
55 if !xot.is_element(ann_child)
56 || xot.element(ann_child).is_none_or(|e| e.name() != annotation)
57 {
58 continue;
59 }
60 for app_child in xot.children(ann_child) {
61 if !xot.is_element(app_child)
62 || xot
63 .element(app_child)
64 .is_none_or(|e| e.name() != appinfo)
65 {
66 continue;
67 }
68 for desc_child in xot.children(app_child) {
69 if xot.is_element(desc_child)
70 && xot
71 .element(desc_child)
72 .is_some_and(|e| e.name() == describe)
73 {
74 let text = xot.text_content_str(desc_child).unwrap_or("").trim().to_string();
75 if !text.is_empty() {
76 let normalized: String = text.split_whitespace().collect::<Vec<_>>().join(" ");
78 return Some(normalized);
79 }
80 }
81 }
82 }
83 }
84 None
85}
86
87fn fuse_descriptions(schema_dir: &Path, schema: &mut RncSchema) -> Result<(), crate::Error> {
89 let mut xsd_paths: Vec<_> = std::fs::read_dir(schema_dir)?
90 .filter_map(|e| e.ok().map(|e| e.path()))
91 .filter(|p| p.extension().is_some_and(|ext| ext == "xsd"))
92 .collect();
93 xsd_paths.sort();
94
95 let uri_to_prefix: HashMap<String, String> = schema
96 .namespaces
97 .iter()
98 .map(|ns| (ns.uri.clone(), ns.prefix.clone()))
99 .collect();
100
101 let llm_uri = schema
103 .namespaces
104 .iter()
105 .find(|ns| ns.uri == "urn:clayers:llm")
106 .map(|ns| ns.uri.clone());
107
108 let mut layer_descs: HashMap<String, String> = HashMap::new();
112 let mut type_descs: HashMap<(String, String), String> = HashMap::new();
113 let mut elem_descs: HashMap<(String, String), String> = HashMap::new();
114
115 let Some(llm_uri) = llm_uri else {
117 return Ok(());
118 };
119
120 for xsd_path in &xsd_paths {
121 let content = std::fs::read_to_string(xsd_path)?;
122 let mut xot = Xot::new();
123 let doc = xot.parse(&content).map_err(xot::Error::from)?;
124 let root = xot.document_element(doc)?;
125
126 let tns_attr = xot.add_name("targetNamespace");
127 let tns = xot.get_attribute(root, tns_attr)
128 .unwrap_or("")
129 .to_string();
130 let Some(pfx) = uri_to_prefix.get(&tns).cloned() else {
132 continue;
133 };
134
135 if let Some(desc) = extract_llm_describe(&mut xot, root, &llm_uri) {
137 layer_descs.insert(pfx.clone(), desc);
138 }
139
140 let xs_ns = xot.add_namespace("http://www.w3.org/2001/XMLSchema");
142 let complex_type = xot.add_name_ns("complexType", xs_ns);
143 let element_tag = xot.add_name_ns("element", xs_ns);
144 let name_attr = xot.add_name("name");
145 let type_attr = xot.add_name("type");
146
147 let child_info: Vec<(xot::Node, xot::NameId, Option<String>, Option<String>)> = xot
149 .children(root)
150 .filter(|c| xot.is_element(*c))
151 .filter_map(|c| {
152 let el = xot.element(c)?;
153 let cn = el.name();
154 let nm = xot.get_attribute(c, name_attr).map(String::from);
155 let tr = xot.get_attribute(c, type_attr).map(String::from);
156 Some((c, cn, nm, tr))
157 })
158 .collect();
159
160 for (child, child_name, name_val, type_ref_val) in child_info {
161 let Some(n) = name_val else { continue };
162 if child_name == complex_type {
163 if let Some(desc) = extract_llm_describe(&mut xot, child, &llm_uri) {
164 type_descs.insert((pfx.clone(), n), desc);
165 }
166 } else if child_name == element_tag {
167 if let Some(desc) = extract_llm_describe(&mut xot, child, &llm_uri) {
168 elem_descs.insert((pfx.clone(), n.clone()), desc);
169 }
170 let key = (pfx.clone(), n.clone());
171 if !elem_descs.contains_key(&key)
172 && let Some(type_ref) = &type_ref_val
173 {
174 let local = split_type_local(type_ref);
175 if let Some(desc) = type_descs.get(&(pfx.clone(), local.to_string())) {
176 elem_descs.insert(key, desc.clone());
177 }
178 }
179 }
180 }
181 }
182
183 for layer in &mut schema.layers {
185 if let Some(desc) = layer_descs.get(&layer.prefix) {
186 layer.description = Some(desc.clone());
187 }
188 for pat in &mut layer.patterns {
189 let key = (layer.prefix.clone(), pat.name.clone());
190 if let Some(desc) = type_descs.get(&key) {
191 pat.description = Some(desc.clone());
192 }
193 }
194 for elem in &mut layer.elements {
195 let key = (layer.prefix.clone(), elem.name.clone());
196 if let Some(desc) = elem_descs.get(&key) {
197 elem.description = Some(desc.clone());
198 }
199 }
200 }
201
202 Ok(())
203}
204
205#[must_use]
210pub fn render(schema: &RncSchema) -> String {
211 schema.to_string()
212}
213
214
215#[cfg(test)]
216mod tests {
217 use super::*;
218 use std::path::PathBuf;
219
220 fn schemas_dir() -> PathBuf {
221 PathBuf::from(env!("CARGO_MANIFEST_DIR"))
222 .join("../../schemas")
223 .canonicalize()
224 .expect("schemas/ directory not found")
225 }
226
227 #[test]
228 fn export_rnc_produces_output_with_namespaces() {
229 let schema = export_rnc(&schemas_dir()).expect("export_rnc failed");
230 let output = schema.to_string();
231 assert!(output.len() > 100, "Output too short: {}", output.len());
232 assert!(
233 output.contains("namespace"),
234 "Missing namespace declarations"
235 );
236 assert!(
238 schema.layers.len() >= 10,
239 "Expected 10+ layers, got {}",
240 schema.layers.len()
241 );
242 }
243
244 #[test]
245 fn export_rnc_has_llm_describe_comments() {
246 let schema = export_rnc(&schemas_dir()).expect("export_rnc failed");
247 let output = schema.to_string();
248 assert!(
250 output.contains("# The prose schema provides"),
251 "Missing prose layer llm:describe comment in output:\n{output}"
252 );
253 }
254
255 #[test]
256 fn export_rnc_filtered_returns_single_layer() {
257 let schema =
258 export_rnc_filtered(&schemas_dir(), &["pr"]).expect("export_rnc_filtered failed");
259 assert_eq!(schema.layers.len(), 1);
260 assert_eq!(schema.layers[0].prefix, "pr");
261 }
262
263 #[test]
264 fn export_rnc_recursive_types_are_named_patterns() {
265 let schema = export_rnc(&schemas_dir()).expect("export_rnc failed");
266 let output = schema.to_string();
267 assert!(
269 output.contains("SectionType ="),
270 "SectionType should be a named pattern: {output}"
271 );
272 }
273
274 #[test]
275 fn export_rnc_topicref_recursive() {
276 let schema = export_rnc(&schemas_dir()).expect("export_rnc failed");
277 let output = schema.to_string();
278 assert!(
280 output.contains("TopicRefType ="),
281 "TopicRefType should be a named pattern: {output}"
282 );
283 }
284
285 #[test]
286 fn render_produces_same_as_display() {
287 let schema = export_rnc(&schemas_dir()).expect("export_rnc failed");
288 assert_eq!(render(&schema), schema.to_string());
289 }
290}