Skip to main content

docx_core/parsers/
csharp_xml.rs

1use std::{error::Error, fmt, path::Path};
2
3use docx_store::models::{
4    DocBlock,
5    DocExample,
6    DocException,
7    DocInherit,
8    DocParam,
9    DocTypeParam,
10    SeeAlso,
11    SourceId,
12    Symbol,
13};
14use docx_store::schema::{SOURCE_KIND_CSHARP_XML, make_csharp_symbol_key};
15use roxmltree::{Document, Node};
16
17/// Options for parsing C# XML documentation.
18#[derive(Debug, Clone)]
19pub struct CsharpParseOptions {
20    pub project_id: String,
21    pub ingest_id: Option<String>,
22    pub language: String,
23    pub source_kind: String,
24}
25
26impl CsharpParseOptions {
27    pub fn new(project_id: impl Into<String>) -> Self {
28        Self {
29            project_id: project_id.into(),
30            ingest_id: None,
31            language: "csharp".to_string(),
32            source_kind: SOURCE_KIND_CSHARP_XML.to_string(),
33        }
34    }
35
36    #[must_use]
37    pub fn with_ingest_id(mut self, ingest_id: impl Into<String>) -> Self {
38        self.ingest_id = Some(ingest_id.into());
39        self
40    }
41}
42
43/// Output from parsing C# XML documentation.
44#[derive(Debug, Clone)]
45pub struct CsharpParseOutput {
46    pub assembly_name: Option<String>,
47    pub symbols: Vec<Symbol>,
48    pub doc_blocks: Vec<DocBlock>,
49}
50
51/// Error type for C# XML parse failures.
52#[derive(Debug)]
53pub struct CsharpParseError {
54    message: String,
55}
56
57impl CsharpParseError {
58    fn new(message: impl Into<String>) -> Self {
59        Self {
60            message: message.into(),
61        }
62    }
63}
64
65impl fmt::Display for CsharpParseError {
66    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67        write!(f, "C# XML parse error: {}", self.message)
68    }
69}
70
71impl Error for CsharpParseError {}
72
73impl From<roxmltree::Error> for CsharpParseError {
74    fn from(err: roxmltree::Error) -> Self {
75        Self::new(err.to_string())
76    }
77}
78
79impl From<std::io::Error> for CsharpParseError {
80    fn from(err: std::io::Error) -> Self {
81        Self::new(err.to_string())
82    }
83}
84
85impl From<tokio::task::JoinError> for CsharpParseError {
86    fn from(err: tokio::task::JoinError) -> Self {
87        Self::new(err.to_string())
88    }
89}
90
91/// Parser for C# XML documentation files.
92pub struct CsharpXmlParser;
93
94impl CsharpXmlParser {
95    /// Parses C# XML documentation into symbols and doc blocks.
96    ///
97    /// # Errors
98    /// Returns `CsharpParseError` if the XML is invalid or cannot be parsed.
99    #[allow(clippy::too_many_lines)]
100    pub fn parse(xml: &str, options: &CsharpParseOptions) -> Result<CsharpParseOutput, CsharpParseError> {
101        let doc = Document::parse(xml)?;
102        let assembly_name = extract_assembly_name(&doc);
103        let mut symbols = Vec::new();
104        let mut doc_blocks = Vec::new();
105
106        for member in doc.descendants().filter(|node| node.has_tag_name("member")) {
107            let Some(doc_id) = member.attribute("name") else {
108                continue;
109            };
110
111            let symbol_key = make_csharp_symbol_key(&options.project_id, doc_id);
112            let parts = parse_doc_id(doc_id);
113
114            let mut symbol = Symbol {
115                id: None,
116                project_id: options.project_id.clone(),
117                language: Some(options.language.clone()),
118                symbol_key: symbol_key.clone(),
119                kind: parts.kind,
120                name: parts.name,
121                qualified_name: parts.qualified_name,
122                display_name: parts.display_name,
123                signature: parts.signature,
124                signature_hash: None,
125                visibility: None,
126                is_static: None,
127                is_async: None,
128                is_const: None,
129                is_deprecated: None,
130                since: None,
131                stability: None,
132                source_path: None,
133                line: None,
134                col: None,
135                return_type: None,
136                params: Vec::new(),
137                type_params: Vec::new(),
138                attributes: Vec::new(),
139                source_ids: vec![SourceId {
140                    kind: "csharp_doc_id".to_string(),
141                    value: doc_id.to_string(),
142                }],
143                doc_summary: None,
144                extra: None,
145            };
146
147            let mut doc_block = DocBlock {
148                id: None,
149                project_id: options.project_id.clone(),
150                ingest_id: options.ingest_id.clone(),
151                symbol_key: Some(symbol_key.clone()),
152                language: Some(options.language.clone()),
153                source_kind: Some(options.source_kind.clone()),
154                doc_hash: None,
155                summary: None,
156                remarks: None,
157                returns: None,
158                value: None,
159                params: Vec::new(),
160                type_params: Vec::new(),
161                exceptions: Vec::new(),
162                examples: Vec::new(),
163                notes: Vec::new(),
164                warnings: Vec::new(),
165                safety: None,
166                panics: None,
167                errors: None,
168                see_also: Vec::new(),
169                deprecated: None,
170                inherit_doc: None,
171                sections: Vec::new(),
172                raw: None,
173                extra: None,
174            };
175
176            for child in member.children().filter(Node::is_element) {
177                match child.tag_name().name() {
178                    "summary" => doc_block.summary = optional_text(child),
179                    "remarks" => doc_block.remarks = optional_text(child),
180                    "returns" => doc_block.returns = optional_text(child),
181                    "value" => doc_block.value = optional_text(child),
182                    "param" => {
183                        if let Some(name) = child.attribute("name") {
184                        let description = render_doc_text(child);
185                        doc_block.params.push(DocParam {
186                            name: name.to_string(),
187                            description: if description.is_empty() { None } else { Some(description) },
188                            type_ref: None,
189                        });
190                        }
191                    }
192                    "typeparam" => {
193                        if let Some(name) = child.attribute("name") {
194                        let description = render_doc_text(child);
195                        doc_block.type_params.push(DocTypeParam {
196                            name: name.to_string(),
197                            description: if description.is_empty() { None } else { Some(description) },
198                        });
199                        }
200                    }
201                    "exception" => {
202                        let description = render_doc_text(child);
203                        let type_ref = child
204                            .attribute("cref")
205                            .map(|cref| docx_store::models::TypeRef {
206                                display: Some(cref.to_string()),
207                                canonical: Some(cref.to_string()),
208                                language: Some(options.language.clone()),
209                                symbol_key: Some(make_csharp_symbol_key(&options.project_id, cref)),
210                                generics: Vec::new(),
211                                modifiers: Vec::new(),
212                            });
213                        doc_block.exceptions.push(DocException {
214                            type_ref,
215                            description: if description.is_empty() { None } else { Some(description) },
216                        });
217                    }
218                    "example" => {
219                        let text = render_doc_text(child);
220                        if !text.is_empty() {
221                            doc_block.examples.push(DocExample {
222                                lang: None,
223                                code: Some(text),
224                                caption: None,
225                            });
226                        }
227                    }
228                    "seealso" => {
229                        if let Some(see) = parse_see_also(child) {
230                            doc_block.see_also.push(see);
231                        }
232                    }
233                    "note" => {
234                        let text = render_doc_text(child);
235                        if !text.is_empty() {
236                            doc_block.notes.push(text);
237                        }
238                    }
239                    "warning" => {
240                        let text = render_doc_text(child);
241                        if !text.is_empty() {
242                            doc_block.warnings.push(text);
243                        }
244                    }
245                    "inheritdoc" => {
246                        let cref = child.attribute("cref").map(str::to_string);
247                        let path = child.attribute("path").map(str::to_string);
248                        doc_block.inherit_doc = Some(DocInherit { cref, path });
249                    }
250                    "deprecated" => {
251                        let text = render_doc_text(child);
252                        if !text.is_empty() {
253                            doc_block.deprecated = Some(text);
254                        }
255                    }
256                    _ => {}
257                }
258            }
259
260            if doc_block.summary.is_some() {
261                symbol.doc_summary.clone_from(&doc_block.summary);
262            }
263
264            let range = member.range();
265            doc_block.raw = Some(xml[range].to_string());
266
267            symbols.push(symbol);
268            doc_blocks.push(doc_block);
269        }
270
271        Ok(CsharpParseOutput {
272            assembly_name,
273            symbols,
274            doc_blocks,
275        })
276    }
277
278    /// Parses XML asynchronously using a blocking task.
279    ///
280    /// # Errors
281    /// Returns `CsharpParseError` if parsing fails or the task panics.
282    pub async fn parse_async(
283        xml: String,
284        options: CsharpParseOptions,
285    ) -> Result<CsharpParseOutput, CsharpParseError> {
286        tokio::task::spawn_blocking(move || Self::parse(&xml, &options)).await?
287    }
288
289    /// Parses XML from a file path asynchronously.
290    ///
291    /// # Errors
292    /// Returns `CsharpParseError` if the file cannot be read or the XML cannot be parsed.
293    pub async fn parse_file(
294        path: impl AsRef<Path>,
295        options: CsharpParseOptions,
296    ) -> Result<CsharpParseOutput, CsharpParseError> {
297        let path = path.as_ref().to_path_buf();
298        let xml = tokio::task::spawn_blocking(move || std::fs::read_to_string(path)).await??;
299        Self::parse_async(xml, options).await
300    }
301}
302
303#[derive(Debug)]
304struct DocIdParts {
305    kind: Option<String>,
306    name: Option<String>,
307    qualified_name: Option<String>,
308    display_name: Option<String>,
309    signature: Option<String>,
310}
311
312fn parse_doc_id(doc_id: &str) -> DocIdParts {
313    let mut parts = doc_id.splitn(2, ':');
314    let prefix = parts.next().unwrap_or("");
315    let rest = parts.next().unwrap_or("");
316
317    let kind = match prefix {
318        "T" => Some("type".to_string()),
319        "M" => Some("method".to_string()),
320        "P" => Some("property".to_string()),
321        "F" => Some("field".to_string()),
322        "E" => Some("event".to_string()),
323        "N" => Some("namespace".to_string()),
324        _ => None,
325    };
326
327    let (qualified_name, signature) = if rest.is_empty() {
328        (None, None)
329    } else if let Some(pos) = rest.find('(') {
330        let qualified = rest[..pos].to_string();
331        (Some(qualified), Some(rest.to_string()))
332    } else {
333        (Some(rest.to_string()), Some(rest.to_string()))
334    };
335
336    let name = qualified_name
337        .as_deref()
338        .and_then(extract_simple_name)
339        .map(str::to_string);
340
341    DocIdParts {
342        kind,
343        name: name.clone(),
344        qualified_name,
345        display_name: name,
346        signature,
347    }
348}
349
350fn extract_simple_name(value: &str) -> Option<&str> {
351    value.rsplit(['.', '+', '#']).next()
352}
353
354fn extract_assembly_name(doc: &Document<'_>) -> Option<String> {
355    let assembly_node = doc.descendants().find(|node| node.has_tag_name("assembly"))?;
356    let name_node = assembly_node
357        .children()
358        .find(|node| node.has_tag_name("name"))?;
359    name_node.text().map(|text| text.trim().to_string())
360}
361
362fn render_doc_text(node: Node<'_, '_>) -> String {
363    let text = render_children(node);
364    cleanup_text(&text)
365}
366
367fn optional_text(node: Node<'_, '_>) -> Option<String> {
368    let text = render_doc_text(node);
369    if text.is_empty() {
370        None
371    } else {
372        Some(text)
373    }
374}
375
376fn render_children(node: Node<'_, '_>) -> String {
377    let mut output = String::new();
378    for child in node.children() {
379        let fragment = render_node(child);
380        if fragment.is_empty() {
381            continue;
382        }
383        if needs_space(&output, &fragment) {
384            output.push(' ');
385        }
386        output.push_str(&fragment);
387    }
388    output
389}
390
391fn render_node(node: Node<'_, '_>) -> String {
392    match node.node_type() {
393        roxmltree::NodeType::Text => node.text().unwrap_or("").to_string(),
394        roxmltree::NodeType::Element => match node.tag_name().name() {
395            "para" => {
396                let text = render_children(node);
397                if text.is_empty() {
398                    String::new()
399                } else {
400                    format!("\n{}\n", text.trim())
401                }
402            }
403            "code" => render_code_block(node),
404            "see" | "seealso" => render_inline_link(node),
405            "paramref" | "typeparamref" => render_ref(node),
406            "list" => render_list(node),
407            _ => render_children(node),
408        },
409        _ => String::new(),
410    }
411}
412
413fn render_code_block(node: Node<'_, '_>) -> String {
414    let code_text = node.text().unwrap_or("").trim();
415    if code_text.is_empty() {
416        String::new()
417    } else {
418        format!("\n```\n{code_text}\n```\n")
419    }
420}
421
422fn render_inline_link(node: Node<'_, '_>) -> String {
423    let target = node
424        .attribute("cref")
425        .or_else(|| node.attribute("href"))
426        .unwrap_or("")
427        .trim();
428    let label = node.text().unwrap_or("").trim();
429    if target.is_empty() {
430        label.to_string()
431    } else if label.is_empty() {
432        target.to_string()
433    } else {
434        format!("[{label}]({target})")
435    }
436}
437
438fn render_ref(node: Node<'_, '_>) -> String {
439    let name = node.attribute("name").unwrap_or("").trim();
440    if name.is_empty() {
441        String::new()
442    } else {
443        format!("`{name}`")
444    }
445}
446
447fn render_list(node: Node<'_, '_>) -> String {
448    let mut lines = Vec::new();
449    for item in node.children().filter(|child| child.has_tag_name("item")) {
450        let term = item
451            .children()
452            .find(|child| child.has_tag_name("term"))
453            .map(render_children);
454        let description = item
455            .children()
456            .find(|child| child.has_tag_name("description"))
457            .map(render_children);
458        let text = match (term, description) {
459            (Some(term), Some(description)) => format!("{}: {}", term.trim(), description.trim()),
460            (Some(term), None) => term,
461            (None, Some(description)) => description,
462            (None, None) => render_children(item),
463        };
464        let text = text.trim();
465        if !text.is_empty() {
466            lines.push(format!("- {text}"));
467        }
468    }
469    if lines.is_empty() {
470        String::new()
471    } else {
472        format!("\n{}\n", lines.join("\n"))
473    }
474}
475
476fn cleanup_text(value: &str) -> String {
477    let mut lines = Vec::new();
478    let mut in_code_block = false;
479    for line in value.replace("\r\n", "\n").lines() {
480        let trimmed = line.trim_end();
481        if trimmed.trim_start().starts_with("```") {
482            in_code_block = !in_code_block;
483            lines.push(trimmed.to_string());
484            continue;
485        }
486        if in_code_block {
487            lines.push(trimmed.to_string());
488        } else {
489            lines.push(collapse_whitespace(trimmed).trim().to_string());
490        }
491    }
492
493    while matches!(lines.first(), Some(line) if line.is_empty()) {
494        lines.remove(0);
495    }
496    while matches!(lines.last(), Some(line) if line.is_empty()) {
497        lines.pop();
498    }
499
500    lines.join("\n")
501}
502
503fn collapse_whitespace(value: &str) -> String {
504    let mut output = String::new();
505    let mut last_was_space = false;
506    for ch in value.chars() {
507        if ch.is_whitespace() {
508            if !last_was_space {
509                output.push(' ');
510                last_was_space = true;
511            }
512        } else {
513            output.push(ch);
514            last_was_space = false;
515        }
516    }
517    output
518}
519
520fn needs_space(current: &str, next: &str) -> bool {
521    if current.is_empty() {
522        return false;
523    }
524    let current_last = current.chars().last();
525    let next_first = next.chars().next();
526    matches!(current_last, Some(ch) if !ch.is_whitespace() && ch != '\n')
527        && matches!(next_first, Some(ch) if !ch.is_whitespace() && ch != '\n')
528}
529
530fn parse_see_also(node: Node<'_, '_>) -> Option<SeeAlso> {
531    let target = node
532        .attribute("cref")
533        .or_else(|| node.attribute("href"))
534        .map(str::to_string)?;
535    let label = node.text().map(|text| text.trim().to_string());
536    let label = match label {
537        Some(text) if text.is_empty() => None,
538        other => other,
539    };
540    let target_kind = if node.attribute("cref").is_some() {
541        Some("cref".to_string())
542    } else {
543        Some("href".to_string())
544    };
545    Some(SeeAlso {
546        label,
547        target,
548        target_kind,
549    })
550}