Skip to main content

docx_core/parsers/
csharp_xml.rs

1use std::{error::Error, fmt, path::Path};
2
3use docx_store::models::{
4    DocBlock, DocExample, DocException, DocInherit, DocParam, DocTypeParam, SeeAlso, SourceId,
5    Symbol,
6};
7use docx_store::schema::{SOURCE_KIND_CSHARP_XML, make_csharp_symbol_key};
8use roxmltree::{Document, Node};
9
10/// Options for parsing C# XML documentation.
11#[derive(Debug, Clone)]
12pub struct CsharpParseOptions {
13    pub project_id: String,
14    pub ingest_id: Option<String>,
15    pub language: String,
16    pub source_kind: String,
17}
18
19impl CsharpParseOptions {
20    pub fn new(project_id: impl Into<String>) -> Self {
21        Self {
22            project_id: project_id.into(),
23            ingest_id: None,
24            language: "csharp".to_string(),
25            source_kind: SOURCE_KIND_CSHARP_XML.to_string(),
26        }
27    }
28
29    #[must_use]
30    pub fn with_ingest_id(mut self, ingest_id: impl Into<String>) -> Self {
31        self.ingest_id = Some(ingest_id.into());
32        self
33    }
34}
35
36/// Output from parsing C# XML documentation.
37#[derive(Debug, Clone)]
38pub struct CsharpParseOutput {
39    pub assembly_name: Option<String>,
40    pub symbols: Vec<Symbol>,
41    pub doc_blocks: Vec<DocBlock>,
42}
43
44/// Error type for C# XML parse failures.
45#[derive(Debug)]
46pub struct CsharpParseError {
47    message: String,
48}
49
50impl CsharpParseError {
51    fn new(message: impl Into<String>) -> Self {
52        Self {
53            message: message.into(),
54        }
55    }
56}
57
58impl fmt::Display for CsharpParseError {
59    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60        write!(f, "C# XML parse error: {}", self.message)
61    }
62}
63
64impl Error for CsharpParseError {}
65
66impl From<roxmltree::Error> for CsharpParseError {
67    fn from(err: roxmltree::Error) -> Self {
68        Self::new(err.to_string())
69    }
70}
71
72impl From<std::io::Error> for CsharpParseError {
73    fn from(err: std::io::Error) -> Self {
74        Self::new(err.to_string())
75    }
76}
77
78impl From<tokio::task::JoinError> for CsharpParseError {
79    fn from(err: tokio::task::JoinError) -> Self {
80        Self::new(err.to_string())
81    }
82}
83
84/// Parser for C# XML documentation files.
85pub struct CsharpXmlParser;
86
87impl CsharpXmlParser {
88    /// Parses C# XML documentation into symbols and doc blocks.
89    ///
90    /// # Errors
91    /// Returns `CsharpParseError` if the XML is invalid or cannot be parsed.
92    #[allow(clippy::too_many_lines)]
93    pub fn parse(
94        xml: &str,
95        options: &CsharpParseOptions,
96    ) -> Result<CsharpParseOutput, CsharpParseError> {
97        let doc = Document::parse(xml)?;
98        let assembly_name = extract_assembly_name(&doc);
99        let mut symbols = Vec::new();
100        let mut doc_blocks = Vec::new();
101
102        for member in doc.descendants().filter(|node| node.has_tag_name("member")) {
103            let Some(doc_id) = member.attribute("name") else {
104                continue;
105            };
106
107            let symbol_key = make_csharp_symbol_key(&options.project_id, doc_id);
108            let parts = parse_doc_id(doc_id);
109
110            let mut symbol = Symbol {
111                id: None,
112                project_id: options.project_id.clone(),
113                language: Some(options.language.clone()),
114                symbol_key: symbol_key.clone(),
115                kind: parts.kind,
116                name: parts.name,
117                qualified_name: parts.qualified_name,
118                display_name: parts.display_name,
119                signature: parts.signature,
120                signature_hash: None,
121                visibility: None,
122                is_static: None,
123                is_async: None,
124                is_const: None,
125                is_deprecated: None,
126                since: None,
127                stability: None,
128                source_path: None,
129                line: None,
130                col: None,
131                return_type: None,
132                params: Vec::new(),
133                type_params: Vec::new(),
134                attributes: Vec::new(),
135                source_ids: vec![SourceId {
136                    kind: "csharp_doc_id".to_string(),
137                    value: doc_id.to_string(),
138                }],
139                doc_summary: None,
140                extra: None,
141            };
142
143            let mut doc_block = DocBlock {
144                id: None,
145                project_id: options.project_id.clone(),
146                ingest_id: options.ingest_id.clone(),
147                symbol_key: Some(symbol_key.clone()),
148                language: Some(options.language.clone()),
149                source_kind: Some(options.source_kind.clone()),
150                doc_hash: None,
151                summary: None,
152                remarks: None,
153                returns: None,
154                value: None,
155                params: Vec::new(),
156                type_params: Vec::new(),
157                exceptions: Vec::new(),
158                examples: Vec::new(),
159                notes: Vec::new(),
160                warnings: Vec::new(),
161                safety: None,
162                panics: None,
163                errors: None,
164                see_also: Vec::new(),
165                deprecated: None,
166                inherit_doc: None,
167                sections: Vec::new(),
168                raw: None,
169                extra: None,
170            };
171
172            for child in member.children().filter(Node::is_element) {
173                match child.tag_name().name() {
174                    "summary" => doc_block.summary = optional_text(child),
175                    "remarks" => doc_block.remarks = optional_text(child),
176                    "returns" => doc_block.returns = optional_text(child),
177                    "value" => doc_block.value = optional_text(child),
178                    "param" => {
179                        if let Some(name) = child.attribute("name") {
180                            let description = render_doc_text(child);
181                            doc_block.params.push(DocParam {
182                                name: name.to_string(),
183                                description: if description.is_empty() {
184                                    None
185                                } else {
186                                    Some(description)
187                                },
188                                type_ref: None,
189                            });
190                        }
191                    }
192                    "typeparam" => {
193                        if let Some(name) = child.attribute("name") {
194                            let description = render_doc_text(child);
195                            doc_block.type_params.push(DocTypeParam {
196                                name: name.to_string(),
197                                description: if description.is_empty() {
198                                    None
199                                } else {
200                                    Some(description)
201                                },
202                            });
203                        }
204                    }
205                    "exception" => {
206                        let description = render_doc_text(child);
207                        let type_ref =
208                            child
209                                .attribute("cref")
210                                .map(|cref| docx_store::models::TypeRef {
211                                    display: Some(cref.to_string()),
212                                    canonical: Some(cref.to_string()),
213                                    language: Some(options.language.clone()),
214                                    symbol_key: Some(make_csharp_symbol_key(
215                                        &options.project_id,
216                                        cref,
217                                    )),
218                                    generics: Vec::new(),
219                                    modifiers: Vec::new(),
220                                });
221                        doc_block.exceptions.push(DocException {
222                            type_ref,
223                            description: if description.is_empty() {
224                                None
225                            } else {
226                                Some(description)
227                            },
228                        });
229                    }
230                    "example" => {
231                        let text = render_doc_text(child);
232                        if !text.is_empty() {
233                            doc_block.examples.push(DocExample {
234                                lang: None,
235                                code: Some(text),
236                                caption: None,
237                            });
238                        }
239                    }
240                    "seealso" => {
241                        if let Some(see) = parse_see_also(child) {
242                            doc_block.see_also.push(see);
243                        }
244                    }
245                    "note" => {
246                        let text = render_doc_text(child);
247                        if !text.is_empty() {
248                            doc_block.notes.push(text);
249                        }
250                    }
251                    "warning" => {
252                        let text = render_doc_text(child);
253                        if !text.is_empty() {
254                            doc_block.warnings.push(text);
255                        }
256                    }
257                    "inheritdoc" => {
258                        let cref = child.attribute("cref").map(str::to_string);
259                        let path = child.attribute("path").map(str::to_string);
260                        doc_block.inherit_doc = Some(DocInherit { cref, path });
261                    }
262                    "deprecated" => {
263                        let text = render_doc_text(child);
264                        if !text.is_empty() {
265                            doc_block.deprecated = Some(text);
266                        }
267                    }
268                    _ => {}
269                }
270            }
271
272            if doc_block.summary.is_some() {
273                symbol.doc_summary.clone_from(&doc_block.summary);
274            }
275
276            let range = member.range();
277            doc_block.raw = Some(xml[range].to_string());
278
279            symbols.push(symbol);
280            doc_blocks.push(doc_block);
281        }
282
283        Ok(CsharpParseOutput {
284            assembly_name,
285            symbols,
286            doc_blocks,
287        })
288    }
289
290    /// Parses XML asynchronously using a blocking task.
291    ///
292    /// # Errors
293    /// Returns `CsharpParseError` if parsing fails or the task panics.
294    pub async fn parse_async(
295        xml: String,
296        options: CsharpParseOptions,
297    ) -> Result<CsharpParseOutput, CsharpParseError> {
298        tokio::task::spawn_blocking(move || Self::parse(&xml, &options)).await?
299    }
300
301    /// Parses XML from a file path asynchronously.
302    ///
303    /// # Errors
304    /// Returns `CsharpParseError` if the file cannot be read or the XML cannot be parsed.
305    pub async fn parse_file(
306        path: impl AsRef<Path>,
307        options: CsharpParseOptions,
308    ) -> Result<CsharpParseOutput, CsharpParseError> {
309        let path = path.as_ref().to_path_buf();
310        let xml = tokio::task::spawn_blocking(move || std::fs::read_to_string(path)).await??;
311        Self::parse_async(xml, options).await
312    }
313}
314
315#[derive(Debug)]
316struct DocIdParts {
317    kind: Option<String>,
318    name: Option<String>,
319    qualified_name: Option<String>,
320    display_name: Option<String>,
321    signature: Option<String>,
322}
323
324fn parse_doc_id(doc_id: &str) -> DocIdParts {
325    let mut parts = doc_id.splitn(2, ':');
326    let prefix = parts.next().unwrap_or("");
327    let rest = parts.next().unwrap_or("");
328
329    let kind = match prefix {
330        "T" => Some("type".to_string()),
331        "M" => Some("method".to_string()),
332        "P" => Some("property".to_string()),
333        "F" => Some("field".to_string()),
334        "E" => Some("event".to_string()),
335        "N" => Some("namespace".to_string()),
336        _ => None,
337    };
338
339    let (qualified_name, signature) = if rest.is_empty() {
340        (None, None)
341    } else if let Some(pos) = rest.find('(') {
342        let qualified = rest[..pos].to_string();
343        (Some(qualified), Some(rest.to_string()))
344    } else {
345        (Some(rest.to_string()), Some(rest.to_string()))
346    };
347
348    let name = qualified_name
349        .as_deref()
350        .and_then(extract_simple_name)
351        .map(str::to_string);
352
353    DocIdParts {
354        kind,
355        name: name.clone(),
356        qualified_name,
357        display_name: name,
358        signature,
359    }
360}
361
362fn extract_simple_name(value: &str) -> Option<&str> {
363    value.rsplit(['.', '+', '#']).next()
364}
365
366fn extract_assembly_name(doc: &Document<'_>) -> Option<String> {
367    let assembly_node = doc
368        .descendants()
369        .find(|node| node.has_tag_name("assembly"))?;
370    let name_node = assembly_node
371        .children()
372        .find(|node| node.has_tag_name("name"))?;
373    name_node.text().map(|text| text.trim().to_string())
374}
375
376fn render_doc_text(node: Node<'_, '_>) -> String {
377    let text = render_children(node);
378    cleanup_text(&text)
379}
380
381fn optional_text(node: Node<'_, '_>) -> Option<String> {
382    let text = render_doc_text(node);
383    if text.is_empty() { None } else { Some(text) }
384}
385
386fn render_children(node: Node<'_, '_>) -> String {
387    let mut output = String::new();
388    for child in node.children() {
389        let fragment = render_node(child);
390        if fragment.is_empty() {
391            continue;
392        }
393        if needs_space(&output, &fragment) {
394            output.push(' ');
395        }
396        output.push_str(&fragment);
397    }
398    output
399}
400
401fn render_node(node: Node<'_, '_>) -> String {
402    match node.node_type() {
403        roxmltree::NodeType::Text => node.text().unwrap_or("").to_string(),
404        roxmltree::NodeType::Element => match node.tag_name().name() {
405            "para" => {
406                let text = render_children(node);
407                if text.is_empty() {
408                    String::new()
409                } else {
410                    format!("\n{}\n", text.trim())
411                }
412            }
413            "code" => render_code_block(node),
414            "see" | "seealso" => render_inline_link(node),
415            "paramref" | "typeparamref" => render_ref(node),
416            "list" => render_list(node),
417            _ => render_children(node),
418        },
419        _ => String::new(),
420    }
421}
422
423fn render_code_block(node: Node<'_, '_>) -> String {
424    let code_text = node.text().unwrap_or("").trim();
425    if code_text.is_empty() {
426        String::new()
427    } else {
428        format!("\n```\n{code_text}\n```\n")
429    }
430}
431
432fn render_inline_link(node: Node<'_, '_>) -> String {
433    let target = node
434        .attribute("cref")
435        .or_else(|| node.attribute("href"))
436        .unwrap_or("")
437        .trim();
438    let label = node.text().unwrap_or("").trim();
439    if target.is_empty() {
440        label.to_string()
441    } else if label.is_empty() {
442        target.to_string()
443    } else {
444        format!("[{label}]({target})")
445    }
446}
447
448fn render_ref(node: Node<'_, '_>) -> String {
449    let name = node.attribute("name").unwrap_or("").trim();
450    if name.is_empty() {
451        String::new()
452    } else {
453        format!("`{name}`")
454    }
455}
456
457fn render_list(node: Node<'_, '_>) -> String {
458    let mut lines = Vec::new();
459    for item in node.children().filter(|child| child.has_tag_name("item")) {
460        let term = item
461            .children()
462            .find(|child| child.has_tag_name("term"))
463            .map(render_children);
464        let description = item
465            .children()
466            .find(|child| child.has_tag_name("description"))
467            .map(render_children);
468        let text = match (term, description) {
469            (Some(term), Some(description)) => format!("{}: {}", term.trim(), description.trim()),
470            (Some(term), None) => term,
471            (None, Some(description)) => description,
472            (None, None) => render_children(item),
473        };
474        let text = text.trim();
475        if !text.is_empty() {
476            lines.push(format!("- {text}"));
477        }
478    }
479    if lines.is_empty() {
480        String::new()
481    } else {
482        format!("\n{}\n", lines.join("\n"))
483    }
484}
485
486fn cleanup_text(value: &str) -> String {
487    let mut lines = Vec::new();
488    let mut in_code_block = false;
489    for line in value.replace("\r\n", "\n").lines() {
490        let trimmed = line.trim_end();
491        if trimmed.trim_start().starts_with("```") {
492            in_code_block = !in_code_block;
493            lines.push(trimmed.to_string());
494            continue;
495        }
496        if in_code_block {
497            lines.push(trimmed.to_string());
498        } else {
499            lines.push(collapse_whitespace(trimmed).trim().to_string());
500        }
501    }
502
503    while matches!(lines.first(), Some(line) if line.is_empty()) {
504        lines.remove(0);
505    }
506    while matches!(lines.last(), Some(line) if line.is_empty()) {
507        lines.pop();
508    }
509
510    lines.join("\n")
511}
512
513fn collapse_whitespace(value: &str) -> String {
514    let mut output = String::new();
515    let mut last_was_space = false;
516    for ch in value.chars() {
517        if ch.is_whitespace() {
518            if !last_was_space {
519                output.push(' ');
520                last_was_space = true;
521            }
522        } else {
523            output.push(ch);
524            last_was_space = false;
525        }
526    }
527    output
528}
529
530fn needs_space(current: &str, next: &str) -> bool {
531    if current.is_empty() {
532        return false;
533    }
534    let current_last = current.chars().last();
535    let next_first = next.chars().next();
536    matches!(current_last, Some(ch) if !ch.is_whitespace() && ch != '\n')
537        && matches!(next_first, Some(ch) if !ch.is_whitespace() && ch != '\n')
538}
539
540fn parse_see_also(node: Node<'_, '_>) -> Option<SeeAlso> {
541    let target = node
542        .attribute("cref")
543        .or_else(|| node.attribute("href"))
544        .map(str::to_string)?;
545    let label = node.text().map(|text| text.trim().to_string());
546    let label = match label {
547        Some(text) if text.is_empty() => None,
548        other => other,
549    };
550    let target_kind = if node.attribute("cref").is_some() {
551        Some("cref".to_string())
552    } else {
553        Some("href".to_string())
554    };
555    Some(SeeAlso {
556        label,
557        target,
558        target_kind,
559    })
560}