1use std::{error::Error, fmt, path::Path};
2
3use docx_store::models::{
4 DocBlock, DocExample, DocException, DocInherit, DocParam, DocTypeParam, SeeAlso, SourceId,
5 Symbol,
6};
7use docx_store::schema::{SOURCE_KIND_CSHARP_XML, make_csharp_symbol_key};
8use roxmltree::{Document, Node};
9
10#[derive(Debug, Clone)]
12pub struct CsharpParseOptions {
13 pub project_id: String,
14 pub ingest_id: Option<String>,
15 pub language: String,
16 pub source_kind: String,
17}
18
19impl CsharpParseOptions {
20 pub fn new(project_id: impl Into<String>) -> Self {
21 Self {
22 project_id: project_id.into(),
23 ingest_id: None,
24 language: "csharp".to_string(),
25 source_kind: SOURCE_KIND_CSHARP_XML.to_string(),
26 }
27 }
28
29 #[must_use]
30 pub fn with_ingest_id(mut self, ingest_id: impl Into<String>) -> Self {
31 self.ingest_id = Some(ingest_id.into());
32 self
33 }
34}
35
36#[derive(Debug, Clone)]
38pub struct CsharpParseOutput {
39 pub assembly_name: Option<String>,
40 pub symbols: Vec<Symbol>,
41 pub doc_blocks: Vec<DocBlock>,
42}
43
44#[derive(Debug)]
46pub struct CsharpParseError {
47 message: String,
48}
49
50impl CsharpParseError {
51 fn new(message: impl Into<String>) -> Self {
52 Self {
53 message: message.into(),
54 }
55 }
56}
57
58impl fmt::Display for CsharpParseError {
59 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60 write!(f, "C# XML parse error: {}", self.message)
61 }
62}
63
64impl Error for CsharpParseError {}
65
66impl From<roxmltree::Error> for CsharpParseError {
67 fn from(err: roxmltree::Error) -> Self {
68 Self::new(err.to_string())
69 }
70}
71
72impl From<std::io::Error> for CsharpParseError {
73 fn from(err: std::io::Error) -> Self {
74 Self::new(err.to_string())
75 }
76}
77
78impl From<tokio::task::JoinError> for CsharpParseError {
79 fn from(err: tokio::task::JoinError) -> Self {
80 Self::new(err.to_string())
81 }
82}
83
84pub struct CsharpXmlParser;
86
87impl CsharpXmlParser {
88 #[allow(clippy::too_many_lines)]
93 pub fn parse(
94 xml: &str,
95 options: &CsharpParseOptions,
96 ) -> Result<CsharpParseOutput, CsharpParseError> {
97 let doc = Document::parse(xml)?;
98 let assembly_name = extract_assembly_name(&doc);
99 let mut symbols = Vec::new();
100 let mut doc_blocks = Vec::new();
101
102 for member in doc.descendants().filter(|node| node.has_tag_name("member")) {
103 let Some(doc_id) = member.attribute("name") else {
104 continue;
105 };
106
107 let symbol_key = make_csharp_symbol_key(&options.project_id, doc_id);
108 let parts = parse_doc_id(doc_id);
109
110 let mut symbol = Symbol {
111 id: None,
112 project_id: options.project_id.clone(),
113 language: Some(options.language.clone()),
114 symbol_key: symbol_key.clone(),
115 kind: parts.kind,
116 name: parts.name,
117 qualified_name: parts.qualified_name,
118 display_name: parts.display_name,
119 signature: parts.signature,
120 signature_hash: None,
121 visibility: None,
122 is_static: None,
123 is_async: None,
124 is_const: None,
125 is_deprecated: None,
126 since: None,
127 stability: None,
128 source_path: None,
129 line: None,
130 col: None,
131 return_type: None,
132 params: Vec::new(),
133 type_params: Vec::new(),
134 attributes: Vec::new(),
135 source_ids: vec![SourceId {
136 kind: "csharp_doc_id".to_string(),
137 value: doc_id.to_string(),
138 }],
139 doc_summary: None,
140 extra: None,
141 };
142
143 let mut doc_block = DocBlock {
144 id: None,
145 project_id: options.project_id.clone(),
146 ingest_id: options.ingest_id.clone(),
147 symbol_key: Some(symbol_key.clone()),
148 language: Some(options.language.clone()),
149 source_kind: Some(options.source_kind.clone()),
150 doc_hash: None,
151 summary: None,
152 remarks: None,
153 returns: None,
154 value: None,
155 params: Vec::new(),
156 type_params: Vec::new(),
157 exceptions: Vec::new(),
158 examples: Vec::new(),
159 notes: Vec::new(),
160 warnings: Vec::new(),
161 safety: None,
162 panics: None,
163 errors: None,
164 see_also: Vec::new(),
165 deprecated: None,
166 inherit_doc: None,
167 sections: Vec::new(),
168 raw: None,
169 extra: None,
170 };
171
172 for child in member.children().filter(Node::is_element) {
173 match child.tag_name().name() {
174 "summary" => doc_block.summary = optional_text(child),
175 "remarks" => doc_block.remarks = optional_text(child),
176 "returns" => doc_block.returns = optional_text(child),
177 "value" => doc_block.value = optional_text(child),
178 "param" => {
179 if let Some(name) = child.attribute("name") {
180 let description = render_doc_text(child);
181 doc_block.params.push(DocParam {
182 name: name.to_string(),
183 description: if description.is_empty() {
184 None
185 } else {
186 Some(description)
187 },
188 type_ref: None,
189 });
190 }
191 }
192 "typeparam" => {
193 if let Some(name) = child.attribute("name") {
194 let description = render_doc_text(child);
195 doc_block.type_params.push(DocTypeParam {
196 name: name.to_string(),
197 description: if description.is_empty() {
198 None
199 } else {
200 Some(description)
201 },
202 });
203 }
204 }
205 "exception" => {
206 let description = render_doc_text(child);
207 let type_ref =
208 child
209 .attribute("cref")
210 .map(|cref| docx_store::models::TypeRef {
211 display: Some(cref.to_string()),
212 canonical: Some(cref.to_string()),
213 language: Some(options.language.clone()),
214 symbol_key: Some(make_csharp_symbol_key(
215 &options.project_id,
216 cref,
217 )),
218 generics: Vec::new(),
219 modifiers: Vec::new(),
220 });
221 doc_block.exceptions.push(DocException {
222 type_ref,
223 description: if description.is_empty() {
224 None
225 } else {
226 Some(description)
227 },
228 });
229 }
230 "example" => {
231 let text = render_doc_text(child);
232 if !text.is_empty() {
233 doc_block.examples.push(DocExample {
234 lang: None,
235 code: Some(text),
236 caption: None,
237 });
238 }
239 }
240 "seealso" => {
241 if let Some(see) = parse_see_also(child) {
242 doc_block.see_also.push(see);
243 }
244 }
245 "note" => {
246 let text = render_doc_text(child);
247 if !text.is_empty() {
248 doc_block.notes.push(text);
249 }
250 }
251 "warning" => {
252 let text = render_doc_text(child);
253 if !text.is_empty() {
254 doc_block.warnings.push(text);
255 }
256 }
257 "inheritdoc" => {
258 let cref = child.attribute("cref").map(str::to_string);
259 let path = child.attribute("path").map(str::to_string);
260 doc_block.inherit_doc = Some(DocInherit { cref, path });
261 }
262 "deprecated" => {
263 let text = render_doc_text(child);
264 if !text.is_empty() {
265 doc_block.deprecated = Some(text);
266 }
267 }
268 _ => {}
269 }
270 }
271
272 if doc_block.summary.is_some() {
273 symbol.doc_summary.clone_from(&doc_block.summary);
274 }
275
276 let range = member.range();
277 doc_block.raw = Some(xml[range].to_string());
278
279 symbols.push(symbol);
280 doc_blocks.push(doc_block);
281 }
282
283 Ok(CsharpParseOutput {
284 assembly_name,
285 symbols,
286 doc_blocks,
287 })
288 }
289
290 pub async fn parse_async(
295 xml: String,
296 options: CsharpParseOptions,
297 ) -> Result<CsharpParseOutput, CsharpParseError> {
298 tokio::task::spawn_blocking(move || Self::parse(&xml, &options)).await?
299 }
300
301 pub async fn parse_file(
306 path: impl AsRef<Path>,
307 options: CsharpParseOptions,
308 ) -> Result<CsharpParseOutput, CsharpParseError> {
309 let path = path.as_ref().to_path_buf();
310 let xml = tokio::task::spawn_blocking(move || std::fs::read_to_string(path)).await??;
311 Self::parse_async(xml, options).await
312 }
313}
314
315#[derive(Debug)]
316struct DocIdParts {
317 kind: Option<String>,
318 name: Option<String>,
319 qualified_name: Option<String>,
320 display_name: Option<String>,
321 signature: Option<String>,
322}
323
324fn parse_doc_id(doc_id: &str) -> DocIdParts {
325 let mut parts = doc_id.splitn(2, ':');
326 let prefix = parts.next().unwrap_or("");
327 let rest = parts.next().unwrap_or("");
328
329 let kind = match prefix {
330 "T" => Some("type".to_string()),
331 "M" => Some("method".to_string()),
332 "P" => Some("property".to_string()),
333 "F" => Some("field".to_string()),
334 "E" => Some("event".to_string()),
335 "N" => Some("namespace".to_string()),
336 _ => None,
337 };
338
339 let (qualified_name, signature) = if rest.is_empty() {
340 (None, None)
341 } else if let Some(pos) = rest.find('(') {
342 let qualified = rest[..pos].to_string();
343 (Some(qualified), Some(rest.to_string()))
344 } else {
345 (Some(rest.to_string()), Some(rest.to_string()))
346 };
347
348 let name = qualified_name
349 .as_deref()
350 .and_then(extract_simple_name)
351 .map(str::to_string);
352
353 DocIdParts {
354 kind,
355 name: name.clone(),
356 qualified_name,
357 display_name: name,
358 signature,
359 }
360}
361
362fn extract_simple_name(value: &str) -> Option<&str> {
363 value.rsplit(['.', '+', '#']).next()
364}
365
366fn extract_assembly_name(doc: &Document<'_>) -> Option<String> {
367 let assembly_node = doc
368 .descendants()
369 .find(|node| node.has_tag_name("assembly"))?;
370 let name_node = assembly_node
371 .children()
372 .find(|node| node.has_tag_name("name"))?;
373 name_node.text().map(|text| text.trim().to_string())
374}
375
376fn render_doc_text(node: Node<'_, '_>) -> String {
377 let text = render_children(node);
378 cleanup_text(&text)
379}
380
381fn optional_text(node: Node<'_, '_>) -> Option<String> {
382 let text = render_doc_text(node);
383 if text.is_empty() { None } else { Some(text) }
384}
385
386fn render_children(node: Node<'_, '_>) -> String {
387 let mut output = String::new();
388 for child in node.children() {
389 let fragment = render_node(child);
390 if fragment.is_empty() {
391 continue;
392 }
393 if needs_space(&output, &fragment) {
394 output.push(' ');
395 }
396 output.push_str(&fragment);
397 }
398 output
399}
400
401fn render_node(node: Node<'_, '_>) -> String {
402 match node.node_type() {
403 roxmltree::NodeType::Text => node.text().unwrap_or("").to_string(),
404 roxmltree::NodeType::Element => match node.tag_name().name() {
405 "para" => {
406 let text = render_children(node);
407 if text.is_empty() {
408 String::new()
409 } else {
410 format!("\n{}\n", text.trim())
411 }
412 }
413 "code" => render_code_block(node),
414 "see" | "seealso" => render_inline_link(node),
415 "paramref" | "typeparamref" => render_ref(node),
416 "list" => render_list(node),
417 _ => render_children(node),
418 },
419 _ => String::new(),
420 }
421}
422
423fn render_code_block(node: Node<'_, '_>) -> String {
424 let code_text = node.text().unwrap_or("").trim();
425 if code_text.is_empty() {
426 String::new()
427 } else {
428 format!("\n```\n{code_text}\n```\n")
429 }
430}
431
432fn render_inline_link(node: Node<'_, '_>) -> String {
433 let target = node
434 .attribute("cref")
435 .or_else(|| node.attribute("href"))
436 .unwrap_or("")
437 .trim();
438 let label = node.text().unwrap_or("").trim();
439 if target.is_empty() {
440 label.to_string()
441 } else if label.is_empty() {
442 target.to_string()
443 } else {
444 format!("[{label}]({target})")
445 }
446}
447
448fn render_ref(node: Node<'_, '_>) -> String {
449 let name = node.attribute("name").unwrap_or("").trim();
450 if name.is_empty() {
451 String::new()
452 } else {
453 format!("`{name}`")
454 }
455}
456
457fn render_list(node: Node<'_, '_>) -> String {
458 let mut lines = Vec::new();
459 for item in node.children().filter(|child| child.has_tag_name("item")) {
460 let term = item
461 .children()
462 .find(|child| child.has_tag_name("term"))
463 .map(render_children);
464 let description = item
465 .children()
466 .find(|child| child.has_tag_name("description"))
467 .map(render_children);
468 let text = match (term, description) {
469 (Some(term), Some(description)) => format!("{}: {}", term.trim(), description.trim()),
470 (Some(term), None) => term,
471 (None, Some(description)) => description,
472 (None, None) => render_children(item),
473 };
474 let text = text.trim();
475 if !text.is_empty() {
476 lines.push(format!("- {text}"));
477 }
478 }
479 if lines.is_empty() {
480 String::new()
481 } else {
482 format!("\n{}\n", lines.join("\n"))
483 }
484}
485
486fn cleanup_text(value: &str) -> String {
487 let mut lines = Vec::new();
488 let mut in_code_block = false;
489 for line in value.replace("\r\n", "\n").lines() {
490 let trimmed = line.trim_end();
491 if trimmed.trim_start().starts_with("```") {
492 in_code_block = !in_code_block;
493 lines.push(trimmed.to_string());
494 continue;
495 }
496 if in_code_block {
497 lines.push(trimmed.to_string());
498 } else {
499 lines.push(collapse_whitespace(trimmed).trim().to_string());
500 }
501 }
502
503 while matches!(lines.first(), Some(line) if line.is_empty()) {
504 lines.remove(0);
505 }
506 while matches!(lines.last(), Some(line) if line.is_empty()) {
507 lines.pop();
508 }
509
510 lines.join("\n")
511}
512
513fn collapse_whitespace(value: &str) -> String {
514 let mut output = String::new();
515 let mut last_was_space = false;
516 for ch in value.chars() {
517 if ch.is_whitespace() {
518 if !last_was_space {
519 output.push(' ');
520 last_was_space = true;
521 }
522 } else {
523 output.push(ch);
524 last_was_space = false;
525 }
526 }
527 output
528}
529
530fn needs_space(current: &str, next: &str) -> bool {
531 if current.is_empty() {
532 return false;
533 }
534 let current_last = current.chars().last();
535 let next_first = next.chars().next();
536 matches!(current_last, Some(ch) if !ch.is_whitespace() && ch != '\n')
537 && matches!(next_first, Some(ch) if !ch.is_whitespace() && ch != '\n')
538}
539
540fn parse_see_also(node: Node<'_, '_>) -> Option<SeeAlso> {
541 let target = node
542 .attribute("cref")
543 .or_else(|| node.attribute("href"))
544 .map(str::to_string)?;
545 let label = node.text().map(|text| text.trim().to_string());
546 let label = match label {
547 Some(text) if text.is_empty() => None,
548 other => other,
549 };
550 let target_kind = if node.attribute("cref").is_some() {
551 Some("cref".to_string())
552 } else {
553 Some("href".to_string())
554 };
555 Some(SeeAlso {
556 label,
557 target,
558 target_kind,
559 })
560}