1use std::{error::Error, fmt, path::Path};
2
3use docx_store::models::{
4 DocBlock,
5 DocExample,
6 DocException,
7 DocInherit,
8 DocParam,
9 DocTypeParam,
10 SeeAlso,
11 SourceId,
12 Symbol,
13};
14use docx_store::schema::{SOURCE_KIND_CSHARP_XML, make_csharp_symbol_key};
15use roxmltree::{Document, Node};
16
17#[derive(Debug, Clone)]
19pub struct CsharpParseOptions {
20 pub project_id: String,
21 pub ingest_id: Option<String>,
22 pub language: String,
23 pub source_kind: String,
24}
25
26impl CsharpParseOptions {
27 pub fn new(project_id: impl Into<String>) -> Self {
28 Self {
29 project_id: project_id.into(),
30 ingest_id: None,
31 language: "csharp".to_string(),
32 source_kind: SOURCE_KIND_CSHARP_XML.to_string(),
33 }
34 }
35
36 #[must_use]
37 pub fn with_ingest_id(mut self, ingest_id: impl Into<String>) -> Self {
38 self.ingest_id = Some(ingest_id.into());
39 self
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct CsharpParseOutput {
46 pub assembly_name: Option<String>,
47 pub symbols: Vec<Symbol>,
48 pub doc_blocks: Vec<DocBlock>,
49}
50
51#[derive(Debug)]
53pub struct CsharpParseError {
54 message: String,
55}
56
57impl CsharpParseError {
58 fn new(message: impl Into<String>) -> Self {
59 Self {
60 message: message.into(),
61 }
62 }
63}
64
65impl fmt::Display for CsharpParseError {
66 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67 write!(f, "C# XML parse error: {}", self.message)
68 }
69}
70
71impl Error for CsharpParseError {}
72
73impl From<roxmltree::Error> for CsharpParseError {
74 fn from(err: roxmltree::Error) -> Self {
75 Self::new(err.to_string())
76 }
77}
78
79impl From<std::io::Error> for CsharpParseError {
80 fn from(err: std::io::Error) -> Self {
81 Self::new(err.to_string())
82 }
83}
84
85impl From<tokio::task::JoinError> for CsharpParseError {
86 fn from(err: tokio::task::JoinError) -> Self {
87 Self::new(err.to_string())
88 }
89}
90
91pub struct CsharpXmlParser;
93
94impl CsharpXmlParser {
95 #[allow(clippy::too_many_lines)]
100 pub fn parse(xml: &str, options: &CsharpParseOptions) -> Result<CsharpParseOutput, CsharpParseError> {
101 let doc = Document::parse(xml)?;
102 let assembly_name = extract_assembly_name(&doc);
103 let mut symbols = Vec::new();
104 let mut doc_blocks = Vec::new();
105
106 for member in doc.descendants().filter(|node| node.has_tag_name("member")) {
107 let Some(doc_id) = member.attribute("name") else {
108 continue;
109 };
110
111 let symbol_key = make_csharp_symbol_key(&options.project_id, doc_id);
112 let parts = parse_doc_id(doc_id);
113
114 let mut symbol = Symbol {
115 id: None,
116 project_id: options.project_id.clone(),
117 language: Some(options.language.clone()),
118 symbol_key: symbol_key.clone(),
119 kind: parts.kind,
120 name: parts.name,
121 qualified_name: parts.qualified_name,
122 display_name: parts.display_name,
123 signature: parts.signature,
124 signature_hash: None,
125 visibility: None,
126 is_static: None,
127 is_async: None,
128 is_const: None,
129 is_deprecated: None,
130 since: None,
131 stability: None,
132 source_path: None,
133 line: None,
134 col: None,
135 return_type: None,
136 params: Vec::new(),
137 type_params: Vec::new(),
138 attributes: Vec::new(),
139 source_ids: vec![SourceId {
140 kind: "csharp_doc_id".to_string(),
141 value: doc_id.to_string(),
142 }],
143 doc_summary: None,
144 extra: None,
145 };
146
147 let mut doc_block = DocBlock {
148 id: None,
149 project_id: options.project_id.clone(),
150 ingest_id: options.ingest_id.clone(),
151 symbol_key: Some(symbol_key.clone()),
152 language: Some(options.language.clone()),
153 source_kind: Some(options.source_kind.clone()),
154 doc_hash: None,
155 summary: None,
156 remarks: None,
157 returns: None,
158 value: None,
159 params: Vec::new(),
160 type_params: Vec::new(),
161 exceptions: Vec::new(),
162 examples: Vec::new(),
163 notes: Vec::new(),
164 warnings: Vec::new(),
165 safety: None,
166 panics: None,
167 errors: None,
168 see_also: Vec::new(),
169 deprecated: None,
170 inherit_doc: None,
171 sections: Vec::new(),
172 raw: None,
173 extra: None,
174 };
175
176 for child in member.children().filter(Node::is_element) {
177 match child.tag_name().name() {
178 "summary" => doc_block.summary = optional_text(child),
179 "remarks" => doc_block.remarks = optional_text(child),
180 "returns" => doc_block.returns = optional_text(child),
181 "value" => doc_block.value = optional_text(child),
182 "param" => {
183 if let Some(name) = child.attribute("name") {
184 let description = render_doc_text(child);
185 doc_block.params.push(DocParam {
186 name: name.to_string(),
187 description: if description.is_empty() { None } else { Some(description) },
188 type_ref: None,
189 });
190 }
191 }
192 "typeparam" => {
193 if let Some(name) = child.attribute("name") {
194 let description = render_doc_text(child);
195 doc_block.type_params.push(DocTypeParam {
196 name: name.to_string(),
197 description: if description.is_empty() { None } else { Some(description) },
198 });
199 }
200 }
201 "exception" => {
202 let description = render_doc_text(child);
203 let type_ref = child
204 .attribute("cref")
205 .map(|cref| docx_store::models::TypeRef {
206 display: Some(cref.to_string()),
207 canonical: Some(cref.to_string()),
208 language: Some(options.language.clone()),
209 symbol_key: Some(make_csharp_symbol_key(&options.project_id, cref)),
210 generics: Vec::new(),
211 modifiers: Vec::new(),
212 });
213 doc_block.exceptions.push(DocException {
214 type_ref,
215 description: if description.is_empty() { None } else { Some(description) },
216 });
217 }
218 "example" => {
219 let text = render_doc_text(child);
220 if !text.is_empty() {
221 doc_block.examples.push(DocExample {
222 lang: None,
223 code: Some(text),
224 caption: None,
225 });
226 }
227 }
228 "seealso" => {
229 if let Some(see) = parse_see_also(child) {
230 doc_block.see_also.push(see);
231 }
232 }
233 "note" => {
234 let text = render_doc_text(child);
235 if !text.is_empty() {
236 doc_block.notes.push(text);
237 }
238 }
239 "warning" => {
240 let text = render_doc_text(child);
241 if !text.is_empty() {
242 doc_block.warnings.push(text);
243 }
244 }
245 "inheritdoc" => {
246 let cref = child.attribute("cref").map(str::to_string);
247 let path = child.attribute("path").map(str::to_string);
248 doc_block.inherit_doc = Some(DocInherit { cref, path });
249 }
250 "deprecated" => {
251 let text = render_doc_text(child);
252 if !text.is_empty() {
253 doc_block.deprecated = Some(text);
254 }
255 }
256 _ => {}
257 }
258 }
259
260 if doc_block.summary.is_some() {
261 symbol.doc_summary.clone_from(&doc_block.summary);
262 }
263
264 let range = member.range();
265 doc_block.raw = Some(xml[range].to_string());
266
267 symbols.push(symbol);
268 doc_blocks.push(doc_block);
269 }
270
271 Ok(CsharpParseOutput {
272 assembly_name,
273 symbols,
274 doc_blocks,
275 })
276 }
277
278 pub async fn parse_async(
283 xml: String,
284 options: CsharpParseOptions,
285 ) -> Result<CsharpParseOutput, CsharpParseError> {
286 tokio::task::spawn_blocking(move || Self::parse(&xml, &options)).await?
287 }
288
289 pub async fn parse_file(
294 path: impl AsRef<Path>,
295 options: CsharpParseOptions,
296 ) -> Result<CsharpParseOutput, CsharpParseError> {
297 let path = path.as_ref().to_path_buf();
298 let xml = tokio::task::spawn_blocking(move || std::fs::read_to_string(path)).await??;
299 Self::parse_async(xml, options).await
300 }
301}
302
303#[derive(Debug)]
304struct DocIdParts {
305 kind: Option<String>,
306 name: Option<String>,
307 qualified_name: Option<String>,
308 display_name: Option<String>,
309 signature: Option<String>,
310}
311
312fn parse_doc_id(doc_id: &str) -> DocIdParts {
313 let mut parts = doc_id.splitn(2, ':');
314 let prefix = parts.next().unwrap_or("");
315 let rest = parts.next().unwrap_or("");
316
317 let kind = match prefix {
318 "T" => Some("type".to_string()),
319 "M" => Some("method".to_string()),
320 "P" => Some("property".to_string()),
321 "F" => Some("field".to_string()),
322 "E" => Some("event".to_string()),
323 "N" => Some("namespace".to_string()),
324 _ => None,
325 };
326
327 let (qualified_name, signature) = if rest.is_empty() {
328 (None, None)
329 } else if let Some(pos) = rest.find('(') {
330 let qualified = rest[..pos].to_string();
331 (Some(qualified), Some(rest.to_string()))
332 } else {
333 (Some(rest.to_string()), Some(rest.to_string()))
334 };
335
336 let name = qualified_name
337 .as_deref()
338 .and_then(extract_simple_name)
339 .map(str::to_string);
340
341 DocIdParts {
342 kind,
343 name: name.clone(),
344 qualified_name,
345 display_name: name,
346 signature,
347 }
348}
349
350fn extract_simple_name(value: &str) -> Option<&str> {
351 value.rsplit(['.', '+', '#']).next()
352}
353
354fn extract_assembly_name(doc: &Document<'_>) -> Option<String> {
355 let assembly_node = doc.descendants().find(|node| node.has_tag_name("assembly"))?;
356 let name_node = assembly_node
357 .children()
358 .find(|node| node.has_tag_name("name"))?;
359 name_node.text().map(|text| text.trim().to_string())
360}
361
362fn render_doc_text(node: Node<'_, '_>) -> String {
363 let text = render_children(node);
364 cleanup_text(&text)
365}
366
367fn optional_text(node: Node<'_, '_>) -> Option<String> {
368 let text = render_doc_text(node);
369 if text.is_empty() {
370 None
371 } else {
372 Some(text)
373 }
374}
375
376fn render_children(node: Node<'_, '_>) -> String {
377 let mut output = String::new();
378 for child in node.children() {
379 let fragment = render_node(child);
380 if fragment.is_empty() {
381 continue;
382 }
383 if needs_space(&output, &fragment) {
384 output.push(' ');
385 }
386 output.push_str(&fragment);
387 }
388 output
389}
390
391fn render_node(node: Node<'_, '_>) -> String {
392 match node.node_type() {
393 roxmltree::NodeType::Text => node.text().unwrap_or("").to_string(),
394 roxmltree::NodeType::Element => match node.tag_name().name() {
395 "para" => {
396 let text = render_children(node);
397 if text.is_empty() {
398 String::new()
399 } else {
400 format!("\n{}\n", text.trim())
401 }
402 }
403 "code" => render_code_block(node),
404 "see" | "seealso" => render_inline_link(node),
405 "paramref" | "typeparamref" => render_ref(node),
406 "list" => render_list(node),
407 _ => render_children(node),
408 },
409 _ => String::new(),
410 }
411}
412
413fn render_code_block(node: Node<'_, '_>) -> String {
414 let code_text = node.text().unwrap_or("").trim();
415 if code_text.is_empty() {
416 String::new()
417 } else {
418 format!("\n```\n{code_text}\n```\n")
419 }
420}
421
422fn render_inline_link(node: Node<'_, '_>) -> String {
423 let target = node
424 .attribute("cref")
425 .or_else(|| node.attribute("href"))
426 .unwrap_or("")
427 .trim();
428 let label = node.text().unwrap_or("").trim();
429 if target.is_empty() {
430 label.to_string()
431 } else if label.is_empty() {
432 target.to_string()
433 } else {
434 format!("[{label}]({target})")
435 }
436}
437
438fn render_ref(node: Node<'_, '_>) -> String {
439 let name = node.attribute("name").unwrap_or("").trim();
440 if name.is_empty() {
441 String::new()
442 } else {
443 format!("`{name}`")
444 }
445}
446
447fn render_list(node: Node<'_, '_>) -> String {
448 let mut lines = Vec::new();
449 for item in node.children().filter(|child| child.has_tag_name("item")) {
450 let term = item
451 .children()
452 .find(|child| child.has_tag_name("term"))
453 .map(render_children);
454 let description = item
455 .children()
456 .find(|child| child.has_tag_name("description"))
457 .map(render_children);
458 let text = match (term, description) {
459 (Some(term), Some(description)) => format!("{}: {}", term.trim(), description.trim()),
460 (Some(term), None) => term,
461 (None, Some(description)) => description,
462 (None, None) => render_children(item),
463 };
464 let text = text.trim();
465 if !text.is_empty() {
466 lines.push(format!("- {text}"));
467 }
468 }
469 if lines.is_empty() {
470 String::new()
471 } else {
472 format!("\n{}\n", lines.join("\n"))
473 }
474}
475
476fn cleanup_text(value: &str) -> String {
477 let mut lines = Vec::new();
478 let mut in_code_block = false;
479 for line in value.replace("\r\n", "\n").lines() {
480 let trimmed = line.trim_end();
481 if trimmed.trim_start().starts_with("```") {
482 in_code_block = !in_code_block;
483 lines.push(trimmed.to_string());
484 continue;
485 }
486 if in_code_block {
487 lines.push(trimmed.to_string());
488 } else {
489 lines.push(collapse_whitespace(trimmed).trim().to_string());
490 }
491 }
492
493 while matches!(lines.first(), Some(line) if line.is_empty()) {
494 lines.remove(0);
495 }
496 while matches!(lines.last(), Some(line) if line.is_empty()) {
497 lines.pop();
498 }
499
500 lines.join("\n")
501}
502
503fn collapse_whitespace(value: &str) -> String {
504 let mut output = String::new();
505 let mut last_was_space = false;
506 for ch in value.chars() {
507 if ch.is_whitespace() {
508 if !last_was_space {
509 output.push(' ');
510 last_was_space = true;
511 }
512 } else {
513 output.push(ch);
514 last_was_space = false;
515 }
516 }
517 output
518}
519
520fn needs_space(current: &str, next: &str) -> bool {
521 if current.is_empty() {
522 return false;
523 }
524 let current_last = current.chars().last();
525 let next_first = next.chars().next();
526 matches!(current_last, Some(ch) if !ch.is_whitespace() && ch != '\n')
527 && matches!(next_first, Some(ch) if !ch.is_whitespace() && ch != '\n')
528}
529
530fn parse_see_also(node: Node<'_, '_>) -> Option<SeeAlso> {
531 let target = node
532 .attribute("cref")
533 .or_else(|| node.attribute("href"))
534 .map(str::to_string)?;
535 let label = node.text().map(|text| text.trim().to_string());
536 let label = match label {
537 Some(text) if text.is_empty() => None,
538 other => other,
539 };
540 let target_kind = if node.attribute("cref").is_some() {
541 Some("cref".to_string())
542 } else {
543 Some("href".to_string())
544 };
545 Some(SeeAlso {
546 label,
547 target,
548 target_kind,
549 })
550}