Skip to main content

alef_codegen/
doc_emission.rs

1//! Language-native documentation comment emission.
2//! Provides standardized functions for emitting doc comments in different languages.
3
4/// Emit PHPDoc-style comments (/** ... */)
5/// Used for PHP classes, methods, and properties.
6///
7/// Sanitizes Rust-specific idioms before translating rustdoc sections
8/// (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@throws`,
9/// `# Example` → ` ```php ` fence) via [`render_phpdoc_sections`].
10///
11/// `exception_class` is the PHP exception class name to use in `@throws` tags.
12pub fn emit_phpdoc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
13    if doc.is_empty() {
14        return;
15    }
16    // Sanitize Rust-specific idioms before processing sections.
17    let sanitized = sanitize_rust_idioms(doc, DocTarget::PhpDoc);
18    let sections = parse_rustdoc_sections(&sanitized);
19    let any_section = sections.arguments.is_some()
20        || sections.returns.is_some()
21        || sections.errors.is_some()
22        || sections.example.is_some();
23    let body = if any_section {
24        render_phpdoc_sections(&sections, exception_class)
25    } else {
26        sanitized
27    };
28    out.push_str(indent);
29    out.push_str("/**\n");
30    for line in body.lines() {
31        out.push_str(indent);
32        out.push_str(" * ");
33        out.push_str(&escape_phpdoc_line(line));
34        out.push('\n');
35    }
36    out.push_str(indent);
37    out.push_str(" */\n");
38}
39
40/// Escape PHPDoc line: handle */ sequences that could close the comment early.
41fn escape_phpdoc_line(s: &str) -> String {
42    s.replace("*/", "* /")
43}
44
45/// Emit C# XML documentation comments (/// <summary> ... </summary>)
46/// Used for C# classes, structs, methods, and properties.
47///
48/// Translates rustdoc sections (`# Arguments` → `<param>`,
49/// `# Returns` → `<returns>`, `# Errors` → `<exception>`,
50/// `# Example` → `<example><code>`) via [`render_csharp_xml_sections`].
51///
52/// `exception_class` is the C# exception class name to use in `<exception cref="...">` tags.
53pub fn emit_csharp_doc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
54    if doc.is_empty() {
55        return;
56    }
57    // Parse sections from the raw rustdoc first (so `# Examples` / `# Arguments`
58    // / `# Returns` / `# Errors` are routed into structured XML tags), then
59    // sanitise each section body to strip Rust idioms and XML-escape `<`/`>`/`&`.
60    let raw_sections = parse_rustdoc_sections(doc);
61    let sections = RustdocSections {
62        summary: sanitize_rust_idioms_keep_sections(&raw_sections.summary, DocTarget::CSharpDoc),
63        arguments: raw_sections
64            .arguments
65            .as_deref()
66            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
67        returns: raw_sections
68            .returns
69            .as_deref()
70            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
71        errors: raw_sections
72            .errors
73            .as_deref()
74            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
75        panics: raw_sections
76            .panics
77            .as_deref()
78            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
79        safety: raw_sections
80            .safety
81            .as_deref()
82            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
83        // Examples typically contain Rust code that doesn't compile as C#; drop the body
84        // entirely rather than risk leaking unparseable code into `<example>`.
85        example: None,
86    };
87    let any_section = sections.arguments.is_some()
88        || sections.returns.is_some()
89        || sections.errors.is_some()
90        || sections.example.is_some();
91    if !any_section {
92        // Backwards-compatible path: plain `<summary>` for prose-only docs.
93        out.push_str(indent);
94        out.push_str("/// <summary>\n");
95        for line in sections.summary.lines() {
96            out.push_str(indent);
97            out.push_str("/// ");
98            // Note: sanitise_rust_idioms_keep_sections already XML-escaped <, >, & for
99            // the CSharpDoc target. We deliberately do NOT call escape_csharp_doc_line
100            // here because that would double-encode (e.g. `&amp;` → `&amp;amp;`).
101            out.push_str(line);
102            out.push('\n');
103        }
104        out.push_str(indent);
105        out.push_str("/// </summary>\n");
106        return;
107    }
108    let rendered = render_csharp_xml_sections(&sections, exception_class);
109    for line in rendered.lines() {
110        out.push_str(indent);
111        out.push_str("/// ");
112        // The rendered tags already contain the canonical chars; we only
113        // escape XML special chars that aren't part of our tag syntax. Since
114        // render_csharp_xml_sections produces well-formed XML, raw passthrough
115        // is correct.
116        out.push_str(line);
117        out.push('\n');
118    }
119}
120
121/// Emit Elixir documentation comments (@doc)
122/// Used for Elixir modules and functions.
123pub fn emit_elixir_doc(out: &mut String, doc: &str) {
124    if doc.is_empty() {
125        return;
126    }
127    out.push_str("@doc \"\"\"\n");
128    for line in doc.lines() {
129        out.push_str(&escape_elixir_doc_line(line));
130        out.push('\n');
131    }
132    out.push_str("\"\"\"\n");
133}
134
135/// Emit Rust `///` documentation comments.
136///
137/// Used by alef backends that emit Rust source (e.g., the Rustler NIF crate,
138/// the swift-bridge wrapper crate, the FRB Dart bridge crate). Distinct from
139/// `emit_swift_doc` only by intent — the syntax is identical (`/// ` per line).
140pub fn emit_rustdoc(out: &mut String, doc: &str, indent: &str) {
141    if doc.is_empty() {
142        return;
143    }
144    for line in doc.lines() {
145        out.push_str(indent);
146        out.push_str("/// ");
147        out.push_str(line);
148        out.push('\n');
149    }
150}
151
152/// Escape Elixir doc line: handle triple-quote sequences that could close the heredoc early.
153fn escape_elixir_doc_line(s: &str) -> String {
154    s.replace("\"\"\"", "\"\" \"")
155}
156
157/// Emit R roxygen2-style documentation comments (#')
158/// Used for R functions.
159pub fn emit_roxygen(out: &mut String, doc: &str) {
160    if doc.is_empty() {
161        return;
162    }
163    for line in doc.lines() {
164        out.push_str("#' ");
165        out.push_str(line);
166        out.push('\n');
167    }
168}
169
170/// Emit Swift-style documentation comments (///)
171/// Used for Swift structs, enums, and functions.
172pub fn emit_swift_doc(out: &mut String, doc: &str, indent: &str) {
173    if doc.is_empty() {
174        return;
175    }
176    for line in doc.lines() {
177        out.push_str(indent);
178        out.push_str("/// ");
179        out.push_str(line);
180        out.push('\n');
181    }
182}
183
184/// Emit Javadoc-style documentation comments (/** ... */)
185/// Used for Java classes, methods, and fields.
186/// Handles XML escaping and Javadoc tag formatting.
187pub fn emit_javadoc(out: &mut String, doc: &str, indent: &str) {
188    if doc.is_empty() {
189        return;
190    }
191    out.push_str(indent);
192    out.push_str("/**\n");
193    for line in doc.lines() {
194        let escaped = escape_javadoc_line(line);
195        let trimmed = escaped.trim_end();
196        if trimmed.is_empty() {
197            out.push_str(indent);
198            out.push_str(" *\n");
199        } else {
200            out.push_str(indent);
201            out.push_str(" * ");
202            out.push_str(trimmed);
203            out.push('\n');
204        }
205    }
206    out.push_str(indent);
207    out.push_str(" */\n");
208}
209
210/// Emit KDoc-style documentation comments (/** ... */)
211/// Used for Kotlin classes, methods, and properties.
212pub fn emit_kdoc(out: &mut String, doc: &str, indent: &str) {
213    if doc.is_empty() {
214        return;
215    }
216    out.push_str(indent);
217    out.push_str("/**\n");
218    for line in doc.lines() {
219        let trimmed = line.trim_end();
220        if trimmed.is_empty() {
221            out.push_str(indent);
222            out.push_str(" *\n");
223        } else {
224            out.push_str(indent);
225            out.push_str(" * ");
226            out.push_str(trimmed);
227            out.push('\n');
228        }
229    }
230    out.push_str(indent);
231    out.push_str(" */\n");
232}
233
234/// Emit KDoc-style documentation comments in ktfmt-canonical format.
235///
236/// ktfmt collapses short KDoc comments to single-line format (`/** ... */`)
237/// when they fit within the 100-character line width limit. This function
238/// generates KDoc in that canonical form to avoid unnecessary formatting
239/// diffs when the generated code is passed through ktfmt.
240///
241/// - Single-line comments that fit in 100 chars: emitted as `/** content */`
242/// - Multi-paragraph or longer comments: emitted with newlines and ` * ` prefixes
243/// - Preserves indent and respects line width boundary at 100 chars
244pub fn emit_kdoc_ktfmt_canonical(out: &mut String, doc: &str, indent: &str) {
245    const KTFMT_LINE_WIDTH: usize = 100;
246
247    if doc.is_empty() {
248        return;
249    }
250
251    let lines: Vec<&str> = doc.lines().collect();
252
253    // Check if this is a short, single-paragraph comment that fits on one line.
254    let is_short_single_paragraph = lines.len() == 1 && !lines[0].contains('\n');
255
256    if is_short_single_paragraph {
257        let trimmed = lines[0].trim();
258        // Calculate total length: indent + "/** " + content + " */"
259        let single_line_len = indent.len() + 4 + trimmed.len() + 3; // 4 for "/** ", 3 for " */"
260        if single_line_len <= KTFMT_LINE_WIDTH {
261            // Fits on one line in ktfmt-canonical format
262            out.push_str(indent);
263            out.push_str("/** ");
264            out.push_str(trimmed);
265            out.push_str(" */\n");
266            return;
267        }
268    }
269
270    // Multi-line format (default for long or multi-paragraph comments)
271    out.push_str(indent);
272    out.push_str("/**\n");
273    for line in lines {
274        let trimmed = line.trim_end();
275        if trimmed.is_empty() {
276            out.push_str(indent);
277            out.push_str(" *\n");
278        } else {
279            out.push_str(indent);
280            out.push_str(" * ");
281            out.push_str(trimmed);
282            out.push('\n');
283        }
284    }
285    out.push_str(indent);
286    out.push_str(" */\n");
287}
288
289/// Emit Dartdoc-style documentation comments (///)
290/// Used for Dart classes, methods, and properties.
291pub fn emit_dartdoc(out: &mut String, doc: &str, indent: &str) {
292    if doc.is_empty() {
293        return;
294    }
295    for line in doc.lines() {
296        out.push_str(indent);
297        out.push_str("/// ");
298        out.push_str(line);
299        out.push('\n');
300    }
301}
302
303/// Emit Gleam documentation comments (///)
304/// Used for Gleam functions and types.
305pub fn emit_gleam_doc(out: &mut String, doc: &str, indent: &str) {
306    if doc.is_empty() {
307        return;
308    }
309    for line in doc.lines() {
310        out.push_str(indent);
311        out.push_str("/// ");
312        out.push_str(line);
313        out.push('\n');
314    }
315}
316
317/// Emit Doxygen-style C documentation comments using `///`-prefixed lines.
318///
319/// Used by `alef-backend-ffi` above every `extern "C" fn`, the `*_len()`
320/// companion, opaque-handle typedef, and (post-cbindgen) the type/enum
321/// declarations cbindgen surfaces in the generated `.h`. cbindgen translates
322/// `///` source lines into a single `/** ... */` Doxygen block per item, so we
323/// only need to emit per-line `///` content here.
324///
325/// Translates rustdoc sections via [`render_doxygen_sections`]:
326///
327/// - `# Arguments` → `\param <name> <description>` (one per arg).
328/// - `# Returns`   → `\return <description>`.
329/// - `# Errors`    → `\note <description>` (Doxygen has no `\throws` for C;
330///   `\note` is the convention).
331/// - `# Safety`    → `\note SAFETY: <description>`.
332/// - `# Example`   → `\code` ... `\endcode` block.
333///
334/// Markdown links (`[text](url)`) are flattened to `text (url)`. Body lines
335/// are word-wrapped at ~100 columns so the rendered `/** */` block stays
336/// readable in IDE tooltips and terminal viewers.
337pub fn emit_c_doxygen(out: &mut String, doc: &str, indent: &str) {
338    if doc.trim().is_empty() {
339        return;
340    }
341    let sections = parse_rustdoc_sections(doc);
342    let any_section = sections.arguments.is_some()
343        || sections.returns.is_some()
344        || sections.errors.is_some()
345        || sections.safety.is_some()
346        || sections.example.is_some();
347    let mut body = if any_section {
348        render_doxygen_sections_with_notes(&sections)
349    } else {
350        sections.summary.clone()
351    };
352    body = strip_markdown_links(&body);
353    let wrapped = word_wrap(&body, DOXYGEN_WRAP_WIDTH);
354    for line in wrapped.lines() {
355        out.push_str(indent);
356        out.push_str("/// ");
357        out.push_str(line);
358        out.push('\n');
359    }
360}
361
362const DOXYGEN_WRAP_WIDTH: usize = 100;
363
364/// Render `RustdocSections` as a Doxygen body but route `# Errors` and
365/// `# Safety` to `\note` lines instead of plain prose. This is the variant
366/// `emit_c_doxygen` uses; the public `render_doxygen_sections` keeps its
367/// long-standing plain-prose semantics so existing callers don't shift.
368fn render_doxygen_sections_with_notes(sections: &RustdocSections) -> String {
369    let mut out = String::new();
370    if !sections.summary.is_empty() {
371        out.push_str(&sections.summary);
372    }
373    if let Some(args) = sections.arguments.as_deref() {
374        for (name, desc) in parse_arguments_bullets(args) {
375            if !out.is_empty() {
376                out.push('\n');
377            }
378            if desc.is_empty() {
379                out.push_str("\\param ");
380                out.push_str(&name);
381            } else {
382                out.push_str("\\param ");
383                out.push_str(&name);
384                out.push(' ');
385                out.push_str(&desc);
386            }
387        }
388    }
389    if let Some(ret) = sections.returns.as_deref() {
390        if !out.is_empty() {
391            out.push('\n');
392        }
393        out.push_str("\\return ");
394        out.push_str(ret.trim());
395    }
396    if let Some(err) = sections.errors.as_deref() {
397        if !out.is_empty() {
398            out.push('\n');
399        }
400        out.push_str("\\note ");
401        out.push_str(err.trim());
402    }
403    if let Some(safety) = sections.safety.as_deref() {
404        if !out.is_empty() {
405            out.push('\n');
406        }
407        out.push_str("\\note SAFETY: ");
408        out.push_str(safety.trim());
409    }
410    if let Some(example) = sections.example.as_deref() {
411        if !out.is_empty() {
412            out.push('\n');
413        }
414        out.push_str("\\code\n");
415        for line in example.lines() {
416            let t = line.trim_start();
417            if t.starts_with("```") {
418                continue;
419            }
420            out.push_str(line);
421            out.push('\n');
422        }
423        out.push_str("\\endcode");
424    }
425    out
426}
427
428/// Flatten Markdown inline links `[text](url)` to `text (url)` so the rendered
429/// Doxygen block stays readable when consumed without a Markdown filter.
430fn strip_markdown_links(s: &str) -> String {
431    let mut out = String::with_capacity(s.len());
432    let bytes = s.as_bytes();
433    let mut i = 0;
434    while i < bytes.len() {
435        if bytes[i] == b'[' {
436            // Find matching closing bracket on the same logical span (no nested brackets).
437            if let Some(close) = bytes[i + 1..].iter().position(|&b| b == b']') {
438                let text_end = i + 1 + close;
439                if text_end + 1 < bytes.len() && bytes[text_end + 1] == b'(' {
440                    if let Some(paren_close) = bytes[text_end + 2..].iter().position(|&b| b == b')') {
441                        let url_start = text_end + 2;
442                        let url_end = url_start + paren_close;
443                        let text = &s[i + 1..text_end];
444                        let url = &s[url_start..url_end];
445                        out.push_str(text);
446                        out.push_str(" (");
447                        out.push_str(url);
448                        out.push(')');
449                        i = url_end + 1;
450                        continue;
451                    }
452                }
453            }
454        }
455        out.push(bytes[i] as char);
456        i += 1;
457    }
458    out
459}
460
461/// Word-wrap each input line at `width` columns. Lines starting with `\code`
462/// or contained between `\code`/`\endcode` markers, as well as Markdown fence
463/// blocks, are passed through verbatim to preserve example formatting.
464fn word_wrap(input: &str, width: usize) -> String {
465    let mut out = String::with_capacity(input.len());
466    let mut in_code = false;
467    for raw in input.lines() {
468        let trimmed = raw.trim_start();
469        if trimmed.starts_with("\\code") {
470            in_code = true;
471            out.push_str(raw);
472            out.push('\n');
473            continue;
474        }
475        if trimmed.starts_with("\\endcode") {
476            in_code = false;
477            out.push_str(raw);
478            out.push('\n');
479            continue;
480        }
481        if in_code || trimmed.starts_with("```") {
482            out.push_str(raw);
483            out.push('\n');
484            continue;
485        }
486        if raw.len() <= width {
487            out.push_str(raw);
488            out.push('\n');
489            continue;
490        }
491        let mut current = String::with_capacity(width);
492        for word in raw.split_whitespace() {
493            if current.is_empty() {
494                current.push_str(word);
495            } else if current.len() + 1 + word.len() > width {
496                out.push_str(&current);
497                out.push('\n');
498                current.clear();
499                current.push_str(word);
500            } else {
501                current.push(' ');
502                current.push_str(word);
503            }
504        }
505        if !current.is_empty() {
506            out.push_str(&current);
507            out.push('\n');
508        }
509    }
510    out.trim_end_matches('\n').to_string()
511}
512
513/// Emit Zig documentation comments (///)
514/// Used for Zig functions, types, and declarations.
515pub fn emit_zig_doc(out: &mut String, doc: &str, indent: &str) {
516    if doc.is_empty() {
517        return;
518    }
519    for line in doc.lines() {
520        out.push_str(indent);
521        out.push_str("/// ");
522        out.push_str(line);
523        out.push('\n');
524    }
525}
526
527/// Emit YARD documentation comments for Ruby.
528/// Used for Ruby classes, methods, and attributes.
529///
530/// YARD syntax: each line prefixed with `# ` (with space). Translates rustdoc
531/// sections (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@raise`)
532/// via [`render_yard_sections`].
533pub fn emit_yard_doc(out: &mut String, doc: &str, indent: &str) {
534    if doc.is_empty() {
535        return;
536    }
537    let sections = parse_rustdoc_sections(doc);
538    let any_section = sections.arguments.is_some()
539        || sections.returns.is_some()
540        || sections.errors.is_some()
541        || sections.example.is_some();
542    let body = if any_section {
543        render_yard_sections(&sections)
544    } else {
545        doc.to_string()
546    };
547    for line in body.lines() {
548        out.push_str(indent);
549        out.push_str("# ");
550        out.push_str(line);
551        out.push('\n');
552    }
553}
554
555/// Render `RustdocSections` as YARD documentation comment body.
556///
557/// - `# Arguments` → `@param name desc` (one per arg)
558/// - `# Returns`   → `@return desc`
559/// - `# Errors`    → `@raise desc`
560/// - `# Example`   → `@example` block.
561///
562/// Output is a plain string with `\n` separators; the emitter wraps each line
563/// in `# ` itself.
564pub fn render_yard_sections(sections: &RustdocSections) -> String {
565    let mut out = String::new();
566    if !sections.summary.is_empty() {
567        out.push_str(&sections.summary);
568    }
569    if let Some(args) = sections.arguments.as_deref() {
570        for (name, desc) in parse_arguments_bullets(args) {
571            if !out.is_empty() {
572                out.push('\n');
573            }
574            if desc.is_empty() {
575                out.push_str("@param ");
576                out.push_str(&name);
577            } else {
578                out.push_str("@param ");
579                out.push_str(&name);
580                out.push(' ');
581                out.push_str(&desc);
582            }
583        }
584    }
585    if let Some(ret) = sections.returns.as_deref() {
586        if !out.is_empty() {
587            out.push('\n');
588        }
589        out.push_str("@return ");
590        out.push_str(ret.trim());
591    }
592    if let Some(err) = sections.errors.as_deref() {
593        if !out.is_empty() {
594            out.push('\n');
595        }
596        out.push_str("@raise ");
597        out.push_str(err.trim());
598    }
599    if let Some(example) = sections.example.as_deref() {
600        if let Some(body) = example_for_target(example, "ruby") {
601            if !out.is_empty() {
602                out.push('\n');
603            }
604            out.push_str("@example\n");
605            out.push_str(&body);
606        }
607    }
608    out
609}
610
611/// Escape Javadoc line: handle XML special chars and backtick code blocks.
612///
613/// HTML entities (`<`, `>`, `&`) are also escaped *inside* `{@code …}` blocks.
614/// Without that, content like `` `<pre><code>` `` would emit raw `<pre>`
615/// inside the Javadoc tag — Eclipse-formatter Spotless then treats it as a
616/// real `<pre>` block element and shatters the line across multiple `* `
617/// rows, breaking `alef-verify`'s embedded hash. Escaped content is
618/// rendered identically by Javadoc readers (the `{@code}` tag shows literal
619/// characters) and is stable under any post-formatter pass.
620fn escape_javadoc_line(s: &str) -> String {
621    let mut result = String::with_capacity(s.len());
622    let mut chars = s.chars().peekable();
623    while let Some(ch) = chars.next() {
624        if ch == '`' {
625            let mut code = String::new();
626            for c in chars.by_ref() {
627                if c == '`' {
628                    break;
629                }
630                code.push(c);
631            }
632            result.push_str("{@code ");
633            result.push_str(&escape_javadoc_html_entities(&code));
634            result.push('}');
635        } else if ch == '<' {
636            result.push_str("&lt;");
637        } else if ch == '>' {
638            result.push_str("&gt;");
639        } else if ch == '&' {
640            result.push_str("&amp;");
641        } else {
642            result.push(ch);
643        }
644    }
645    result
646}
647
648/// Escape only the HTML special characters that would otherwise be parsed by
649/// downstream Javadoc/Eclipse formatters as block-level HTML (e.g. `<pre>`).
650fn escape_javadoc_html_entities(s: &str) -> String {
651    let mut out = String::with_capacity(s.len());
652    for ch in s.chars() {
653        match ch {
654            '<' => out.push_str("&lt;"),
655            '>' => out.push_str("&gt;"),
656            '&' => out.push_str("&amp;"),
657            other => out.push(other),
658        }
659    }
660    out
661}
662
663/// A parsed rustdoc comment broken out into the sections binding emitters
664/// care about.
665///
666/// `summary` is the leading prose paragraph(s) before any `# Heading`.
667/// Sections are stored verbatim (without the `# Heading` line itself);
668/// each binding is responsible for translating bullet lists and code
669/// fences into its host-native conventions.
670///
671/// Trailing/leading whitespace inside each field is trimmed so emitters
672/// can concatenate without producing `* ` lines containing only spaces.
673#[derive(Debug, Default, Clone, PartialEq, Eq)]
674pub struct RustdocSections {
675    /// Prose before the first `# Section` heading.
676    pub summary: String,
677    /// Body of the `# Arguments` section, if present.
678    pub arguments: Option<String>,
679    /// Body of the `# Returns` section, if present.
680    pub returns: Option<String>,
681    /// Body of the `# Errors` section, if present.
682    pub errors: Option<String>,
683    /// Body of the `# Panics` section, if present.
684    pub panics: Option<String>,
685    /// Body of the `# Safety` section, if present.
686    pub safety: Option<String>,
687    /// Body of the `# Example` / `# Examples` section, if present.
688    pub example: Option<String>,
689}
690
691/// Parse a rustdoc string into [`RustdocSections`].
692///
693/// Recognises level-1 ATX headings whose name matches one of the standard
694/// rustdoc section names (`Arguments`, `Returns`, `Errors`, `Panics`,
695/// `Safety`, `Example`, `Examples`). Anything before the first heading
696/// becomes `summary`. Unrecognised headings are folded into the
697/// preceding section verbatim, so unconventional rustdoc isn't lost.
698///
699/// The input is expected to already have rustdoc-hidden lines stripped
700/// and intra-doc-link syntax rewritten by
701/// [`crate::extractor::helpers::normalize_rustdoc`].
702pub fn parse_rustdoc_sections(doc: &str) -> RustdocSections {
703    if doc.trim().is_empty() {
704        return RustdocSections::default();
705    }
706    let mut summary = String::new();
707    let mut arguments: Option<String> = None;
708    let mut returns: Option<String> = None;
709    let mut errors: Option<String> = None;
710    let mut panics: Option<String> = None;
711    let mut safety: Option<String> = None;
712    let mut example: Option<String> = None;
713    let mut current: Option<&'static str> = None;
714    let mut buf = String::new();
715    let mut in_fence = false;
716    let flush = |target: Option<&'static str>,
717                 buf: &mut String,
718                 summary: &mut String,
719                 arguments: &mut Option<String>,
720                 returns: &mut Option<String>,
721                 errors: &mut Option<String>,
722                 panics: &mut Option<String>,
723                 safety: &mut Option<String>,
724                 example: &mut Option<String>| {
725        let body = std::mem::take(buf).trim().to_string();
726        if body.is_empty() {
727            return;
728        }
729        match target {
730            None => {
731                if !summary.is_empty() {
732                    summary.push('\n');
733                }
734                summary.push_str(&body);
735            }
736            Some("arguments") => *arguments = Some(body),
737            Some("returns") => *returns = Some(body),
738            Some("errors") => *errors = Some(body),
739            Some("panics") => *panics = Some(body),
740            Some("safety") => *safety = Some(body),
741            Some("example") => *example = Some(body),
742            _ => {}
743        }
744    };
745    for line in doc.lines() {
746        let trimmed = line.trim_start();
747        if trimmed.starts_with("```") {
748            in_fence = !in_fence;
749            buf.push_str(line);
750            buf.push('\n');
751            continue;
752        }
753        if !in_fence {
754            if let Some(rest) = trimmed.strip_prefix("# ") {
755                let head = rest.trim().to_ascii_lowercase();
756                let target = match head.as_str() {
757                    "arguments" | "args" => Some("arguments"),
758                    "returns" => Some("returns"),
759                    "errors" => Some("errors"),
760                    "panics" => Some("panics"),
761                    "safety" => Some("safety"),
762                    "example" | "examples" => Some("example"),
763                    _ => None,
764                };
765                if target.is_some() {
766                    flush(
767                        current,
768                        &mut buf,
769                        &mut summary,
770                        &mut arguments,
771                        &mut returns,
772                        &mut errors,
773                        &mut panics,
774                        &mut safety,
775                        &mut example,
776                    );
777                    current = target;
778                    continue;
779                }
780            }
781        }
782        buf.push_str(line);
783        buf.push('\n');
784    }
785    flush(
786        current,
787        &mut buf,
788        &mut summary,
789        &mut arguments,
790        &mut returns,
791        &mut errors,
792        &mut panics,
793        &mut safety,
794        &mut example,
795    );
796    RustdocSections {
797        summary,
798        arguments,
799        returns,
800        errors,
801        panics,
802        safety,
803        example,
804    }
805}
806
807/// Parse `# Arguments` body into `(name, description)` pairs.
808///
809/// Recognises both Markdown bullet styles `*` and `-`, with optional
810/// backticks around the name: `* `name` - description` or
811/// `- name: description`. Continuation lines indented under a bullet
812/// are appended to the previous entry's description.
813///
814/// Used by emitters that translate to per-parameter documentation tags
815/// (`@param`, `<param>`, `\param`).
816pub fn parse_arguments_bullets(body: &str) -> Vec<(String, String)> {
817    let mut out: Vec<(String, String)> = Vec::new();
818    for raw in body.lines() {
819        let line = raw.trim_end();
820        let trimmed = line.trim_start();
821        let is_bullet = trimmed.starts_with("* ") || trimmed.starts_with("- ");
822        if is_bullet {
823            let after = &trimmed[2..];
824            // Accept `name`, `name:` or `name -` separator forms.
825            let (name, desc) = if let Some(idx) = after.find(" - ") {
826                (after[..idx].trim(), after[idx + 3..].trim())
827            } else if let Some(idx) = after.find(": ") {
828                (after[..idx].trim(), after[idx + 2..].trim())
829            } else if let Some(idx) = after.find(' ') {
830                (after[..idx].trim(), after[idx + 1..].trim())
831            } else {
832                (after.trim(), "")
833            };
834            let name = name.trim_matches('`').trim_matches('*').to_string();
835            out.push((name, desc.to_string()));
836        } else if !trimmed.is_empty() {
837            if let Some(last) = out.last_mut() {
838                if !last.1.is_empty() {
839                    last.1.push(' ');
840                }
841                last.1.push_str(trimmed);
842            }
843        }
844    }
845    out
846}
847
848/// Detect the language tag on the first code fence in `body`.
849///
850/// Scans `body` for the first line that starts with ` ``` ` and returns the
851/// tag that follows (e.g. `"rust"`, `"php"`, `"typescript"`). A bare ` ``` `
852/// with no tag returns `"rust"` because rustdoc treats unlabelled fences as
853/// Rust by default. Returns `"rust"` when no fence is found at all.
854fn detect_first_fence_lang(body: &str) -> &str {
855    for line in body.lines() {
856        let trimmed = line.trim_start();
857        if let Some(rest) = trimmed.strip_prefix("```") {
858            let tag = rest.split(',').next().unwrap_or("").trim();
859            return if tag.is_empty() { "rust" } else { tag };
860        }
861    }
862    "rust"
863}
864
865/// Return `Some(transformed_example)` if the example should be emitted for
866/// `target_lang`, or `None` when the example is Rust source that would be
867/// meaningless in the foreign language.
868///
869/// When the original fence language is `rust` (including bare ` ``` ` which
870/// rustdoc defaults to Rust) and the target is not `rust`, the example is
871/// suppressed entirely — better absent than misleading. Cross-language
872/// transliteration of example bodies is intentionally out of scope.
873pub fn example_for_target(example: &str, target_lang: &str) -> Option<String> {
874    let trimmed = example.trim();
875    let source_lang = detect_first_fence_lang(trimmed);
876    if source_lang == "rust" && target_lang != "rust" {
877        None
878    } else {
879        Some(replace_fence_lang(trimmed, target_lang))
880    }
881}
882
883/// Strip a single ` ```lang ` fence pair from `body`, returning the inner
884/// code lines. Replaces the leading ` ```rust ` (or any other tag) with
885/// `lang_replacement`, leaving the rest of the body unchanged.
886///
887/// When no fence is present the body is returned unchanged. Used by
888/// emitters that need to convert ` ```rust ` examples into
889/// ` ```typescript ` / ` ```python ` / ` ```swift ` etc.
890pub fn replace_fence_lang(body: &str, lang_replacement: &str) -> String {
891    let mut out = String::with_capacity(body.len());
892    for line in body.lines() {
893        let trimmed = line.trim_start();
894        if let Some(rest) = trimmed.strip_prefix("```") {
895            // Replace the language tag (everything up to the next comma or
896            // end of line). Preserve indentation.
897            let indent = &line[..line.len() - trimmed.len()];
898            let after_lang = rest.find(',').map(|i| &rest[i..]).unwrap_or("");
899            out.push_str(indent);
900            out.push_str("```");
901            out.push_str(lang_replacement);
902            out.push_str(after_lang);
903            out.push('\n');
904        } else {
905            out.push_str(line);
906            out.push('\n');
907        }
908    }
909    out.trim_end_matches('\n').to_string()
910}
911
912/// Render `RustdocSections` as a JSDoc comment body (without the `/**` /
913/// ` */` wrappers — those are added by the caller's emitter, which knows
914/// the indent/escape conventions).
915///
916/// - `# Arguments` → `@param name - desc`
917/// - `# Returns`   → `@returns desc`
918/// - `# Errors`    → `@throws desc`
919/// - `# Example`   → `@example` block. Replaces ` ```rust ` fences with
920///   ` ```typescript ` so the example highlights properly in TypeDoc.
921///
922/// Output is a plain string with `\n` separators; emitters wrap each line
923/// in ` * ` themselves.
924pub fn render_jsdoc_sections(sections: &RustdocSections) -> String {
925    let mut out = String::new();
926    if !sections.summary.is_empty() {
927        out.push_str(&sections.summary);
928    }
929    if let Some(args) = sections.arguments.as_deref() {
930        for (name, desc) in parse_arguments_bullets(args) {
931            if !out.is_empty() {
932                out.push('\n');
933            }
934            if desc.is_empty() {
935                out.push_str(&crate::template_env::render(
936                    "doc_jsdoc_param.jinja",
937                    minijinja::context! { name => &name },
938                ));
939            } else {
940                out.push_str(&crate::template_env::render(
941                    "doc_jsdoc_param_desc.jinja",
942                    minijinja::context! { name => &name, desc => &desc },
943                ));
944            }
945        }
946    }
947    if let Some(ret) = sections.returns.as_deref() {
948        if !out.is_empty() {
949            out.push('\n');
950        }
951        out.push_str(&crate::template_env::render(
952            "doc_jsdoc_returns.jinja",
953            minijinja::context! { content => ret.trim() },
954        ));
955    }
956    if let Some(err) = sections.errors.as_deref() {
957        if !out.is_empty() {
958            out.push('\n');
959        }
960        out.push_str(&crate::template_env::render(
961            "doc_jsdoc_throws.jinja",
962            minijinja::context! { content => err.trim() },
963        ));
964    }
965    if let Some(example) = sections.example.as_deref() {
966        if let Some(body) = example_for_target(example, "typescript") {
967            if !out.is_empty() {
968                out.push('\n');
969            }
970            out.push_str("@example\n");
971            out.push_str(&body);
972        }
973    }
974    out
975}
976
977/// Render `RustdocSections` as a JavaDoc comment body.
978///
979/// - `# Arguments` → `@param name desc` (one per param)
980/// - `# Returns`   → `@return desc`
981/// - `# Errors`    → `@throws KreuzbergRsException desc`
982/// - `# Example`   → `<pre>{@code ...}</pre>` block.
983///
984/// `throws_class` is the FQN/simple name of the exception class to use in
985/// the `@throws` tag (e.g. `"KreuzbergRsException"`).
986pub fn render_javadoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
987    let mut out = String::new();
988    if !sections.summary.is_empty() {
989        out.push_str(&sections.summary);
990    }
991    if let Some(args) = sections.arguments.as_deref() {
992        for (name, desc) in parse_arguments_bullets(args) {
993            if !out.is_empty() {
994                out.push('\n');
995            }
996            if desc.is_empty() {
997                out.push_str(&crate::template_env::render(
998                    "doc_javadoc_param.jinja",
999                    minijinja::context! { name => &name },
1000                ));
1001            } else {
1002                out.push_str(&crate::template_env::render(
1003                    "doc_javadoc_param_desc.jinja",
1004                    minijinja::context! { name => &name, desc => &desc },
1005                ));
1006            }
1007        }
1008    }
1009    if let Some(ret) = sections.returns.as_deref() {
1010        if !out.is_empty() {
1011            out.push('\n');
1012        }
1013        out.push_str(&crate::template_env::render(
1014            "doc_javadoc_return.jinja",
1015            minijinja::context! { content => ret.trim() },
1016        ));
1017    }
1018    if let Some(err) = sections.errors.as_deref() {
1019        if !out.is_empty() {
1020            out.push('\n');
1021        }
1022        out.push_str(&crate::template_env::render(
1023            "doc_javadoc_throws.jinja",
1024            minijinja::context! { throws_class => throws_class, content => err.trim() },
1025        ));
1026    }
1027    out
1028}
1029
1030/// Render `RustdocSections` as a C# XML doc comment body (without the
1031/// `/// ` line prefixes — the emitter adds those).
1032///
1033/// - summary  → `<summary>...</summary>`
1034/// - args     → `<param name="x">desc</param>` (one per arg)
1035/// - returns  → `<returns>desc</returns>`
1036/// - errors   → `<exception cref="KreuzbergException">desc</exception>`
1037/// - example  → `<example><code language="csharp">...</code></example>`
1038pub fn render_csharp_xml_sections(sections: &RustdocSections, exception_class: &str) -> String {
1039    let mut out = String::new();
1040    out.push_str("<summary>\n");
1041    let summary = if sections.summary.is_empty() {
1042        ""
1043    } else {
1044        sections.summary.as_str()
1045    };
1046    for line in summary.lines() {
1047        out.push_str(line);
1048        out.push('\n');
1049    }
1050    out.push_str("</summary>");
1051    if let Some(args) = sections.arguments.as_deref() {
1052        for (name, desc) in parse_arguments_bullets(args) {
1053            out.push('\n');
1054            if desc.is_empty() {
1055                out.push_str(&crate::template_env::render(
1056                    "doc_csharp_param.jinja",
1057                    minijinja::context! { name => &name },
1058                ));
1059            } else {
1060                out.push_str(&crate::template_env::render(
1061                    "doc_csharp_param_desc.jinja",
1062                    minijinja::context! { name => &name, desc => &desc },
1063                ));
1064            }
1065        }
1066    }
1067    if let Some(ret) = sections.returns.as_deref() {
1068        out.push('\n');
1069        out.push_str(&crate::template_env::render(
1070            "doc_csharp_returns.jinja",
1071            minijinja::context! { content => ret.trim() },
1072        ));
1073    }
1074    if let Some(err) = sections.errors.as_deref() {
1075        out.push('\n');
1076        out.push_str(&crate::template_env::render(
1077            "doc_csharp_exception.jinja",
1078            minijinja::context! {
1079                exception_class => exception_class,
1080                content => err.trim(),
1081            },
1082        ));
1083    }
1084    if let Some(example) = sections.example.as_deref() {
1085        out.push('\n');
1086        out.push_str("<example><code language=\"csharp\">\n");
1087        // Drop fence markers, keep code.
1088        for line in example.lines() {
1089            let t = line.trim_start();
1090            if t.starts_with("```") {
1091                continue;
1092            }
1093            out.push_str(line);
1094            out.push('\n');
1095        }
1096        out.push_str("</code></example>");
1097    }
1098    out
1099}
1100
1101/// Render `RustdocSections` as a PHPDoc comment body.
1102///
1103/// - `# Arguments` → `@param mixed $name desc`
1104/// - `# Returns`   → `@return desc`
1105/// - `# Errors`    → `@throws KreuzbergException desc`
1106/// - `# Example`   → ` ```php ` fence (replaces ` ```rust `).
1107pub fn render_phpdoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
1108    let mut out = String::new();
1109    if !sections.summary.is_empty() {
1110        out.push_str(&sections.summary);
1111    }
1112    if let Some(args) = sections.arguments.as_deref() {
1113        for (name, desc) in parse_arguments_bullets(args) {
1114            if !out.is_empty() {
1115                out.push('\n');
1116            }
1117            if desc.is_empty() {
1118                out.push_str(&crate::template_env::render(
1119                    "doc_phpdoc_param.jinja",
1120                    minijinja::context! { name => &name },
1121                ));
1122            } else {
1123                out.push_str(&crate::template_env::render(
1124                    "doc_phpdoc_param_desc.jinja",
1125                    minijinja::context! { name => &name, desc => &desc },
1126                ));
1127            }
1128        }
1129    }
1130    if let Some(ret) = sections.returns.as_deref() {
1131        if !out.is_empty() {
1132            out.push('\n');
1133        }
1134        out.push_str(&crate::template_env::render(
1135            "doc_phpdoc_return.jinja",
1136            minijinja::context! { content => ret.trim() },
1137        ));
1138    }
1139    if let Some(err) = sections.errors.as_deref() {
1140        if !out.is_empty() {
1141            out.push('\n');
1142        }
1143        out.push_str(&crate::template_env::render(
1144            "doc_phpdoc_throws.jinja",
1145            minijinja::context! { throws_class => throws_class, content => err.trim() },
1146        ));
1147    }
1148    if let Some(example) = sections.example.as_deref() {
1149        if let Some(body) = example_for_target(example, "php") {
1150            if !out.is_empty() {
1151                out.push('\n');
1152            }
1153            out.push_str(&body);
1154        }
1155    }
1156    out
1157}
1158
1159/// Render `RustdocSections` as a Doxygen comment body for the C header.
1160///
1161/// - args    → `\param name desc`
1162/// - returns → `\return desc`
1163/// - errors  → prose paragraph (Doxygen has no semantic tag for FFI errors)
1164/// - example → `\code` ... `\endcode`
1165pub fn render_doxygen_sections(sections: &RustdocSections) -> String {
1166    let mut out = String::new();
1167    if !sections.summary.is_empty() {
1168        out.push_str(&sections.summary);
1169    }
1170    if let Some(args) = sections.arguments.as_deref() {
1171        for (name, desc) in parse_arguments_bullets(args) {
1172            if !out.is_empty() {
1173                out.push('\n');
1174            }
1175            if desc.is_empty() {
1176                out.push_str(&crate::template_env::render(
1177                    "doc_doxygen_param.jinja",
1178                    minijinja::context! { name => &name },
1179                ));
1180            } else {
1181                out.push_str(&crate::template_env::render(
1182                    "doc_doxygen_param_desc.jinja",
1183                    minijinja::context! { name => &name, desc => &desc },
1184                ));
1185            }
1186        }
1187    }
1188    if let Some(ret) = sections.returns.as_deref() {
1189        if !out.is_empty() {
1190            out.push('\n');
1191        }
1192        out.push_str(&crate::template_env::render(
1193            "doc_doxygen_return.jinja",
1194            minijinja::context! { content => ret.trim() },
1195        ));
1196    }
1197    if let Some(err) = sections.errors.as_deref() {
1198        if !out.is_empty() {
1199            out.push('\n');
1200        }
1201        out.push_str(&crate::template_env::render(
1202            "doc_doxygen_errors.jinja",
1203            minijinja::context! { content => err.trim() },
1204        ));
1205    }
1206    if let Some(example) = sections.example.as_deref() {
1207        if !out.is_empty() {
1208            out.push('\n');
1209        }
1210        out.push_str("\\code\n");
1211        for line in example.lines() {
1212            let t = line.trim_start();
1213            if t.starts_with("```") {
1214                continue;
1215            }
1216            out.push_str(line);
1217            out.push('\n');
1218        }
1219        out.push_str("\\endcode");
1220    }
1221    out
1222}
1223
1224/// Return the first paragraph of a doc comment as a single joined line.
1225///
1226/// Collects lines until the first blank line, trims each, then joins with a
1227/// space. This handles wrapped sentences like:
1228///
1229/// ```text
1230/// Convert HTML to Markdown, returning
1231/// a `ConversionResult`.
1232/// ```
1233///
1234/// which would otherwise be truncated at the comma when callers use
1235/// `.lines().next()`.
1236pub fn doc_first_paragraph_joined(doc: &str) -> String {
1237    doc.lines()
1238        .take_while(|l| !l.trim().is_empty())
1239        .map(str::trim)
1240        .collect::<Vec<_>>()
1241        .join(" ")
1242}
1243
1244/// Target language for [`sanitize_rust_idioms`].
1245///
1246/// Each variant selects the idiomatic mapping for Rust constructs that do not
1247/// translate directly to foreign-language doc syntax.
1248#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1249pub enum DocTarget {
1250    /// PHPDoc (`/** ... */`), e.g. phpstan-typed prose.
1251    PhpDoc,
1252    /// Javadoc (`/** ... */`), e.g. OpenJDK-style annotations.
1253    JavaDoc,
1254    /// TSDoc (`/** ... */`), TypeScript variant of JSDoc.
1255    TsDoc,
1256    /// JSDoc (`/** ... */`), JavaScript variant.
1257    JsDoc,
1258    /// C# XML doc (`/// <summary>...</summary>`).
1259    ///
1260    /// Strips Rust code fences and section headings (`# Examples`,
1261    /// `# Arguments`, `# Returns`, etc.), drops Rust trait-bound prose,
1262    /// and XML-escapes any remaining `<` / `>` / `&` so the result is
1263    /// safe to embed inside a `<summary>` element.
1264    CSharpDoc,
1265}
1266
1267/// Sanitize Rust-specific idioms in a prose string for the given foreign-language
1268/// documentation target.
1269///
1270/// Transformations are applied **outside** backtick spans and code fences only,
1271/// so inline code examples and fenced blocks are never mutated (except that
1272/// ` ```rust ` fences are dropped entirely for [`DocTarget::TsDoc`] /
1273/// [`DocTarget::JsDoc`] and have their language tag stripped for
1274/// [`DocTarget::PhpDoc`] / [`DocTarget::JavaDoc`]).
1275///
1276/// # Transformations
1277///
1278/// - Intra-doc links `` [`Type::method`] `` → `` `Type.method` ``.
1279/// - `[`Foo`]` (backtick inside square brackets) → `` `Foo` ``.
1280/// - `None` (word boundary) → `null` (PHP/Java) or `undefined` (TS/JS).
1281/// - `Some(x)` → `the value (x)`.
1282/// - `Option<T>` → `T?` (PHP) / `T | null` (Java) / `T | undefined` (TS/JS).
1283/// - `Vec<u8>` → `string` (PHP) / `byte[]` (Java) / `Uint8Array` (TS/JS).
1284/// - `Vec<T>` → `T[]` (all targets).
1285/// - `HashMap<K, V>` → `array<K, V>` (PHP) / `Map<K, V>` (Java) / `Record<K, V>` (TS/JS).
1286/// - `Arc<T>`, `Box<T>`, `Mutex<T>`, `RwLock<T>`, `Rc<T>`, `Cell<T>`, `RefCell<T>` → `T`.
1287/// - `Send + Sync`, `Send`, `Sync`, `'static` → stripped.
1288/// - Standalone `::` between identifiers → `.`.
1289/// - `pub fn `, `crate::`, `&self`, `&mut self` → stripped.
1290/// - `#[…]` attribute macros on their own line or inline → stripped.
1291/// - `.unwrap()`, `.expect("…")` → stripped.
1292/// - ` ```rust ` fences → dropped entirely (TS/JS) or tag removed (PHP/Java).
1293pub fn sanitize_rust_idioms(text: &str, target: DocTarget) -> String {
1294    // For C# XML doc the default is to drop rustdoc section headings
1295    // (`# Examples`, `# Arguments`, …) and the remainder of the comment,
1296    // because those bodies routinely contain content that cannot be embedded
1297    // safely inside `<summary>`. Callers that have already extracted sections
1298    // (`emit_csharp_doc`) sanitise each section body via [`sanitize_rust_idioms_keep_sections`].
1299    sanitize_rust_idioms_inner(text, target, true)
1300}
1301
1302/// Same as [`sanitize_rust_idioms`] but never drops rustdoc section headings.
1303///
1304/// Used by emitters that have already split the doc into sections and need to
1305/// sanitise each body fragment independently (e.g. C# XML doc emission with
1306/// per-section `<param>` / `<returns>` / `<exception>` tags).
1307pub fn sanitize_rust_idioms_keep_sections(text: &str, target: DocTarget) -> String {
1308    sanitize_rust_idioms_inner(text, target, false)
1309}
1310
1311fn sanitize_rust_idioms_inner(text: &str, target: DocTarget, drop_csharp_sections: bool) -> String {
1312    let mut out = String::with_capacity(text.len());
1313    let mut in_rust_fence = false;
1314    let mut in_other_fence = false;
1315    // For C# XML doc: once a `# Examples` / `# Arguments` / etc. heading is
1316    // encountered, drop the entire remainder of the comment. Rustdoc section
1317    // headings cannot be safely embedded inside `<summary>` and the per-section
1318    // content (code fences, intra-doc links, generics) is the leading cause
1319    // of CS1002/CS1519 leakage. The plain `<summary>` path collapses to the
1320    // top-level prose only.
1321    let mut csharp_section_dropped = false;
1322
1323    for line in text.lines() {
1324        if csharp_section_dropped {
1325            continue;
1326        }
1327        let trimmed = line.trim_start();
1328        if drop_csharp_sections
1329            && matches!(target, DocTarget::CSharpDoc)
1330            && !in_rust_fence
1331            && !in_other_fence
1332            && is_rustdoc_section_heading(trimmed)
1333        {
1334            csharp_section_dropped = true;
1335            continue;
1336        }
1337
1338        // Detect code fence boundaries.
1339        if let Some(rest) = trimmed.strip_prefix("```") {
1340            if in_rust_fence {
1341                // Closing fence of a rust block.
1342                in_rust_fence = false;
1343                match target {
1344                    DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc => {
1345                        // Entire rust block dropped — don't emit closing fence.
1346                    }
1347                    DocTarget::PhpDoc | DocTarget::JavaDoc => {
1348                        out.push_str(line);
1349                        out.push('\n');
1350                    }
1351                }
1352                continue;
1353            }
1354            if in_other_fence {
1355                // Closing fence of a non-rust block.
1356                in_other_fence = false;
1357                out.push_str(line);
1358                out.push('\n');
1359                continue;
1360            }
1361            // Opening fence — determine language.
1362            let lang = rest.split(',').next().unwrap_or("").trim();
1363            let is_rust = lang.is_empty() || lang == "rust" || lang.starts_with("rust,");
1364            if is_rust {
1365                in_rust_fence = true;
1366                match target {
1367                    DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc => {
1368                        // Drop the entire rust fence block — skip opening line.
1369                    }
1370                    DocTarget::PhpDoc | DocTarget::JavaDoc => {
1371                        // Emit fence without language tag.
1372                        let indent = &line[..line.len() - trimmed.len()];
1373                        let after_lang = rest.find(',').map(|i| &rest[i..]).unwrap_or("");
1374                        out.push_str(indent);
1375                        out.push_str("```");
1376                        out.push_str(after_lang);
1377                        out.push('\n');
1378                    }
1379                }
1380                continue;
1381            }
1382            // Non-rust fence: pass through verbatim.
1383            in_other_fence = true;
1384            out.push_str(line);
1385            out.push('\n');
1386            continue;
1387        }
1388
1389        // Inside a rust fence.
1390        if in_rust_fence {
1391            match target {
1392                DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc => {
1393                    // Drop content of rust fences.
1394                }
1395                DocTarget::PhpDoc | DocTarget::JavaDoc => {
1396                    out.push_str(line);
1397                    out.push('\n');
1398                }
1399            }
1400            continue;
1401        }
1402
1403        // Inside a non-rust fence: pass through verbatim.
1404        if in_other_fence {
1405            out.push_str(line);
1406            out.push('\n');
1407            continue;
1408        }
1409
1410        // Check if this line is a bare `#[...]` attribute line.
1411        let stripped_indent = line.trim_start();
1412        if stripped_indent.starts_with("#[") && stripped_indent.ends_with(']') {
1413            // Attribute-only line — drop entirely.
1414            continue;
1415        }
1416
1417        // Normal prose line: apply token-level transformations.
1418        let sanitized = apply_prose_transforms(line, target);
1419        out.push_str(&sanitized);
1420        out.push('\n');
1421    }
1422
1423    // Trim trailing newline added by the loop (preserve internal newlines).
1424    if out.ends_with('\n') && !text.ends_with('\n') {
1425        out.pop();
1426    }
1427
1428    // For C# XML doc, escape any remaining `<`, `>`, `&` so the result is
1429    // safe to embed inside `<summary>...</summary>`. By this point the
1430    // Rust-idiom substitutions have replaced `Vec<T>` / `Option<T>` /
1431    // `HashMap<K, V>` / `Result<T, E>` with their idiomatic forms, but
1432    // unrecognised generic constructs (e.g. trait-object references) may
1433    // still contain raw angle brackets that would break C# XML parsing.
1434    if matches!(target, DocTarget::CSharpDoc) {
1435        out = xml_escape_for_csharp(&out);
1436    }
1437
1438    out
1439}
1440
1441/// Return `true` if `line` (already left-trimmed) is a Rustdoc section heading
1442/// such as `# Examples`, `# Arguments`, `# Returns`, `# Errors`, `# Panics`,
1443/// or `# Safety`. Case-insensitive on the heading name.
1444fn is_rustdoc_section_heading(trimmed: &str) -> bool {
1445    let Some(rest) = trimmed.strip_prefix("# ") else {
1446        return false;
1447    };
1448    let head = rest.trim().to_ascii_lowercase();
1449    matches!(
1450        head.as_str(),
1451        "arguments" | "args" | "returns" | "errors" | "panics" | "safety" | "example" | "examples"
1452    )
1453}
1454
1455/// XML-escape `<`, `>`, `&` for safe embedding inside a C# `<summary>` element.
1456///
1457/// `<` / `>` may legitimately appear in prose after Rust-idiom substitution
1458/// when the substitutions produce C#-friendly forms (e.g. `Dictionary<K, V>`).
1459/// Those are still XML-significant characters and must be entity-escaped for
1460/// XML parsers (Roslyn, doxygen) to accept the resulting `<summary>` block.
1461fn xml_escape_for_csharp(s: &str) -> String {
1462    let mut out = String::with_capacity(s.len());
1463    for ch in s.chars() {
1464        match ch {
1465            '&' => out.push_str("&amp;"),
1466            '<' => out.push_str("&lt;"),
1467            '>' => out.push_str("&gt;"),
1468            _ => out.push(ch),
1469        }
1470    }
1471    out
1472}
1473
1474/// Apply prose-level Rust-idiom transformations to a single line.
1475///
1476/// Some transformations span or precede backtick boundaries and must be applied
1477/// to the full line before tokenisation:
1478///
1479/// 1. Intra-doc links (`` [`...`] ``) — they wrap a backtick pair.
1480/// 2. `::` path separator — even inside backtick spans it should become `.`
1481///    for all foreign-language targets, since the target language uses `.` for
1482///    member access and package paths in code examples too.
1483///
1484/// All remaining transformations are applied only to literal (non-code) segments
1485/// after tokenisation.
1486fn apply_prose_transforms(line: &str, target: DocTarget) -> String {
1487    // Step 1: replace intra-doc links before tokenisation (they span backtick pairs).
1488    let line = replace_intradoc_links(line, target);
1489
1490    // Step 2: replace :: everywhere (including inside backtick spans).
1491    // All targets use `.` as the member/package separator, so this is always safe.
1492    let line = replace_path_separator(&line);
1493
1494    // Step 3: strip .unwrap() and .expect() everywhere, including inside backtick spans,
1495    // since these Rust error-handling idioms are meaningless in all target languages.
1496    let line = strip_unwrap_expect(&line);
1497
1498    // Step 4: tokenise and apply remaining transforms only to literal segments.
1499    let segments = tokenize_backtick_spans(&line);
1500    let mut result = String::with_capacity(line.len());
1501    for (is_code, span) in segments {
1502        if is_code {
1503            result.push('`');
1504            result.push_str(span);
1505            result.push('`');
1506        } else {
1507            result.push_str(&transform_prose_segment(span, target));
1508        }
1509    }
1510    result
1511}
1512
1513/// Split a line into alternating literal/code segments.
1514///
1515/// Returns `Vec<(is_code, &str)>` where `is_code` is true for the content
1516/// between a matched backtick pair. Unmatched backticks are treated as
1517/// literal characters (passed through as literal segments).
1518fn tokenize_backtick_spans(line: &str) -> Vec<(bool, &str)> {
1519    let mut segments = Vec::new();
1520    let bytes = line.as_bytes();
1521    let mut start = 0;
1522    let mut i = 0;
1523
1524    while i < bytes.len() {
1525        if bytes[i] == b'`' {
1526            // Emit preceding literal segment.
1527            if i > start {
1528                segments.push((false, &line[start..i]));
1529            }
1530            // Find the closing backtick.
1531            let code_start = i + 1;
1532            let close = bytes[code_start..].iter().position(|&b| b == b'`');
1533            if let Some(offset) = close {
1534                let code_end = code_start + offset;
1535                segments.push((true, &line[code_start..code_end]));
1536                i = code_end + 1;
1537                start = i;
1538            } else {
1539                // No closing backtick — treat as literal from here.
1540                segments.push((false, &line[i..]));
1541                start = line.len();
1542                i = line.len();
1543            }
1544        } else {
1545            i += 1;
1546        }
1547    }
1548    if start < line.len() {
1549        segments.push((false, &line[start..]));
1550    }
1551    segments
1552}
1553
1554/// Apply all prose-level Rust substitutions to a literal text segment.
1555///
1556/// Intra-doc links have already been replaced by `apply_prose_transforms`
1557/// before tokenisation; this function handles the remaining transformations.
1558fn transform_prose_segment(text: &str, target: DocTarget) -> String {
1559    let mut s = text.to_string();
1560
1561    // 1. Strip #[derive(...)] and other inline attribute-style references.
1562    s = strip_inline_attributes(&s);
1563
1564    // 2. Strip pub fn, crate::, &self, &mut self.
1565    s = s.replace("pub fn ", "");
1566    s = s.replace("crate::", "");
1567    s = s.replace("&mut self", "");
1568    s = s.replace("&self", "");
1569
1570    // 3. Strip lifetime and bound markers.
1571    s = strip_lifetime_and_bounds(&s);
1572
1573    // 4. Type substitutions (order matters — most specific first).
1574    s = replace_type_wrappers(&s, target);
1575
1576    // 5. Some(x) -> the value (x).
1577    s = replace_some_calls(&s);
1578
1579    // 5b. Bare "Some <lowercase>" in prose -> drop "Some ".
1580    s = replace_some_keyword_in_prose(&s);
1581
1582    // 6. None -> null / undefined (word boundary, uppercase only).
1583    s = replace_none_keyword(&s, target);
1584
1585    // Note: :: -> . and .unwrap()/.expect() stripping are applied to the full
1586    // line before tokenisation in apply_prose_transforms and therefore do not
1587    // need to be repeated here.
1588
1589    s
1590}
1591
1592/// Advance byte position `i` in `s` past one full UTF-8 character, push that
1593/// character to `out`, and return the new byte position.
1594///
1595/// All the byte-crawling helpers below look for ASCII special characters only.
1596/// When none matches, they must advance by one full character (not one byte)
1597/// to avoid splitting multi-byte UTF-8 sequences.
1598#[inline]
1599fn advance_char(s: &str, out: &mut String, i: usize) -> usize {
1600    // Safety: `i` must be a valid char boundary; callers guarantee this
1601    // because all branch points look for ASCII bytes which are always
1602    // single-byte char boundaries.
1603    let ch = s[i..].chars().next().expect("valid UTF-8 position");
1604    out.push(ch);
1605    i + ch.len_utf8()
1606}
1607
1608/// Replace `` [`Type::method()`] `` and `` [`Foo`] `` intra-doc links with
1609/// backtick-wrapped identifiers, converting `::` to `.`.
1610fn replace_intradoc_links(s: &str, _target: DocTarget) -> String {
1611    let mut out = String::with_capacity(s.len());
1612    let bytes = s.as_bytes();
1613    let mut i = 0;
1614    while i < bytes.len() {
1615        // Look for [`
1616        if i + 1 < bytes.len() && bytes[i] == b'[' && bytes[i + 1] == b'`' {
1617            // Find closing `]
1618            let search_start = i + 2;
1619            let mut found = false;
1620            let mut j = search_start;
1621            while j + 1 < bytes.len() {
1622                if bytes[j] == b'`' && bytes[j + 1] == b']' {
1623                    let inner = &s[search_start..j];
1624                    // Convert :: to . in the inner part.
1625                    let converted = inner.replace("::", ".");
1626                    out.push('`');
1627                    out.push_str(&converted);
1628                    out.push('`');
1629                    i = j + 2;
1630                    found = true;
1631                    break;
1632                }
1633                j += 1;
1634            }
1635            if !found {
1636                i = advance_char(s, &mut out, i);
1637            }
1638        } else {
1639            i = advance_char(s, &mut out, i);
1640        }
1641    }
1642    out
1643}
1644
1645/// Strip inline `#[...]` attribute references (not on their own line — those
1646/// are handled as full-line drops in the main loop).
1647fn strip_inline_attributes(s: &str) -> String {
1648    let mut out = String::with_capacity(s.len());
1649    let bytes = s.as_bytes();
1650    let mut i = 0;
1651    while i < bytes.len() {
1652        if bytes[i] == b'#' && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
1653            // Skip until matching ']', handling nesting.
1654            let mut depth = 0usize;
1655            let mut j = i + 1;
1656            while j < bytes.len() {
1657                if bytes[j] == b'[' {
1658                    depth += 1;
1659                } else if bytes[j] == b']' {
1660                    depth -= 1;
1661                    if depth == 0 {
1662                        i = j + 1;
1663                        break;
1664                    }
1665                }
1666                j += 1;
1667            }
1668            if depth != 0 {
1669                // Unmatched bracket: emit literally.
1670                i = advance_char(s, &mut out, i);
1671            }
1672        } else {
1673            i = advance_char(s, &mut out, i);
1674        }
1675    }
1676    out
1677}
1678
1679/// Strip `'static`, `Send + Sync`, `Send`, `Sync` from prose text.
1680fn strip_lifetime_and_bounds(s: &str) -> String {
1681    // Order matters: match compound forms before simple forms.
1682    let mut out = s.to_string();
1683    // Strip `Send + Sync` (with optional spaces around `+`).
1684    out = regex_replace_all(&out, r"Send\s*\+\s*Sync", "");
1685    out = regex_replace_all(&out, r"Sync\s*\+\s*Send", "");
1686    // Strip standalone Send/Sync only at word boundaries.
1687    out = regex_replace_word_boundary(&out, "Send", "");
1688    out = regex_replace_word_boundary(&out, "Sync", "");
1689    // Strip 'static lifetime markers.
1690    out = regex_replace_all(&out, r"'\s*static\b", "");
1691    out
1692}
1693
1694/// Replace occurrences of `pattern` (treated as a simple substring pattern
1695/// with `\s*` only, no full regex) with `replacement` in `s`.
1696///
1697/// This is a lightweight regex-free replacement for simple patterns that
1698/// only need literal text or `\s*` between tokens.
1699fn regex_replace_all(s: &str, pattern: &str, replacement: &str) -> String {
1700    // Inline tiny pattern compiler for the three patterns we actually use.
1701    match pattern {
1702        r"Send\s*\+\s*Sync" => replace_with_optional_spaces(s, "Send", "+", "Sync", replacement),
1703        r"Sync\s*\+\s*Send" => replace_with_optional_spaces(s, "Sync", "+", "Send", replacement),
1704        r"'\s*static\b" => replace_static_lifetime(s, replacement),
1705        _ => s.replace(pattern, replacement),
1706    }
1707}
1708
1709/// Replace `word_boundary(keyword)` occurrences in `s` with `replacement`.
1710fn regex_replace_word_boundary(s: &str, keyword: &str, replacement: &str) -> String {
1711    let mut out = String::with_capacity(s.len());
1712    let klen = keyword.len();
1713    let bytes = s.as_bytes();
1714    let kbytes = keyword.as_bytes();
1715    if klen == 0 || klen > bytes.len() {
1716        return s.to_string();
1717    }
1718    let mut i = 0;
1719    while i + klen <= bytes.len() {
1720        if &bytes[i..i + klen] == kbytes {
1721            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1722            let after_ok =
1723                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
1724            if before_ok && after_ok {
1725                out.push_str(replacement);
1726                i += klen;
1727                continue;
1728            }
1729        }
1730        i = advance_char(s, &mut out, i);
1731    }
1732    if i < bytes.len() {
1733        out.push_str(&s[i..]);
1734    }
1735    out
1736}
1737
1738/// Replace `A <spaces> op <spaces> B` triplets with `replacement`.
1739fn replace_with_optional_spaces(s: &str, a: &str, op: &str, b: &str, replacement: &str) -> String {
1740    let mut out = String::with_capacity(s.len());
1741    let mut i = 0;
1742    let chars: Vec<char> = s.chars().collect();
1743    let total = chars.len();
1744
1745    while i < total {
1746        // Try to match `a` at position i.
1747        let a_chars: Vec<char> = a.chars().collect();
1748        let b_chars: Vec<char> = b.chars().collect();
1749        let op_chars: Vec<char> = op.chars().collect();
1750
1751        if chars[i..].starts_with(&a_chars) {
1752            let mut j = i + a_chars.len();
1753            // Skip spaces.
1754            while j < total && chars[j] == ' ' {
1755                j += 1;
1756            }
1757            // Match op.
1758            if j + op_chars.len() <= total && chars[j..].starts_with(&op_chars) {
1759                let mut k = j + op_chars.len();
1760                // Skip spaces.
1761                while k < total && chars[k] == ' ' {
1762                    k += 1;
1763                }
1764                // Match b.
1765                if k + b_chars.len() <= total && chars[k..].starts_with(&b_chars) {
1766                    out.push_str(replacement);
1767                    i = k + b_chars.len();
1768                    continue;
1769                }
1770            }
1771        }
1772        out.push(chars[i]);
1773        i += 1;
1774    }
1775    out
1776}
1777
1778/// Replace `'static` lifetime markers (with optional spaces after `'`).
1779fn replace_static_lifetime(s: &str, replacement: &str) -> String {
1780    let mut out = String::with_capacity(s.len());
1781    let bytes = s.as_bytes();
1782    let mut i = 0;
1783    while i < bytes.len() {
1784        if bytes[i] == b'\'' {
1785            // Peek ahead skipping spaces.
1786            let mut j = i + 1;
1787            while j < bytes.len() && bytes[j] == b' ' {
1788                j += 1;
1789            }
1790            let keyword = b"static";
1791            if bytes[j..].starts_with(keyword) {
1792                let end = j + keyword.len();
1793                // Must be followed by non-identifier char or end.
1794                let after_ok = end >= bytes.len() || !bytes[end].is_ascii_alphanumeric() && bytes[end] != b'_';
1795                if after_ok {
1796                    out.push_str(replacement);
1797                    i = end;
1798                    continue;
1799                }
1800            }
1801        }
1802        i = advance_char(s, &mut out, i);
1803    }
1804    out
1805}
1806
1807/// Replace Rust generic type wrappers in prose.
1808fn replace_type_wrappers(s: &str, target: DocTarget) -> String {
1809    // Order: most specific patterns first.
1810    let mut out = s.to_string();
1811
1812    // Vec<u8> — must come before Vec<T>.
1813    let vec_u8_replacement = match target {
1814        DocTarget::PhpDoc => "string",
1815        DocTarget::JavaDoc => "byte[]",
1816        DocTarget::TsDoc | DocTarget::JsDoc => "Uint8Array",
1817        DocTarget::CSharpDoc => "byte[]",
1818    };
1819    out = replace_generic1(&out, "Vec", "u8", vec_u8_replacement);
1820
1821    // HashMap<K, V> — must come before Vec<T> to avoid order-dependency issues.
1822    let map_replacement_fn = |k: &str, v: &str| match target {
1823        DocTarget::PhpDoc => format!("array<{k}, {v}>"),
1824        DocTarget::JavaDoc => format!("Map<{k}, {v}>"),
1825        DocTarget::TsDoc | DocTarget::JsDoc => format!("Record<{k}, {v}>"),
1826        DocTarget::CSharpDoc => format!("Dictionary<{k}, {v}>"),
1827    };
1828    out = replace_generic2(&out, "HashMap", &map_replacement_fn);
1829
1830    // Vec<T> — generic.
1831    out = replace_generic1_passthrough(&out, "Vec", |inner| format!("{inner}[]"));
1832
1833    // Option<T>.
1834    let option_replacement_fn = |inner: &str| match target {
1835        DocTarget::PhpDoc => format!("{inner}?"),
1836        DocTarget::JavaDoc => format!("{inner} | null"),
1837        DocTarget::TsDoc | DocTarget::JsDoc => format!("{inner} | undefined"),
1838        DocTarget::CSharpDoc => format!("{inner}?"),
1839    };
1840    out = replace_generic1_passthrough(&out, "Option", option_replacement_fn);
1841
1842    // Result<T, E> — drop the error type, keep the success type.
1843    // C# has no Result type; the binding throws exceptions, so just the success type
1844    // is meaningful in prose. We do this for C# only; other targets historically left
1845    // `Result<T, E>` unchanged (their tests assert nothing about it).
1846    if matches!(target, DocTarget::CSharpDoc) {
1847        out = replace_generic2(&out, "Result", &|t: &str, _e: &str| t.to_string());
1848    }
1849
1850    // Smart pointer wrappers: strip to inner type.
1851    for wrapper in &["Arc", "Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
1852        out = replace_generic1_passthrough(&out, wrapper, |inner| inner.to_string());
1853    }
1854
1855    out
1856}
1857
1858/// Replace `Name<SingleArg>` where SingleArg is an exact literal (e.g. `Vec<u8>`).
1859fn replace_generic1(s: &str, name: &str, arg: &str, replacement: &str) -> String {
1860    let pattern = format!("{name}<{arg}>");
1861    s.replace(&pattern, replacement)
1862}
1863
1864/// Replace `Name<T>` → `f(T)` for an arbitrary inner type expression.
1865///
1866/// Handles nested generics by counting angle-bracket depth.
1867fn replace_generic1_passthrough<F>(s: &str, name: &str, f: F) -> String
1868where
1869    F: Fn(&str) -> String,
1870{
1871    let mut out = String::with_capacity(s.len());
1872    let mut i = 0;
1873    let prefix = format!("{name}<");
1874    let pbytes = prefix.as_bytes();
1875    let bytes = s.as_bytes();
1876
1877    while i < bytes.len() {
1878        if bytes[i..].starts_with(pbytes) {
1879            // Check that the char before is not alphanumeric (word boundary).
1880            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1881            if before_ok {
1882                let inner_start = i + pbytes.len();
1883                // Find the matching '>'.
1884                let mut depth = 1usize;
1885                let mut j = inner_start;
1886                while j < bytes.len() {
1887                    match bytes[j] {
1888                        b'<' => depth += 1,
1889                        b'>' => {
1890                            depth -= 1;
1891                            if depth == 0 {
1892                                break;
1893                            }
1894                        }
1895                        _ => {}
1896                    }
1897                    j += 1;
1898                }
1899                if depth == 0 && j < bytes.len() {
1900                    let inner = &s[inner_start..j];
1901                    out.push_str(&f(inner));
1902                    i = j + 1;
1903                    continue;
1904                }
1905            }
1906        }
1907        i = advance_char(s, &mut out, i);
1908    }
1909    out
1910}
1911
1912/// Replace `Name<K, V>` → `f(K, V)` for two-argument generics (e.g. `HashMap`).
1913fn replace_generic2<F>(s: &str, name: &str, f: &F) -> String
1914where
1915    F: Fn(&str, &str) -> String,
1916{
1917    let mut out = String::with_capacity(s.len());
1918    let mut i = 0;
1919    let prefix = format!("{name}<");
1920    let pbytes = prefix.as_bytes();
1921    let bytes = s.as_bytes();
1922
1923    while i < bytes.len() {
1924        if bytes[i..].starts_with(pbytes) {
1925            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1926            if before_ok {
1927                let inner_start = i + pbytes.len();
1928                // Find the matching '>' respecting nesting.
1929                let mut depth = 1usize;
1930                let mut j = inner_start;
1931                while j < bytes.len() {
1932                    match bytes[j] {
1933                        b'<' => depth += 1,
1934                        b'>' => {
1935                            depth -= 1;
1936                            if depth == 0 {
1937                                break;
1938                            }
1939                        }
1940                        _ => {}
1941                    }
1942                    j += 1;
1943                }
1944                if depth == 0 && j < bytes.len() {
1945                    let inner = &s[inner_start..j];
1946                    // Split on the first ',' at depth 0.
1947                    let split = split_on_comma_at_top_level(inner);
1948                    if let Some((k, v)) = split {
1949                        out.push_str(&f(k.trim(), v.trim()));
1950                        i = j + 1;
1951                        continue;
1952                    }
1953                }
1954            }
1955        }
1956        i = advance_char(s, &mut out, i);
1957    }
1958    out
1959}
1960
1961/// Split `s` on the first comma that is at angle-bracket depth 0.
1962fn split_on_comma_at_top_level(s: &str) -> Option<(&str, &str)> {
1963    let mut depth = 0i32;
1964    for (idx, ch) in s.char_indices() {
1965        match ch {
1966            '<' => depth += 1,
1967            '>' => depth -= 1,
1968            ',' if depth == 0 => return Some((&s[..idx], &s[idx + 1..])),
1969            _ => {}
1970        }
1971    }
1972    None
1973}
1974
1975/// Replace `Some(x)` in prose with `the value (x)`.
1976fn replace_some_calls(s: &str) -> String {
1977    let mut out = String::with_capacity(s.len());
1978    let bytes = s.as_bytes();
1979    let prefix = b"Some(";
1980    let mut i = 0;
1981
1982    while i < bytes.len() {
1983        if bytes[i..].starts_with(prefix) {
1984            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1985            if before_ok {
1986                let arg_start = i + prefix.len();
1987                // Find matching ')' respecting nesting.
1988                let mut depth = 1usize;
1989                let mut j = arg_start;
1990                while j < bytes.len() {
1991                    match bytes[j] {
1992                        b'(' => depth += 1,
1993                        b')' => {
1994                            depth -= 1;
1995                            if depth == 0 {
1996                                break;
1997                            }
1998                        }
1999                        _ => {}
2000                    }
2001                    j += 1;
2002                }
2003                if depth == 0 && j < bytes.len() {
2004                    let arg = &s[arg_start..j];
2005                    out.push_str("the value (");
2006                    out.push_str(arg);
2007                    out.push(')');
2008                    i = j + 1;
2009                    continue;
2010                }
2011            }
2012        }
2013        i = advance_char(s, &mut out, i);
2014    }
2015    out
2016}
2017
2018/// Drop bare `Some ` when it appears as a Rust-idiom modifier in prose
2019/// ("(Some values)", "Some keys leave the previous", etc.). The `Some(...)`
2020/// call form is handled separately by [`replace_some_calls`].
2021///
2022/// Match shape: word-boundary `Some` + single ASCII space + ASCII-lowercase
2023/// letter. The "Some " prefix is dropped; the following word is preserved.
2024/// `SomeType`, `Some.method()`, `Some(x)`, and sentence-initial `Some `
2025/// followed by an uppercase noun stay untouched.
2026fn replace_some_keyword_in_prose(s: &str) -> String {
2027    let keyword = b"Some ";
2028    let klen = keyword.len();
2029    let bytes = s.as_bytes();
2030    if klen >= bytes.len() {
2031        return s.to_string();
2032    }
2033    let mut out = String::with_capacity(s.len());
2034    let mut i = 0;
2035    while i + klen < bytes.len() {
2036        if &bytes[i..i + klen] == keyword {
2037            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2038            let after_ok = bytes[i + klen].is_ascii_lowercase();
2039            if before_ok && after_ok {
2040                i += klen;
2041                continue;
2042            }
2043        }
2044        i = advance_char(s, &mut out, i);
2045    }
2046    if i < bytes.len() {
2047        out.push_str(&s[i..]);
2048    }
2049    out
2050}
2051
2052/// Replace `None` (at word boundaries, uppercase) with the target-appropriate nil.
2053fn replace_none_keyword(s: &str, target: DocTarget) -> String {
2054    let replacement = match target {
2055        DocTarget::PhpDoc | DocTarget::JavaDoc | DocTarget::CSharpDoc => "null",
2056        DocTarget::TsDoc | DocTarget::JsDoc => "undefined",
2057    };
2058    let keyword = b"None";
2059    let klen = keyword.len();
2060    let mut out = String::with_capacity(s.len());
2061    let bytes = s.as_bytes();
2062    if klen > bytes.len() {
2063        return s.to_string();
2064    }
2065    let mut i = 0;
2066
2067    while i + klen <= bytes.len() {
2068        if &bytes[i..i + klen] == keyword {
2069            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2070            let after_ok =
2071                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
2072            if before_ok && after_ok {
2073                out.push_str(replacement);
2074                i += klen;
2075                continue;
2076            }
2077        }
2078        i = advance_char(s, &mut out, i);
2079    }
2080    if i < bytes.len() {
2081        out.push_str(&s[i..]);
2082    }
2083    out
2084}
2085
2086/// Replace standalone `::` between identifiers with `.`.
2087fn replace_path_separator(s: &str) -> String {
2088    let mut out = String::with_capacity(s.len());
2089    let bytes = s.as_bytes();
2090    let mut i = 0;
2091
2092    while i < bytes.len() {
2093        if i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b':' {
2094            // Only replace if surrounded by identifier characters or end/start of string.
2095            let before_ok = i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
2096            let after_ok = i + 2 < bytes.len() && (bytes[i + 2].is_ascii_alphanumeric() || bytes[i + 2] == b'_');
2097            if before_ok || after_ok {
2098                out.push('.');
2099                i += 2;
2100                continue;
2101            }
2102        }
2103        i = advance_char(s, &mut out, i);
2104    }
2105    out
2106}
2107
2108/// Strip `.unwrap()` and `.expect("...")` calls from prose.
2109fn strip_unwrap_expect(s: &str) -> String {
2110    let mut out = String::with_capacity(s.len());
2111    let bytes = s.as_bytes();
2112    let mut i = 0;
2113
2114    while i < bytes.len() {
2115        // Match .unwrap().
2116        if bytes[i..].starts_with(b".unwrap()") {
2117            i += b".unwrap()".len();
2118            continue;
2119        }
2120        // Match .expect(...).
2121        if bytes[i..].starts_with(b".expect(") {
2122            let arg_start = i + b".expect(".len();
2123            let mut depth = 1usize;
2124            let mut j = arg_start;
2125            while j < bytes.len() {
2126                match bytes[j] {
2127                    b'(' => depth += 1,
2128                    b')' => {
2129                        depth -= 1;
2130                        if depth == 0 {
2131                            break;
2132                        }
2133                    }
2134                    _ => {}
2135                }
2136                j += 1;
2137            }
2138            if depth == 0 {
2139                i = j + 1;
2140                continue;
2141            }
2142        }
2143        i = advance_char(s, &mut out, i);
2144    }
2145    out
2146}
2147
2148#[cfg(test)]
2149mod tests {
2150    use super::*;
2151
2152    #[test]
2153    fn test_emit_phpdoc() {
2154        let mut out = String::new();
2155        emit_phpdoc(&mut out, "Simple documentation", "    ", "TestException");
2156        assert!(out.contains("/**"));
2157        assert!(out.contains("Simple documentation"));
2158        assert!(out.contains("*/"));
2159    }
2160
2161    #[test]
2162    fn test_phpdoc_escaping() {
2163        let mut out = String::new();
2164        emit_phpdoc(&mut out, "Handle */ sequences", "", "TestException");
2165        assert!(out.contains("Handle * / sequences"));
2166    }
2167
2168    #[test]
2169    fn test_emit_csharp_doc() {
2170        let mut out = String::new();
2171        emit_csharp_doc(&mut out, "C# documentation", "    ", "TestException");
2172        assert!(out.contains("<summary>"));
2173        assert!(out.contains("C# documentation"));
2174        assert!(out.contains("</summary>"));
2175    }
2176
2177    #[test]
2178    fn test_csharp_xml_escaping() {
2179        let mut out = String::new();
2180        emit_csharp_doc(&mut out, "foo < bar & baz > qux", "", "TestException");
2181        assert!(out.contains("foo &lt; bar &amp; baz &gt; qux"));
2182    }
2183
2184    #[test]
2185    fn test_emit_elixir_doc() {
2186        let mut out = String::new();
2187        emit_elixir_doc(&mut out, "Elixir documentation");
2188        assert!(out.contains("@doc \"\"\""));
2189        assert!(out.contains("Elixir documentation"));
2190        assert!(out.contains("\"\"\""));
2191    }
2192
2193    #[test]
2194    fn test_elixir_heredoc_escaping() {
2195        let mut out = String::new();
2196        emit_elixir_doc(&mut out, "Handle \"\"\" sequences");
2197        assert!(out.contains("Handle \"\" \" sequences"));
2198    }
2199
2200    #[test]
2201    fn test_emit_roxygen() {
2202        let mut out = String::new();
2203        emit_roxygen(&mut out, "R documentation");
2204        assert!(out.contains("#' R documentation"));
2205    }
2206
2207    #[test]
2208    fn test_emit_swift_doc() {
2209        let mut out = String::new();
2210        emit_swift_doc(&mut out, "Swift documentation", "    ");
2211        assert!(out.contains("/// Swift documentation"));
2212    }
2213
2214    #[test]
2215    fn test_emit_javadoc() {
2216        let mut out = String::new();
2217        emit_javadoc(&mut out, "Java documentation", "    ");
2218        assert!(out.contains("/**"));
2219        assert!(out.contains("Java documentation"));
2220        assert!(out.contains("*/"));
2221    }
2222
2223    #[test]
2224    fn test_emit_kdoc() {
2225        let mut out = String::new();
2226        emit_kdoc(&mut out, "Kotlin documentation", "    ");
2227        assert!(out.contains("/**"));
2228        assert!(out.contains("Kotlin documentation"));
2229        assert!(out.contains("*/"));
2230    }
2231
2232    #[test]
2233    fn test_emit_dartdoc() {
2234        let mut out = String::new();
2235        emit_dartdoc(&mut out, "Dart documentation", "    ");
2236        assert!(out.contains("/// Dart documentation"));
2237    }
2238
2239    #[test]
2240    fn test_emit_gleam_doc() {
2241        let mut out = String::new();
2242        emit_gleam_doc(&mut out, "Gleam documentation", "    ");
2243        assert!(out.contains("/// Gleam documentation"));
2244    }
2245
2246    #[test]
2247    fn test_emit_zig_doc() {
2248        let mut out = String::new();
2249        emit_zig_doc(&mut out, "Zig documentation", "    ");
2250        assert!(out.contains("/// Zig documentation"));
2251    }
2252
2253    #[test]
2254    fn test_empty_doc_skipped() {
2255        let mut out = String::new();
2256        emit_phpdoc(&mut out, "", "", "TestException");
2257        emit_csharp_doc(&mut out, "", "", "TestException");
2258        emit_elixir_doc(&mut out, "");
2259        emit_roxygen(&mut out, "");
2260        emit_kdoc(&mut out, "", "");
2261        emit_dartdoc(&mut out, "", "");
2262        emit_gleam_doc(&mut out, "", "");
2263        emit_zig_doc(&mut out, "", "");
2264        assert!(out.is_empty());
2265    }
2266
2267    #[test]
2268    fn test_doc_first_paragraph_joined_single_line() {
2269        assert_eq!(doc_first_paragraph_joined("Simple doc."), "Simple doc.");
2270    }
2271
2272    #[test]
2273    fn test_doc_first_paragraph_joined_wrapped_sentence() {
2274        // Simulates a docstring like convert's: "Convert HTML to Markdown,\nreturning a result."
2275        let doc = "Convert HTML to Markdown,\nreturning a result.";
2276        assert_eq!(
2277            doc_first_paragraph_joined(doc),
2278            "Convert HTML to Markdown, returning a result."
2279        );
2280    }
2281
2282    #[test]
2283    fn test_doc_first_paragraph_joined_stops_at_blank_line() {
2284        let doc = "First paragraph.\nStill first.\n\nSecond paragraph.";
2285        assert_eq!(doc_first_paragraph_joined(doc), "First paragraph. Still first.");
2286    }
2287
2288    #[test]
2289    fn test_doc_first_paragraph_joined_empty() {
2290        assert_eq!(doc_first_paragraph_joined(""), "");
2291    }
2292
2293    #[test]
2294    fn test_parse_rustdoc_sections_basic() {
2295        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns `KreuzbergError` on failure.";
2296        let sections = parse_rustdoc_sections(doc);
2297        assert_eq!(sections.summary, "Extracts text from a file.");
2298        assert_eq!(sections.arguments.as_deref(), Some("* `path` - The file path."));
2299        assert_eq!(sections.returns.as_deref(), Some("The extracted text."));
2300        assert_eq!(sections.errors.as_deref(), Some("Returns `KreuzbergError` on failure."));
2301        assert!(sections.panics.is_none());
2302    }
2303
2304    #[test]
2305    fn test_parse_rustdoc_sections_example_with_fence() {
2306        let doc = "Run the thing.\n\n# Example\n\n```rust\nlet x = run();\n```";
2307        let sections = parse_rustdoc_sections(doc);
2308        assert_eq!(sections.summary, "Run the thing.");
2309        assert!(sections.example.as_ref().unwrap().contains("```rust"));
2310        assert!(sections.example.as_ref().unwrap().contains("let x = run();"));
2311    }
2312
2313    #[test]
2314    fn test_parse_rustdoc_sections_pound_inside_fence_is_not_a_heading() {
2315        // Even though we get rustdoc-hidden lines pre-stripped, a literal
2316        // `# foo` inside a non-rust fence (e.g. shell example) must not
2317        // start a new section.
2318        let doc = "Summary.\n\n# Example\n\n```bash\n# install deps\nrun --foo\n```";
2319        let sections = parse_rustdoc_sections(doc);
2320        assert_eq!(sections.summary, "Summary.");
2321        assert!(sections.example.as_ref().unwrap().contains("# install deps"));
2322    }
2323
2324    #[test]
2325    fn test_parse_arguments_bullets_dash_separator() {
2326        let body = "* `path` - The file path.\n* `config` - Optional configuration.";
2327        let pairs = parse_arguments_bullets(body);
2328        assert_eq!(pairs.len(), 2);
2329        assert_eq!(pairs[0], ("path".to_string(), "The file path.".to_string()));
2330        assert_eq!(pairs[1], ("config".to_string(), "Optional configuration.".to_string()));
2331    }
2332
2333    #[test]
2334    fn test_parse_arguments_bullets_continuation_line() {
2335        let body = "* `path` - The file path,\n  resolved relative to cwd.\n* `mode` - Open mode.";
2336        let pairs = parse_arguments_bullets(body);
2337        assert_eq!(pairs.len(), 2);
2338        assert_eq!(pairs[0].1, "The file path, resolved relative to cwd.");
2339    }
2340
2341    #[test]
2342    fn test_replace_fence_lang_rust_to_typescript() {
2343        let body = "```rust\nlet x = run();\n```";
2344        let out = replace_fence_lang(body, "typescript");
2345        assert!(out.starts_with("```typescript"));
2346        assert!(out.contains("let x = run();"));
2347    }
2348
2349    #[test]
2350    fn test_replace_fence_lang_preserves_attrs() {
2351        let body = "```rust,no_run\nlet x = run();\n```";
2352        let out = replace_fence_lang(body, "typescript");
2353        assert!(out.starts_with("```typescript,no_run"));
2354    }
2355
2356    #[test]
2357    fn test_replace_fence_lang_no_fence_unchanged() {
2358        let body = "Plain prose with `inline code`.";
2359        let out = replace_fence_lang(body, "typescript");
2360        assert_eq!(out, "Plain prose with `inline code`.");
2361    }
2362
2363    fn fixture_sections() -> RustdocSections {
2364        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n* `config` - Optional configuration.\n\n# Returns\n\nThe extracted text and metadata.\n\n# Errors\n\nReturns an error when the file is unreadable.\n\n# Example\n\n```rust\nlet result = extract(\"file.pdf\")?;\n```";
2365        parse_rustdoc_sections(doc)
2366    }
2367
2368    #[test]
2369    fn test_render_jsdoc_sections() {
2370        let sections = fixture_sections();
2371        let out = render_jsdoc_sections(&sections);
2372        assert!(out.starts_with("Extracts text from a file."));
2373        assert!(out.contains("@param path - The file path."));
2374        assert!(out.contains("@param config - Optional configuration."));
2375        assert!(out.contains("@returns The extracted text and metadata."));
2376        assert!(out.contains("@throws Returns an error when the file is unreadable."));
2377        // fixture example is ```rust — stripped when target is TypeScript
2378        assert!(!out.contains("@example"), "Rust example must not appear in TSDoc");
2379        assert!(!out.contains("```typescript"));
2380        assert!(!out.contains("```rust"));
2381    }
2382
2383    #[test]
2384    fn test_render_jsdoc_sections_preserves_typescript_example() {
2385        let doc = "Do something.\n\n# Example\n\n```typescript\nconst x = doSomething();\n```";
2386        let sections = parse_rustdoc_sections(doc);
2387        let out = render_jsdoc_sections(&sections);
2388        assert!(out.contains("@example"), "TypeScript example must be preserved");
2389        assert!(out.contains("```typescript"));
2390    }
2391
2392    #[test]
2393    fn test_render_javadoc_sections() {
2394        let sections = fixture_sections();
2395        let out = render_javadoc_sections(&sections, "KreuzbergRsException");
2396        assert!(out.contains("@param path The file path."));
2397        assert!(out.contains("@return The extracted text and metadata."));
2398        assert!(out.contains("@throws KreuzbergRsException Returns an error when the file is unreadable."));
2399        // Java rendering omits the example block (handled separately by emit_javadoc which
2400        // wraps code in `<pre>{@code}</pre>`); we just confirm summary survives.
2401        assert!(out.starts_with("Extracts text from a file."));
2402    }
2403
2404    #[test]
2405    fn test_render_csharp_xml_sections() {
2406        let sections = fixture_sections();
2407        let out = render_csharp_xml_sections(&sections, "KreuzbergException");
2408        assert!(out.contains("<summary>\nExtracts text from a file.\n</summary>"));
2409        assert!(out.contains("<param name=\"path\">The file path.</param>"));
2410        assert!(out.contains("<returns>The extracted text and metadata.</returns>"));
2411        assert!(out.contains("<exception cref=\"KreuzbergException\">"));
2412        assert!(out.contains("<example><code language=\"csharp\">"));
2413        assert!(out.contains("let result = extract"));
2414    }
2415
2416    #[test]
2417    fn test_render_phpdoc_sections() {
2418        let sections = fixture_sections();
2419        let out = render_phpdoc_sections(&sections, "KreuzbergException");
2420        assert!(out.contains("@param mixed $path The file path."));
2421        assert!(out.contains("@return The extracted text and metadata."));
2422        assert!(out.contains("@throws KreuzbergException"));
2423        // fixture example is ```rust — stripped when target is PHP
2424        assert!(!out.contains("```php"), "Rust example must not appear in PHPDoc");
2425        assert!(!out.contains("```rust"));
2426    }
2427
2428    #[test]
2429    fn test_render_phpdoc_sections_preserves_php_example() {
2430        let doc = "Do something.\n\n# Example\n\n```php\n$x = doSomething();\n```";
2431        let sections = parse_rustdoc_sections(doc);
2432        let out = render_phpdoc_sections(&sections, "MyException");
2433        assert!(out.contains("```php"), "PHP example must be preserved");
2434    }
2435
2436    #[test]
2437    fn test_render_doxygen_sections() {
2438        let sections = fixture_sections();
2439        let out = render_doxygen_sections(&sections);
2440        assert!(out.contains("\\param path The file path."));
2441        assert!(out.contains("\\return The extracted text and metadata."));
2442        assert!(out.contains("\\code"));
2443        assert!(out.contains("\\endcode"));
2444    }
2445
2446    #[test]
2447    fn test_emit_yard_doc_simple() {
2448        let mut out = String::new();
2449        emit_yard_doc(&mut out, "Simple Ruby documentation", "    ");
2450        assert!(out.contains("# Simple Ruby documentation"));
2451    }
2452
2453    #[test]
2454    fn test_emit_yard_doc_empty() {
2455        let mut out = String::new();
2456        emit_yard_doc(&mut out, "", "    ");
2457        assert!(out.is_empty());
2458    }
2459
2460    #[test]
2461    fn test_emit_yard_doc_with_sections() {
2462        let mut out = String::new();
2463        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns error on failure.";
2464        emit_yard_doc(&mut out, doc, "  ");
2465        assert!(out.contains("# Extracts text from a file."));
2466        assert!(out.contains("# @param path The file path."));
2467        assert!(out.contains("# @return The extracted text."));
2468        assert!(out.contains("# @raise Returns error on failure."));
2469    }
2470
2471    #[test]
2472    fn test_emit_c_doxygen_simple_prose() {
2473        let mut out = String::new();
2474        emit_c_doxygen(&mut out, "Free a string.", "");
2475        assert!(out.contains("/// Free a string."), "got: {out}");
2476    }
2477
2478    #[test]
2479    fn test_emit_c_doxygen_with_sections() {
2480        let mut out = String::new();
2481        let doc = "Extract content from a file.\n\n# Arguments\n\n* `path` - Path to the file.\n* `mode` - Read mode.\n\n# Returns\n\nA newly allocated string the caller owns.\n\n# Errors\n\nReturns null when the file is unreadable.";
2482        emit_c_doxygen(&mut out, doc, "");
2483        assert!(out.contains("/// Extract content from a file."));
2484        assert!(out.contains("/// \\param path Path to the file."));
2485        assert!(out.contains("/// \\param mode Read mode."));
2486        assert!(out.contains("/// \\return A newly allocated string the caller owns."));
2487        assert!(out.contains("/// \\note Returns null when the file is unreadable."));
2488    }
2489
2490    #[test]
2491    fn test_emit_c_doxygen_safety_section_maps_to_note() {
2492        let mut out = String::new();
2493        let doc = "Free a buffer.\n\n# Safety\n\nPointer must have been returned by this library.";
2494        emit_c_doxygen(&mut out, doc, "");
2495        assert!(out.contains("/// \\note SAFETY: Pointer must have been returned by this library."));
2496    }
2497
2498    #[test]
2499    fn test_emit_c_doxygen_example_renders_code_fence() {
2500        let mut out = String::new();
2501        let doc = "Demo.\n\n# Example\n\n```rust\nlet x = run();\n```";
2502        emit_c_doxygen(&mut out, doc, "");
2503        assert!(out.contains("/// \\code"));
2504        assert!(out.contains("/// \\endcode"));
2505        assert!(out.contains("let x = run();"));
2506    }
2507
2508    #[test]
2509    fn test_emit_c_doxygen_strips_markdown_links() {
2510        let mut out = String::new();
2511        let doc = "See [the docs](https://example.com/x) for details.";
2512        emit_c_doxygen(&mut out, doc, "");
2513        assert!(
2514            out.contains("the docs (https://example.com/x)"),
2515            "expected flattened link, got: {out}"
2516        );
2517        assert!(!out.contains("](https://"));
2518    }
2519
2520    #[test]
2521    fn test_emit_c_doxygen_word_wraps_long_lines() {
2522        let mut out = String::new();
2523        let long = "a ".repeat(80);
2524        emit_c_doxygen(&mut out, long.trim(), "");
2525        for line in out.lines() {
2526            // Each emitted prefix is "/// " (4 chars); the body after that
2527            // should be ≤ 100 chars per `DOXYGEN_WRAP_WIDTH`.
2528            let body = line.trim_start_matches("/// ");
2529            assert!(body.len() <= 100, "line too long ({}): {line}", body.len());
2530        }
2531    }
2532
2533    #[test]
2534    fn test_emit_c_doxygen_empty_input_is_noop() {
2535        let mut out = String::new();
2536        emit_c_doxygen(&mut out, "", "");
2537        emit_c_doxygen(&mut out, "   \n\t  ", "");
2538        assert!(out.is_empty());
2539    }
2540
2541    #[test]
2542    fn test_emit_c_doxygen_indent_applied() {
2543        let mut out = String::new();
2544        emit_c_doxygen(&mut out, "Hello.", "    ");
2545        assert!(out.starts_with("    /// Hello."));
2546    }
2547
2548    #[test]
2549    fn test_render_yard_sections() {
2550        let sections = fixture_sections();
2551        let out = render_yard_sections(&sections);
2552        assert!(out.contains("@param path The file path."));
2553        assert!(out.contains("@return The extracted text and metadata."));
2554        assert!(out.contains("@raise Returns an error when the file is unreadable."));
2555        // fixture example is ```rust — stripped when target is Ruby
2556        assert!(!out.contains("@example"), "Rust example must not appear in YARD");
2557        assert!(!out.contains("```ruby"));
2558        assert!(!out.contains("```rust"));
2559    }
2560
2561    #[test]
2562    fn test_render_yard_sections_preserves_ruby_example() {
2563        let doc = "Do something.\n\n# Example\n\n```ruby\nputs :hi\n```";
2564        let sections = parse_rustdoc_sections(doc);
2565        let out = render_yard_sections(&sections);
2566        assert!(out.contains("@example"), "Ruby example must be preserved");
2567        assert!(out.contains("```ruby"));
2568    }
2569
2570    // --- M1: example_for_target unit tests ---
2571
2572    #[test]
2573    fn example_for_target_rust_fenced_suppressed_for_php() {
2574        let example = "```rust\nlet x = 1;\n```";
2575        assert_eq!(
2576            example_for_target(example, "php"),
2577            None,
2578            "rust-fenced example must be omitted for PHP target"
2579        );
2580    }
2581
2582    #[test]
2583    fn example_for_target_bare_fence_defaults_to_rust_suppressed_for_ruby() {
2584        let example = "```\nlet x = 1;\n```";
2585        assert_eq!(
2586            example_for_target(example, "ruby"),
2587            None,
2588            "bare fence is treated as Rust and must be omitted for Ruby target"
2589        );
2590    }
2591
2592    #[test]
2593    fn example_for_target_php_example_preserved_for_php() {
2594        let example = "```php\n$x = 1;\n```";
2595        let result = example_for_target(example, "php");
2596        assert!(result.is_some(), "PHP example must be preserved for PHP target");
2597        assert!(result.unwrap().contains("```php"));
2598    }
2599
2600    #[test]
2601    fn example_for_target_ruby_example_preserved_for_ruby() {
2602        let example = "```ruby\nputs :hi\n```";
2603        let result = example_for_target(example, "ruby");
2604        assert!(result.is_some(), "Ruby example must be preserved for Ruby target");
2605        assert!(result.unwrap().contains("```ruby"));
2606    }
2607
2608    #[test]
2609    fn render_phpdoc_sections_with_rust_example_emits_no_at_example_block() {
2610        let doc = "Convert HTML.\n\n# Arguments\n\n* `html` - The HTML input.\n\n# Example\n\n```rust\nlet result = convert(html, None)?;\n```";
2611        let sections = parse_rustdoc_sections(doc);
2612        let out = render_phpdoc_sections(&sections, "HtmlToMarkdownException");
2613        assert!(!out.contains("```php"), "no PHP @example block for Rust source");
2614        assert!(!out.contains("```rust"), "raw Rust must not leak into PHPDoc");
2615        assert!(out.contains("@param"), "other sections must still be emitted");
2616    }
2617
2618    // --- KDoc ktfmt-canonical format tests ---
2619
2620    #[test]
2621    fn test_emit_kdoc_ktfmt_canonical_short_single_line() {
2622        let mut out = String::new();
2623        emit_kdoc_ktfmt_canonical(&mut out, "Simple doc.", "");
2624        assert_eq!(
2625            out, "/** Simple doc. */\n",
2626            "short single-line comment should collapse to canonical format"
2627        );
2628    }
2629
2630    #[test]
2631    fn test_emit_kdoc_ktfmt_canonical_short_with_indent() {
2632        let mut out = String::new();
2633        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2634        assert_eq!(out, "    /** Text node (most frequent - 100+ per document) */\n");
2635    }
2636
2637    #[test]
2638    fn test_emit_kdoc_ktfmt_canonical_long_comment_uses_multiline() {
2639        let mut out = String::new();
2640        let long_text = "This is a very long documentation comment that exceeds the 100-character line width limit and should therefore be emitted in multi-line format";
2641        emit_kdoc_ktfmt_canonical(&mut out, long_text, "");
2642        assert!(out.contains("/**\n"), "long comment should start with newline");
2643        assert!(out.contains(" * "), "long comment should use multi-line format");
2644        assert!(out.contains(" */\n"), "long comment should end with newline");
2645    }
2646
2647    #[test]
2648    fn test_emit_kdoc_ktfmt_canonical_multiline_comment() {
2649        let mut out = String::new();
2650        let doc = "First line.\n\nSecond paragraph.";
2651        emit_kdoc_ktfmt_canonical(&mut out, doc, "");
2652        assert!(out.contains("/**\n"), "multi-paragraph should use multi-line format");
2653        assert!(out.contains(" * First line."), "first paragraph preserved");
2654        assert!(out.contains(" *\n"), "blank line preserved");
2655        assert!(out.contains(" * Second paragraph."), "second paragraph preserved");
2656    }
2657
2658    #[test]
2659    fn test_emit_kdoc_ktfmt_canonical_empty_doc() {
2660        let mut out = String::new();
2661        emit_kdoc_ktfmt_canonical(&mut out, "", "");
2662        assert!(out.is_empty(), "empty doc should produce no output");
2663    }
2664
2665    #[test]
2666    fn test_emit_kdoc_ktfmt_canonical_fits_within_100_chars() {
2667        let mut out = String::new();
2668        // Construct exactly at the boundary: indent(0) + "/** " + content + " */" = 100 chars
2669        // "/** " = 4 chars, " */" = 3 chars, so content can be 93 chars
2670        let content = "a".repeat(93);
2671        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2672        let line = out.lines().next().unwrap();
2673        assert_eq!(
2674            line.len(),
2675            100,
2676            "should fit exactly at 100 chars and use single-line format"
2677        );
2678        assert!(out.starts_with("/**"), "should use single-line format");
2679    }
2680
2681    #[test]
2682    fn test_emit_kdoc_ktfmt_canonical_exceeds_100_chars() {
2683        let mut out = String::new();
2684        // Exceed 100 chars: content of 94 chars with "/** " + " */" = 101 chars
2685        let content = "a".repeat(94);
2686        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2687        assert!(
2688            out.contains("/**\n"),
2689            "should use multi-line format when exceeding 100 chars"
2690        );
2691        assert!(out.contains(" * "), "multi-line format with ` * ` prefix");
2692    }
2693
2694    #[test]
2695    fn test_emit_kdoc_ktfmt_canonical_respects_indent() {
2696        let mut out = String::new();
2697        // With 4-char indent, max content is 89 chars (4 + 4 + 89 + 3 = 100)
2698        let content = "a".repeat(89);
2699        emit_kdoc_ktfmt_canonical(&mut out, &content, "    ");
2700        let line = out.lines().next().unwrap();
2701        assert_eq!(line.len(), 100, "should respect indent in 100-char calculation");
2702        assert!(line.starts_with("    /** "), "should include indent");
2703    }
2704
2705    #[test]
2706    fn test_emit_kdoc_ktfmt_canonical_real_world_enum_variant() {
2707        let mut out = String::new();
2708        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2709        // This is from NodeType enum; should collapse to single-line
2710        assert!(out.starts_with("    /** "), "should preserve 4-space indent");
2711        assert!(out.contains(" */\n"), "should end with newline");
2712        // Verify it's single-line format
2713        let line_count = out.lines().count();
2714        assert_eq!(line_count, 1, "should be single-line format");
2715    }
2716
2717    #[test]
2718    fn test_emit_kdoc_ktfmt_canonical_real_world_data_class_field() {
2719        let mut out = String::new();
2720        let doc = "Heading style to use in Markdown output (ATX `#` or Setext underline).";
2721        emit_kdoc_ktfmt_canonical(&mut out, doc, "    ");
2722        // This is from ConversionOptions data class; should collapse to single-line
2723        let line_count = out.lines().count();
2724        assert_eq!(line_count, 1, "should be single-line format");
2725        assert!(out.starts_with("    /** "), "should have correct indent");
2726    }
2727
2728    // --- sanitize_rust_idioms tests ---
2729
2730    #[test]
2731    fn sanitize_intradoc_link_with_path_separator_java() {
2732        let input = "See [`ConversionOptions::builder()`] for details.";
2733        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2734        assert!(out.contains("`ConversionOptions.builder()`"), "got: {out}");
2735        assert!(!out.contains("[`"), "brackets must be removed, got: {out}");
2736    }
2737
2738    #[test]
2739    fn sanitize_intradoc_link_simple_type_php() {
2740        let input = "Returns a [`ConversionResult`].";
2741        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2742        assert!(out.contains("`ConversionResult`"), "got: {out}");
2743        assert!(!out.contains("[`"), "got: {out}");
2744    }
2745
2746    #[test]
2747    fn sanitize_none_to_null_javadoc() {
2748        let input = "Returns None when no value is found.";
2749        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2750        assert!(out.contains("null"), "got: {out}");
2751        assert!(!out.contains("None"), "got: {out}");
2752    }
2753
2754    #[test]
2755    fn sanitize_none_to_undefined_tsdoc() {
2756        let input = "Returns None if absent.";
2757        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2758        assert!(out.contains("undefined"), "got: {out}");
2759        assert!(!out.contains("None"), "got: {out}");
2760    }
2761
2762    #[test]
2763    fn sanitize_some_x_to_the_value_x() {
2764        let input = "Pass Some(value) to enable.";
2765        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2766        assert!(out.contains("the value (value)"), "got: {out}");
2767        assert!(!out.contains("Some("), "got: {out}");
2768    }
2769
2770    #[test]
2771    fn sanitize_bare_some_followed_by_lowercase_noun_is_dropped() {
2772        // Real leak from html-to-markdown PreprocessingOptionsUpdate.java:16.
2773        let input =
2774            "Only specified fields (Some values) will override existing options; None values leave the previous";
2775        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2776        assert!(
2777            out.contains("(values)"),
2778            "bare `Some ` before lowercase noun must be stripped; got: {out}"
2779        );
2780        assert!(
2781            out.contains("null values"),
2782            "bare `None ` must also be replaced; got: {out}"
2783        );
2784        assert!(!out.contains("Some "), "Some prefix must not survive; got: {out}");
2785    }
2786
2787    #[test]
2788    fn sanitize_bare_some_does_not_touch_identifiers_or_uppercase_followers() {
2789        // SomeType, Some.method(), Some(x), and "Some Title" (proper noun) all preserved.
2790        let cases = [
2791            "SomeType lives on.",
2792            "Some.method() returns Self.",
2793            "Some Title",
2794            "Some(x) is a value.",
2795        ];
2796        for case in cases {
2797            let out = sanitize_rust_idioms(case, DocTarget::JavaDoc);
2798            // For the Some(x) case, replace_some_calls (run earlier) converts to "the value (x)"
2799            // so "Some" itself is gone — that's expected; everything else preserves "Some".
2800            if case.starts_with("Some(") {
2801                assert!(out.contains("the value (x)"), "got: {out}");
2802            } else {
2803                assert!(out.contains("Some"), "Some must survive in {case:?}; got: {out}");
2804            }
2805        }
2806    }
2807
2808    #[test]
2809    fn sanitize_option_t_to_nullable_php() {
2810        let input = "The result is Option<String>.";
2811        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2812        assert!(out.contains("String?"), "got: {out}");
2813        assert!(!out.contains("Option<"), "got: {out}");
2814    }
2815
2816    #[test]
2817    fn sanitize_option_t_to_or_null_java() {
2818        let input = "The result is Option<String>.";
2819        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2820        assert!(out.contains("String | null"), "got: {out}");
2821    }
2822
2823    #[test]
2824    fn sanitize_option_t_to_or_undefined_tsdoc() {
2825        let input = "The result is Option<String>.";
2826        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2827        assert!(out.contains("String | undefined"), "got: {out}");
2828    }
2829
2830    #[test]
2831    fn sanitize_vec_u8_per_target() {
2832        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::PhpDoc).contains("string"));
2833        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JavaDoc).contains("byte[]"));
2834        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::TsDoc).contains("Uint8Array"));
2835        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JsDoc).contains("Uint8Array"));
2836    }
2837
2838    #[test]
2839    fn sanitize_vec_t_to_array() {
2840        let input = "Returns Vec<String>.";
2841        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2842        assert!(out.contains("String[]"), "got: {out}");
2843        assert!(!out.contains("Vec<"), "got: {out}");
2844    }
2845
2846    #[test]
2847    fn sanitize_hashmap_per_target() {
2848        let input = "Uses HashMap<String, u32>.";
2849        assert!(sanitize_rust_idioms(input, DocTarget::PhpDoc).contains("array<String, u32>"));
2850        assert!(sanitize_rust_idioms(input, DocTarget::JavaDoc).contains("Map<String, u32>"));
2851        assert!(sanitize_rust_idioms(input, DocTarget::TsDoc).contains("Record<String, u32>"));
2852    }
2853
2854    #[test]
2855    fn sanitize_arc_wrapper_stripped() {
2856        let input = "Holds Arc<Config>.";
2857        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2858        assert!(out.contains("Config"), "got: {out}");
2859        assert!(!out.contains("Arc<"), "got: {out}");
2860    }
2861
2862    #[test]
2863    fn sanitize_box_mutex_rwlock_rc_cell_refcell_stripped() {
2864        for wrapper in &["Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
2865            let input = format!("Contains {wrapper}<Inner>.");
2866            let out = sanitize_rust_idioms(&input, DocTarget::JavaDoc);
2867            assert!(out.contains("Inner"), "wrapper {wrapper} not stripped, got: {out}");
2868            assert!(
2869                !out.contains(&format!("{wrapper}<")),
2870                "wrapper {wrapper} still present, got: {out}"
2871            );
2872        }
2873    }
2874
2875    #[test]
2876    fn sanitize_send_sync_stripped() {
2877        let input = "The type is Send + Sync.";
2878        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2879        assert!(!out.contains("Send"), "got: {out}");
2880        assert!(!out.contains("Sync"), "got: {out}");
2881    }
2882
2883    #[test]
2884    fn sanitize_static_lifetime_stripped() {
2885        let input = "Requires 'static lifetime.";
2886        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2887        assert!(!out.contains("'static"), "got: {out}");
2888    }
2889
2890    #[test]
2891    fn sanitize_pub_fn_stripped() {
2892        let input = "Calls pub fn convert().";
2893        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2894        assert!(!out.contains("pub fn"), "got: {out}");
2895        assert!(out.contains("convert()"), "got: {out}");
2896    }
2897
2898    #[test]
2899    fn sanitize_crate_prefix_stripped() {
2900        let input = "See crate::error::ConversionError.";
2901        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2902        assert!(!out.contains("crate::"), "got: {out}");
2903        assert!(out.contains("error.ConversionError"), "got: {out}");
2904    }
2905
2906    #[test]
2907    fn sanitize_unwrap_expect_stripped() {
2908        let input = "Call result.unwrap() or result.expect(\"msg\").";
2909        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2910        assert!(!out.contains(".unwrap()"), "got: {out}");
2911        assert!(!out.contains(".expect("), "got: {out}");
2912    }
2913
2914    #[test]
2915    fn sanitize_no_mutation_inside_backticks() {
2916        // None inside backtick span must not be replaced.
2917        let input = "Use `None` as the argument.";
2918        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2919        assert!(out.contains("`None`"), "backtick span must be preserved, got: {out}");
2920    }
2921
2922    #[test]
2923    fn sanitize_rust_fence_dropped_for_tsdoc() {
2924        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2925        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2926        assert!(
2927            !out.contains("let x = 1;"),
2928            "rust fence content must be dropped, got: {out}"
2929        );
2930        assert!(!out.contains("```rust"), "got: {out}");
2931        assert!(out.contains("Trailer."), "text after fence must survive, got: {out}");
2932    }
2933
2934    #[test]
2935    fn sanitize_rust_fence_tag_stripped_for_java() {
2936        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2937        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2938        // Language tag is stripped; content is kept.
2939        assert!(
2940            out.contains("let x = 1;"),
2941            "fence content must survive for Java, got: {out}"
2942        );
2943        assert!(!out.contains("```rust"), "rust tag must be stripped, got: {out}");
2944        assert!(out.contains("```\n"), "bare fence must be kept, got: {out}");
2945    }
2946
2947    #[test]
2948    fn sanitize_non_rust_fence_passed_through() {
2949        let input = "Example:\n\n```typescript\nconst x = 1;\n```";
2950        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2951        assert!(out.contains("```typescript"), "non-rust fence must survive, got: {out}");
2952        assert!(out.contains("const x = 1;"), "got: {out}");
2953    }
2954
2955    #[test]
2956    fn sanitize_backtick_code_span_not_mutated_option() {
2957        // Option<T> inside backtick span must not be replaced.
2958        let input = "The type is `Option<String>`.";
2959        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2960        // The backtick-protected span should be preserved verbatim.
2961        assert!(
2962            out.contains("`Option<String>`"),
2963            "code span must be preserved, got: {out}"
2964        );
2965    }
2966
2967    #[test]
2968    fn sanitize_idempotent() {
2969        // Running twice should produce the same result as running once.
2970        let input = "Returns None when Vec<String> is empty.";
2971        let once = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2972        let twice = sanitize_rust_idioms(&once, DocTarget::JavaDoc);
2973        assert_eq!(once, twice, "sanitize_rust_idioms should be idempotent");
2974    }
2975
2976    #[test]
2977    fn sanitize_multiline_prose() {
2978        let input = "Convert HTML to Markdown.\n\nReturns None on failure.\nUse Option<String> for the result.";
2979        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2980        assert!(out.contains("null"), "None must be replaced on line 2, got: {out}");
2981        assert!(
2982            out.contains("String | null"),
2983            "Option<String> must be replaced on line 3, got: {out}"
2984        );
2985    }
2986
2987    #[test]
2988    fn sanitize_attribute_line_dropped() {
2989        let input = "#[derive(Debug, Clone)]\nSome documentation.";
2990        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2991        assert!(!out.contains("#[derive("), "attribute line must be dropped, got: {out}");
2992        // Prose survives, though bare "Some " before a lowercase noun is stripped
2993        // by `replace_some_keyword_in_prose`, so accept either form.
2994        assert!(out.contains("documentation."), "prose must survive, got: {out}");
2995    }
2996
2997    #[test]
2998    fn sanitize_path_separator_in_prose() {
2999        let input = "See std::collections::HashMap for details.";
3000        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3001        assert!(out.contains("std.collections.HashMap"), ":: must become ., got: {out}");
3002    }
3003
3004    #[test]
3005    fn sanitize_none_not_replaced_inside_identifier() {
3006        // "NoneType" must not be replaced.
3007        let input = "Unlike NoneType in Python.";
3008        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3009        assert!(out.contains("NoneType"), "NoneType must not be replaced, got: {out}");
3010    }
3011
3012    // --- CSharpDoc target tests ---
3013
3014    #[test]
3015    fn sanitize_csharp_drops_rust_section_headings_and_example_body() {
3016        // The GraphQLErrorException case: `# Examples` heading followed by a
3017        // ```ignore code fence containing `Self::error_code`, `Result<T, E>`,
3018        // intra-doc links — all of which previously leaked into `<summary>`.
3019        let input = "Convert error to HTTP status code\n\n\
3020            Maps GraphQL error types to status codes.\n\n\
3021            # Examples\n\n\
3022            ```ignore\n\
3023            use spikard_graphql::error::GraphQLError;\n\
3024            let error = GraphQLError::AuthenticationError(\"Invalid token\".to_string());\n\
3025            assert_eq!(error.status_code(), 401);\n\
3026            ```\n";
3027        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3028        assert!(
3029            out.contains("Convert error to HTTP status code"),
3030            "summary preserved: {out}"
3031        );
3032        assert!(out.contains("Maps GraphQL error types"), "prose preserved: {out}");
3033        assert!(!out.contains("# Examples"), "heading dropped: {out}");
3034        assert!(!out.contains("```"), "code fence dropped: {out}");
3035        assert!(!out.contains("Self::error_code"), "Self::method dropped: {out}");
3036        assert!(
3037            !out.contains("GraphQLError::AuthenticationError"),
3038            "rust path dropped: {out}"
3039        );
3040    }
3041
3042    #[test]
3043    fn sanitize_csharp_intradoc_link_with_path_separator() {
3044        let input = "See [`Self::error_code`] for the variant codes.";
3045        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3046        assert!(out.contains("`Self.error_code`"), "intra-doc link normalised: {out}");
3047        assert!(!out.contains("[`"), "square brackets removed: {out}");
3048        assert!(!out.contains("::"), ":: replaced with .: {out}");
3049    }
3050
3051    #[test]
3052    fn sanitize_csharp_result_type_keeps_success_drops_error() {
3053        let input = "Returns Result<String, ConversionError> on failure.";
3054        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3055        assert!(out.contains("String"), "success type kept: {out}");
3056        assert!(!out.contains("Result<"), "Result wrapper dropped: {out}");
3057        assert!(!out.contains("ConversionError"), "error type dropped: {out}");
3058    }
3059
3060    #[test]
3061    fn sanitize_csharp_option_becomes_nullable() {
3062        let input = "Returns Option<String>.";
3063        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3064        // After XML-escaping, the `?` survives but any surviving `<`/`>` get escaped.
3065        assert!(out.contains("String?"), "Option<T> -> T?: {out}");
3066        assert!(!out.contains("Option<"), "Option dropped: {out}");
3067    }
3068
3069    #[test]
3070    fn sanitize_csharp_vec_u8_becomes_byte_array() {
3071        let input = "Accepts Vec<u8>.";
3072        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3073        // `byte[]` survives — the `[` is not XML-significant.
3074        assert!(out.contains("byte[]"), "Vec<u8> -> byte[]: {out}");
3075    }
3076
3077    #[test]
3078    fn sanitize_csharp_hashmap_becomes_dictionary() {
3079        let input = "Holds HashMap<String, u32>.";
3080        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3081        // The `<` / `>` produced by Dictionary<K, V> must be XML-escaped.
3082        assert!(
3083            out.contains("Dictionary&lt;String, u32&gt;"),
3084            "HashMap -> Dictionary with XML-escaped brackets: {out}"
3085        );
3086    }
3087
3088    #[test]
3089    fn sanitize_csharp_none_to_null() {
3090        let input = "Returns None on miss.";
3091        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3092        assert!(out.contains("null"), "None -> null: {out}");
3093        assert!(!out.contains("None"), "None replaced: {out}");
3094    }
3095
3096    #[test]
3097    fn sanitize_csharp_escapes_raw_angle_brackets_and_amp() {
3098        // Unrecognised `<...>` constructs (e.g. trait objects, generic params on
3099        // unknown names) must still be XML-escaped so the result is valid inside
3100        // `<summary>`.
3101        let input = "Accepts Box<dyn Trait> and combines a & b.";
3102        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3103        // Box<T> wrapper is stripped to inner type, leaving `dyn Trait`.
3104        assert!(out.contains("dyn Trait"), "Box<T> stripped: {out}");
3105        assert!(out.contains("&amp;"), "ampersand escaped: {out}");
3106    }
3107
3108    #[test]
3109    fn sanitize_csharp_drops_rust_code_fence_entirely() {
3110        let input = "Intro.\n\n```rust\nlet x: Vec<u8> = vec![];\n```\n\nTrailer.";
3111        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3112        assert!(!out.contains("let x"), "code fence body dropped: {out}");
3113        assert!(!out.contains("```"), "fence markers dropped: {out}");
3114        assert!(out.contains("Intro."), "prose before fence kept: {out}");
3115        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
3116    }
3117
3118    #[test]
3119    fn sanitize_csharp_keep_sections_does_not_drop_headings() {
3120        // The sections-preserving variant leaves heading lines alone so callers
3121        // that have already extracted sections can sanitise each body fragment.
3122        let input = "Summary.\n\n# Arguments\n\n* `name` - the value.";
3123        let out = sanitize_rust_idioms_keep_sections(input, DocTarget::CSharpDoc);
3124        assert!(out.contains("# Arguments"), "heading preserved: {out}");
3125        assert!(out.contains("name"), "body preserved: {out}");
3126    }
3127
3128    #[test]
3129    fn sanitize_csharp_idempotent() {
3130        let input = "Returns Option<String> or None.";
3131        let once = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3132        let twice = sanitize_rust_idioms(&once, DocTarget::CSharpDoc);
3133        assert_eq!(once, twice, "CSharpDoc sanitisation must be idempotent");
3134    }
3135}