Skip to main content

alef_codegen/
doc_emission.rs

1//! Language-native documentation comment emission.
2//! Provides standardized functions for emitting doc comments in different languages.
3
4/// Emit PHPDoc-style comments (/** ... */)
5/// Used for PHP classes, methods, and properties.
6///
7/// Sanitizes Rust-specific idioms before translating rustdoc sections
8/// (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@throws`,
9/// `# Example` → ` ```php ` fence) via [`render_phpdoc_sections`].
10///
11/// `exception_class` is the PHP exception class name to use in `@throws` tags.
12pub fn emit_phpdoc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
13    if doc.is_empty() {
14        return;
15    }
16    // Sanitize Rust-specific idioms before processing sections.
17    let sanitized = sanitize_rust_idioms(doc, DocTarget::PhpDoc);
18    let sections = parse_rustdoc_sections(&sanitized);
19    let any_section = sections.arguments.is_some()
20        || sections.returns.is_some()
21        || sections.errors.is_some()
22        || sections.example.is_some();
23    let body = if any_section {
24        render_phpdoc_sections(&sections, exception_class)
25    } else {
26        sanitized
27    };
28    out.push_str(indent);
29    out.push_str("/**\n");
30    for line in body.lines() {
31        out.push_str(indent);
32        out.push_str(" * ");
33        out.push_str(&escape_phpdoc_line(line));
34        out.push('\n');
35    }
36    out.push_str(indent);
37    out.push_str(" */\n");
38}
39
40/// Escape PHPDoc line: handle */ sequences that could close the comment early.
41fn escape_phpdoc_line(s: &str) -> String {
42    s.replace("*/", "* /")
43}
44
45/// Emit C# XML documentation comments (/// <summary> ... </summary>)
46/// Used for C# classes, structs, methods, and properties.
47///
48/// Translates rustdoc sections (`# Arguments` → `<param>`,
49/// `# Returns` → `<returns>`, `# Errors` → `<exception>`,
50/// `# Example` → `<example><code>`) via [`render_csharp_xml_sections`].
51///
52/// `exception_class` is the C# exception class name to use in `<exception cref="...">` tags.
53pub fn emit_csharp_doc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
54    if doc.is_empty() {
55        return;
56    }
57    // Parse sections from the raw rustdoc first (so `# Examples` / `# Arguments`
58    // / `# Returns` / `# Errors` are routed into structured XML tags), then
59    // sanitise each section body to strip Rust idioms and XML-escape `<`/`>`/`&`.
60    let raw_sections = parse_rustdoc_sections(doc);
61    let sections = RustdocSections {
62        summary: sanitize_rust_idioms_keep_sections(&raw_sections.summary, DocTarget::CSharpDoc),
63        arguments: raw_sections
64            .arguments
65            .as_deref()
66            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
67        returns: raw_sections
68            .returns
69            .as_deref()
70            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
71        errors: raw_sections
72            .errors
73            .as_deref()
74            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
75        panics: raw_sections
76            .panics
77            .as_deref()
78            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
79        safety: raw_sections
80            .safety
81            .as_deref()
82            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
83        // Examples typically contain Rust code that doesn't compile as C#; drop the body
84        // entirely rather than risk leaking unparseable code into `<example>`.
85        example: None,
86    };
87    let any_section = sections.arguments.is_some()
88        || sections.returns.is_some()
89        || sections.errors.is_some()
90        || sections.example.is_some();
91    if !any_section {
92        // Backwards-compatible path: plain `<summary>` for prose-only docs.
93        out.push_str(indent);
94        out.push_str("/// <summary>\n");
95        for line in sections.summary.lines() {
96            out.push_str(indent);
97            out.push_str("/// ");
98            // Note: sanitise_rust_idioms_keep_sections already XML-escaped <, >, & for
99            // the CSharpDoc target. We deliberately do NOT call escape_csharp_doc_line
100            // here because that would double-encode (e.g. `&amp;` → `&amp;amp;`).
101            out.push_str(line);
102            out.push('\n');
103        }
104        out.push_str(indent);
105        out.push_str("/// </summary>\n");
106        return;
107    }
108    let rendered = render_csharp_xml_sections(&sections, exception_class);
109    for line in rendered.lines() {
110        out.push_str(indent);
111        out.push_str("/// ");
112        // The rendered tags already contain the canonical chars; we only
113        // escape XML special chars that aren't part of our tag syntax. Since
114        // render_csharp_xml_sections produces well-formed XML, raw passthrough
115        // is correct.
116        out.push_str(line);
117        out.push('\n');
118    }
119}
120
121/// Emit Elixir documentation comments (@doc)
122/// Used for Elixir modules and functions.
123pub fn emit_elixir_doc(out: &mut String, doc: &str) {
124    if doc.is_empty() {
125        return;
126    }
127    out.push_str("@doc \"\"\"\n");
128    for line in doc.lines() {
129        out.push_str(&escape_elixir_doc_line(line));
130        out.push('\n');
131    }
132    out.push_str("\"\"\"\n");
133}
134
135/// Emit Rust `///` documentation comments.
136///
137/// Used by alef backends that emit Rust source (e.g., the Rustler NIF crate,
138/// the swift-bridge wrapper crate, the FRB Dart bridge crate). Distinct from
139/// `emit_swift_doc` only by intent — the syntax is identical (`/// ` per line).
140pub fn emit_rustdoc(out: &mut String, doc: &str, indent: &str) {
141    if doc.is_empty() {
142        return;
143    }
144    for line in doc.lines() {
145        out.push_str(indent);
146        out.push_str("/// ");
147        out.push_str(line);
148        out.push('\n');
149    }
150}
151
152/// Escape Elixir doc line: handle triple-quote sequences that could close the heredoc early.
153fn escape_elixir_doc_line(s: &str) -> String {
154    s.replace("\"\"\"", "\"\" \"")
155}
156
157/// Emit R roxygen2-style documentation comments (#')
158/// Used for R functions.
159pub fn emit_roxygen(out: &mut String, doc: &str) {
160    if doc.is_empty() {
161        return;
162    }
163    for line in doc.lines() {
164        out.push_str("#' ");
165        out.push_str(line);
166        out.push('\n');
167    }
168}
169
170/// Emit Swift-style documentation comments (///)
171/// Used for Swift structs, enums, and functions.
172pub fn emit_swift_doc(out: &mut String, doc: &str, indent: &str) {
173    if doc.is_empty() {
174        return;
175    }
176    for line in doc.lines() {
177        out.push_str(indent);
178        out.push_str("/// ");
179        out.push_str(line);
180        out.push('\n');
181    }
182}
183
184/// Emit Javadoc-style documentation comments (/** ... */)
185/// Used for Java classes, methods, and fields.
186/// Handles XML escaping and Javadoc tag formatting.
187pub fn emit_javadoc(out: &mut String, doc: &str, indent: &str) {
188    if doc.is_empty() {
189        return;
190    }
191    out.push_str(indent);
192    out.push_str("/**\n");
193    for line in doc.lines() {
194        let escaped = escape_javadoc_line(line);
195        let trimmed = escaped.trim_end();
196        if trimmed.is_empty() {
197            out.push_str(indent);
198            out.push_str(" *\n");
199        } else {
200            out.push_str(indent);
201            out.push_str(" * ");
202            out.push_str(trimmed);
203            out.push('\n');
204        }
205    }
206    out.push_str(indent);
207    out.push_str(" */\n");
208}
209
210/// Emit KDoc-style documentation comments (/** ... */)
211/// Used for Kotlin classes, methods, and properties.
212pub fn emit_kdoc(out: &mut String, doc: &str, indent: &str) {
213    if doc.is_empty() {
214        return;
215    }
216    out.push_str(indent);
217    out.push_str("/**\n");
218    for line in doc.lines() {
219        let trimmed = line.trim_end();
220        if trimmed.is_empty() {
221            out.push_str(indent);
222            out.push_str(" *\n");
223        } else {
224            out.push_str(indent);
225            out.push_str(" * ");
226            out.push_str(trimmed);
227            out.push('\n');
228        }
229    }
230    out.push_str(indent);
231    out.push_str(" */\n");
232}
233
234/// Emit KDoc-style documentation comments in ktfmt-canonical format.
235///
236/// ktfmt collapses short KDoc comments to single-line format (`/** ... */`)
237/// when they fit within the 100-character line width limit. This function
238/// generates KDoc in that canonical form to avoid unnecessary formatting
239/// diffs when the generated code is passed through ktfmt.
240///
241/// - Single-line comments that fit in 100 chars: emitted as `/** content */`
242/// - Multi-paragraph or longer comments: emitted with newlines and ` * ` prefixes
243/// - Preserves indent and respects line width boundary at 100 chars
244pub fn emit_kdoc_ktfmt_canonical(out: &mut String, doc: &str, indent: &str) {
245    const KTFMT_LINE_WIDTH: usize = 100;
246
247    if doc.is_empty() {
248        return;
249    }
250
251    let lines: Vec<&str> = doc.lines().collect();
252
253    // Check if this is a short, single-paragraph comment that fits on one line.
254    let is_short_single_paragraph = lines.len() == 1 && !lines[0].contains('\n');
255
256    if is_short_single_paragraph {
257        let trimmed = lines[0].trim();
258        // Calculate total length: indent + "/** " + content + " */"
259        let single_line_len = indent.len() + 4 + trimmed.len() + 3; // 4 for "/** ", 3 for " */"
260        if single_line_len <= KTFMT_LINE_WIDTH {
261            // Fits on one line in ktfmt-canonical format
262            out.push_str(indent);
263            out.push_str("/** ");
264            out.push_str(trimmed);
265            out.push_str(" */\n");
266            return;
267        }
268    }
269
270    // Multi-line format (default for long or multi-paragraph comments)
271    out.push_str(indent);
272    out.push_str("/**\n");
273    for line in lines {
274        let trimmed = line.trim_end();
275        if trimmed.is_empty() {
276            out.push_str(indent);
277            out.push_str(" *\n");
278        } else {
279            out.push_str(indent);
280            out.push_str(" * ");
281            out.push_str(trimmed);
282            out.push('\n');
283        }
284    }
285    out.push_str(indent);
286    out.push_str(" */\n");
287}
288
289/// Emit Dartdoc-style documentation comments (///)
290/// Used for Dart classes, methods, and properties.
291pub fn emit_dartdoc(out: &mut String, doc: &str, indent: &str) {
292    if doc.is_empty() {
293        return;
294    }
295    for line in doc.lines() {
296        out.push_str(indent);
297        out.push_str("/// ");
298        out.push_str(line);
299        out.push('\n');
300    }
301}
302
303/// Emit Gleam documentation comments (///)
304/// Used for Gleam functions and types.
305pub fn emit_gleam_doc(out: &mut String, doc: &str, indent: &str) {
306    if doc.is_empty() {
307        return;
308    }
309    for line in doc.lines() {
310        out.push_str(indent);
311        out.push_str("/// ");
312        out.push_str(line);
313        out.push('\n');
314    }
315}
316
317/// Emit Doxygen-style C documentation comments using `///`-prefixed lines.
318///
319/// Used by `alef-backend-ffi` above every `extern "C" fn`, the `*_len()`
320/// companion, opaque-handle typedef, and (post-cbindgen) the type/enum
321/// declarations cbindgen surfaces in the generated `.h`. cbindgen translates
322/// `///` source lines into a single `/** ... */` Doxygen block per item, so we
323/// only need to emit per-line `///` content here.
324///
325/// Translates rustdoc sections via [`render_doxygen_sections`]:
326///
327/// - `# Arguments` → `\param <name> <description>` (one per arg).
328/// - `# Returns`   → `\return <description>`.
329/// - `# Errors`    → `\note <description>` (Doxygen has no `\throws` for C;
330///   `\note` is the convention).
331/// - `# Safety`    → `\note SAFETY: <description>`.
332/// - `# Example`   → `\code` ... `\endcode` block.
333///
334/// Markdown links (`[text](url)`) are flattened to `text (url)`. Body lines
335/// are word-wrapped at ~100 columns so the rendered `/** */` block stays
336/// readable in IDE tooltips and terminal viewers.
337pub fn emit_c_doxygen(out: &mut String, doc: &str, indent: &str) {
338    if doc.trim().is_empty() {
339        return;
340    }
341    let sections = parse_rustdoc_sections(doc);
342    let any_section = sections.arguments.is_some()
343        || sections.returns.is_some()
344        || sections.errors.is_some()
345        || sections.safety.is_some()
346        || sections.example.is_some();
347    let mut body = if any_section {
348        render_doxygen_sections_with_notes(&sections)
349    } else {
350        sections.summary.clone()
351    };
352    body = strip_markdown_links(&body);
353    let wrapped = word_wrap(&body, DOXYGEN_WRAP_WIDTH);
354    for line in wrapped.lines() {
355        out.push_str(indent);
356        out.push_str("/// ");
357        out.push_str(line);
358        out.push('\n');
359    }
360}
361
362const DOXYGEN_WRAP_WIDTH: usize = 100;
363
364/// Render `RustdocSections` as a Doxygen body but route `# Errors` and
365/// `# Safety` to `\note` lines instead of plain prose. This is the variant
366/// `emit_c_doxygen` uses; the public `render_doxygen_sections` keeps its
367/// long-standing plain-prose semantics so existing callers don't shift.
368fn render_doxygen_sections_with_notes(sections: &RustdocSections) -> String {
369    let mut out = String::new();
370    if !sections.summary.is_empty() {
371        out.push_str(&sections.summary);
372    }
373    if let Some(args) = sections.arguments.as_deref() {
374        for (name, desc) in parse_arguments_bullets(args) {
375            if !out.is_empty() {
376                out.push('\n');
377            }
378            if desc.is_empty() {
379                out.push_str("\\param ");
380                out.push_str(&name);
381            } else {
382                out.push_str("\\param ");
383                out.push_str(&name);
384                out.push(' ');
385                out.push_str(&desc);
386            }
387        }
388    }
389    if let Some(ret) = sections.returns.as_deref() {
390        if !out.is_empty() {
391            out.push('\n');
392        }
393        out.push_str("\\return ");
394        out.push_str(ret.trim());
395    }
396    if let Some(err) = sections.errors.as_deref() {
397        if !out.is_empty() {
398            out.push('\n');
399        }
400        out.push_str("\\note ");
401        out.push_str(err.trim());
402    }
403    if let Some(safety) = sections.safety.as_deref() {
404        if !out.is_empty() {
405            out.push('\n');
406        }
407        out.push_str("\\note SAFETY: ");
408        out.push_str(safety.trim());
409    }
410    if let Some(example) = sections.example.as_deref() {
411        if !out.is_empty() {
412            out.push('\n');
413        }
414        out.push_str("\\code\n");
415        for line in example.lines() {
416            let t = line.trim_start();
417            if t.starts_with("```") {
418                continue;
419            }
420            out.push_str(line);
421            out.push('\n');
422        }
423        out.push_str("\\endcode");
424    }
425    out
426}
427
428/// Flatten Markdown inline links `[text](url)` to `text (url)` so the rendered
429/// Doxygen block stays readable when consumed without a Markdown filter.
430fn strip_markdown_links(s: &str) -> String {
431    let mut out = String::with_capacity(s.len());
432    let bytes = s.as_bytes();
433    let mut i = 0;
434    while i < bytes.len() {
435        if bytes[i] == b'[' {
436            // Find matching closing bracket on the same logical span (no nested brackets).
437            if let Some(close) = bytes[i + 1..].iter().position(|&b| b == b']') {
438                let text_end = i + 1 + close;
439                if text_end + 1 < bytes.len() && bytes[text_end + 1] == b'(' {
440                    if let Some(paren_close) = bytes[text_end + 2..].iter().position(|&b| b == b')') {
441                        let url_start = text_end + 2;
442                        let url_end = url_start + paren_close;
443                        let text = &s[i + 1..text_end];
444                        let url = &s[url_start..url_end];
445                        out.push_str(text);
446                        out.push_str(" (");
447                        out.push_str(url);
448                        out.push(')');
449                        i = url_end + 1;
450                        continue;
451                    }
452                }
453            }
454        }
455        out.push(bytes[i] as char);
456        i += 1;
457    }
458    out
459}
460
461/// Word-wrap each input line at `width` columns. Lines starting with `\code`
462/// or contained between `\code`/`\endcode` markers, as well as Markdown fence
463/// blocks, are passed through verbatim to preserve example formatting.
464fn word_wrap(input: &str, width: usize) -> String {
465    let mut out = String::with_capacity(input.len());
466    let mut in_code = false;
467    for raw in input.lines() {
468        let trimmed = raw.trim_start();
469        if trimmed.starts_with("\\code") {
470            in_code = true;
471            out.push_str(raw);
472            out.push('\n');
473            continue;
474        }
475        if trimmed.starts_with("\\endcode") {
476            in_code = false;
477            out.push_str(raw);
478            out.push('\n');
479            continue;
480        }
481        if in_code || trimmed.starts_with("```") {
482            out.push_str(raw);
483            out.push('\n');
484            continue;
485        }
486        if raw.len() <= width {
487            out.push_str(raw);
488            out.push('\n');
489            continue;
490        }
491        let mut current = String::with_capacity(width);
492        for word in raw.split_whitespace() {
493            if current.is_empty() {
494                current.push_str(word);
495            } else if current.len() + 1 + word.len() > width {
496                out.push_str(&current);
497                out.push('\n');
498                current.clear();
499                current.push_str(word);
500            } else {
501                current.push(' ');
502                current.push_str(word);
503            }
504        }
505        if !current.is_empty() {
506            out.push_str(&current);
507            out.push('\n');
508        }
509    }
510    out.trim_end_matches('\n').to_string()
511}
512
513/// Emit Zig documentation comments (///)
514/// Used for Zig functions, types, and declarations.
515pub fn emit_zig_doc(out: &mut String, doc: &str, indent: &str) {
516    if doc.is_empty() {
517        return;
518    }
519    for line in doc.lines() {
520        out.push_str(indent);
521        out.push_str("/// ");
522        out.push_str(line);
523        out.push('\n');
524    }
525}
526
527/// Emit YARD documentation comments for Ruby.
528/// Used for Ruby classes, methods, and attributes.
529///
530/// YARD syntax: each line prefixed with `# ` (with space). Translates rustdoc
531/// sections (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@raise`)
532/// via [`render_yard_sections`].
533pub fn emit_yard_doc(out: &mut String, doc: &str, indent: &str) {
534    if doc.is_empty() {
535        return;
536    }
537    let sections = parse_rustdoc_sections(doc);
538    let any_section = sections.arguments.is_some()
539        || sections.returns.is_some()
540        || sections.errors.is_some()
541        || sections.example.is_some();
542    let body = if any_section {
543        render_yard_sections(&sections)
544    } else {
545        doc.to_string()
546    };
547    for line in body.lines() {
548        out.push_str(indent);
549        out.push_str("# ");
550        out.push_str(line);
551        out.push('\n');
552    }
553}
554
555/// Render `RustdocSections` as YARD documentation comment body.
556///
557/// - `# Arguments` → `@param name desc` (one per arg)
558/// - `# Returns`   → `@return desc`
559/// - `# Errors`    → `@raise desc`
560/// - `# Example`   → `@example` block.
561///
562/// Output is a plain string with `\n` separators; the emitter wraps each line
563/// in `# ` itself.
564pub fn render_yard_sections(sections: &RustdocSections) -> String {
565    let mut out = String::new();
566    if !sections.summary.is_empty() {
567        out.push_str(&sections.summary);
568    }
569    if let Some(args) = sections.arguments.as_deref() {
570        for (name, desc) in parse_arguments_bullets(args) {
571            if !out.is_empty() {
572                out.push('\n');
573            }
574            if desc.is_empty() {
575                out.push_str("@param ");
576                out.push_str(&name);
577            } else {
578                out.push_str("@param ");
579                out.push_str(&name);
580                out.push(' ');
581                out.push_str(&desc);
582            }
583        }
584    }
585    if let Some(ret) = sections.returns.as_deref() {
586        if !out.is_empty() {
587            out.push('\n');
588        }
589        out.push_str("@return ");
590        out.push_str(ret.trim());
591    }
592    if let Some(err) = sections.errors.as_deref() {
593        if !out.is_empty() {
594            out.push('\n');
595        }
596        out.push_str("@raise ");
597        out.push_str(err.trim());
598    }
599    if let Some(example) = sections.example.as_deref() {
600        if let Some(body) = example_for_target(example, "ruby") {
601            if !out.is_empty() {
602                out.push('\n');
603            }
604            out.push_str("@example\n");
605            out.push_str(&body);
606        }
607    }
608    out
609}
610
611/// Escape Javadoc line: handle XML special chars and backtick code blocks.
612///
613/// HTML entities (`<`, `>`, `&`) are also escaped *inside* `{@code …}` blocks.
614/// Without that, content like `` `<pre><code>` `` would emit raw `<pre>`
615/// inside the Javadoc tag — Eclipse-formatter Spotless then treats it as a
616/// real `<pre>` block element and shatters the line across multiple `* `
617/// rows, breaking `alef-verify`'s embedded hash. Escaped content is
618/// rendered identically by Javadoc readers (the `{@code}` tag shows literal
619/// characters) and is stable under any post-formatter pass.
620fn escape_javadoc_line(s: &str) -> String {
621    let mut result = String::with_capacity(s.len());
622    let mut chars = s.chars().peekable();
623    while let Some(ch) = chars.next() {
624        if ch == '`' {
625            let mut code = String::new();
626            for c in chars.by_ref() {
627                if c == '`' {
628                    break;
629                }
630                code.push(c);
631            }
632            result.push_str("{@code ");
633            result.push_str(&escape_javadoc_html_entities(&code));
634            result.push('}');
635        } else if ch == '<' {
636            result.push_str("&lt;");
637        } else if ch == '>' {
638            result.push_str("&gt;");
639        } else if ch == '&' {
640            result.push_str("&amp;");
641        } else {
642            result.push(ch);
643        }
644    }
645    result
646}
647
648/// Escape only the HTML special characters that would otherwise be parsed by
649/// downstream Javadoc/Eclipse formatters as block-level HTML (e.g. `<pre>`).
650fn escape_javadoc_html_entities(s: &str) -> String {
651    let mut out = String::with_capacity(s.len());
652    for ch in s.chars() {
653        match ch {
654            '<' => out.push_str("&lt;"),
655            '>' => out.push_str("&gt;"),
656            '&' => out.push_str("&amp;"),
657            other => out.push(other),
658        }
659    }
660    out
661}
662
663/// A parsed rustdoc comment broken out into the sections binding emitters
664/// care about.
665///
666/// `summary` is the leading prose paragraph(s) before any `# Heading`.
667/// Sections are stored verbatim (without the `# Heading` line itself);
668/// each binding is responsible for translating bullet lists and code
669/// fences into its host-native conventions.
670///
671/// Trailing/leading whitespace inside each field is trimmed so emitters
672/// can concatenate without producing `* ` lines containing only spaces.
673#[derive(Debug, Default, Clone, PartialEq, Eq)]
674pub struct RustdocSections {
675    /// Prose before the first `# Section` heading.
676    pub summary: String,
677    /// Body of the `# Arguments` section, if present.
678    pub arguments: Option<String>,
679    /// Body of the `# Returns` section, if present.
680    pub returns: Option<String>,
681    /// Body of the `# Errors` section, if present.
682    pub errors: Option<String>,
683    /// Body of the `# Panics` section, if present.
684    pub panics: Option<String>,
685    /// Body of the `# Safety` section, if present.
686    pub safety: Option<String>,
687    /// Body of the `# Example` / `# Examples` section, if present.
688    pub example: Option<String>,
689}
690
691/// Parse a rustdoc string into [`RustdocSections`].
692///
693/// Recognises level-1 ATX headings whose name matches one of the standard
694/// rustdoc section names (`Arguments`, `Returns`, `Errors`, `Panics`,
695/// `Safety`, `Example`, `Examples`). Anything before the first heading
696/// becomes `summary`. Unrecognised headings are folded into the
697/// preceding section verbatim, so unconventional rustdoc isn't lost.
698///
699/// The input is expected to already have rustdoc-hidden lines stripped
700/// and intra-doc-link syntax rewritten by
701/// [`crate::extractor::helpers::normalize_rustdoc`].
702pub fn parse_rustdoc_sections(doc: &str) -> RustdocSections {
703    if doc.trim().is_empty() {
704        return RustdocSections::default();
705    }
706    let mut summary = String::new();
707    let mut arguments: Option<String> = None;
708    let mut returns: Option<String> = None;
709    let mut errors: Option<String> = None;
710    let mut panics: Option<String> = None;
711    let mut safety: Option<String> = None;
712    let mut example: Option<String> = None;
713    let mut current: Option<&'static str> = None;
714    let mut buf = String::new();
715    let mut in_fence = false;
716    let flush = |target: Option<&'static str>,
717                 buf: &mut String,
718                 summary: &mut String,
719                 arguments: &mut Option<String>,
720                 returns: &mut Option<String>,
721                 errors: &mut Option<String>,
722                 panics: &mut Option<String>,
723                 safety: &mut Option<String>,
724                 example: &mut Option<String>| {
725        let body = std::mem::take(buf).trim().to_string();
726        if body.is_empty() {
727            return;
728        }
729        match target {
730            None => {
731                if !summary.is_empty() {
732                    summary.push('\n');
733                }
734                summary.push_str(&body);
735            }
736            Some("arguments") => *arguments = Some(body),
737            Some("returns") => *returns = Some(body),
738            Some("errors") => *errors = Some(body),
739            Some("panics") => *panics = Some(body),
740            Some("safety") => *safety = Some(body),
741            Some("example") => *example = Some(body),
742            _ => {}
743        }
744    };
745    for line in doc.lines() {
746        let trimmed = line.trim_start();
747        if trimmed.starts_with("```") {
748            in_fence = !in_fence;
749            buf.push_str(line);
750            buf.push('\n');
751            continue;
752        }
753        if !in_fence {
754            if let Some(rest) = trimmed.strip_prefix("# ") {
755                let head = rest.trim().to_ascii_lowercase();
756                let target = match head.as_str() {
757                    "arguments" | "args" => Some("arguments"),
758                    "returns" => Some("returns"),
759                    "errors" => Some("errors"),
760                    "panics" => Some("panics"),
761                    "safety" => Some("safety"),
762                    "example" | "examples" => Some("example"),
763                    _ => None,
764                };
765                if target.is_some() {
766                    flush(
767                        current,
768                        &mut buf,
769                        &mut summary,
770                        &mut arguments,
771                        &mut returns,
772                        &mut errors,
773                        &mut panics,
774                        &mut safety,
775                        &mut example,
776                    );
777                    current = target;
778                    continue;
779                }
780            }
781        }
782        buf.push_str(line);
783        buf.push('\n');
784    }
785    flush(
786        current,
787        &mut buf,
788        &mut summary,
789        &mut arguments,
790        &mut returns,
791        &mut errors,
792        &mut panics,
793        &mut safety,
794        &mut example,
795    );
796    RustdocSections {
797        summary,
798        arguments,
799        returns,
800        errors,
801        panics,
802        safety,
803        example,
804    }
805}
806
807/// Parse `# Arguments` body into `(name, description)` pairs.
808///
809/// Recognises both Markdown bullet styles `*` and `-`, with optional
810/// backticks around the name: `* `name` - description` or
811/// `- name: description`. Continuation lines indented under a bullet
812/// are appended to the previous entry's description.
813///
814/// Used by emitters that translate to per-parameter documentation tags
815/// (`@param`, `<param>`, `\param`).
816pub fn parse_arguments_bullets(body: &str) -> Vec<(String, String)> {
817    let mut out: Vec<(String, String)> = Vec::new();
818    for raw in body.lines() {
819        let line = raw.trim_end();
820        let trimmed = line.trim_start();
821        let is_bullet = trimmed.starts_with("* ") || trimmed.starts_with("- ");
822        if is_bullet {
823            let after = &trimmed[2..];
824            // Accept `name`, `name:` or `name -` separator forms.
825            let (name, desc) = if let Some(idx) = after.find(" - ") {
826                (after[..idx].trim(), after[idx + 3..].trim())
827            } else if let Some(idx) = after.find(": ") {
828                (after[..idx].trim(), after[idx + 2..].trim())
829            } else if let Some(idx) = after.find(' ') {
830                (after[..idx].trim(), after[idx + 1..].trim())
831            } else {
832                (after.trim(), "")
833            };
834            let name = name.trim_matches('`').trim_matches('*').to_string();
835            out.push((name, desc.to_string()));
836        } else if !trimmed.is_empty() {
837            if let Some(last) = out.last_mut() {
838                if !last.1.is_empty() {
839                    last.1.push(' ');
840                }
841                last.1.push_str(trimmed);
842            }
843        }
844    }
845    out
846}
847
848/// Detect the language tag on the first code fence in `body`.
849///
850/// Scans `body` for the first line that starts with ` ``` ` and returns the
851/// tag that follows (e.g. `"rust"`, `"php"`, `"typescript"`). A bare ` ``` `
852/// with no tag returns `"rust"` because rustdoc treats unlabelled fences as
853/// Rust by default. Returns `"rust"` when no fence is found at all.
854fn detect_first_fence_lang(body: &str) -> &str {
855    for line in body.lines() {
856        let trimmed = line.trim_start();
857        if let Some(rest) = trimmed.strip_prefix("```") {
858            let tag = rest.split(',').next().unwrap_or("").trim();
859            return if tag.is_empty() { "rust" } else { tag };
860        }
861    }
862    "rust"
863}
864
865/// Return `Some(transformed_example)` if the example should be emitted for
866/// `target_lang`, or `None` when the example is Rust source that would be
867/// meaningless in the foreign language.
868///
869/// When the original fence language is `rust` (including bare ` ``` ` which
870/// rustdoc defaults to Rust) and the target is not `rust`, the example is
871/// suppressed entirely — better absent than misleading. Cross-language
872/// transliteration of example bodies is intentionally out of scope.
873pub fn example_for_target(example: &str, target_lang: &str) -> Option<String> {
874    let trimmed = example.trim();
875    let source_lang = detect_first_fence_lang(trimmed);
876    if source_lang == "rust" && target_lang != "rust" {
877        None
878    } else {
879        Some(replace_fence_lang(trimmed, target_lang))
880    }
881}
882
883/// Strip a single ` ```lang ` fence pair from `body`, returning the inner
884/// code lines. Replaces the leading ` ```rust ` (or any other tag) with
885/// `lang_replacement`, leaving the rest of the body unchanged.
886///
887/// When no fence is present the body is returned unchanged. Used by
888/// emitters that need to convert ` ```rust ` examples into
889/// ` ```typescript ` / ` ```python ` / ` ```swift ` etc.
890pub fn replace_fence_lang(body: &str, lang_replacement: &str) -> String {
891    let mut out = String::with_capacity(body.len());
892    for line in body.lines() {
893        let trimmed = line.trim_start();
894        if let Some(rest) = trimmed.strip_prefix("```") {
895            // Replace the language tag (everything up to the next comma or
896            // end of line). Preserve indentation.
897            let indent = &line[..line.len() - trimmed.len()];
898            let after_lang = rest.find(',').map(|i| &rest[i..]).unwrap_or("");
899            out.push_str(indent);
900            out.push_str("```");
901            out.push_str(lang_replacement);
902            out.push_str(after_lang);
903            out.push('\n');
904        } else {
905            out.push_str(line);
906            out.push('\n');
907        }
908    }
909    out.trim_end_matches('\n').to_string()
910}
911
912/// Render `RustdocSections` as a JSDoc comment body (without the `/**` /
913/// ` */` wrappers — those are added by the caller's emitter, which knows
914/// the indent/escape conventions).
915///
916/// - `# Arguments` → `@param name - desc`
917/// - `# Returns`   → `@returns desc`
918/// - `# Errors`    → `@throws desc`
919/// - `# Example`   → `@example` block. Replaces ` ```rust ` fences with
920///   ` ```typescript ` so the example highlights properly in TypeDoc.
921///
922/// Output is a plain string with `\n` separators; emitters wrap each line
923/// in ` * ` themselves.
924pub fn render_jsdoc_sections(sections: &RustdocSections) -> String {
925    let mut out = String::new();
926    if !sections.summary.is_empty() {
927        out.push_str(&sections.summary);
928    }
929    if let Some(args) = sections.arguments.as_deref() {
930        for (name, desc) in parse_arguments_bullets(args) {
931            if !out.is_empty() {
932                out.push('\n');
933            }
934            if desc.is_empty() {
935                out.push_str(&crate::template_env::render(
936                    "doc_jsdoc_param.jinja",
937                    minijinja::context! { name => &name },
938                ));
939            } else {
940                out.push_str(&crate::template_env::render(
941                    "doc_jsdoc_param_desc.jinja",
942                    minijinja::context! { name => &name, desc => &desc },
943                ));
944            }
945        }
946    }
947    if let Some(ret) = sections.returns.as_deref() {
948        if !out.is_empty() {
949            out.push('\n');
950        }
951        out.push_str(&crate::template_env::render(
952            "doc_jsdoc_returns.jinja",
953            minijinja::context! { content => ret.trim() },
954        ));
955    }
956    if let Some(err) = sections.errors.as_deref() {
957        if !out.is_empty() {
958            out.push('\n');
959        }
960        out.push_str(&crate::template_env::render(
961            "doc_jsdoc_throws.jinja",
962            minijinja::context! { content => err.trim() },
963        ));
964    }
965    if let Some(example) = sections.example.as_deref() {
966        if let Some(body) = example_for_target(example, "typescript") {
967            if !out.is_empty() {
968                out.push('\n');
969            }
970            out.push_str("@example\n");
971            out.push_str(&body);
972        }
973    }
974    out
975}
976
977/// Render `RustdocSections` as a JavaDoc comment body.
978///
979/// - `# Arguments` → `@param name desc` (one per param)
980/// - `# Returns`   → `@return desc`
981/// - `# Errors`    → `@throws KreuzbergRsException desc`
982/// - `# Example`   → `<pre>{@code ...}</pre>` block.
983///
984/// `throws_class` is the FQN/simple name of the exception class to use in
985/// the `@throws` tag (e.g. `"KreuzbergRsException"`).
986pub fn render_javadoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
987    let mut out = String::new();
988    if !sections.summary.is_empty() {
989        out.push_str(&sections.summary);
990    }
991    if let Some(args) = sections.arguments.as_deref() {
992        for (name, desc) in parse_arguments_bullets(args) {
993            if !out.is_empty() {
994                out.push('\n');
995            }
996            if desc.is_empty() {
997                out.push_str(&crate::template_env::render(
998                    "doc_javadoc_param.jinja",
999                    minijinja::context! { name => &name },
1000                ));
1001            } else {
1002                out.push_str(&crate::template_env::render(
1003                    "doc_javadoc_param_desc.jinja",
1004                    minijinja::context! { name => &name, desc => &desc },
1005                ));
1006            }
1007        }
1008    }
1009    if let Some(ret) = sections.returns.as_deref() {
1010        if !out.is_empty() {
1011            out.push('\n');
1012        }
1013        out.push_str(&crate::template_env::render(
1014            "doc_javadoc_return.jinja",
1015            minijinja::context! { content => ret.trim() },
1016        ));
1017    }
1018    if let Some(err) = sections.errors.as_deref() {
1019        if !out.is_empty() {
1020            out.push('\n');
1021        }
1022        out.push_str(&crate::template_env::render(
1023            "doc_javadoc_throws.jinja",
1024            minijinja::context! { throws_class => throws_class, content => err.trim() },
1025        ));
1026    }
1027    out
1028}
1029
1030/// Render `RustdocSections` as a C# XML doc comment body (without the
1031/// `/// ` line prefixes — the emitter adds those).
1032///
1033/// - summary  → `<summary>...</summary>`
1034/// - args     → `<param name="x">desc</param>` (one per arg)
1035/// - returns  → `<returns>desc</returns>`
1036/// - errors   → `<exception cref="KreuzbergException">desc</exception>`
1037/// - example  → `<example><code language="csharp">...</code></example>`
1038pub fn render_csharp_xml_sections(sections: &RustdocSections, exception_class: &str) -> String {
1039    let mut out = String::new();
1040    out.push_str("<summary>\n");
1041    let summary = if sections.summary.is_empty() {
1042        ""
1043    } else {
1044        sections.summary.as_str()
1045    };
1046    for line in summary.lines() {
1047        out.push_str(line);
1048        out.push('\n');
1049    }
1050    out.push_str("</summary>");
1051    if let Some(args) = sections.arguments.as_deref() {
1052        for (name, desc) in parse_arguments_bullets(args) {
1053            out.push('\n');
1054            if desc.is_empty() {
1055                out.push_str(&crate::template_env::render(
1056                    "doc_csharp_param.jinja",
1057                    minijinja::context! { name => &name },
1058                ));
1059            } else {
1060                out.push_str(&crate::template_env::render(
1061                    "doc_csharp_param_desc.jinja",
1062                    minijinja::context! { name => &name, desc => &desc },
1063                ));
1064            }
1065        }
1066    }
1067    if let Some(ret) = sections.returns.as_deref() {
1068        out.push('\n');
1069        out.push_str(&crate::template_env::render(
1070            "doc_csharp_returns.jinja",
1071            minijinja::context! { content => ret.trim() },
1072        ));
1073    }
1074    if let Some(err) = sections.errors.as_deref() {
1075        out.push('\n');
1076        out.push_str(&crate::template_env::render(
1077            "doc_csharp_exception.jinja",
1078            minijinja::context! {
1079                exception_class => exception_class,
1080                content => err.trim(),
1081            },
1082        ));
1083    }
1084    if let Some(example) = sections.example.as_deref() {
1085        out.push('\n');
1086        out.push_str("<example><code language=\"csharp\">\n");
1087        // Drop fence markers, keep code.
1088        for line in example.lines() {
1089            let t = line.trim_start();
1090            if t.starts_with("```") {
1091                continue;
1092            }
1093            out.push_str(line);
1094            out.push('\n');
1095        }
1096        out.push_str("</code></example>");
1097    }
1098    out
1099}
1100
1101/// Render `RustdocSections` as a PHPDoc comment body.
1102///
1103/// - `# Arguments` → `@param mixed $name desc`
1104/// - `# Returns`   → `@return desc`
1105/// - `# Errors`    → `@throws KreuzbergException desc`
1106/// - `# Example`   → ` ```php ` fence (replaces ` ```rust `).
1107pub fn render_phpdoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
1108    let mut out = String::new();
1109    if !sections.summary.is_empty() {
1110        out.push_str(&sections.summary);
1111    }
1112    if let Some(args) = sections.arguments.as_deref() {
1113        for (name, desc) in parse_arguments_bullets(args) {
1114            if !out.is_empty() {
1115                out.push('\n');
1116            }
1117            if desc.is_empty() {
1118                out.push_str(&crate::template_env::render(
1119                    "doc_phpdoc_param.jinja",
1120                    minijinja::context! { name => &name },
1121                ));
1122            } else {
1123                out.push_str(&crate::template_env::render(
1124                    "doc_phpdoc_param_desc.jinja",
1125                    minijinja::context! { name => &name, desc => &desc },
1126                ));
1127            }
1128        }
1129    }
1130    if let Some(ret) = sections.returns.as_deref() {
1131        if !out.is_empty() {
1132            out.push('\n');
1133        }
1134        out.push_str(&crate::template_env::render(
1135            "doc_phpdoc_return.jinja",
1136            minijinja::context! { content => ret.trim() },
1137        ));
1138    }
1139    if let Some(err) = sections.errors.as_deref() {
1140        if !out.is_empty() {
1141            out.push('\n');
1142        }
1143        out.push_str(&crate::template_env::render(
1144            "doc_phpdoc_throws.jinja",
1145            minijinja::context! { throws_class => throws_class, content => err.trim() },
1146        ));
1147    }
1148    if let Some(example) = sections.example.as_deref() {
1149        if let Some(body) = example_for_target(example, "php") {
1150            if !out.is_empty() {
1151                out.push('\n');
1152            }
1153            out.push_str(&body);
1154        }
1155    }
1156    out
1157}
1158
1159/// Render `RustdocSections` as a Doxygen comment body for the C header.
1160///
1161/// - args    → `\param name desc`
1162/// - returns → `\return desc`
1163/// - errors  → prose paragraph (Doxygen has no semantic tag for FFI errors)
1164/// - example → `\code` ... `\endcode`
1165pub fn render_doxygen_sections(sections: &RustdocSections) -> String {
1166    let mut out = String::new();
1167    if !sections.summary.is_empty() {
1168        out.push_str(&sections.summary);
1169    }
1170    if let Some(args) = sections.arguments.as_deref() {
1171        for (name, desc) in parse_arguments_bullets(args) {
1172            if !out.is_empty() {
1173                out.push('\n');
1174            }
1175            if desc.is_empty() {
1176                out.push_str(&crate::template_env::render(
1177                    "doc_doxygen_param.jinja",
1178                    minijinja::context! { name => &name },
1179                ));
1180            } else {
1181                out.push_str(&crate::template_env::render(
1182                    "doc_doxygen_param_desc.jinja",
1183                    minijinja::context! { name => &name, desc => &desc },
1184                ));
1185            }
1186        }
1187    }
1188    if let Some(ret) = sections.returns.as_deref() {
1189        if !out.is_empty() {
1190            out.push('\n');
1191        }
1192        out.push_str(&crate::template_env::render(
1193            "doc_doxygen_return.jinja",
1194            minijinja::context! { content => ret.trim() },
1195        ));
1196    }
1197    if let Some(err) = sections.errors.as_deref() {
1198        if !out.is_empty() {
1199            out.push('\n');
1200        }
1201        out.push_str(&crate::template_env::render(
1202            "doc_doxygen_errors.jinja",
1203            minijinja::context! { content => err.trim() },
1204        ));
1205    }
1206    if let Some(example) = sections.example.as_deref() {
1207        if !out.is_empty() {
1208            out.push('\n');
1209        }
1210        out.push_str("\\code\n");
1211        for line in example.lines() {
1212            let t = line.trim_start();
1213            if t.starts_with("```") {
1214                continue;
1215            }
1216            out.push_str(line);
1217            out.push('\n');
1218        }
1219        out.push_str("\\endcode");
1220    }
1221    out
1222}
1223
1224/// Return the first paragraph of a doc comment as a single joined line.
1225///
1226/// Collects lines until the first blank line, trims each, then joins with a
1227/// space. This handles wrapped sentences like:
1228///
1229/// ```text
1230/// Convert HTML to Markdown, returning
1231/// a `ConversionResult`.
1232/// ```
1233///
1234/// which would otherwise be truncated at the comma when callers use
1235/// `.lines().next()`.
1236pub fn doc_first_paragraph_joined(doc: &str) -> String {
1237    doc.lines()
1238        .take_while(|l| !l.trim().is_empty())
1239        .map(str::trim)
1240        .collect::<Vec<_>>()
1241        .join(" ")
1242}
1243
1244/// Target language for [`sanitize_rust_idioms`].
1245///
1246/// Each variant selects the idiomatic mapping for Rust constructs that do not
1247/// translate directly to foreign-language doc syntax.
1248#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1249pub enum DocTarget {
1250    /// PHPDoc (`/** ... */`), e.g. phpstan-typed prose.
1251    PhpDoc,
1252    /// Javadoc (`/** ... */`), e.g. OpenJDK-style annotations.
1253    JavaDoc,
1254    /// TSDoc (`/** ... */`), TypeScript variant of JSDoc.
1255    TsDoc,
1256    /// JSDoc (`/** ... */`), JavaScript variant.
1257    JsDoc,
1258    /// C# XML doc (`/// <summary>...</summary>`).
1259    ///
1260    /// Strips Rust code fences and section headings (`# Examples`,
1261    /// `# Arguments`, `# Returns`, etc.), drops Rust trait-bound prose,
1262    /// and XML-escapes any remaining `<` / `>` / `&` so the result is
1263    /// safe to embed inside a `<summary>` element.
1264    CSharpDoc,
1265}
1266
1267/// Sanitize Rust-specific idioms in a prose string for the given foreign-language
1268/// documentation target.
1269///
1270/// Transformations are applied **outside** backtick spans and code fences only,
1271/// so inline code examples and fenced blocks are never mutated (except that
1272/// ` ```rust ` fences and unmarked ` ``` ` code blocks are dropped entirely
1273/// for all targets [`DocTarget::TsDoc`], [`DocTarget::JsDoc`], [`DocTarget::PhpDoc`],
1274/// [`DocTarget::JavaDoc`], and [`DocTarget::CSharpDoc`]).
1275///
1276/// # Transformations
1277///
1278/// - Intra-doc links `` [`Type::method`] `` → `` `Type.method` ``.
1279/// - `[`Foo`]` (backtick inside square brackets) → `` `Foo` ``.
1280/// - `None` (word boundary) → `null` (PHP/Java) or `undefined` (TS/JS).
1281/// - `Some(x)` → `the value (x)`.
1282/// - `Option<T>` → `T?` (PHP) / `T | null` (Java) / `T | undefined` (TS/JS).
1283/// - `Vec<u8>` → `string` (PHP) / `byte[]` (Java) / `Uint8Array` (TS/JS).
1284/// - `Vec<T>` → `T[]` (all targets).
1285/// - `HashMap<K, V>` → `array<K, V>` (PHP) / `Map<K, V>` (Java) / `Record<K, V>` (TS/JS).
1286/// - `Arc<T>`, `Box<T>`, `Mutex<T>`, `RwLock<T>`, `Rc<T>`, `Cell<T>`, `RefCell<T>` → `T`.
1287/// - `Send + Sync`, `Send`, `Sync`, `'static` → stripped.
1288/// - Standalone `::` between identifiers → `.`.
1289/// - `pub fn `, `crate::`, `&self`, `&mut self` → stripped.
1290/// - `#[…]` attribute macros on their own line or inline → stripped.
1291/// - `.unwrap()`, `.expect("…")` → stripped.
1292/// - ` ```rust ` and unmarked ` ``` ` code fences → dropped entirely.
1293pub fn sanitize_rust_idioms(text: &str, target: DocTarget) -> String {
1294    // For C# XML doc the default is to drop rustdoc section headings
1295    // (`# Examples`, `# Arguments`, …) and the remainder of the comment,
1296    // because those bodies routinely contain content that cannot be embedded
1297    // safely inside `<summary>`. Callers that have already extracted sections
1298    // (`emit_csharp_doc`) sanitise each section body via [`sanitize_rust_idioms_keep_sections`].
1299    sanitize_rust_idioms_inner(text, target, true)
1300}
1301
1302/// Same as [`sanitize_rust_idioms`] but never drops rustdoc section headings.
1303///
1304/// Used by emitters that have already split the doc into sections and need to
1305/// sanitise each body fragment independently (e.g. C# XML doc emission with
1306/// per-section `<param>` / `<returns>` / `<exception>` tags).
1307pub fn sanitize_rust_idioms_keep_sections(text: &str, target: DocTarget) -> String {
1308    sanitize_rust_idioms_inner(text, target, false)
1309}
1310
1311fn sanitize_rust_idioms_inner(text: &str, target: DocTarget, drop_csharp_sections: bool) -> String {
1312    let mut out = String::with_capacity(text.len());
1313    let mut in_rust_fence = false;
1314    let mut in_other_fence = false;
1315    // For C# XML doc: once a `# Examples` / `# Arguments` / etc. heading is
1316    // encountered, drop the entire remainder of the comment. Rustdoc section
1317    // headings cannot be safely embedded inside `<summary>` and the per-section
1318    // content (code fences, intra-doc links, generics) is the leading cause
1319    // of CS1002/CS1519 leakage. The plain `<summary>` path collapses to the
1320    // top-level prose only.
1321    let mut csharp_section_dropped = false;
1322
1323    for line in text.lines() {
1324        if csharp_section_dropped {
1325            continue;
1326        }
1327        let trimmed = line.trim_start();
1328        if drop_csharp_sections
1329            && matches!(target, DocTarget::CSharpDoc)
1330            && !in_rust_fence
1331            && !in_other_fence
1332            && is_rustdoc_section_heading(trimmed)
1333        {
1334            csharp_section_dropped = true;
1335            continue;
1336        }
1337
1338        // Detect code fence boundaries.
1339        if let Some(rest) = trimmed.strip_prefix("```") {
1340            if in_rust_fence {
1341                // Closing fence of a rust block.
1342                in_rust_fence = false;
1343                match target {
1344                    DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc | DocTarget::PhpDoc | DocTarget::JavaDoc => {
1345                        // Entire rust block dropped — don't emit closing fence.
1346                    }
1347                }
1348                continue;
1349            }
1350            if in_other_fence {
1351                // Closing fence of a non-rust block.
1352                in_other_fence = false;
1353                out.push_str(line);
1354                out.push('\n');
1355                continue;
1356            }
1357            // Opening fence — determine language.
1358            let lang = rest.split(',').next().unwrap_or("").trim();
1359            let is_rust = lang.is_empty() || lang == "rust" || lang.starts_with("rust,");
1360            if is_rust {
1361                in_rust_fence = true;
1362                match target {
1363                    DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc | DocTarget::PhpDoc | DocTarget::JavaDoc => {
1364                        // Drop the entire rust fence block — skip opening line.
1365                        // Rust code examples are not portable to any of the target languages.
1366                    }
1367                }
1368                continue;
1369            }
1370            // Non-rust fence: pass through verbatim.
1371            in_other_fence = true;
1372            out.push_str(line);
1373            out.push('\n');
1374            continue;
1375        }
1376
1377        // Inside a rust fence.
1378        if in_rust_fence {
1379            match target {
1380                DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc | DocTarget::PhpDoc | DocTarget::JavaDoc => {
1381                    // Drop content of rust fences — all targets filter out Rust code examples.
1382                }
1383            }
1384            continue;
1385        }
1386
1387        // Inside a non-rust fence: pass through verbatim.
1388        if in_other_fence {
1389            out.push_str(line);
1390            out.push('\n');
1391            continue;
1392        }
1393
1394        // Check if this line is a bare `#[...]` attribute line.
1395        let stripped_indent = line.trim_start();
1396        if stripped_indent.starts_with("#[") && stripped_indent.ends_with(']') {
1397            // Attribute-only line — drop entirely.
1398            continue;
1399        }
1400
1401        // Normal prose line: apply token-level transformations.
1402        let sanitized = apply_prose_transforms(line, target);
1403        out.push_str(&sanitized);
1404        out.push('\n');
1405    }
1406
1407    // Trim trailing newline added by the loop (preserve internal newlines).
1408    if out.ends_with('\n') && !text.ends_with('\n') {
1409        out.pop();
1410    }
1411
1412    // For C# XML doc, escape any remaining `<`, `>`, `&` so the result is
1413    // safe to embed inside `<summary>...</summary>`. By this point the
1414    // Rust-idiom substitutions have replaced `Vec<T>` / `Option<T>` /
1415    // `HashMap<K, V>` / `Result<T, E>` with their idiomatic forms, but
1416    // unrecognised generic constructs (e.g. trait-object references) may
1417    // still contain raw angle brackets that would break C# XML parsing.
1418    if matches!(target, DocTarget::CSharpDoc) {
1419        out = xml_escape_for_csharp(&out);
1420    }
1421
1422    out
1423}
1424
1425/// Return `true` if `line` (already left-trimmed) is a Rustdoc section heading
1426/// such as `# Examples`, `# Arguments`, `# Returns`, `# Errors`, `# Panics`,
1427/// or `# Safety`. Case-insensitive on the heading name.
1428fn is_rustdoc_section_heading(trimmed: &str) -> bool {
1429    let Some(rest) = trimmed.strip_prefix("# ") else {
1430        return false;
1431    };
1432    let head = rest.trim().to_ascii_lowercase();
1433    matches!(
1434        head.as_str(),
1435        "arguments" | "args" | "returns" | "errors" | "panics" | "safety" | "example" | "examples"
1436    )
1437}
1438
1439/// XML-escape `<`, `>`, `&` for safe embedding inside a C# `<summary>` element.
1440///
1441/// `<` / `>` may legitimately appear in prose after Rust-idiom substitution
1442/// when the substitutions produce C#-friendly forms (e.g. `Dictionary<K, V>`).
1443/// Those are still XML-significant characters and must be entity-escaped for
1444/// XML parsers (Roslyn, doxygen) to accept the resulting `<summary>` block.
1445fn xml_escape_for_csharp(s: &str) -> String {
1446    let mut out = String::with_capacity(s.len());
1447    for ch in s.chars() {
1448        match ch {
1449            '&' => out.push_str("&amp;"),
1450            '<' => out.push_str("&lt;"),
1451            '>' => out.push_str("&gt;"),
1452            _ => out.push(ch),
1453        }
1454    }
1455    out
1456}
1457
1458/// Apply prose-level Rust-idiom transformations to a single line.
1459///
1460/// Some transformations span or precede backtick boundaries and must be applied
1461/// to the full line before tokenisation:
1462///
1463/// 1. Intra-doc links (`` [`...`] ``) — they wrap a backtick pair.
1464/// 2. `::` path separator — even inside backtick spans it should become `.`
1465///    for all foreign-language targets, since the target language uses `.` for
1466///    member access and package paths in code examples too.
1467///
1468/// All remaining transformations are applied only to literal (non-code) segments
1469/// after tokenisation.
1470fn apply_prose_transforms(line: &str, target: DocTarget) -> String {
1471    // Step 1: replace intra-doc links before tokenisation (they span backtick pairs).
1472    let line = replace_intradoc_links(line, target);
1473
1474    // Step 2: replace :: everywhere (including inside backtick spans).
1475    // All targets use `.` as the member/package separator, so this is always safe.
1476    let line = replace_path_separator(&line);
1477
1478    // Step 3: strip .unwrap() and .expect() everywhere, including inside backtick spans,
1479    // since these Rust error-handling idioms are meaningless in all target languages.
1480    let line = strip_unwrap_expect(&line);
1481
1482    // Step 4: tokenise and apply remaining transforms only to literal segments.
1483    let segments = tokenize_backtick_spans(&line);
1484    let mut result = String::with_capacity(line.len());
1485    for (is_code, span) in segments {
1486        if is_code {
1487            result.push('`');
1488            result.push_str(span);
1489            result.push('`');
1490        } else {
1491            result.push_str(&transform_prose_segment(span, target));
1492        }
1493    }
1494    result
1495}
1496
1497/// Split a line into alternating literal/code segments.
1498///
1499/// Returns `Vec<(is_code, &str)>` where `is_code` is true for the content
1500/// between a matched backtick pair. Unmatched backticks are treated as
1501/// literal characters (passed through as literal segments).
1502fn tokenize_backtick_spans(line: &str) -> Vec<(bool, &str)> {
1503    let mut segments = Vec::new();
1504    let bytes = line.as_bytes();
1505    let mut start = 0;
1506    let mut i = 0;
1507
1508    while i < bytes.len() {
1509        if bytes[i] == b'`' {
1510            // Emit preceding literal segment.
1511            if i > start {
1512                segments.push((false, &line[start..i]));
1513            }
1514            // Find the closing backtick.
1515            let code_start = i + 1;
1516            let close = bytes[code_start..].iter().position(|&b| b == b'`');
1517            if let Some(offset) = close {
1518                let code_end = code_start + offset;
1519                segments.push((true, &line[code_start..code_end]));
1520                i = code_end + 1;
1521                start = i;
1522            } else {
1523                // No closing backtick — treat as literal from here.
1524                segments.push((false, &line[i..]));
1525                start = line.len();
1526                i = line.len();
1527            }
1528        } else {
1529            i += 1;
1530        }
1531    }
1532    if start < line.len() {
1533        segments.push((false, &line[start..]));
1534    }
1535    segments
1536}
1537
1538/// Apply all prose-level Rust substitutions to a literal text segment.
1539///
1540/// Intra-doc links have already been replaced by `apply_prose_transforms`
1541/// before tokenisation; this function handles the remaining transformations.
1542fn transform_prose_segment(text: &str, target: DocTarget) -> String {
1543    let mut s = text.to_string();
1544
1545    // 1. Strip #[derive(...)] and other inline attribute-style references.
1546    s = strip_inline_attributes(&s);
1547
1548    // 2. Strip pub fn, crate::, &self, &mut self.
1549    s = s.replace("pub fn ", "");
1550    s = s.replace("crate::", "");
1551    s = s.replace("&mut self", "");
1552    s = s.replace("&self", "");
1553
1554    // 3. Strip lifetime and bound markers.
1555    s = strip_lifetime_and_bounds(&s);
1556
1557    // 4. Type substitutions (order matters — most specific first).
1558    s = replace_type_wrappers(&s, target);
1559
1560    // 5. Some(x) -> the value (x).
1561    s = replace_some_calls(&s);
1562
1563    // 5b. Bare "Some <lowercase>" in prose -> drop "Some ".
1564    s = replace_some_keyword_in_prose(&s);
1565
1566    // 6. None -> null / undefined (word boundary, uppercase only).
1567    s = replace_none_keyword(&s, target);
1568
1569    // Note: :: -> . and .unwrap()/.expect() stripping are applied to the full
1570    // line before tokenisation in apply_prose_transforms and therefore do not
1571    // need to be repeated here.
1572
1573    s
1574}
1575
1576/// Advance byte position `i` in `s` past one full UTF-8 character, push that
1577/// character to `out`, and return the new byte position.
1578///
1579/// All the byte-crawling helpers below look for ASCII special characters only.
1580/// When none matches, they must advance by one full character (not one byte)
1581/// to avoid splitting multi-byte UTF-8 sequences.
1582#[inline]
1583fn advance_char(s: &str, out: &mut String, i: usize) -> usize {
1584    // Safety: `i` must be a valid char boundary; callers guarantee this
1585    // because all branch points look for ASCII bytes which are always
1586    // single-byte char boundaries.
1587    let ch = s[i..].chars().next().expect("valid UTF-8 position");
1588    out.push(ch);
1589    i + ch.len_utf8()
1590}
1591
1592/// Replace `` [`Type::method()`] `` and `` [`Foo`] `` intra-doc links with
1593/// backtick-wrapped identifiers, converting `::` to `.`.
1594fn replace_intradoc_links(s: &str, _target: DocTarget) -> String {
1595    let mut out = String::with_capacity(s.len());
1596    let bytes = s.as_bytes();
1597    let mut i = 0;
1598    while i < bytes.len() {
1599        // Look for [`
1600        if i + 1 < bytes.len() && bytes[i] == b'[' && bytes[i + 1] == b'`' {
1601            // Find closing `]
1602            let search_start = i + 2;
1603            let mut found = false;
1604            let mut j = search_start;
1605            while j + 1 < bytes.len() {
1606                if bytes[j] == b'`' && bytes[j + 1] == b']' {
1607                    let inner = &s[search_start..j];
1608                    // Convert :: to . in the inner part.
1609                    let converted = inner.replace("::", ".");
1610                    out.push('`');
1611                    out.push_str(&converted);
1612                    out.push('`');
1613                    i = j + 2;
1614                    found = true;
1615                    break;
1616                }
1617                j += 1;
1618            }
1619            if !found {
1620                i = advance_char(s, &mut out, i);
1621            }
1622        } else {
1623            i = advance_char(s, &mut out, i);
1624        }
1625    }
1626    out
1627}
1628
1629/// Strip inline `#[...]` attribute references (not on their own line — those
1630/// are handled as full-line drops in the main loop).
1631fn strip_inline_attributes(s: &str) -> String {
1632    let mut out = String::with_capacity(s.len());
1633    let bytes = s.as_bytes();
1634    let mut i = 0;
1635    while i < bytes.len() {
1636        if bytes[i] == b'#' && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
1637            // Skip until matching ']', handling nesting.
1638            let mut depth = 0usize;
1639            let mut j = i + 1;
1640            while j < bytes.len() {
1641                if bytes[j] == b'[' {
1642                    depth += 1;
1643                } else if bytes[j] == b']' {
1644                    depth -= 1;
1645                    if depth == 0 {
1646                        i = j + 1;
1647                        break;
1648                    }
1649                }
1650                j += 1;
1651            }
1652            if depth != 0 {
1653                // Unmatched bracket: emit literally.
1654                i = advance_char(s, &mut out, i);
1655            }
1656        } else {
1657            i = advance_char(s, &mut out, i);
1658        }
1659    }
1660    out
1661}
1662
1663/// Strip `'static`, `Send + Sync`, `Send`, `Sync` from prose text.
1664fn strip_lifetime_and_bounds(s: &str) -> String {
1665    // Order matters: match compound forms before simple forms.
1666    let mut out = s.to_string();
1667    // Strip `Send + Sync` (with optional spaces around `+`).
1668    out = regex_replace_all(&out, r"Send\s*\+\s*Sync", "");
1669    out = regex_replace_all(&out, r"Sync\s*\+\s*Send", "");
1670    // Strip standalone Send/Sync only at word boundaries.
1671    out = regex_replace_word_boundary(&out, "Send", "");
1672    out = regex_replace_word_boundary(&out, "Sync", "");
1673    // Strip 'static lifetime markers.
1674    out = regex_replace_all(&out, r"'\s*static\b", "");
1675    out
1676}
1677
1678/// Replace occurrences of `pattern` (treated as a simple substring pattern
1679/// with `\s*` only, no full regex) with `replacement` in `s`.
1680///
1681/// This is a lightweight regex-free replacement for simple patterns that
1682/// only need literal text or `\s*` between tokens.
1683fn regex_replace_all(s: &str, pattern: &str, replacement: &str) -> String {
1684    // Inline tiny pattern compiler for the three patterns we actually use.
1685    match pattern {
1686        r"Send\s*\+\s*Sync" => replace_with_optional_spaces(s, "Send", "+", "Sync", replacement),
1687        r"Sync\s*\+\s*Send" => replace_with_optional_spaces(s, "Sync", "+", "Send", replacement),
1688        r"'\s*static\b" => replace_static_lifetime(s, replacement),
1689        _ => s.replace(pattern, replacement),
1690    }
1691}
1692
1693/// Replace `word_boundary(keyword)` occurrences in `s` with `replacement`.
1694fn regex_replace_word_boundary(s: &str, keyword: &str, replacement: &str) -> String {
1695    let mut out = String::with_capacity(s.len());
1696    let klen = keyword.len();
1697    let bytes = s.as_bytes();
1698    let kbytes = keyword.as_bytes();
1699    if klen == 0 || klen > bytes.len() {
1700        return s.to_string();
1701    }
1702    let mut i = 0;
1703    while i + klen <= bytes.len() {
1704        if &bytes[i..i + klen] == kbytes {
1705            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1706            let after_ok =
1707                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
1708            if before_ok && after_ok {
1709                out.push_str(replacement);
1710                i += klen;
1711                continue;
1712            }
1713        }
1714        i = advance_char(s, &mut out, i);
1715    }
1716    if i < bytes.len() {
1717        out.push_str(&s[i..]);
1718    }
1719    out
1720}
1721
1722/// Replace `A <spaces> op <spaces> B` triplets with `replacement`.
1723fn replace_with_optional_spaces(s: &str, a: &str, op: &str, b: &str, replacement: &str) -> String {
1724    let mut out = String::with_capacity(s.len());
1725    let mut i = 0;
1726    let chars: Vec<char> = s.chars().collect();
1727    let total = chars.len();
1728
1729    while i < total {
1730        // Try to match `a` at position i.
1731        let a_chars: Vec<char> = a.chars().collect();
1732        let b_chars: Vec<char> = b.chars().collect();
1733        let op_chars: Vec<char> = op.chars().collect();
1734
1735        if chars[i..].starts_with(&a_chars) {
1736            let mut j = i + a_chars.len();
1737            // Skip spaces.
1738            while j < total && chars[j] == ' ' {
1739                j += 1;
1740            }
1741            // Match op.
1742            if j + op_chars.len() <= total && chars[j..].starts_with(&op_chars) {
1743                let mut k = j + op_chars.len();
1744                // Skip spaces.
1745                while k < total && chars[k] == ' ' {
1746                    k += 1;
1747                }
1748                // Match b.
1749                if k + b_chars.len() <= total && chars[k..].starts_with(&b_chars) {
1750                    out.push_str(replacement);
1751                    i = k + b_chars.len();
1752                    continue;
1753                }
1754            }
1755        }
1756        out.push(chars[i]);
1757        i += 1;
1758    }
1759    out
1760}
1761
1762/// Replace `'static` lifetime markers (with optional spaces after `'`).
1763fn replace_static_lifetime(s: &str, replacement: &str) -> String {
1764    let mut out = String::with_capacity(s.len());
1765    let bytes = s.as_bytes();
1766    let mut i = 0;
1767    while i < bytes.len() {
1768        if bytes[i] == b'\'' {
1769            // Peek ahead skipping spaces.
1770            let mut j = i + 1;
1771            while j < bytes.len() && bytes[j] == b' ' {
1772                j += 1;
1773            }
1774            let keyword = b"static";
1775            if bytes[j..].starts_with(keyword) {
1776                let end = j + keyword.len();
1777                // Must be followed by non-identifier char or end.
1778                let after_ok = end >= bytes.len() || !bytes[end].is_ascii_alphanumeric() && bytes[end] != b'_';
1779                if after_ok {
1780                    out.push_str(replacement);
1781                    i = end;
1782                    continue;
1783                }
1784            }
1785        }
1786        i = advance_char(s, &mut out, i);
1787    }
1788    out
1789}
1790
1791/// Replace Rust generic type wrappers in prose.
1792fn replace_type_wrappers(s: &str, target: DocTarget) -> String {
1793    // Order: most specific patterns first.
1794    let mut out = s.to_string();
1795
1796    // Vec<u8> — must come before Vec<T>.
1797    let vec_u8_replacement = match target {
1798        DocTarget::PhpDoc => "string",
1799        DocTarget::JavaDoc => "byte[]",
1800        DocTarget::TsDoc | DocTarget::JsDoc => "Uint8Array",
1801        DocTarget::CSharpDoc => "byte[]",
1802    };
1803    out = replace_generic1(&out, "Vec", "u8", vec_u8_replacement);
1804
1805    // HashMap<K, V> — must come before Vec<T> to avoid order-dependency issues.
1806    let map_replacement_fn = |k: &str, v: &str| match target {
1807        DocTarget::PhpDoc => format!("array<{k}, {v}>"),
1808        DocTarget::JavaDoc => format!("Map<{k}, {v}>"),
1809        DocTarget::TsDoc | DocTarget::JsDoc => format!("Record<{k}, {v}>"),
1810        DocTarget::CSharpDoc => format!("Dictionary<{k}, {v}>"),
1811    };
1812    out = replace_generic2(&out, "HashMap", &map_replacement_fn);
1813
1814    // Vec<T> — generic.
1815    out = replace_generic1_passthrough(&out, "Vec", |inner| format!("{inner}[]"));
1816
1817    // Option<T>.
1818    let option_replacement_fn = |inner: &str| match target {
1819        DocTarget::PhpDoc => format!("{inner}?"),
1820        DocTarget::JavaDoc => format!("{inner} | null"),
1821        DocTarget::TsDoc | DocTarget::JsDoc => format!("{inner} | undefined"),
1822        DocTarget::CSharpDoc => format!("{inner}?"),
1823    };
1824    out = replace_generic1_passthrough(&out, "Option", option_replacement_fn);
1825
1826    // Result<T, E> — drop the error type, keep the success type.
1827    // C# has no Result type; the binding throws exceptions, so just the success type
1828    // is meaningful in prose. We do this for C# only; other targets historically left
1829    // `Result<T, E>` unchanged (their tests assert nothing about it).
1830    if matches!(target, DocTarget::CSharpDoc) {
1831        out = replace_generic2(&out, "Result", &|t: &str, _e: &str| t.to_string());
1832    }
1833
1834    // Smart pointer wrappers: strip to inner type.
1835    for wrapper in &["Arc", "Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
1836        out = replace_generic1_passthrough(&out, wrapper, |inner| inner.to_string());
1837    }
1838
1839    out
1840}
1841
1842/// Replace `Name<SingleArg>` where SingleArg is an exact literal (e.g. `Vec<u8>`).
1843fn replace_generic1(s: &str, name: &str, arg: &str, replacement: &str) -> String {
1844    let pattern = format!("{name}<{arg}>");
1845    s.replace(&pattern, replacement)
1846}
1847
1848/// Replace `Name<T>` → `f(T)` for an arbitrary inner type expression.
1849///
1850/// Handles nested generics by counting angle-bracket depth.
1851fn replace_generic1_passthrough<F>(s: &str, name: &str, f: F) -> String
1852where
1853    F: Fn(&str) -> String,
1854{
1855    let mut out = String::with_capacity(s.len());
1856    let mut i = 0;
1857    let prefix = format!("{name}<");
1858    let pbytes = prefix.as_bytes();
1859    let bytes = s.as_bytes();
1860
1861    while i < bytes.len() {
1862        if bytes[i..].starts_with(pbytes) {
1863            // Check that the char before is not alphanumeric (word boundary).
1864            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1865            if before_ok {
1866                let inner_start = i + pbytes.len();
1867                // Find the matching '>'.
1868                let mut depth = 1usize;
1869                let mut j = inner_start;
1870                while j < bytes.len() {
1871                    match bytes[j] {
1872                        b'<' => depth += 1,
1873                        b'>' => {
1874                            depth -= 1;
1875                            if depth == 0 {
1876                                break;
1877                            }
1878                        }
1879                        _ => {}
1880                    }
1881                    j += 1;
1882                }
1883                if depth == 0 && j < bytes.len() {
1884                    let inner = &s[inner_start..j];
1885                    out.push_str(&f(inner));
1886                    i = j + 1;
1887                    continue;
1888                }
1889            }
1890        }
1891        i = advance_char(s, &mut out, i);
1892    }
1893    out
1894}
1895
1896/// Replace `Name<K, V>` → `f(K, V)` for two-argument generics (e.g. `HashMap`).
1897fn replace_generic2<F>(s: &str, name: &str, f: &F) -> String
1898where
1899    F: Fn(&str, &str) -> String,
1900{
1901    let mut out = String::with_capacity(s.len());
1902    let mut i = 0;
1903    let prefix = format!("{name}<");
1904    let pbytes = prefix.as_bytes();
1905    let bytes = s.as_bytes();
1906
1907    while i < bytes.len() {
1908        if bytes[i..].starts_with(pbytes) {
1909            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1910            if before_ok {
1911                let inner_start = i + pbytes.len();
1912                // Find the matching '>' respecting nesting.
1913                let mut depth = 1usize;
1914                let mut j = inner_start;
1915                while j < bytes.len() {
1916                    match bytes[j] {
1917                        b'<' => depth += 1,
1918                        b'>' => {
1919                            depth -= 1;
1920                            if depth == 0 {
1921                                break;
1922                            }
1923                        }
1924                        _ => {}
1925                    }
1926                    j += 1;
1927                }
1928                if depth == 0 && j < bytes.len() {
1929                    let inner = &s[inner_start..j];
1930                    // Split on the first ',' at depth 0.
1931                    let split = split_on_comma_at_top_level(inner);
1932                    if let Some((k, v)) = split {
1933                        out.push_str(&f(k.trim(), v.trim()));
1934                        i = j + 1;
1935                        continue;
1936                    }
1937                }
1938            }
1939        }
1940        i = advance_char(s, &mut out, i);
1941    }
1942    out
1943}
1944
1945/// Split `s` on the first comma that is at angle-bracket depth 0.
1946fn split_on_comma_at_top_level(s: &str) -> Option<(&str, &str)> {
1947    let mut depth = 0i32;
1948    for (idx, ch) in s.char_indices() {
1949        match ch {
1950            '<' => depth += 1,
1951            '>' => depth -= 1,
1952            ',' if depth == 0 => return Some((&s[..idx], &s[idx + 1..])),
1953            _ => {}
1954        }
1955    }
1956    None
1957}
1958
1959/// Replace `Some(x)` in prose with `the value (x)`.
1960fn replace_some_calls(s: &str) -> String {
1961    let mut out = String::with_capacity(s.len());
1962    let bytes = s.as_bytes();
1963    let prefix = b"Some(";
1964    let mut i = 0;
1965
1966    while i < bytes.len() {
1967        if bytes[i..].starts_with(prefix) {
1968            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1969            if before_ok {
1970                let arg_start = i + prefix.len();
1971                // Find matching ')' respecting nesting.
1972                let mut depth = 1usize;
1973                let mut j = arg_start;
1974                while j < bytes.len() {
1975                    match bytes[j] {
1976                        b'(' => depth += 1,
1977                        b')' => {
1978                            depth -= 1;
1979                            if depth == 0 {
1980                                break;
1981                            }
1982                        }
1983                        _ => {}
1984                    }
1985                    j += 1;
1986                }
1987                if depth == 0 && j < bytes.len() {
1988                    let arg = &s[arg_start..j];
1989                    out.push_str("the value (");
1990                    out.push_str(arg);
1991                    out.push(')');
1992                    i = j + 1;
1993                    continue;
1994                }
1995            }
1996        }
1997        i = advance_char(s, &mut out, i);
1998    }
1999    out
2000}
2001
2002/// Drop bare `Some ` when it appears as a Rust-idiom modifier in prose
2003/// ("(Some values)", "Some keys leave the previous", etc.). The `Some(...)`
2004/// call form is handled separately by [`replace_some_calls`].
2005///
2006/// Match shape: word-boundary `Some` + single ASCII space + ASCII-lowercase
2007/// letter. The "Some " prefix is dropped; the following word is preserved.
2008/// `SomeType`, `Some.method()`, `Some(x)`, and sentence-initial `Some `
2009/// followed by an uppercase noun stay untouched.
2010fn replace_some_keyword_in_prose(s: &str) -> String {
2011    let keyword = b"Some ";
2012    let klen = keyword.len();
2013    let bytes = s.as_bytes();
2014    if klen >= bytes.len() {
2015        return s.to_string();
2016    }
2017    let mut out = String::with_capacity(s.len());
2018    let mut i = 0;
2019    while i + klen < bytes.len() {
2020        if &bytes[i..i + klen] == keyword {
2021            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2022            let after_ok = bytes[i + klen].is_ascii_lowercase();
2023            if before_ok && after_ok {
2024                i += klen;
2025                continue;
2026            }
2027        }
2028        i = advance_char(s, &mut out, i);
2029    }
2030    if i < bytes.len() {
2031        out.push_str(&s[i..]);
2032    }
2033    out
2034}
2035
2036/// Replace `None` (at word boundaries, uppercase) with the target-appropriate nil.
2037fn replace_none_keyword(s: &str, target: DocTarget) -> String {
2038    let replacement = match target {
2039        DocTarget::PhpDoc | DocTarget::JavaDoc | DocTarget::CSharpDoc => "null",
2040        DocTarget::TsDoc | DocTarget::JsDoc => "undefined",
2041    };
2042    let keyword = b"None";
2043    let klen = keyword.len();
2044    let mut out = String::with_capacity(s.len());
2045    let bytes = s.as_bytes();
2046    if klen > bytes.len() {
2047        return s.to_string();
2048    }
2049    let mut i = 0;
2050
2051    while i + klen <= bytes.len() {
2052        if &bytes[i..i + klen] == keyword {
2053            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2054            let after_ok =
2055                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
2056            if before_ok && after_ok {
2057                out.push_str(replacement);
2058                i += klen;
2059                continue;
2060            }
2061        }
2062        i = advance_char(s, &mut out, i);
2063    }
2064    if i < bytes.len() {
2065        out.push_str(&s[i..]);
2066    }
2067    out
2068}
2069
2070/// Replace standalone `::` between identifiers with `.`.
2071fn replace_path_separator(s: &str) -> String {
2072    let mut out = String::with_capacity(s.len());
2073    let bytes = s.as_bytes();
2074    let mut i = 0;
2075
2076    while i < bytes.len() {
2077        if i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b':' {
2078            // Only replace if surrounded by identifier characters or end/start of string.
2079            let before_ok = i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
2080            let after_ok = i + 2 < bytes.len() && (bytes[i + 2].is_ascii_alphanumeric() || bytes[i + 2] == b'_');
2081            if before_ok || after_ok {
2082                out.push('.');
2083                i += 2;
2084                continue;
2085            }
2086        }
2087        i = advance_char(s, &mut out, i);
2088    }
2089    out
2090}
2091
2092/// Strip `.unwrap()` and `.expect("...")` calls from prose.
2093fn strip_unwrap_expect(s: &str) -> String {
2094    let mut out = String::with_capacity(s.len());
2095    let bytes = s.as_bytes();
2096    let mut i = 0;
2097
2098    while i < bytes.len() {
2099        // Match .unwrap().
2100        if bytes[i..].starts_with(b".unwrap()") {
2101            i += b".unwrap()".len();
2102            continue;
2103        }
2104        // Match .expect(...).
2105        if bytes[i..].starts_with(b".expect(") {
2106            let arg_start = i + b".expect(".len();
2107            let mut depth = 1usize;
2108            let mut j = arg_start;
2109            while j < bytes.len() {
2110                match bytes[j] {
2111                    b'(' => depth += 1,
2112                    b')' => {
2113                        depth -= 1;
2114                        if depth == 0 {
2115                            break;
2116                        }
2117                    }
2118                    _ => {}
2119                }
2120                j += 1;
2121            }
2122            if depth == 0 {
2123                i = j + 1;
2124                continue;
2125            }
2126        }
2127        i = advance_char(s, &mut out, i);
2128    }
2129    out
2130}
2131
2132#[cfg(test)]
2133mod tests {
2134    use super::*;
2135
2136    #[test]
2137    fn test_emit_phpdoc() {
2138        let mut out = String::new();
2139        emit_phpdoc(&mut out, "Simple documentation", "    ", "TestException");
2140        assert!(out.contains("/**"));
2141        assert!(out.contains("Simple documentation"));
2142        assert!(out.contains("*/"));
2143    }
2144
2145    #[test]
2146    fn test_phpdoc_escaping() {
2147        let mut out = String::new();
2148        emit_phpdoc(&mut out, "Handle */ sequences", "", "TestException");
2149        assert!(out.contains("Handle * / sequences"));
2150    }
2151
2152    #[test]
2153    fn test_emit_csharp_doc() {
2154        let mut out = String::new();
2155        emit_csharp_doc(&mut out, "C# documentation", "    ", "TestException");
2156        assert!(out.contains("<summary>"));
2157        assert!(out.contains("C# documentation"));
2158        assert!(out.contains("</summary>"));
2159    }
2160
2161    #[test]
2162    fn test_csharp_xml_escaping() {
2163        let mut out = String::new();
2164        emit_csharp_doc(&mut out, "foo < bar & baz > qux", "", "TestException");
2165        assert!(out.contains("foo &lt; bar &amp; baz &gt; qux"));
2166    }
2167
2168    #[test]
2169    fn test_emit_elixir_doc() {
2170        let mut out = String::new();
2171        emit_elixir_doc(&mut out, "Elixir documentation");
2172        assert!(out.contains("@doc \"\"\""));
2173        assert!(out.contains("Elixir documentation"));
2174        assert!(out.contains("\"\"\""));
2175    }
2176
2177    #[test]
2178    fn test_elixir_heredoc_escaping() {
2179        let mut out = String::new();
2180        emit_elixir_doc(&mut out, "Handle \"\"\" sequences");
2181        assert!(out.contains("Handle \"\" \" sequences"));
2182    }
2183
2184    #[test]
2185    fn test_emit_roxygen() {
2186        let mut out = String::new();
2187        emit_roxygen(&mut out, "R documentation");
2188        assert!(out.contains("#' R documentation"));
2189    }
2190
2191    #[test]
2192    fn test_emit_swift_doc() {
2193        let mut out = String::new();
2194        emit_swift_doc(&mut out, "Swift documentation", "    ");
2195        assert!(out.contains("/// Swift documentation"));
2196    }
2197
2198    #[test]
2199    fn test_emit_javadoc() {
2200        let mut out = String::new();
2201        emit_javadoc(&mut out, "Java documentation", "    ");
2202        assert!(out.contains("/**"));
2203        assert!(out.contains("Java documentation"));
2204        assert!(out.contains("*/"));
2205    }
2206
2207    #[test]
2208    fn test_emit_kdoc() {
2209        let mut out = String::new();
2210        emit_kdoc(&mut out, "Kotlin documentation", "    ");
2211        assert!(out.contains("/**"));
2212        assert!(out.contains("Kotlin documentation"));
2213        assert!(out.contains("*/"));
2214    }
2215
2216    #[test]
2217    fn test_emit_dartdoc() {
2218        let mut out = String::new();
2219        emit_dartdoc(&mut out, "Dart documentation", "    ");
2220        assert!(out.contains("/// Dart documentation"));
2221    }
2222
2223    #[test]
2224    fn test_emit_gleam_doc() {
2225        let mut out = String::new();
2226        emit_gleam_doc(&mut out, "Gleam documentation", "    ");
2227        assert!(out.contains("/// Gleam documentation"));
2228    }
2229
2230    #[test]
2231    fn test_emit_zig_doc() {
2232        let mut out = String::new();
2233        emit_zig_doc(&mut out, "Zig documentation", "    ");
2234        assert!(out.contains("/// Zig documentation"));
2235    }
2236
2237    #[test]
2238    fn test_empty_doc_skipped() {
2239        let mut out = String::new();
2240        emit_phpdoc(&mut out, "", "", "TestException");
2241        emit_csharp_doc(&mut out, "", "", "TestException");
2242        emit_elixir_doc(&mut out, "");
2243        emit_roxygen(&mut out, "");
2244        emit_kdoc(&mut out, "", "");
2245        emit_dartdoc(&mut out, "", "");
2246        emit_gleam_doc(&mut out, "", "");
2247        emit_zig_doc(&mut out, "", "");
2248        assert!(out.is_empty());
2249    }
2250
2251    #[test]
2252    fn test_doc_first_paragraph_joined_single_line() {
2253        assert_eq!(doc_first_paragraph_joined("Simple doc."), "Simple doc.");
2254    }
2255
2256    #[test]
2257    fn test_doc_first_paragraph_joined_wrapped_sentence() {
2258        // Simulates a docstring like convert's: "Convert HTML to Markdown,\nreturning a result."
2259        let doc = "Convert HTML to Markdown,\nreturning a result.";
2260        assert_eq!(
2261            doc_first_paragraph_joined(doc),
2262            "Convert HTML to Markdown, returning a result."
2263        );
2264    }
2265
2266    #[test]
2267    fn test_doc_first_paragraph_joined_stops_at_blank_line() {
2268        let doc = "First paragraph.\nStill first.\n\nSecond paragraph.";
2269        assert_eq!(doc_first_paragraph_joined(doc), "First paragraph. Still first.");
2270    }
2271
2272    #[test]
2273    fn test_doc_first_paragraph_joined_empty() {
2274        assert_eq!(doc_first_paragraph_joined(""), "");
2275    }
2276
2277    #[test]
2278    fn test_parse_rustdoc_sections_basic() {
2279        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns `KreuzbergError` on failure.";
2280        let sections = parse_rustdoc_sections(doc);
2281        assert_eq!(sections.summary, "Extracts text from a file.");
2282        assert_eq!(sections.arguments.as_deref(), Some("* `path` - The file path."));
2283        assert_eq!(sections.returns.as_deref(), Some("The extracted text."));
2284        assert_eq!(sections.errors.as_deref(), Some("Returns `KreuzbergError` on failure."));
2285        assert!(sections.panics.is_none());
2286    }
2287
2288    #[test]
2289    fn test_parse_rustdoc_sections_example_with_fence() {
2290        let doc = "Run the thing.\n\n# Example\n\n```rust\nlet x = run();\n```";
2291        let sections = parse_rustdoc_sections(doc);
2292        assert_eq!(sections.summary, "Run the thing.");
2293        assert!(sections.example.as_ref().unwrap().contains("```rust"));
2294        assert!(sections.example.as_ref().unwrap().contains("let x = run();"));
2295    }
2296
2297    #[test]
2298    fn test_parse_rustdoc_sections_pound_inside_fence_is_not_a_heading() {
2299        // Even though we get rustdoc-hidden lines pre-stripped, a literal
2300        // `# foo` inside a non-rust fence (e.g. shell example) must not
2301        // start a new section.
2302        let doc = "Summary.\n\n# Example\n\n```bash\n# install deps\nrun --foo\n```";
2303        let sections = parse_rustdoc_sections(doc);
2304        assert_eq!(sections.summary, "Summary.");
2305        assert!(sections.example.as_ref().unwrap().contains("# install deps"));
2306    }
2307
2308    #[test]
2309    fn test_parse_arguments_bullets_dash_separator() {
2310        let body = "* `path` - The file path.\n* `config` - Optional configuration.";
2311        let pairs = parse_arguments_bullets(body);
2312        assert_eq!(pairs.len(), 2);
2313        assert_eq!(pairs[0], ("path".to_string(), "The file path.".to_string()));
2314        assert_eq!(pairs[1], ("config".to_string(), "Optional configuration.".to_string()));
2315    }
2316
2317    #[test]
2318    fn test_parse_arguments_bullets_continuation_line() {
2319        let body = "* `path` - The file path,\n  resolved relative to cwd.\n* `mode` - Open mode.";
2320        let pairs = parse_arguments_bullets(body);
2321        assert_eq!(pairs.len(), 2);
2322        assert_eq!(pairs[0].1, "The file path, resolved relative to cwd.");
2323    }
2324
2325    #[test]
2326    fn test_replace_fence_lang_rust_to_typescript() {
2327        let body = "```rust\nlet x = run();\n```";
2328        let out = replace_fence_lang(body, "typescript");
2329        assert!(out.starts_with("```typescript"));
2330        assert!(out.contains("let x = run();"));
2331    }
2332
2333    #[test]
2334    fn test_replace_fence_lang_preserves_attrs() {
2335        let body = "```rust,no_run\nlet x = run();\n```";
2336        let out = replace_fence_lang(body, "typescript");
2337        assert!(out.starts_with("```typescript,no_run"));
2338    }
2339
2340    #[test]
2341    fn test_replace_fence_lang_no_fence_unchanged() {
2342        let body = "Plain prose with `inline code`.";
2343        let out = replace_fence_lang(body, "typescript");
2344        assert_eq!(out, "Plain prose with `inline code`.");
2345    }
2346
2347    fn fixture_sections() -> RustdocSections {
2348        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n* `config` - Optional configuration.\n\n# Returns\n\nThe extracted text and metadata.\n\n# Errors\n\nReturns an error when the file is unreadable.\n\n# Example\n\n```rust\nlet result = extract(\"file.pdf\")?;\n```";
2349        parse_rustdoc_sections(doc)
2350    }
2351
2352    #[test]
2353    fn test_render_jsdoc_sections() {
2354        let sections = fixture_sections();
2355        let out = render_jsdoc_sections(&sections);
2356        assert!(out.starts_with("Extracts text from a file."));
2357        assert!(out.contains("@param path - The file path."));
2358        assert!(out.contains("@param config - Optional configuration."));
2359        assert!(out.contains("@returns The extracted text and metadata."));
2360        assert!(out.contains("@throws Returns an error when the file is unreadable."));
2361        // fixture example is ```rust — stripped when target is TypeScript
2362        assert!(!out.contains("@example"), "Rust example must not appear in TSDoc");
2363        assert!(!out.contains("```typescript"));
2364        assert!(!out.contains("```rust"));
2365    }
2366
2367    #[test]
2368    fn test_render_jsdoc_sections_preserves_typescript_example() {
2369        let doc = "Do something.\n\n# Example\n\n```typescript\nconst x = doSomething();\n```";
2370        let sections = parse_rustdoc_sections(doc);
2371        let out = render_jsdoc_sections(&sections);
2372        assert!(out.contains("@example"), "TypeScript example must be preserved");
2373        assert!(out.contains("```typescript"));
2374    }
2375
2376    #[test]
2377    fn test_render_javadoc_sections() {
2378        let sections = fixture_sections();
2379        let out = render_javadoc_sections(&sections, "KreuzbergRsException");
2380        assert!(out.contains("@param path The file path."));
2381        assert!(out.contains("@return The extracted text and metadata."));
2382        assert!(out.contains("@throws KreuzbergRsException Returns an error when the file is unreadable."));
2383        // Java rendering omits the example block (handled separately by emit_javadoc which
2384        // wraps code in `<pre>{@code}</pre>`); we just confirm summary survives.
2385        assert!(out.starts_with("Extracts text from a file."));
2386    }
2387
2388    #[test]
2389    fn test_render_csharp_xml_sections() {
2390        let sections = fixture_sections();
2391        let out = render_csharp_xml_sections(&sections, "KreuzbergException");
2392        assert!(out.contains("<summary>\nExtracts text from a file.\n</summary>"));
2393        assert!(out.contains("<param name=\"path\">The file path.</param>"));
2394        assert!(out.contains("<returns>The extracted text and metadata.</returns>"));
2395        assert!(out.contains("<exception cref=\"KreuzbergException\">"));
2396        assert!(out.contains("<example><code language=\"csharp\">"));
2397        assert!(out.contains("let result = extract"));
2398    }
2399
2400    #[test]
2401    fn test_render_phpdoc_sections() {
2402        let sections = fixture_sections();
2403        let out = render_phpdoc_sections(&sections, "KreuzbergException");
2404        assert!(out.contains("@param mixed $path The file path."));
2405        assert!(out.contains("@return The extracted text and metadata."));
2406        assert!(out.contains("@throws KreuzbergException"));
2407        // fixture example is ```rust — stripped when target is PHP
2408        assert!(!out.contains("```php"), "Rust example must not appear in PHPDoc");
2409        assert!(!out.contains("```rust"));
2410    }
2411
2412    #[test]
2413    fn test_render_phpdoc_sections_preserves_php_example() {
2414        let doc = "Do something.\n\n# Example\n\n```php\n$x = doSomething();\n```";
2415        let sections = parse_rustdoc_sections(doc);
2416        let out = render_phpdoc_sections(&sections, "MyException");
2417        assert!(out.contains("```php"), "PHP example must be preserved");
2418    }
2419
2420    #[test]
2421    fn test_render_doxygen_sections() {
2422        let sections = fixture_sections();
2423        let out = render_doxygen_sections(&sections);
2424        assert!(out.contains("\\param path The file path."));
2425        assert!(out.contains("\\return The extracted text and metadata."));
2426        assert!(out.contains("\\code"));
2427        assert!(out.contains("\\endcode"));
2428    }
2429
2430    #[test]
2431    fn test_emit_yard_doc_simple() {
2432        let mut out = String::new();
2433        emit_yard_doc(&mut out, "Simple Ruby documentation", "    ");
2434        assert!(out.contains("# Simple Ruby documentation"));
2435    }
2436
2437    #[test]
2438    fn test_emit_yard_doc_empty() {
2439        let mut out = String::new();
2440        emit_yard_doc(&mut out, "", "    ");
2441        assert!(out.is_empty());
2442    }
2443
2444    #[test]
2445    fn test_emit_yard_doc_with_sections() {
2446        let mut out = String::new();
2447        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns error on failure.";
2448        emit_yard_doc(&mut out, doc, "  ");
2449        assert!(out.contains("# Extracts text from a file."));
2450        assert!(out.contains("# @param path The file path."));
2451        assert!(out.contains("# @return The extracted text."));
2452        assert!(out.contains("# @raise Returns error on failure."));
2453    }
2454
2455    #[test]
2456    fn test_emit_c_doxygen_simple_prose() {
2457        let mut out = String::new();
2458        emit_c_doxygen(&mut out, "Free a string.", "");
2459        assert!(out.contains("/// Free a string."), "got: {out}");
2460    }
2461
2462    #[test]
2463    fn test_emit_c_doxygen_with_sections() {
2464        let mut out = String::new();
2465        let doc = "Extract content from a file.\n\n# Arguments\n\n* `path` - Path to the file.\n* `mode` - Read mode.\n\n# Returns\n\nA newly allocated string the caller owns.\n\n# Errors\n\nReturns null when the file is unreadable.";
2466        emit_c_doxygen(&mut out, doc, "");
2467        assert!(out.contains("/// Extract content from a file."));
2468        assert!(out.contains("/// \\param path Path to the file."));
2469        assert!(out.contains("/// \\param mode Read mode."));
2470        assert!(out.contains("/// \\return A newly allocated string the caller owns."));
2471        assert!(out.contains("/// \\note Returns null when the file is unreadable."));
2472    }
2473
2474    #[test]
2475    fn test_emit_c_doxygen_safety_section_maps_to_note() {
2476        let mut out = String::new();
2477        let doc = "Free a buffer.\n\n# Safety\n\nPointer must have been returned by this library.";
2478        emit_c_doxygen(&mut out, doc, "");
2479        assert!(out.contains("/// \\note SAFETY: Pointer must have been returned by this library."));
2480    }
2481
2482    #[test]
2483    fn test_emit_c_doxygen_example_renders_code_fence() {
2484        let mut out = String::new();
2485        let doc = "Demo.\n\n# Example\n\n```rust\nlet x = run();\n```";
2486        emit_c_doxygen(&mut out, doc, "");
2487        assert!(out.contains("/// \\code"));
2488        assert!(out.contains("/// \\endcode"));
2489        assert!(out.contains("let x = run();"));
2490    }
2491
2492    #[test]
2493    fn test_emit_c_doxygen_strips_markdown_links() {
2494        let mut out = String::new();
2495        let doc = "See [the docs](https://example.com/x) for details.";
2496        emit_c_doxygen(&mut out, doc, "");
2497        assert!(
2498            out.contains("the docs (https://example.com/x)"),
2499            "expected flattened link, got: {out}"
2500        );
2501        assert!(!out.contains("](https://"));
2502    }
2503
2504    #[test]
2505    fn test_emit_c_doxygen_word_wraps_long_lines() {
2506        let mut out = String::new();
2507        let long = "a ".repeat(80);
2508        emit_c_doxygen(&mut out, long.trim(), "");
2509        for line in out.lines() {
2510            // Each emitted prefix is "/// " (4 chars); the body after that
2511            // should be ≤ 100 chars per `DOXYGEN_WRAP_WIDTH`.
2512            let body = line.trim_start_matches("/// ");
2513            assert!(body.len() <= 100, "line too long ({}): {line}", body.len());
2514        }
2515    }
2516
2517    #[test]
2518    fn test_emit_c_doxygen_empty_input_is_noop() {
2519        let mut out = String::new();
2520        emit_c_doxygen(&mut out, "", "");
2521        emit_c_doxygen(&mut out, "   \n\t  ", "");
2522        assert!(out.is_empty());
2523    }
2524
2525    #[test]
2526    fn test_emit_c_doxygen_indent_applied() {
2527        let mut out = String::new();
2528        emit_c_doxygen(&mut out, "Hello.", "    ");
2529        assert!(out.starts_with("    /// Hello."));
2530    }
2531
2532    #[test]
2533    fn test_render_yard_sections() {
2534        let sections = fixture_sections();
2535        let out = render_yard_sections(&sections);
2536        assert!(out.contains("@param path The file path."));
2537        assert!(out.contains("@return The extracted text and metadata."));
2538        assert!(out.contains("@raise Returns an error when the file is unreadable."));
2539        // fixture example is ```rust — stripped when target is Ruby
2540        assert!(!out.contains("@example"), "Rust example must not appear in YARD");
2541        assert!(!out.contains("```ruby"));
2542        assert!(!out.contains("```rust"));
2543    }
2544
2545    #[test]
2546    fn test_render_yard_sections_preserves_ruby_example() {
2547        let doc = "Do something.\n\n# Example\n\n```ruby\nputs :hi\n```";
2548        let sections = parse_rustdoc_sections(doc);
2549        let out = render_yard_sections(&sections);
2550        assert!(out.contains("@example"), "Ruby example must be preserved");
2551        assert!(out.contains("```ruby"));
2552    }
2553
2554    // --- M1: example_for_target unit tests ---
2555
2556    #[test]
2557    fn example_for_target_rust_fenced_suppressed_for_php() {
2558        let example = "```rust\nlet x = 1;\n```";
2559        assert_eq!(
2560            example_for_target(example, "php"),
2561            None,
2562            "rust-fenced example must be omitted for PHP target"
2563        );
2564    }
2565
2566    #[test]
2567    fn example_for_target_bare_fence_defaults_to_rust_suppressed_for_ruby() {
2568        let example = "```\nlet x = 1;\n```";
2569        assert_eq!(
2570            example_for_target(example, "ruby"),
2571            None,
2572            "bare fence is treated as Rust and must be omitted for Ruby target"
2573        );
2574    }
2575
2576    #[test]
2577    fn example_for_target_php_example_preserved_for_php() {
2578        let example = "```php\n$x = 1;\n```";
2579        let result = example_for_target(example, "php");
2580        assert!(result.is_some(), "PHP example must be preserved for PHP target");
2581        assert!(result.unwrap().contains("```php"));
2582    }
2583
2584    #[test]
2585    fn example_for_target_ruby_example_preserved_for_ruby() {
2586        let example = "```ruby\nputs :hi\n```";
2587        let result = example_for_target(example, "ruby");
2588        assert!(result.is_some(), "Ruby example must be preserved for Ruby target");
2589        assert!(result.unwrap().contains("```ruby"));
2590    }
2591
2592    #[test]
2593    fn render_phpdoc_sections_with_rust_example_emits_no_at_example_block() {
2594        let doc = "Convert HTML.\n\n# Arguments\n\n* `html` - The HTML input.\n\n# Example\n\n```rust\nlet result = convert(html, None)?;\n```";
2595        let sections = parse_rustdoc_sections(doc);
2596        let out = render_phpdoc_sections(&sections, "HtmlToMarkdownException");
2597        assert!(!out.contains("```php"), "no PHP @example block for Rust source");
2598        assert!(!out.contains("```rust"), "raw Rust must not leak into PHPDoc");
2599        assert!(out.contains("@param"), "other sections must still be emitted");
2600    }
2601
2602    // --- KDoc ktfmt-canonical format tests ---
2603
2604    #[test]
2605    fn test_emit_kdoc_ktfmt_canonical_short_single_line() {
2606        let mut out = String::new();
2607        emit_kdoc_ktfmt_canonical(&mut out, "Simple doc.", "");
2608        assert_eq!(
2609            out, "/** Simple doc. */\n",
2610            "short single-line comment should collapse to canonical format"
2611        );
2612    }
2613
2614    #[test]
2615    fn test_emit_kdoc_ktfmt_canonical_short_with_indent() {
2616        let mut out = String::new();
2617        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2618        assert_eq!(out, "    /** Text node (most frequent - 100+ per document) */\n");
2619    }
2620
2621    #[test]
2622    fn test_emit_kdoc_ktfmt_canonical_long_comment_uses_multiline() {
2623        let mut out = String::new();
2624        let long_text = "This is a very long documentation comment that exceeds the 100-character line width limit and should therefore be emitted in multi-line format";
2625        emit_kdoc_ktfmt_canonical(&mut out, long_text, "");
2626        assert!(out.contains("/**\n"), "long comment should start with newline");
2627        assert!(out.contains(" * "), "long comment should use multi-line format");
2628        assert!(out.contains(" */\n"), "long comment should end with newline");
2629    }
2630
2631    #[test]
2632    fn test_emit_kdoc_ktfmt_canonical_multiline_comment() {
2633        let mut out = String::new();
2634        let doc = "First line.\n\nSecond paragraph.";
2635        emit_kdoc_ktfmt_canonical(&mut out, doc, "");
2636        assert!(out.contains("/**\n"), "multi-paragraph should use multi-line format");
2637        assert!(out.contains(" * First line."), "first paragraph preserved");
2638        assert!(out.contains(" *\n"), "blank line preserved");
2639        assert!(out.contains(" * Second paragraph."), "second paragraph preserved");
2640    }
2641
2642    #[test]
2643    fn test_emit_kdoc_ktfmt_canonical_empty_doc() {
2644        let mut out = String::new();
2645        emit_kdoc_ktfmt_canonical(&mut out, "", "");
2646        assert!(out.is_empty(), "empty doc should produce no output");
2647    }
2648
2649    #[test]
2650    fn test_emit_kdoc_ktfmt_canonical_fits_within_100_chars() {
2651        let mut out = String::new();
2652        // Construct exactly at the boundary: indent(0) + "/** " + content + " */" = 100 chars
2653        // "/** " = 4 chars, " */" = 3 chars, so content can be 93 chars
2654        let content = "a".repeat(93);
2655        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2656        let line = out.lines().next().unwrap();
2657        assert_eq!(
2658            line.len(),
2659            100,
2660            "should fit exactly at 100 chars and use single-line format"
2661        );
2662        assert!(out.starts_with("/**"), "should use single-line format");
2663    }
2664
2665    #[test]
2666    fn test_emit_kdoc_ktfmt_canonical_exceeds_100_chars() {
2667        let mut out = String::new();
2668        // Exceed 100 chars: content of 94 chars with "/** " + " */" = 101 chars
2669        let content = "a".repeat(94);
2670        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2671        assert!(
2672            out.contains("/**\n"),
2673            "should use multi-line format when exceeding 100 chars"
2674        );
2675        assert!(out.contains(" * "), "multi-line format with ` * ` prefix");
2676    }
2677
2678    #[test]
2679    fn test_emit_kdoc_ktfmt_canonical_respects_indent() {
2680        let mut out = String::new();
2681        // With 4-char indent, max content is 89 chars (4 + 4 + 89 + 3 = 100)
2682        let content = "a".repeat(89);
2683        emit_kdoc_ktfmt_canonical(&mut out, &content, "    ");
2684        let line = out.lines().next().unwrap();
2685        assert_eq!(line.len(), 100, "should respect indent in 100-char calculation");
2686        assert!(line.starts_with("    /** "), "should include indent");
2687    }
2688
2689    #[test]
2690    fn test_emit_kdoc_ktfmt_canonical_real_world_enum_variant() {
2691        let mut out = String::new();
2692        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2693        // This is from NodeType enum; should collapse to single-line
2694        assert!(out.starts_with("    /** "), "should preserve 4-space indent");
2695        assert!(out.contains(" */\n"), "should end with newline");
2696        // Verify it's single-line format
2697        let line_count = out.lines().count();
2698        assert_eq!(line_count, 1, "should be single-line format");
2699    }
2700
2701    #[test]
2702    fn test_emit_kdoc_ktfmt_canonical_real_world_data_class_field() {
2703        let mut out = String::new();
2704        let doc = "Heading style to use in Markdown output (ATX `#` or Setext underline).";
2705        emit_kdoc_ktfmt_canonical(&mut out, doc, "    ");
2706        // This is from ConversionOptions data class; should collapse to single-line
2707        let line_count = out.lines().count();
2708        assert_eq!(line_count, 1, "should be single-line format");
2709        assert!(out.starts_with("    /** "), "should have correct indent");
2710    }
2711
2712    // --- sanitize_rust_idioms tests ---
2713
2714    #[test]
2715    fn sanitize_intradoc_link_with_path_separator_java() {
2716        let input = "See [`ConversionOptions::builder()`] for details.";
2717        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2718        assert!(out.contains("`ConversionOptions.builder()`"), "got: {out}");
2719        assert!(!out.contains("[`"), "brackets must be removed, got: {out}");
2720    }
2721
2722    #[test]
2723    fn sanitize_intradoc_link_simple_type_php() {
2724        let input = "Returns a [`ConversionResult`].";
2725        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2726        assert!(out.contains("`ConversionResult`"), "got: {out}");
2727        assert!(!out.contains("[`"), "got: {out}");
2728    }
2729
2730    #[test]
2731    fn sanitize_none_to_null_javadoc() {
2732        let input = "Returns None when no value is found.";
2733        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2734        assert!(out.contains("null"), "got: {out}");
2735        assert!(!out.contains("None"), "got: {out}");
2736    }
2737
2738    #[test]
2739    fn sanitize_none_to_undefined_tsdoc() {
2740        let input = "Returns None if absent.";
2741        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2742        assert!(out.contains("undefined"), "got: {out}");
2743        assert!(!out.contains("None"), "got: {out}");
2744    }
2745
2746    #[test]
2747    fn sanitize_some_x_to_the_value_x() {
2748        let input = "Pass Some(value) to enable.";
2749        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2750        assert!(out.contains("the value (value)"), "got: {out}");
2751        assert!(!out.contains("Some("), "got: {out}");
2752    }
2753
2754    #[test]
2755    fn sanitize_bare_some_followed_by_lowercase_noun_is_dropped() {
2756        // Real leak from html-to-markdown PreprocessingOptionsUpdate.java:16.
2757        let input =
2758            "Only specified fields (Some values) will override existing options; None values leave the previous";
2759        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2760        assert!(
2761            out.contains("(values)"),
2762            "bare `Some ` before lowercase noun must be stripped; got: {out}"
2763        );
2764        assert!(
2765            out.contains("null values"),
2766            "bare `None ` must also be replaced; got: {out}"
2767        );
2768        assert!(!out.contains("Some "), "Some prefix must not survive; got: {out}");
2769    }
2770
2771    #[test]
2772    fn sanitize_bare_some_does_not_touch_identifiers_or_uppercase_followers() {
2773        // SomeType, Some.method(), Some(x), and "Some Title" (proper noun) all preserved.
2774        let cases = [
2775            "SomeType lives on.",
2776            "Some.method() returns Self.",
2777            "Some Title",
2778            "Some(x) is a value.",
2779        ];
2780        for case in cases {
2781            let out = sanitize_rust_idioms(case, DocTarget::JavaDoc);
2782            // For the Some(x) case, replace_some_calls (run earlier) converts to "the value (x)"
2783            // so "Some" itself is gone — that's expected; everything else preserves "Some".
2784            if case.starts_with("Some(") {
2785                assert!(out.contains("the value (x)"), "got: {out}");
2786            } else {
2787                assert!(out.contains("Some"), "Some must survive in {case:?}; got: {out}");
2788            }
2789        }
2790    }
2791
2792    #[test]
2793    fn sanitize_option_t_to_nullable_php() {
2794        let input = "The result is Option<String>.";
2795        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2796        assert!(out.contains("String?"), "got: {out}");
2797        assert!(!out.contains("Option<"), "got: {out}");
2798    }
2799
2800    #[test]
2801    fn sanitize_option_t_to_or_null_java() {
2802        let input = "The result is Option<String>.";
2803        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2804        assert!(out.contains("String | null"), "got: {out}");
2805    }
2806
2807    #[test]
2808    fn sanitize_option_t_to_or_undefined_tsdoc() {
2809        let input = "The result is Option<String>.";
2810        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2811        assert!(out.contains("String | undefined"), "got: {out}");
2812    }
2813
2814    #[test]
2815    fn sanitize_vec_u8_per_target() {
2816        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::PhpDoc).contains("string"));
2817        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JavaDoc).contains("byte[]"));
2818        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::TsDoc).contains("Uint8Array"));
2819        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JsDoc).contains("Uint8Array"));
2820    }
2821
2822    #[test]
2823    fn sanitize_vec_t_to_array() {
2824        let input = "Returns Vec<String>.";
2825        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2826        assert!(out.contains("String[]"), "got: {out}");
2827        assert!(!out.contains("Vec<"), "got: {out}");
2828    }
2829
2830    #[test]
2831    fn sanitize_hashmap_per_target() {
2832        let input = "Uses HashMap<String, u32>.";
2833        assert!(sanitize_rust_idioms(input, DocTarget::PhpDoc).contains("array<String, u32>"));
2834        assert!(sanitize_rust_idioms(input, DocTarget::JavaDoc).contains("Map<String, u32>"));
2835        assert!(sanitize_rust_idioms(input, DocTarget::TsDoc).contains("Record<String, u32>"));
2836    }
2837
2838    #[test]
2839    fn sanitize_arc_wrapper_stripped() {
2840        let input = "Holds Arc<Config>.";
2841        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2842        assert!(out.contains("Config"), "got: {out}");
2843        assert!(!out.contains("Arc<"), "got: {out}");
2844    }
2845
2846    #[test]
2847    fn sanitize_box_mutex_rwlock_rc_cell_refcell_stripped() {
2848        for wrapper in &["Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
2849            let input = format!("Contains {wrapper}<Inner>.");
2850            let out = sanitize_rust_idioms(&input, DocTarget::JavaDoc);
2851            assert!(out.contains("Inner"), "wrapper {wrapper} not stripped, got: {out}");
2852            assert!(
2853                !out.contains(&format!("{wrapper}<")),
2854                "wrapper {wrapper} still present, got: {out}"
2855            );
2856        }
2857    }
2858
2859    #[test]
2860    fn sanitize_send_sync_stripped() {
2861        let input = "The type is Send + Sync.";
2862        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2863        assert!(!out.contains("Send"), "got: {out}");
2864        assert!(!out.contains("Sync"), "got: {out}");
2865    }
2866
2867    #[test]
2868    fn sanitize_static_lifetime_stripped() {
2869        let input = "Requires 'static lifetime.";
2870        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2871        assert!(!out.contains("'static"), "got: {out}");
2872    }
2873
2874    #[test]
2875    fn sanitize_pub_fn_stripped() {
2876        let input = "Calls pub fn convert().";
2877        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2878        assert!(!out.contains("pub fn"), "got: {out}");
2879        assert!(out.contains("convert()"), "got: {out}");
2880    }
2881
2882    #[test]
2883    fn sanitize_crate_prefix_stripped() {
2884        let input = "See crate::error::ConversionError.";
2885        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2886        assert!(!out.contains("crate::"), "got: {out}");
2887        assert!(out.contains("error.ConversionError"), "got: {out}");
2888    }
2889
2890    #[test]
2891    fn sanitize_unwrap_expect_stripped() {
2892        let input = "Call result.unwrap() or result.expect(\"msg\").";
2893        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2894        assert!(!out.contains(".unwrap()"), "got: {out}");
2895        assert!(!out.contains(".expect("), "got: {out}");
2896    }
2897
2898    #[test]
2899    fn sanitize_no_mutation_inside_backticks() {
2900        // None inside backtick span must not be replaced.
2901        let input = "Use `None` as the argument.";
2902        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2903        assert!(out.contains("`None`"), "backtick span must be preserved, got: {out}");
2904    }
2905
2906    #[test]
2907    fn sanitize_rust_fence_dropped_for_tsdoc() {
2908        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2909        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2910        assert!(
2911            !out.contains("let x = 1;"),
2912            "rust fence content must be dropped, got: {out}"
2913        );
2914        assert!(!out.contains("```rust"), "got: {out}");
2915        assert!(out.contains("Trailer."), "text after fence must survive, got: {out}");
2916    }
2917
2918    #[test]
2919    fn sanitize_rust_fence_dropped_for_java() {
2920        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2921        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2922        // Rust fences are now dropped entirely for Java (Rust code is not portable).
2923        assert!(
2924            !out.contains("let x = 1;"),
2925            "fence content must be dropped for Java, got: {out}"
2926        );
2927        assert!(!out.contains("```"), "fence markers must be dropped, got: {out}");
2928        assert!(out.contains("Intro."), "prose before fence kept: {out}");
2929        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
2930    }
2931
2932    #[test]
2933    fn sanitize_non_rust_fence_passed_through() {
2934        let input = "Example:\n\n```typescript\nconst x = 1;\n```";
2935        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2936        assert!(out.contains("```typescript"), "non-rust fence must survive, got: {out}");
2937        assert!(out.contains("const x = 1;"), "got: {out}");
2938    }
2939
2940    #[test]
2941    fn sanitize_backtick_code_span_not_mutated_option() {
2942        // Option<T> inside backtick span must not be replaced.
2943        let input = "The type is `Option<String>`.";
2944        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2945        // The backtick-protected span should be preserved verbatim.
2946        assert!(
2947            out.contains("`Option<String>`"),
2948            "code span must be preserved, got: {out}"
2949        );
2950    }
2951
2952    #[test]
2953    fn sanitize_idempotent() {
2954        // Running twice should produce the same result as running once.
2955        let input = "Returns None when Vec<String> is empty.";
2956        let once = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2957        let twice = sanitize_rust_idioms(&once, DocTarget::JavaDoc);
2958        assert_eq!(once, twice, "sanitize_rust_idioms should be idempotent");
2959    }
2960
2961    #[test]
2962    fn sanitize_multiline_prose() {
2963        let input = "Convert HTML to Markdown.\n\nReturns None on failure.\nUse Option<String> for the result.";
2964        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2965        assert!(out.contains("null"), "None must be replaced on line 2, got: {out}");
2966        assert!(
2967            out.contains("String | null"),
2968            "Option<String> must be replaced on line 3, got: {out}"
2969        );
2970    }
2971
2972    #[test]
2973    fn sanitize_attribute_line_dropped() {
2974        let input = "#[derive(Debug, Clone)]\nSome documentation.";
2975        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2976        assert!(!out.contains("#[derive("), "attribute line must be dropped, got: {out}");
2977        // Prose survives, though bare "Some " before a lowercase noun is stripped
2978        // by `replace_some_keyword_in_prose`, so accept either form.
2979        assert!(out.contains("documentation."), "prose must survive, got: {out}");
2980    }
2981
2982    #[test]
2983    fn sanitize_path_separator_in_prose() {
2984        let input = "See std::collections::HashMap for details.";
2985        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2986        assert!(out.contains("std.collections.HashMap"), ":: must become ., got: {out}");
2987    }
2988
2989    #[test]
2990    fn sanitize_none_not_replaced_inside_identifier() {
2991        // "NoneType" must not be replaced.
2992        let input = "Unlike NoneType in Python.";
2993        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2994        assert!(out.contains("NoneType"), "NoneType must not be replaced, got: {out}");
2995    }
2996
2997    // --- CSharpDoc target tests ---
2998
2999    #[test]
3000    fn sanitize_csharp_drops_rust_section_headings_and_example_body() {
3001        // The GraphQLErrorException case: `# Examples` heading followed by a
3002        // ```ignore code fence containing `Self::error_code`, `Result<T, E>`,
3003        // intra-doc links — all of which previously leaked into `<summary>`.
3004        let input = "Convert error to HTTP status code\n\n\
3005            Maps GraphQL error types to status codes.\n\n\
3006            # Examples\n\n\
3007            ```ignore\n\
3008            use spikard_graphql::error::GraphQLError;\n\
3009            let error = GraphQLError::AuthenticationError(\"Invalid token\".to_string());\n\
3010            assert_eq!(error.status_code(), 401);\n\
3011            ```\n";
3012        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3013        assert!(
3014            out.contains("Convert error to HTTP status code"),
3015            "summary preserved: {out}"
3016        );
3017        assert!(out.contains("Maps GraphQL error types"), "prose preserved: {out}");
3018        assert!(!out.contains("# Examples"), "heading dropped: {out}");
3019        assert!(!out.contains("```"), "code fence dropped: {out}");
3020        assert!(!out.contains("Self::error_code"), "Self::method dropped: {out}");
3021        assert!(
3022            !out.contains("GraphQLError::AuthenticationError"),
3023            "rust path dropped: {out}"
3024        );
3025    }
3026
3027    #[test]
3028    fn sanitize_csharp_intradoc_link_with_path_separator() {
3029        let input = "See [`Self::error_code`] for the variant codes.";
3030        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3031        assert!(out.contains("`Self.error_code`"), "intra-doc link normalised: {out}");
3032        assert!(!out.contains("[`"), "square brackets removed: {out}");
3033        assert!(!out.contains("::"), ":: replaced with .: {out}");
3034    }
3035
3036    #[test]
3037    fn sanitize_csharp_result_type_keeps_success_drops_error() {
3038        let input = "Returns Result<String, ConversionError> on failure.";
3039        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3040        assert!(out.contains("String"), "success type kept: {out}");
3041        assert!(!out.contains("Result<"), "Result wrapper dropped: {out}");
3042        assert!(!out.contains("ConversionError"), "error type dropped: {out}");
3043    }
3044
3045    #[test]
3046    fn sanitize_csharp_option_becomes_nullable() {
3047        let input = "Returns Option<String>.";
3048        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3049        // After XML-escaping, the `?` survives but any surviving `<`/`>` get escaped.
3050        assert!(out.contains("String?"), "Option<T> -> T?: {out}");
3051        assert!(!out.contains("Option<"), "Option dropped: {out}");
3052    }
3053
3054    #[test]
3055    fn sanitize_csharp_vec_u8_becomes_byte_array() {
3056        let input = "Accepts Vec<u8>.";
3057        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3058        // `byte[]` survives — the `[` is not XML-significant.
3059        assert!(out.contains("byte[]"), "Vec<u8> -> byte[]: {out}");
3060    }
3061
3062    #[test]
3063    fn sanitize_csharp_hashmap_becomes_dictionary() {
3064        let input = "Holds HashMap<String, u32>.";
3065        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3066        // The `<` / `>` produced by Dictionary<K, V> must be XML-escaped.
3067        assert!(
3068            out.contains("Dictionary&lt;String, u32&gt;"),
3069            "HashMap -> Dictionary with XML-escaped brackets: {out}"
3070        );
3071    }
3072
3073    #[test]
3074    fn sanitize_csharp_none_to_null() {
3075        let input = "Returns None on miss.";
3076        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3077        assert!(out.contains("null"), "None -> null: {out}");
3078        assert!(!out.contains("None"), "None replaced: {out}");
3079    }
3080
3081    #[test]
3082    fn sanitize_csharp_escapes_raw_angle_brackets_and_amp() {
3083        // Unrecognised `<...>` constructs (e.g. trait objects, generic params on
3084        // unknown names) must still be XML-escaped so the result is valid inside
3085        // `<summary>`.
3086        let input = "Accepts Box<dyn Trait> and combines a & b.";
3087        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3088        // Box<T> wrapper is stripped to inner type, leaving `dyn Trait`.
3089        assert!(out.contains("dyn Trait"), "Box<T> stripped: {out}");
3090        assert!(out.contains("&amp;"), "ampersand escaped: {out}");
3091    }
3092
3093    #[test]
3094    fn sanitize_csharp_drops_rust_code_fence_entirely() {
3095        let input = "Intro.\n\n```rust\nlet x: Vec<u8> = vec![];\n```\n\nTrailer.";
3096        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3097        assert!(!out.contains("let x"), "code fence body dropped: {out}");
3098        assert!(!out.contains("```"), "fence markers dropped: {out}");
3099        assert!(out.contains("Intro."), "prose before fence kept: {out}");
3100        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
3101    }
3102
3103    #[test]
3104    fn sanitize_csharp_keep_sections_does_not_drop_headings() {
3105        // The sections-preserving variant leaves heading lines alone so callers
3106        // that have already extracted sections can sanitise each body fragment.
3107        let input = "Summary.\n\n# Arguments\n\n* `name` - the value.";
3108        let out = sanitize_rust_idioms_keep_sections(input, DocTarget::CSharpDoc);
3109        assert!(out.contains("# Arguments"), "heading preserved: {out}");
3110        assert!(out.contains("name"), "body preserved: {out}");
3111    }
3112
3113    #[test]
3114    fn sanitize_csharp_idempotent() {
3115        let input = "Returns Option<String> or None.";
3116        let once = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3117        let twice = sanitize_rust_idioms(&once, DocTarget::CSharpDoc);
3118        assert_eq!(once, twice, "CSharpDoc sanitisation must be idempotent");
3119    }
3120
3121    #[test]
3122    fn sanitize_phpdoc_drops_unmarked_rust_code_fences() {
3123        // Regression test: unmarked code fences (```\n...\n```) in Rust docstrings
3124        // are treated as Rust code and should be dropped for PHP target.
3125        let input = "Detect language name from a file extension.\n\nReturns `None` for unrecognized extensions.\n\n```\nuse tree_sitter_language_pack::detect_language_from_extension;\nassert_eq!(detect_language_from_extension(\"py\"), Some(\"python\"));\nassert_eq!(detect_language_from_extension(\"RS\"), Some(\"rust\"));\nassert_eq!(detect_language_from_extension(\"xyz\"), None);\n```";
3126        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3127        assert!(!out.contains("use tree_sitter_language_pack"), "Rust use stmt dropped: {out}");
3128        assert!(!out.contains("assert_eq!"), "Rust code dropped: {out}");
3129        assert!(!out.contains("```"), "fence markers dropped: {out}");
3130        assert!(out.contains("Detect language name"), "prose before fence kept: {out}");
3131        assert!(out.contains("unrecognized extensions"), "prose kept: {out}");
3132    }
3133
3134    #[test]
3135    fn sanitize_javadoc_drops_unmarked_rust_code_fences() {
3136        // Regression test: unmarked code fences in Rust docstrings should be dropped
3137        // for Java target as well.
3138        let input = "Process a file.\n\n```\nlet result = process(\"def hello(): pass\", &config).unwrap();\n```";
3139        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3140        assert!(!out.contains("unwrap"), "Rust unwrap dropped: {out}");
3141        assert!(!out.contains("```"), "fence markers dropped: {out}");
3142        assert!(out.contains("Process a file"), "prose kept: {out}");
3143    }
3144
3145    #[test]
3146    fn sanitize_phpdoc_drops_explicit_rust_fences() {
3147        // Explicit ```rust fences should also be dropped for PHP.
3148        let input = "Summary.\n\n```rust\nuse std::path::PathBuf;\nlet p = PathBuf::from(\"/tmp\");\n```";
3149        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3150        assert!(!out.contains("use std::"), "Rust code dropped: {out}");
3151        assert!(!out.contains("PathBuf"), "Rust types dropped: {out}");
3152        assert!(!out.contains("```"), "fence markers dropped: {out}");
3153        assert!(out.contains("Summary"), "prose kept: {out}");
3154    }
3155}