alef_codegen/
doc_emission.rs

1//! Language-native documentation comment emission.
2//! Provides standardized functions for emitting doc comments in different languages.
3
4/// Emit PHPDoc-style comments (/** ... */)
5/// Used for PHP classes, methods, and properties.
6///
7/// Sanitizes Rust-specific idioms before translating rustdoc sections
8/// (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@throws`,
9/// `# Example` → ` ```php ` fence) via [`render_phpdoc_sections`].
10///
11/// `exception_class` is the PHP exception class name to use in `@throws` tags.
12pub fn emit_phpdoc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
13    if doc.is_empty() {
14        return;
15    }
16    // Sanitize Rust-specific idioms before processing sections.
17    let sanitized = sanitize_rust_idioms(doc, DocTarget::PhpDoc);
18    let sections = parse_rustdoc_sections(&sanitized);
19    let any_section = sections.arguments.is_some()
20        || sections.returns.is_some()
21        || sections.errors.is_some()
22        || sections.example.is_some();
23    let body = if any_section {
24        render_phpdoc_sections(&sections, exception_class)
25    } else {
26        sanitized
27    };
28    out.push_str(indent);
29    out.push_str("/**\n");
30    for line in body.lines() {
31        out.push_str(indent);
32        out.push_str(" * ");
33        out.push_str(&escape_phpdoc_line(line));
34        out.push('\n');
35    }
36    out.push_str(indent);
37    out.push_str(" */\n");
38}
39
40/// Escape PHPDoc line: handle */ sequences that could close the comment early.
41fn escape_phpdoc_line(s: &str) -> String {
42    s.replace("*/", "* /")
43}
44
45/// Emit C# XML documentation comments (/// <summary> ... </summary>)
46/// Used for C# classes, structs, methods, and properties.
47///
48/// Translates rustdoc sections (`# Arguments` → `<param>`,
49/// `# Returns` → `<returns>`, `# Errors` → `<exception>`,
50/// `# Example` → `<example><code>`) via [`render_csharp_xml_sections`].
51///
52/// `exception_class` is the C# exception class name to use in `<exception cref="...">` tags.
53pub fn emit_csharp_doc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
54    if doc.is_empty() {
55        return;
56    }
57    // Parse sections from the raw rustdoc first (so `# Examples` / `# Arguments`
58    // / `# Returns` / `# Errors` are routed into structured XML tags), then
59    // sanitise each section body to strip Rust idioms and XML-escape `<`/`>`/`&`.
60    let raw_sections = parse_rustdoc_sections(doc);
61    let sections = RustdocSections {
62        summary: sanitize_rust_idioms_keep_sections(&raw_sections.summary, DocTarget::CSharpDoc),
63        arguments: raw_sections
64            .arguments
65            .as_deref()
66            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
67        returns: raw_sections
68            .returns
69            .as_deref()
70            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
71        errors: raw_sections
72            .errors
73            .as_deref()
74            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
75        panics: raw_sections
76            .panics
77            .as_deref()
78            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
79        safety: raw_sections
80            .safety
81            .as_deref()
82            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
83        // Examples typically contain Rust code that doesn't compile as C#; drop the body
84        // entirely rather than risk leaking unparseable code into `<example>`.
85        example: None,
86    };
87    let any_section = sections.arguments.is_some()
88        || sections.returns.is_some()
89        || sections.errors.is_some()
90        || sections.example.is_some();
91    if !any_section {
92        // Backwards-compatible path: plain `<summary>` for prose-only docs.
93        out.push_str(indent);
94        out.push_str("/// <summary>\n");
95        for line in sections.summary.lines() {
96            out.push_str(indent);
97            out.push_str("/// ");
98            // Note: sanitise_rust_idioms_keep_sections already XML-escaped <, >, & for
99            // the CSharpDoc target. We deliberately do NOT call escape_csharp_doc_line
100            // here because that would double-encode (e.g. `&amp;` → `&amp;amp;`).
101            out.push_str(line);
102            out.push('\n');
103        }
104        out.push_str(indent);
105        out.push_str("/// </summary>\n");
106        return;
107    }
108    let rendered = render_csharp_xml_sections(&sections, exception_class);
109    for line in rendered.lines() {
110        out.push_str(indent);
111        out.push_str("/// ");
112        // The rendered tags already contain the canonical chars; we only
113        // escape XML special chars that aren't part of our tag syntax. Since
114        // render_csharp_xml_sections produces well-formed XML, raw passthrough
115        // is correct.
116        out.push_str(line);
117        out.push('\n');
118    }
119}
120
121/// Emit Elixir documentation comments (@doc)
122/// Used for Elixir modules and functions.
123pub fn emit_elixir_doc(out: &mut String, doc: &str) {
124    if doc.is_empty() {
125        return;
126    }
127    out.push_str("@doc \"\"\"\n");
128    for line in doc.lines() {
129        out.push_str(&escape_elixir_doc_line(line));
130        out.push('\n');
131    }
132    out.push_str("\"\"\"\n");
133}
134
135/// Emit Rust `///` documentation comments.
136///
137/// Used by alef backends that emit Rust source (e.g., the Rustler NIF crate,
138/// the swift-bridge wrapper crate, the FRB Dart bridge crate). Distinct from
139/// `emit_swift_doc` only by intent — the syntax is identical (`/// ` per line).
140pub fn emit_rustdoc(out: &mut String, doc: &str, indent: &str) {
141    if doc.is_empty() {
142        return;
143    }
144    for line in doc.lines() {
145        out.push_str(indent);
146        out.push_str("/// ");
147        out.push_str(line);
148        out.push('\n');
149    }
150}
151
152/// Escape Elixir doc line: handle triple-quote sequences that could close the heredoc early.
153fn escape_elixir_doc_line(s: &str) -> String {
154    s.replace("\"\"\"", "\"\" \"")
155}
156
157/// Emit R roxygen2-style documentation comments (#')
158/// Used for R functions.
159pub fn emit_roxygen(out: &mut String, doc: &str) {
160    if doc.is_empty() {
161        return;
162    }
163    for line in doc.lines() {
164        out.push_str("#' ");
165        out.push_str(line);
166        out.push('\n');
167    }
168}
169
170/// Emit Swift-style documentation comments (///)
171/// Used for Swift structs, enums, and functions.
172pub fn emit_swift_doc(out: &mut String, doc: &str, indent: &str) {
173    if doc.is_empty() {
174        return;
175    }
176    for line in doc.lines() {
177        out.push_str(indent);
178        out.push_str("/// ");
179        out.push_str(line);
180        out.push('\n');
181    }
182}
183
184/// Emit Javadoc-style documentation comments (/** ... */)
185/// Used for Java classes, methods, and fields.
186/// Handles XML escaping and Javadoc tag formatting.
187pub fn emit_javadoc(out: &mut String, doc: &str, indent: &str) {
188    if doc.is_empty() {
189        return;
190    }
191    out.push_str(indent);
192    out.push_str("/**\n");
193    for line in doc.lines() {
194        let escaped = escape_javadoc_line(line);
195        let trimmed = escaped.trim_end();
196        if trimmed.is_empty() {
197            out.push_str(indent);
198            out.push_str(" *\n");
199        } else {
200            out.push_str(indent);
201            out.push_str(" * ");
202            out.push_str(trimmed);
203            out.push('\n');
204        }
205    }
206    out.push_str(indent);
207    out.push_str(" */\n");
208}
209
210/// Emit KDoc-style documentation comments (/** ... */)
211/// Used for Kotlin classes, methods, and properties.
212pub fn emit_kdoc(out: &mut String, doc: &str, indent: &str) {
213    if doc.is_empty() {
214        return;
215    }
216    out.push_str(indent);
217    out.push_str("/**\n");
218    for line in doc.lines() {
219        let trimmed = line.trim_end();
220        if trimmed.is_empty() {
221            out.push_str(indent);
222            out.push_str(" *\n");
223        } else {
224            out.push_str(indent);
225            out.push_str(" * ");
226            out.push_str(trimmed);
227            out.push('\n');
228        }
229    }
230    out.push_str(indent);
231    out.push_str(" */\n");
232}
233
234/// Emit KDoc-style documentation comments in ktfmt-canonical format.
235///
236/// ktfmt collapses short KDoc comments to single-line format (`/** ... */`)
237/// when they fit within the 100-character line width limit. This function
238/// generates KDoc in that canonical form to avoid unnecessary formatting
239/// diffs when the generated code is passed through ktfmt.
240///
241/// - Single-line comments that fit in 100 chars: emitted as `/** content */`
242/// - Multi-paragraph or longer comments: emitted with newlines and ` * ` prefixes
243/// - Preserves indent and respects line width boundary at 100 chars
244pub fn emit_kdoc_ktfmt_canonical(out: &mut String, doc: &str, indent: &str) {
245    const KTFMT_LINE_WIDTH: usize = 100;
246
247    if doc.is_empty() {
248        return;
249    }
250
251    let lines: Vec<&str> = doc.lines().collect();
252
253    // Check if this is a short, single-paragraph comment that fits on one line.
254    let is_short_single_paragraph = lines.len() == 1 && !lines[0].contains('\n');
255
256    if is_short_single_paragraph {
257        let trimmed = lines[0].trim();
258        // Calculate total length: indent + "/** " + content + " */"
259        let single_line_len = indent.len() + 4 + trimmed.len() + 3; // 4 for "/** ", 3 for " */"
260        if single_line_len <= KTFMT_LINE_WIDTH {
261            // Fits on one line in ktfmt-canonical format
262            out.push_str(indent);
263            out.push_str("/** ");
264            out.push_str(trimmed);
265            out.push_str(" */\n");
266            return;
267        }
268    }
269
270    // Multi-line format (default for long or multi-paragraph comments)
271    out.push_str(indent);
272    out.push_str("/**\n");
273    for line in lines {
274        let trimmed = line.trim_end();
275        if trimmed.is_empty() {
276            out.push_str(indent);
277            out.push_str(" *\n");
278        } else {
279            out.push_str(indent);
280            out.push_str(" * ");
281            out.push_str(trimmed);
282            out.push('\n');
283        }
284    }
285    out.push_str(indent);
286    out.push_str(" */\n");
287}
288
289/// Emit Dartdoc-style documentation comments (///)
290/// Used for Dart classes, methods, and properties.
291pub fn emit_dartdoc(out: &mut String, doc: &str, indent: &str) {
292    if doc.is_empty() {
293        return;
294    }
295    for line in doc.lines() {
296        out.push_str(indent);
297        out.push_str("/// ");
298        out.push_str(line);
299        out.push('\n');
300    }
301}
302
303/// Emit Gleam documentation comments (///)
304/// Used for Gleam functions and types.
305pub fn emit_gleam_doc(out: &mut String, doc: &str, indent: &str) {
306    if doc.is_empty() {
307        return;
308    }
309    for line in doc.lines() {
310        out.push_str(indent);
311        out.push_str("/// ");
312        out.push_str(line);
313        out.push('\n');
314    }
315}
316
317/// Emit Doxygen-style C documentation comments using `///`-prefixed lines.
318///
319/// Used by `alef-backend-ffi` above every `extern "C" fn`, the `*_len()`
320/// companion, opaque-handle typedef, and (post-cbindgen) the type/enum
321/// declarations cbindgen surfaces in the generated `.h`. cbindgen translates
322/// `///` source lines into a single `/** ... */` Doxygen block per item, so we
323/// only need to emit per-line `///` content here.
324///
325/// Translates rustdoc sections via [`render_doxygen_sections`]:
326///
327/// - `# Arguments` → `\param <name> <description>` (one per arg).
328/// - `# Returns`   → `\return <description>`.
329/// - `# Errors`    → `\note <description>` (Doxygen has no `\throws` for C;
330///   `\note` is the convention).
331/// - `# Safety`    → `\note SAFETY: <description>`.
332/// - `# Example`   → `\code` ... `\endcode` block.
333///
334/// Markdown links (`[text](url)`) are flattened to `text (url)`. Body lines
335/// are word-wrapped at ~100 columns so the rendered `/** */` block stays
336/// readable in IDE tooltips and terminal viewers.
337pub fn emit_c_doxygen(out: &mut String, doc: &str, indent: &str) {
338    if doc.trim().is_empty() {
339        return;
340    }
341    let sections = parse_rustdoc_sections(doc);
342    let any_section = sections.arguments.is_some()
343        || sections.returns.is_some()
344        || sections.errors.is_some()
345        || sections.safety.is_some()
346        || sections.example.is_some();
347    let mut body = if any_section {
348        render_doxygen_sections_with_notes(&sections)
349    } else {
350        sections.summary.clone()
351    };
352    body = strip_markdown_links(&body);
353    let wrapped = word_wrap(&body, DOXYGEN_WRAP_WIDTH);
354    for line in wrapped.lines() {
355        out.push_str(indent);
356        out.push_str("/// ");
357        out.push_str(line);
358        out.push('\n');
359    }
360}
361
362const DOXYGEN_WRAP_WIDTH: usize = 100;
363
364/// Render `RustdocSections` as a Doxygen body but route `# Errors` and
365/// `# Safety` to `\note` lines instead of plain prose. This is the variant
366/// `emit_c_doxygen` uses; the public `render_doxygen_sections` keeps its
367/// long-standing plain-prose semantics so existing callers don't shift.
368fn render_doxygen_sections_with_notes(sections: &RustdocSections) -> String {
369    let mut out = String::new();
370    if !sections.summary.is_empty() {
371        out.push_str(&sections.summary);
372    }
373    if let Some(args) = sections.arguments.as_deref() {
374        for (name, desc) in parse_arguments_bullets(args) {
375            if !out.is_empty() {
376                out.push('\n');
377            }
378            if desc.is_empty() {
379                out.push_str("\\param ");
380                out.push_str(&name);
381            } else {
382                out.push_str("\\param ");
383                out.push_str(&name);
384                out.push(' ');
385                out.push_str(&desc);
386            }
387        }
388    }
389    if let Some(ret) = sections.returns.as_deref() {
390        if !out.is_empty() {
391            out.push('\n');
392        }
393        out.push_str("\\return ");
394        out.push_str(ret.trim());
395    }
396    if let Some(err) = sections.errors.as_deref() {
397        if !out.is_empty() {
398            out.push('\n');
399        }
400        out.push_str("\\note ");
401        out.push_str(err.trim());
402    }
403    if let Some(safety) = sections.safety.as_deref() {
404        if !out.is_empty() {
405            out.push('\n');
406        }
407        out.push_str("\\note SAFETY: ");
408        out.push_str(safety.trim());
409    }
410    if let Some(example) = sections.example.as_deref() {
411        if !out.is_empty() {
412            out.push('\n');
413        }
414        out.push_str("\\code\n");
415        for line in example.lines() {
416            let t = line.trim_start();
417            if t.starts_with("```") {
418                continue;
419            }
420            out.push_str(line);
421            out.push('\n');
422        }
423        out.push_str("\\endcode");
424    }
425    out
426}
427
428/// Flatten Markdown inline links `[text](url)` to `text (url)` so the rendered
429/// Doxygen block stays readable when consumed without a Markdown filter.
430fn strip_markdown_links(s: &str) -> String {
431    let mut out = String::with_capacity(s.len());
432    let bytes = s.as_bytes();
433    let mut i = 0;
434    while i < bytes.len() {
435        if bytes[i] == b'[' {
436            // Find matching closing bracket on the same logical span (no nested brackets).
437            if let Some(close) = bytes[i + 1..].iter().position(|&b| b == b']') {
438                let text_end = i + 1 + close;
439                if text_end + 1 < bytes.len() && bytes[text_end + 1] == b'(' {
440                    if let Some(paren_close) = bytes[text_end + 2..].iter().position(|&b| b == b')') {
441                        let url_start = text_end + 2;
442                        let url_end = url_start + paren_close;
443                        let text = &s[i + 1..text_end];
444                        let url = &s[url_start..url_end];
445                        out.push_str(text);
446                        out.push_str(" (");
447                        out.push_str(url);
448                        out.push(')');
449                        i = url_end + 1;
450                        continue;
451                    }
452                }
453            }
454        }
455        out.push(bytes[i] as char);
456        i += 1;
457    }
458    out
459}
460
461/// Word-wrap each input line at `width` columns. Lines starting with `\code`
462/// or contained between `\code`/`\endcode` markers, as well as Markdown fence
463/// blocks, are passed through verbatim to preserve example formatting.
464fn word_wrap(input: &str, width: usize) -> String {
465    let mut out = String::with_capacity(input.len());
466    let mut in_code = false;
467    for raw in input.lines() {
468        let trimmed = raw.trim_start();
469        if trimmed.starts_with("\\code") {
470            in_code = true;
471            out.push_str(raw);
472            out.push('\n');
473            continue;
474        }
475        if trimmed.starts_with("\\endcode") {
476            in_code = false;
477            out.push_str(raw);
478            out.push('\n');
479            continue;
480        }
481        if in_code || trimmed.starts_with("```") {
482            out.push_str(raw);
483            out.push('\n');
484            continue;
485        }
486        if raw.len() <= width {
487            out.push_str(raw);
488            out.push('\n');
489            continue;
490        }
491        let mut current = String::with_capacity(width);
492        for word in raw.split_whitespace() {
493            if current.is_empty() {
494                current.push_str(word);
495            } else if current.len() + 1 + word.len() > width {
496                out.push_str(&current);
497                out.push('\n');
498                current.clear();
499                current.push_str(word);
500            } else {
501                current.push(' ');
502                current.push_str(word);
503            }
504        }
505        if !current.is_empty() {
506            out.push_str(&current);
507            out.push('\n');
508        }
509    }
510    out.trim_end_matches('\n').to_string()
511}
512
513/// Emit Zig documentation comments (///)
514/// Used for Zig functions, types, and declarations.
515pub fn emit_zig_doc(out: &mut String, doc: &str, indent: &str) {
516    if doc.is_empty() {
517        return;
518    }
519    for line in doc.lines() {
520        out.push_str(indent);
521        out.push_str("/// ");
522        out.push_str(line);
523        out.push('\n');
524    }
525}
526
527/// Emit YARD documentation comments for Ruby.
528/// Used for Ruby classes, methods, and attributes.
529///
530/// YARD syntax: each line prefixed with `# ` (with space). Translates rustdoc
531/// sections (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@raise`)
532/// via [`render_yard_sections`].
533pub fn emit_yard_doc(out: &mut String, doc: &str, indent: &str) {
534    if doc.is_empty() {
535        return;
536    }
537    let sections = parse_rustdoc_sections(doc);
538    let any_section = sections.arguments.is_some()
539        || sections.returns.is_some()
540        || sections.errors.is_some()
541        || sections.example.is_some();
542    let body = if any_section {
543        render_yard_sections(&sections)
544    } else {
545        doc.to_string()
546    };
547    for line in body.lines() {
548        out.push_str(indent);
549        out.push_str("# ");
550        out.push_str(line);
551        out.push('\n');
552    }
553}
554
555/// Render `RustdocSections` as YARD documentation comment body.
556///
557/// - `# Arguments` → `@param name desc` (one per arg)
558/// - `# Returns`   → `@return desc`
559/// - `# Errors`    → `@raise desc`
560/// - `# Example`   → `@example` block.
561///
562/// Output is a plain string with `\n` separators; the emitter wraps each line
563/// in `# ` itself.
564pub fn render_yard_sections(sections: &RustdocSections) -> String {
565    let mut out = String::new();
566    if !sections.summary.is_empty() {
567        out.push_str(&sections.summary);
568    }
569    if let Some(args) = sections.arguments.as_deref() {
570        for (name, desc) in parse_arguments_bullets(args) {
571            if !out.is_empty() {
572                out.push('\n');
573            }
574            if desc.is_empty() {
575                out.push_str("@param ");
576                out.push_str(&name);
577            } else {
578                out.push_str("@param ");
579                out.push_str(&name);
580                out.push(' ');
581                out.push_str(&desc);
582            }
583        }
584    }
585    if let Some(ret) = sections.returns.as_deref() {
586        if !out.is_empty() {
587            out.push('\n');
588        }
589        out.push_str("@return ");
590        out.push_str(ret.trim());
591    }
592    if let Some(err) = sections.errors.as_deref() {
593        if !out.is_empty() {
594            out.push('\n');
595        }
596        out.push_str("@raise ");
597        out.push_str(err.trim());
598    }
599    if let Some(example) = sections.example.as_deref() {
600        if let Some(body) = example_for_target(example, "ruby") {
601            if !out.is_empty() {
602                out.push('\n');
603            }
604            out.push_str("@example\n");
605            out.push_str(&body);
606        }
607    }
608    out
609}
610
611/// Escape Javadoc line: handle XML special chars and backtick code blocks.
612///
613/// HTML entities (`<`, `>`, `&`) are also escaped *inside* `{@code …}` blocks.
614/// Without that, content like `` `<pre><code>` `` would emit raw `<pre>`
615/// inside the Javadoc tag — Eclipse-formatter Spotless then treats it as a
616/// real `<pre>` block element and shatters the line across multiple `* `
617/// rows, breaking `alef-verify`'s embedded hash. Escaped content is
618/// rendered identically by Javadoc readers (the `{@code}` tag shows literal
619/// characters) and is stable under any post-formatter pass.
620fn escape_javadoc_line(s: &str) -> String {
621    let mut result = String::with_capacity(s.len());
622    let mut chars = s.chars().peekable();
623    while let Some(ch) = chars.next() {
624        if ch == '`' {
625            let mut code = String::new();
626            for c in chars.by_ref() {
627                if c == '`' {
628                    break;
629                }
630                code.push(c);
631            }
632            result.push_str("{@code ");
633            result.push_str(&escape_javadoc_html_entities(&code));
634            result.push('}');
635        } else if ch == '<' {
636            result.push_str("&lt;");
637        } else if ch == '>' {
638            result.push_str("&gt;");
639        } else if ch == '&' {
640            result.push_str("&amp;");
641        } else {
642            result.push(ch);
643        }
644    }
645    result
646}
647
648/// Escape only the HTML special characters that would otherwise be parsed by
649/// downstream Javadoc/Eclipse formatters as block-level HTML (e.g. `<pre>`).
650fn escape_javadoc_html_entities(s: &str) -> String {
651    let mut out = String::with_capacity(s.len());
652    for ch in s.chars() {
653        match ch {
654            '<' => out.push_str("&lt;"),
655            '>' => out.push_str("&gt;"),
656            '&' => out.push_str("&amp;"),
657            other => out.push(other),
658        }
659    }
660    out
661}
662
663/// A parsed rustdoc comment broken out into the sections binding emitters
664/// care about.
665///
666/// `summary` is the leading prose paragraph(s) before any `# Heading`.
667/// Sections are stored verbatim (without the `# Heading` line itself);
668/// each binding is responsible for translating bullet lists and code
669/// fences into its host-native conventions.
670///
671/// Trailing/leading whitespace inside each field is trimmed so emitters
672/// can concatenate without producing `* ` lines containing only spaces.
673#[derive(Debug, Default, Clone, PartialEq, Eq)]
674pub struct RustdocSections {
675    /// Prose before the first `# Section` heading.
676    pub summary: String,
677    /// Body of the `# Arguments` section, if present.
678    pub arguments: Option<String>,
679    /// Body of the `# Returns` section, if present.
680    pub returns: Option<String>,
681    /// Body of the `# Errors` section, if present.
682    pub errors: Option<String>,
683    /// Body of the `# Panics` section, if present.
684    pub panics: Option<String>,
685    /// Body of the `# Safety` section, if present.
686    pub safety: Option<String>,
687    /// Body of the `# Example` / `# Examples` section, if present.
688    pub example: Option<String>,
689}
690
691/// Parse a rustdoc string into [`RustdocSections`].
692///
693/// Recognises level-1 ATX headings whose name matches one of the standard
694/// rustdoc section names (`Arguments`, `Returns`, `Errors`, `Panics`,
695/// `Safety`, `Example`, `Examples`). Anything before the first heading
696/// becomes `summary`. Unrecognised headings are folded into the
697/// preceding section verbatim, so unconventional rustdoc isn't lost.
698///
699/// The input is expected to already have rustdoc-hidden lines stripped
700/// and intra-doc-link syntax rewritten by
701/// [`crate::extractor::helpers::normalize_rustdoc`].
702pub fn parse_rustdoc_sections(doc: &str) -> RustdocSections {
703    if doc.trim().is_empty() {
704        return RustdocSections::default();
705    }
706    let mut summary = String::new();
707    let mut arguments: Option<String> = None;
708    let mut returns: Option<String> = None;
709    let mut errors: Option<String> = None;
710    let mut panics: Option<String> = None;
711    let mut safety: Option<String> = None;
712    let mut example: Option<String> = None;
713    let mut current: Option<&'static str> = None;
714    let mut buf = String::new();
715    let mut in_fence = false;
716    let flush = |target: Option<&'static str>,
717                 buf: &mut String,
718                 summary: &mut String,
719                 arguments: &mut Option<String>,
720                 returns: &mut Option<String>,
721                 errors: &mut Option<String>,
722                 panics: &mut Option<String>,
723                 safety: &mut Option<String>,
724                 example: &mut Option<String>| {
725        let body = std::mem::take(buf).trim().to_string();
726        if body.is_empty() {
727            return;
728        }
729        match target {
730            None => {
731                if !summary.is_empty() {
732                    summary.push('\n');
733                }
734                summary.push_str(&body);
735            }
736            Some("arguments") => *arguments = Some(body),
737            Some("returns") => *returns = Some(body),
738            Some("errors") => *errors = Some(body),
739            Some("panics") => *panics = Some(body),
740            Some("safety") => *safety = Some(body),
741            Some("example") => *example = Some(body),
742            _ => {}
743        }
744    };
745    for line in doc.lines() {
746        let trimmed = line.trim_start();
747        if trimmed.starts_with("```") {
748            in_fence = !in_fence;
749            buf.push_str(line);
750            buf.push('\n');
751            continue;
752        }
753        if !in_fence {
754            if let Some(rest) = trimmed.strip_prefix("# ") {
755                let head = rest.trim().to_ascii_lowercase();
756                let target = match head.as_str() {
757                    "arguments" | "args" => Some("arguments"),
758                    "returns" => Some("returns"),
759                    "errors" => Some("errors"),
760                    "panics" => Some("panics"),
761                    "safety" => Some("safety"),
762                    "example" | "examples" => Some("example"),
763                    _ => None,
764                };
765                if target.is_some() {
766                    flush(
767                        current,
768                        &mut buf,
769                        &mut summary,
770                        &mut arguments,
771                        &mut returns,
772                        &mut errors,
773                        &mut panics,
774                        &mut safety,
775                        &mut example,
776                    );
777                    current = target;
778                    continue;
779                }
780            }
781        }
782        buf.push_str(line);
783        buf.push('\n');
784    }
785    flush(
786        current,
787        &mut buf,
788        &mut summary,
789        &mut arguments,
790        &mut returns,
791        &mut errors,
792        &mut panics,
793        &mut safety,
794        &mut example,
795    );
796    RustdocSections {
797        summary,
798        arguments,
799        returns,
800        errors,
801        panics,
802        safety,
803        example,
804    }
805}
806
807/// Parse `# Arguments` body into `(name, description)` pairs.
808///
809/// Recognises both Markdown bullet styles `*` and `-`, with optional
810/// backticks around the name: `* `name` - description` or
811/// `- name: description`. Continuation lines indented under a bullet
812/// are appended to the previous entry's description.
813///
814/// Used by emitters that translate to per-parameter documentation tags
815/// (`@param`, `<param>`, `\param`).
816pub fn parse_arguments_bullets(body: &str) -> Vec<(String, String)> {
817    let mut out: Vec<(String, String)> = Vec::new();
818    for raw in body.lines() {
819        let line = raw.trim_end();
820        let trimmed = line.trim_start();
821        let is_bullet = trimmed.starts_with("* ") || trimmed.starts_with("- ");
822        if is_bullet {
823            let after = &trimmed[2..];
824            // Accept `name`, `name:` or `name -` separator forms.
825            let (name, desc) = if let Some(idx) = after.find(" - ") {
826                (after[..idx].trim(), after[idx + 3..].trim())
827            } else if let Some(idx) = after.find(": ") {
828                (after[..idx].trim(), after[idx + 2..].trim())
829            } else if let Some(idx) = after.find(' ') {
830                (after[..idx].trim(), after[idx + 1..].trim())
831            } else {
832                (after.trim(), "")
833            };
834            let name = name.trim_matches('`').trim_matches('*').to_string();
835            out.push((name, desc.to_string()));
836        } else if !trimmed.is_empty() {
837            if let Some(last) = out.last_mut() {
838                if !last.1.is_empty() {
839                    last.1.push(' ');
840                }
841                last.1.push_str(trimmed);
842            }
843        }
844    }
845    out
846}
847
848/// Return `true` if `tag` (the first comma-separated token after the opening
849/// ` ``` ` of a code fence) identifies a Rust code block.
850///
851/// This covers:
852/// - bare tag (empty string) — rustdoc treats unlabelled fences as Rust by default
853/// - `"rust"` — explicit Rust
854/// - `"rust,<attrs>"` — Rust with trailing comma-separated attributes
855/// - rustdoc test-attribute-only fences: `no_run`, `ignore`, `should_panic`,
856///   `compile_fail` — these are only meaningful to rustdoc and always indicate
857///   Rust code even when `rust` itself is omitted
858/// - `"edition2018"`, `"edition2021"`, etc. — edition-gated Rust examples
859fn is_rust_fence_tag(tag: &str) -> bool {
860    const RUSTDOC_ATTRS: &[&str] = &["no_run", "ignore", "should_panic", "compile_fail"];
861    tag.is_empty()
862        || tag == "rust"
863        || tag.starts_with("rust,")
864        || RUSTDOC_ATTRS.contains(&tag)
865        || tag.starts_with("edition")
866}
867
868/// Detect the language tag on the first code fence in `body`.
869///
870/// Scans `body` for the first line that starts with ` ``` ` and returns the
871/// tag that follows (e.g. `"rust"`, `"php"`, `"typescript"`). A bare ` ``` `
872/// with no tag returns `"rust"` because rustdoc treats unlabelled fences as
873/// Rust by default. Returns `"rust"` when no fence is found at all.
874fn detect_first_fence_lang(body: &str) -> &str {
875    for line in body.lines() {
876        let trimmed = line.trim_start();
877        if let Some(rest) = trimmed.strip_prefix("```") {
878            let tag = rest.split(',').next().unwrap_or("").trim();
879            return if tag.is_empty() || is_rust_fence_tag(tag) { "rust" } else { tag };
880        }
881    }
882    "rust"
883}
884
885/// Return `Some(transformed_example)` if the example should be emitted for
886/// `target_lang`, or `None` when the example is Rust source that would be
887/// meaningless in the foreign language.
888///
889/// When the original fence language is `rust` (including bare ` ``` ` which
890/// rustdoc defaults to Rust) and the target is not `rust`, the example is
891/// suppressed entirely — better absent than misleading. Cross-language
892/// transliteration of example bodies is intentionally out of scope.
893pub fn example_for_target(example: &str, target_lang: &str) -> Option<String> {
894    let trimmed = example.trim();
895    let source_lang = detect_first_fence_lang(trimmed);
896    if source_lang == "rust" && target_lang != "rust" {
897        None
898    } else {
899        Some(replace_fence_lang(trimmed, target_lang))
900    }
901}
902
903/// Strip a single ` ```lang ` fence pair from `body`, returning the inner
904/// code lines. Replaces the leading ` ```rust ` (or any other tag) with
905/// `lang_replacement`, leaving the rest of the body unchanged.
906///
907/// When no fence is present the body is returned unchanged. Used by
908/// emitters that need to convert ` ```rust ` examples into
909/// ` ```typescript ` / ` ```python ` / ` ```swift ` etc.
910pub fn replace_fence_lang(body: &str, lang_replacement: &str) -> String {
911    let mut out = String::with_capacity(body.len());
912    for line in body.lines() {
913        let trimmed = line.trim_start();
914        if let Some(rest) = trimmed.strip_prefix("```") {
915            // Replace the language tag (everything up to the next comma or
916            // end of line). Preserve indentation.
917            let indent = &line[..line.len() - trimmed.len()];
918            let after_lang = rest.find(',').map(|i| &rest[i..]).unwrap_or("");
919            out.push_str(indent);
920            out.push_str("```");
921            out.push_str(lang_replacement);
922            out.push_str(after_lang);
923            out.push('\n');
924        } else {
925            out.push_str(line);
926            out.push('\n');
927        }
928    }
929    out.trim_end_matches('\n').to_string()
930}
931
932/// Render `RustdocSections` as a JSDoc comment body (without the `/**` /
933/// ` */` wrappers — those are added by the caller's emitter, which knows
934/// the indent/escape conventions).
935///
936/// - `# Arguments` → `@param name - desc`
937/// - `# Returns`   → `@returns desc`
938/// - `# Errors`    → `@throws desc`
939/// - `# Example`   → `@example` block. Replaces ` ```rust ` fences with
940///   ` ```typescript ` so the example highlights properly in TypeDoc.
941///
942/// Output is a plain string with `\n` separators; emitters wrap each line
943/// in ` * ` themselves.
944pub fn render_jsdoc_sections(sections: &RustdocSections) -> String {
945    let mut out = String::new();
946    if !sections.summary.is_empty() {
947        out.push_str(&sections.summary);
948    }
949    if let Some(args) = sections.arguments.as_deref() {
950        for (name, desc) in parse_arguments_bullets(args) {
951            if !out.is_empty() {
952                out.push('\n');
953            }
954            if desc.is_empty() {
955                out.push_str(&crate::template_env::render(
956                    "doc_jsdoc_param.jinja",
957                    minijinja::context! { name => &name },
958                ));
959            } else {
960                out.push_str(&crate::template_env::render(
961                    "doc_jsdoc_param_desc.jinja",
962                    minijinja::context! { name => &name, desc => &desc },
963                ));
964            }
965        }
966    }
967    if let Some(ret) = sections.returns.as_deref() {
968        if !out.is_empty() {
969            out.push('\n');
970        }
971        out.push_str(&crate::template_env::render(
972            "doc_jsdoc_returns.jinja",
973            minijinja::context! { content => ret.trim() },
974        ));
975    }
976    if let Some(err) = sections.errors.as_deref() {
977        if !out.is_empty() {
978            out.push('\n');
979        }
980        out.push_str(&crate::template_env::render(
981            "doc_jsdoc_throws.jinja",
982            minijinja::context! { content => err.trim() },
983        ));
984    }
985    if let Some(example) = sections.example.as_deref() {
986        if let Some(body) = example_for_target(example, "typescript") {
987            if !out.is_empty() {
988                out.push('\n');
989            }
990            out.push_str("@example\n");
991            out.push_str(&body);
992        }
993    }
994    out
995}
996
997/// Render `RustdocSections` as a JavaDoc comment body.
998///
999/// - `# Arguments` → `@param name desc` (one per param)
1000/// - `# Returns`   → `@return desc`
1001/// - `# Errors`    → `@throws KreuzbergRsException desc`
1002/// - `# Example`   → `<pre>{@code ...}</pre>` block.
1003///
1004/// `throws_class` is the FQN/simple name of the exception class to use in
1005/// the `@throws` tag (e.g. `"KreuzbergRsException"`).
1006pub fn render_javadoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
1007    let mut out = String::new();
1008    if !sections.summary.is_empty() {
1009        out.push_str(&sections.summary);
1010    }
1011    if let Some(args) = sections.arguments.as_deref() {
1012        for (name, desc) in parse_arguments_bullets(args) {
1013            if !out.is_empty() {
1014                out.push('\n');
1015            }
1016            if desc.is_empty() {
1017                out.push_str(&crate::template_env::render(
1018                    "doc_javadoc_param.jinja",
1019                    minijinja::context! { name => &name },
1020                ));
1021            } else {
1022                out.push_str(&crate::template_env::render(
1023                    "doc_javadoc_param_desc.jinja",
1024                    minijinja::context! { name => &name, desc => &desc },
1025                ));
1026            }
1027        }
1028    }
1029    if let Some(ret) = sections.returns.as_deref() {
1030        if !out.is_empty() {
1031            out.push('\n');
1032        }
1033        out.push_str(&crate::template_env::render(
1034            "doc_javadoc_return.jinja",
1035            minijinja::context! { content => ret.trim() },
1036        ));
1037    }
1038    if let Some(err) = sections.errors.as_deref() {
1039        if !out.is_empty() {
1040            out.push('\n');
1041        }
1042        out.push_str(&crate::template_env::render(
1043            "doc_javadoc_throws.jinja",
1044            minijinja::context! { throws_class => throws_class, content => err.trim() },
1045        ));
1046    }
1047    out
1048}
1049
1050/// Render `RustdocSections` as a C# XML doc comment body (without the
1051/// `/// ` line prefixes — the emitter adds those).
1052///
1053/// - summary  → `<summary>...</summary>`
1054/// - args     → `<param name="x">desc</param>` (one per arg)
1055/// - returns  → `<returns>desc</returns>`
1056/// - errors   → `<exception cref="KreuzbergException">desc</exception>`
1057/// - example  → `<example><code language="csharp">...</code></example>`
1058pub fn render_csharp_xml_sections(sections: &RustdocSections, exception_class: &str) -> String {
1059    let mut out = String::new();
1060    out.push_str("<summary>\n");
1061    let summary = if sections.summary.is_empty() {
1062        ""
1063    } else {
1064        sections.summary.as_str()
1065    };
1066    for line in summary.lines() {
1067        out.push_str(line);
1068        out.push('\n');
1069    }
1070    out.push_str("</summary>");
1071    if let Some(args) = sections.arguments.as_deref() {
1072        for (name, desc) in parse_arguments_bullets(args) {
1073            out.push('\n');
1074            if desc.is_empty() {
1075                out.push_str(&crate::template_env::render(
1076                    "doc_csharp_param.jinja",
1077                    minijinja::context! { name => &name },
1078                ));
1079            } else {
1080                out.push_str(&crate::template_env::render(
1081                    "doc_csharp_param_desc.jinja",
1082                    minijinja::context! { name => &name, desc => &desc },
1083                ));
1084            }
1085        }
1086    }
1087    if let Some(ret) = sections.returns.as_deref() {
1088        out.push('\n');
1089        out.push_str(&crate::template_env::render(
1090            "doc_csharp_returns.jinja",
1091            minijinja::context! { content => ret.trim() },
1092        ));
1093    }
1094    if let Some(err) = sections.errors.as_deref() {
1095        out.push('\n');
1096        out.push_str(&crate::template_env::render(
1097            "doc_csharp_exception.jinja",
1098            minijinja::context! {
1099                exception_class => exception_class,
1100                content => err.trim(),
1101            },
1102        ));
1103    }
1104    if let Some(example) = sections.example.as_deref() {
1105        out.push('\n');
1106        out.push_str("<example><code language=\"csharp\">\n");
1107        // Drop fence markers, keep code.
1108        for line in example.lines() {
1109            let t = line.trim_start();
1110            if t.starts_with("```") {
1111                continue;
1112            }
1113            out.push_str(line);
1114            out.push('\n');
1115        }
1116        out.push_str("</code></example>");
1117    }
1118    out
1119}
1120
1121/// Render `RustdocSections` as a PHPDoc comment body.
1122///
1123/// - `# Arguments` → `@param mixed $name desc`
1124/// - `# Returns`   → `@return desc`
1125/// - `# Errors`    → `@throws KreuzbergException desc`
1126/// - `# Example`   → ` ```php ` fence (replaces ` ```rust `).
1127pub fn render_phpdoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
1128    let mut out = String::new();
1129    if !sections.summary.is_empty() {
1130        out.push_str(&sections.summary);
1131    }
1132    if let Some(args) = sections.arguments.as_deref() {
1133        for (name, desc) in parse_arguments_bullets(args) {
1134            if !out.is_empty() {
1135                out.push('\n');
1136            }
1137            if desc.is_empty() {
1138                out.push_str(&crate::template_env::render(
1139                    "doc_phpdoc_param.jinja",
1140                    minijinja::context! { name => &name },
1141                ));
1142            } else {
1143                out.push_str(&crate::template_env::render(
1144                    "doc_phpdoc_param_desc.jinja",
1145                    minijinja::context! { name => &name, desc => &desc },
1146                ));
1147            }
1148        }
1149    }
1150    if let Some(ret) = sections.returns.as_deref() {
1151        if !out.is_empty() {
1152            out.push('\n');
1153        }
1154        out.push_str(&crate::template_env::render(
1155            "doc_phpdoc_return.jinja",
1156            minijinja::context! { content => ret.trim() },
1157        ));
1158    }
1159    if let Some(err) = sections.errors.as_deref() {
1160        if !out.is_empty() {
1161            out.push('\n');
1162        }
1163        out.push_str(&crate::template_env::render(
1164            "doc_phpdoc_throws.jinja",
1165            minijinja::context! { throws_class => throws_class, content => err.trim() },
1166        ));
1167    }
1168    if let Some(example) = sections.example.as_deref() {
1169        if let Some(body) = example_for_target(example, "php") {
1170            if !out.is_empty() {
1171                out.push('\n');
1172            }
1173            out.push_str(&body);
1174        }
1175    }
1176    out
1177}
1178
1179/// Render `RustdocSections` as a Doxygen comment body for the C header.
1180///
1181/// - args    → `\param name desc`
1182/// - returns → `\return desc`
1183/// - errors  → prose paragraph (Doxygen has no semantic tag for FFI errors)
1184/// - example → `\code` ... `\endcode`
1185pub fn render_doxygen_sections(sections: &RustdocSections) -> String {
1186    let mut out = String::new();
1187    if !sections.summary.is_empty() {
1188        out.push_str(&sections.summary);
1189    }
1190    if let Some(args) = sections.arguments.as_deref() {
1191        for (name, desc) in parse_arguments_bullets(args) {
1192            if !out.is_empty() {
1193                out.push('\n');
1194            }
1195            if desc.is_empty() {
1196                out.push_str(&crate::template_env::render(
1197                    "doc_doxygen_param.jinja",
1198                    minijinja::context! { name => &name },
1199                ));
1200            } else {
1201                out.push_str(&crate::template_env::render(
1202                    "doc_doxygen_param_desc.jinja",
1203                    minijinja::context! { name => &name, desc => &desc },
1204                ));
1205            }
1206        }
1207    }
1208    if let Some(ret) = sections.returns.as_deref() {
1209        if !out.is_empty() {
1210            out.push('\n');
1211        }
1212        out.push_str(&crate::template_env::render(
1213            "doc_doxygen_return.jinja",
1214            minijinja::context! { content => ret.trim() },
1215        ));
1216    }
1217    if let Some(err) = sections.errors.as_deref() {
1218        if !out.is_empty() {
1219            out.push('\n');
1220        }
1221        out.push_str(&crate::template_env::render(
1222            "doc_doxygen_errors.jinja",
1223            minijinja::context! { content => err.trim() },
1224        ));
1225    }
1226    if let Some(example) = sections.example.as_deref() {
1227        if !out.is_empty() {
1228            out.push('\n');
1229        }
1230        out.push_str("\\code\n");
1231        for line in example.lines() {
1232            let t = line.trim_start();
1233            if t.starts_with("```") {
1234                continue;
1235            }
1236            out.push_str(line);
1237            out.push('\n');
1238        }
1239        out.push_str("\\endcode");
1240    }
1241    out
1242}
1243
1244/// Return the first paragraph of a doc comment as a single joined line.
1245///
1246/// Collects lines until the first blank line, trims each, then joins with a
1247/// space. This handles wrapped sentences like:
1248///
1249/// ```text
1250/// Convert HTML to Markdown, returning
1251/// a `ConversionResult`.
1252/// ```
1253///
1254/// which would otherwise be truncated at the comma when callers use
1255/// `.lines().next()`.
1256pub fn doc_first_paragraph_joined(doc: &str) -> String {
1257    doc.lines()
1258        .take_while(|l| !l.trim().is_empty())
1259        .map(str::trim)
1260        .collect::<Vec<_>>()
1261        .join(" ")
1262}
1263
1264/// Target language for [`sanitize_rust_idioms`].
1265///
1266/// Each variant selects the idiomatic mapping for Rust constructs that do not
1267/// translate directly to foreign-language doc syntax.
1268#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1269pub enum DocTarget {
1270    /// PHPDoc (`/** ... */`), e.g. phpstan-typed prose.
1271    PhpDoc,
1272    /// Javadoc (`/** ... */`), e.g. OpenJDK-style annotations.
1273    JavaDoc,
1274    /// TSDoc (`/** ... */`), TypeScript variant of JSDoc.
1275    TsDoc,
1276    /// JSDoc (`/** ... */`), JavaScript variant.
1277    JsDoc,
1278    /// C# XML doc (`/// <summary>...</summary>`).
1279    ///
1280    /// Strips Rust code fences and section headings (`# Examples`,
1281    /// `# Arguments`, `# Returns`, etc.), drops Rust trait-bound prose,
1282    /// and XML-escapes any remaining `<` / `>` / `&` so the result is
1283    /// safe to embed inside a `<summary>` element.
1284    CSharpDoc,
1285}
1286
1287/// Sanitize Rust-specific idioms in a prose string for the given foreign-language
1288/// documentation target.
1289///
1290/// Transformations are applied **outside** backtick spans and code fences only,
1291/// so inline code examples and fenced blocks are never mutated (except that
1292/// ` ```rust ` fences and unmarked ` ``` ` code blocks are dropped entirely
1293/// for all targets [`DocTarget::TsDoc`], [`DocTarget::JsDoc`], [`DocTarget::PhpDoc`],
1294/// [`DocTarget::JavaDoc`], and [`DocTarget::CSharpDoc`]).
1295///
1296/// # Transformations
1297///
1298/// - Intra-doc links `` [`Type::method`] `` → `` `Type.method` ``.
1299/// - `[`Foo`]` (backtick inside square brackets) → `` `Foo` ``.
1300/// - `None` (word boundary) → `null` (PHP/Java) or `undefined` (TS/JS).
1301/// - `Some(x)` → `the value (x)`.
1302/// - `Option<T>` → `T?` (PHP) / `T | null` (Java) / `T | undefined` (TS/JS).
1303/// - `Vec<u8>` → `string` (PHP) / `byte[]` (Java) / `Uint8Array` (TS/JS).
1304/// - `Vec<T>` → `T[]` (all targets).
1305/// - `HashMap<K, V>` → `array<K, V>` (PHP) / `Map<K, V>` (Java) / `Record<K, V>` (TS/JS).
1306/// - `Arc<T>`, `Box<T>`, `Mutex<T>`, `RwLock<T>`, `Rc<T>`, `Cell<T>`, `RefCell<T>` → `T`.
1307/// - `Send + Sync`, `Send`, `Sync`, `'static` → stripped.
1308/// - Standalone `::` between identifiers → `.`.
1309/// - `pub fn `, `crate::`, `&self`, `&mut self` → stripped.
1310/// - `#[…]` attribute macros on their own line or inline → stripped.
1311/// - `.unwrap()`, `.expect("…")` → stripped.
1312/// - ` ```rust ` and unmarked ` ``` ` code fences → dropped entirely.
1313pub fn sanitize_rust_idioms(text: &str, target: DocTarget) -> String {
1314    // For C# XML doc the default is to drop rustdoc section headings
1315    // (`# Examples`, `# Arguments`, …) and the remainder of the comment,
1316    // because those bodies routinely contain content that cannot be embedded
1317    // safely inside `<summary>`. Callers that have already extracted sections
1318    // (`emit_csharp_doc`) sanitise each section body via [`sanitize_rust_idioms_keep_sections`].
1319    sanitize_rust_idioms_inner(text, target, true)
1320}
1321
1322/// Same as [`sanitize_rust_idioms`] but never drops rustdoc section headings.
1323///
1324/// Used by emitters that have already split the doc into sections and need to
1325/// sanitise each body fragment independently (e.g. C# XML doc emission with
1326/// per-section `<param>` / `<returns>` / `<exception>` tags).
1327pub fn sanitize_rust_idioms_keep_sections(text: &str, target: DocTarget) -> String {
1328    sanitize_rust_idioms_inner(text, target, false)
1329}
1330
1331fn sanitize_rust_idioms_inner(text: &str, target: DocTarget, drop_csharp_sections: bool) -> String {
1332    let mut out = String::with_capacity(text.len());
1333    let mut in_rust_fence = false;
1334    let mut in_other_fence = false;
1335    // For C# XML doc: once a `# Examples` / `# Arguments` / etc. heading is
1336    // encountered, drop the entire remainder of the comment. Rustdoc section
1337    // headings cannot be safely embedded inside `<summary>` and the per-section
1338    // content (code fences, intra-doc links, generics) is the leading cause
1339    // of CS1002/CS1519 leakage. The plain `<summary>` path collapses to the
1340    // top-level prose only.
1341    let mut csharp_section_dropped = false;
1342
1343    for line in text.lines() {
1344        if csharp_section_dropped {
1345            continue;
1346        }
1347        let trimmed = line.trim_start();
1348        if drop_csharp_sections
1349            && matches!(target, DocTarget::CSharpDoc)
1350            && !in_rust_fence
1351            && !in_other_fence
1352            && is_rustdoc_section_heading(trimmed)
1353        {
1354            csharp_section_dropped = true;
1355            continue;
1356        }
1357
1358        // Detect code fence boundaries.
1359        if let Some(rest) = trimmed.strip_prefix("```") {
1360            if in_rust_fence {
1361                // Closing fence of a rust block.
1362                in_rust_fence = false;
1363                match target {
1364                    DocTarget::TsDoc
1365                    | DocTarget::JsDoc
1366                    | DocTarget::CSharpDoc
1367                    | DocTarget::PhpDoc
1368                    | DocTarget::JavaDoc => {
1369                        // Entire rust block dropped — don't emit closing fence.
1370                    }
1371                }
1372                continue;
1373            }
1374            if in_other_fence {
1375                // Closing fence of a non-rust block.
1376                in_other_fence = false;
1377                out.push_str(line);
1378                out.push('\n');
1379                continue;
1380            }
1381            // Opening fence — determine language.
1382            let lang = rest.split(',').next().unwrap_or("").trim();
1383            let is_rust = is_rust_fence_tag(lang);
1384            if is_rust {
1385                in_rust_fence = true;
1386                match target {
1387                    DocTarget::TsDoc
1388                    | DocTarget::JsDoc
1389                    | DocTarget::CSharpDoc
1390                    | DocTarget::PhpDoc
1391                    | DocTarget::JavaDoc => {
1392                        // Drop the entire rust fence block — skip opening line.
1393                        // Rust code examples are not portable to any of the target languages.
1394                    }
1395                }
1396                continue;
1397            }
1398            // Non-rust fence: pass through verbatim.
1399            in_other_fence = true;
1400            out.push_str(line);
1401            out.push('\n');
1402            continue;
1403        }
1404
1405        // Inside a rust fence.
1406        if in_rust_fence {
1407            match target {
1408                DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc | DocTarget::PhpDoc | DocTarget::JavaDoc => {
1409                    // Drop content of rust fences — all targets filter out Rust code examples.
1410                }
1411            }
1412            continue;
1413        }
1414
1415        // Inside a non-rust fence: pass through verbatim.
1416        if in_other_fence {
1417            out.push_str(line);
1418            out.push('\n');
1419            continue;
1420        }
1421
1422        // Check if this line is a bare `#[...]` attribute line.
1423        let stripped_indent = line.trim_start();
1424        if stripped_indent.starts_with("#[") && stripped_indent.ends_with(']') {
1425            // Attribute-only line — drop entirely.
1426            continue;
1427        }
1428
1429        // Normal prose line: apply token-level transformations.
1430        let sanitized = apply_prose_transforms(line, target);
1431        out.push_str(&sanitized);
1432        out.push('\n');
1433    }
1434
1435    // Trim trailing newline added by the loop (preserve internal newlines).
1436    if out.ends_with('\n') && !text.ends_with('\n') {
1437        out.pop();
1438    }
1439
1440    // For C# XML doc, escape any remaining `<`, `>`, `&` so the result is
1441    // safe to embed inside `<summary>...</summary>`. By this point the
1442    // Rust-idiom substitutions have replaced `Vec<T>` / `Option<T>` /
1443    // `HashMap<K, V>` / `Result<T, E>` with their idiomatic forms, but
1444    // unrecognised generic constructs (e.g. trait-object references) may
1445    // still contain raw angle brackets that would break C# XML parsing.
1446    if matches!(target, DocTarget::CSharpDoc) {
1447        out = xml_escape_for_csharp(&out);
1448    }
1449
1450    out
1451}
1452
1453/// Return `true` if `line` (already left-trimmed) is a Rustdoc section heading
1454/// such as `# Examples`, `# Arguments`, `# Returns`, `# Errors`, `# Panics`,
1455/// or `# Safety`. Case-insensitive on the heading name.
1456fn is_rustdoc_section_heading(trimmed: &str) -> bool {
1457    let Some(rest) = trimmed.strip_prefix("# ") else {
1458        return false;
1459    };
1460    let head = rest.trim().to_ascii_lowercase();
1461    matches!(
1462        head.as_str(),
1463        "arguments" | "args" | "returns" | "errors" | "panics" | "safety" | "example" | "examples"
1464    )
1465}
1466
1467/// XML-escape `<`, `>`, `&` for safe embedding inside a C# `<summary>` element.
1468///
1469/// `<` / `>` may legitimately appear in prose after Rust-idiom substitution
1470/// when the substitutions produce C#-friendly forms (e.g. `Dictionary<K, V>`).
1471/// Those are still XML-significant characters and must be entity-escaped for
1472/// XML parsers (Roslyn, doxygen) to accept the resulting `<summary>` block.
1473fn xml_escape_for_csharp(s: &str) -> String {
1474    let mut out = String::with_capacity(s.len());
1475    for ch in s.chars() {
1476        match ch {
1477            '&' => out.push_str("&amp;"),
1478            '<' => out.push_str("&lt;"),
1479            '>' => out.push_str("&gt;"),
1480            _ => out.push(ch),
1481        }
1482    }
1483    out
1484}
1485
1486/// Apply prose-level Rust-idiom transformations to a single line.
1487///
1488/// Some transformations span or precede backtick boundaries and must be applied
1489/// to the full line before tokenisation:
1490///
1491/// 1. Intra-doc links (`` [`...`] ``) — they wrap a backtick pair.
1492/// 2. `::` path separator — even inside backtick spans it should become `.`
1493///    for all foreign-language targets, since the target language uses `.` for
1494///    member access and package paths in code examples too.
1495///
1496/// All remaining transformations are applied only to literal (non-code) segments
1497/// after tokenisation.
1498fn apply_prose_transforms(line: &str, target: DocTarget) -> String {
1499    // Step 1: replace intra-doc links before tokenisation (they span backtick pairs).
1500    let line = replace_intradoc_links(line, target);
1501
1502    // Step 2: replace :: everywhere (including inside backtick spans).
1503    // All targets use `.` as the member/package separator, so this is always safe.
1504    let line = replace_path_separator(&line);
1505
1506    // Step 3: strip .unwrap() and .expect() everywhere, including inside backtick spans,
1507    // since these Rust error-handling idioms are meaningless in all target languages.
1508    let line = strip_unwrap_expect(&line);
1509
1510    // Step 4: tokenise and apply remaining transforms only to literal segments.
1511    let segments = tokenize_backtick_spans(&line);
1512    let mut result = String::with_capacity(line.len());
1513    for (is_code, span) in segments {
1514        if is_code {
1515            result.push('`');
1516            result.push_str(span);
1517            result.push('`');
1518        } else {
1519            result.push_str(&transform_prose_segment(span, target));
1520        }
1521    }
1522    result
1523}
1524
1525/// Split a line into alternating literal/code segments.
1526///
1527/// Returns `Vec<(is_code, &str)>` where `is_code` is true for the content
1528/// between a matched backtick pair. Unmatched backticks are treated as
1529/// literal characters (passed through as literal segments).
1530fn tokenize_backtick_spans(line: &str) -> Vec<(bool, &str)> {
1531    let mut segments = Vec::new();
1532    let bytes = line.as_bytes();
1533    let mut start = 0;
1534    let mut i = 0;
1535
1536    while i < bytes.len() {
1537        if bytes[i] == b'`' {
1538            // Emit preceding literal segment.
1539            if i > start {
1540                segments.push((false, &line[start..i]));
1541            }
1542            // Find the closing backtick.
1543            let code_start = i + 1;
1544            let close = bytes[code_start..].iter().position(|&b| b == b'`');
1545            if let Some(offset) = close {
1546                let code_end = code_start + offset;
1547                segments.push((true, &line[code_start..code_end]));
1548                i = code_end + 1;
1549                start = i;
1550            } else {
1551                // No closing backtick — treat as literal from here.
1552                segments.push((false, &line[i..]));
1553                start = line.len();
1554                i = line.len();
1555            }
1556        } else {
1557            i += 1;
1558        }
1559    }
1560    if start < line.len() {
1561        segments.push((false, &line[start..]));
1562    }
1563    segments
1564}
1565
1566/// Apply all prose-level Rust substitutions to a literal text segment.
1567///
1568/// Intra-doc links have already been replaced by `apply_prose_transforms`
1569/// before tokenisation; this function handles the remaining transformations.
1570fn transform_prose_segment(text: &str, target: DocTarget) -> String {
1571    let mut s = text.to_string();
1572
1573    // 1. Strip #[derive(...)] and other inline attribute-style references.
1574    s = strip_inline_attributes(&s);
1575
1576    // 2. Strip pub fn, crate::, &self, &mut self.
1577    s = s.replace("pub fn ", "");
1578    s = s.replace("crate::", "");
1579    s = s.replace("&mut self", "");
1580    s = s.replace("&self", "");
1581
1582    // 3. Strip lifetime and bound markers.
1583    s = strip_lifetime_and_bounds(&s);
1584
1585    // 4. Type substitutions (order matters — most specific first).
1586    s = replace_type_wrappers(&s, target);
1587
1588    // 5. Some(x) -> the value (x).
1589    s = replace_some_calls(&s);
1590
1591    // 5b. Bare "Some <lowercase>" in prose -> drop "Some ".
1592    s = replace_some_keyword_in_prose(&s);
1593
1594    // 6. None -> null / undefined (word boundary, uppercase only).
1595    s = replace_none_keyword(&s, target);
1596
1597    // Note: :: -> . and .unwrap()/.expect() stripping are applied to the full
1598    // line before tokenisation in apply_prose_transforms and therefore do not
1599    // need to be repeated here.
1600
1601    s
1602}
1603
1604/// Advance byte position `i` in `s` past one full UTF-8 character, push that
1605/// character to `out`, and return the new byte position.
1606///
1607/// All the byte-crawling helpers below look for ASCII special characters only.
1608/// When none matches, they must advance by one full character (not one byte)
1609/// to avoid splitting multi-byte UTF-8 sequences.
1610#[inline]
1611fn advance_char(s: &str, out: &mut String, i: usize) -> usize {
1612    // Safety: `i` must be a valid char boundary; callers guarantee this
1613    // because all branch points look for ASCII bytes which are always
1614    // single-byte char boundaries.
1615    let ch = s[i..].chars().next().expect("valid UTF-8 position");
1616    out.push(ch);
1617    i + ch.len_utf8()
1618}
1619
1620/// Replace `` [`Type::method()`] `` and `` [`Foo`] `` intra-doc links with
1621/// backtick-wrapped identifiers, converting `::` to `.`.
1622fn replace_intradoc_links(s: &str, _target: DocTarget) -> String {
1623    let mut out = String::with_capacity(s.len());
1624    let bytes = s.as_bytes();
1625    let mut i = 0;
1626    while i < bytes.len() {
1627        // Look for [`
1628        if i + 1 < bytes.len() && bytes[i] == b'[' && bytes[i + 1] == b'`' {
1629            // Find closing `]
1630            let search_start = i + 2;
1631            let mut found = false;
1632            let mut j = search_start;
1633            while j + 1 < bytes.len() {
1634                if bytes[j] == b'`' && bytes[j + 1] == b']' {
1635                    let inner = &s[search_start..j];
1636                    // Convert :: to . in the inner part.
1637                    let converted = inner.replace("::", ".");
1638                    out.push('`');
1639                    out.push_str(&converted);
1640                    out.push('`');
1641                    i = j + 2;
1642                    found = true;
1643                    break;
1644                }
1645                j += 1;
1646            }
1647            if !found {
1648                i = advance_char(s, &mut out, i);
1649            }
1650        } else {
1651            i = advance_char(s, &mut out, i);
1652        }
1653    }
1654    out
1655}
1656
1657/// Strip inline `#[...]` attribute references (not on their own line — those
1658/// are handled as full-line drops in the main loop).
1659fn strip_inline_attributes(s: &str) -> String {
1660    let mut out = String::with_capacity(s.len());
1661    let bytes = s.as_bytes();
1662    let mut i = 0;
1663    while i < bytes.len() {
1664        if bytes[i] == b'#' && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
1665            // Skip until matching ']', handling nesting.
1666            let mut depth = 0usize;
1667            let mut j = i + 1;
1668            while j < bytes.len() {
1669                if bytes[j] == b'[' {
1670                    depth += 1;
1671                } else if bytes[j] == b']' {
1672                    depth -= 1;
1673                    if depth == 0 {
1674                        i = j + 1;
1675                        break;
1676                    }
1677                }
1678                j += 1;
1679            }
1680            if depth != 0 {
1681                // Unmatched bracket: emit literally.
1682                i = advance_char(s, &mut out, i);
1683            }
1684        } else {
1685            i = advance_char(s, &mut out, i);
1686        }
1687    }
1688    out
1689}
1690
1691/// Strip `'static`, `Send + Sync`, `Send`, `Sync` from prose text.
1692fn strip_lifetime_and_bounds(s: &str) -> String {
1693    // Order matters: match compound forms before simple forms.
1694    let mut out = s.to_string();
1695    // Strip `Send + Sync` (with optional spaces around `+`).
1696    out = regex_replace_all(&out, r"Send\s*\+\s*Sync", "");
1697    out = regex_replace_all(&out, r"Sync\s*\+\s*Send", "");
1698    // Strip standalone Send/Sync only at word boundaries.
1699    out = regex_replace_word_boundary(&out, "Send", "");
1700    out = regex_replace_word_boundary(&out, "Sync", "");
1701    // Strip 'static lifetime markers.
1702    out = regex_replace_all(&out, r"'\s*static\b", "");
1703    out
1704}
1705
1706/// Replace occurrences of `pattern` (treated as a simple substring pattern
1707/// with `\s*` only, no full regex) with `replacement` in `s`.
1708///
1709/// This is a lightweight regex-free replacement for simple patterns that
1710/// only need literal text or `\s*` between tokens.
1711fn regex_replace_all(s: &str, pattern: &str, replacement: &str) -> String {
1712    // Inline tiny pattern compiler for the three patterns we actually use.
1713    match pattern {
1714        r"Send\s*\+\s*Sync" => replace_with_optional_spaces(s, "Send", "+", "Sync", replacement),
1715        r"Sync\s*\+\s*Send" => replace_with_optional_spaces(s, "Sync", "+", "Send", replacement),
1716        r"'\s*static\b" => replace_static_lifetime(s, replacement),
1717        _ => s.replace(pattern, replacement),
1718    }
1719}
1720
1721/// Replace `word_boundary(keyword)` occurrences in `s` with `replacement`.
1722fn regex_replace_word_boundary(s: &str, keyword: &str, replacement: &str) -> String {
1723    let mut out = String::with_capacity(s.len());
1724    let klen = keyword.len();
1725    let bytes = s.as_bytes();
1726    let kbytes = keyword.as_bytes();
1727    if klen == 0 || klen > bytes.len() {
1728        return s.to_string();
1729    }
1730    let mut i = 0;
1731    while i + klen <= bytes.len() {
1732        if &bytes[i..i + klen] == kbytes {
1733            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1734            let after_ok =
1735                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
1736            if before_ok && after_ok {
1737                out.push_str(replacement);
1738                i += klen;
1739                continue;
1740            }
1741        }
1742        i = advance_char(s, &mut out, i);
1743    }
1744    if i < bytes.len() {
1745        out.push_str(&s[i..]);
1746    }
1747    out
1748}
1749
1750/// Replace `A <spaces> op <spaces> B` triplets with `replacement`.
1751fn replace_with_optional_spaces(s: &str, a: &str, op: &str, b: &str, replacement: &str) -> String {
1752    let mut out = String::with_capacity(s.len());
1753    let mut i = 0;
1754    let chars: Vec<char> = s.chars().collect();
1755    let total = chars.len();
1756
1757    while i < total {
1758        // Try to match `a` at position i.
1759        let a_chars: Vec<char> = a.chars().collect();
1760        let b_chars: Vec<char> = b.chars().collect();
1761        let op_chars: Vec<char> = op.chars().collect();
1762
1763        if chars[i..].starts_with(&a_chars) {
1764            let mut j = i + a_chars.len();
1765            // Skip spaces.
1766            while j < total && chars[j] == ' ' {
1767                j += 1;
1768            }
1769            // Match op.
1770            if j + op_chars.len() <= total && chars[j..].starts_with(&op_chars) {
1771                let mut k = j + op_chars.len();
1772                // Skip spaces.
1773                while k < total && chars[k] == ' ' {
1774                    k += 1;
1775                }
1776                // Match b.
1777                if k + b_chars.len() <= total && chars[k..].starts_with(&b_chars) {
1778                    out.push_str(replacement);
1779                    i = k + b_chars.len();
1780                    continue;
1781                }
1782            }
1783        }
1784        out.push(chars[i]);
1785        i += 1;
1786    }
1787    out
1788}
1789
1790/// Replace `'static` lifetime markers (with optional spaces after `'`).
1791fn replace_static_lifetime(s: &str, replacement: &str) -> String {
1792    let mut out = String::with_capacity(s.len());
1793    let bytes = s.as_bytes();
1794    let mut i = 0;
1795    while i < bytes.len() {
1796        if bytes[i] == b'\'' {
1797            // Peek ahead skipping spaces.
1798            let mut j = i + 1;
1799            while j < bytes.len() && bytes[j] == b' ' {
1800                j += 1;
1801            }
1802            let keyword = b"static";
1803            if bytes[j..].starts_with(keyword) {
1804                let end = j + keyword.len();
1805                // Must be followed by non-identifier char or end.
1806                let after_ok = end >= bytes.len() || !bytes[end].is_ascii_alphanumeric() && bytes[end] != b'_';
1807                if after_ok {
1808                    out.push_str(replacement);
1809                    i = end;
1810                    continue;
1811                }
1812            }
1813        }
1814        i = advance_char(s, &mut out, i);
1815    }
1816    out
1817}
1818
1819/// Replace Rust generic type wrappers in prose.
1820fn replace_type_wrappers(s: &str, target: DocTarget) -> String {
1821    // Order: most specific patterns first.
1822    let mut out = s.to_string();
1823
1824    // Vec<u8> — must come before Vec<T>.
1825    let vec_u8_replacement = match target {
1826        DocTarget::PhpDoc => "string",
1827        DocTarget::JavaDoc => "byte[]",
1828        DocTarget::TsDoc | DocTarget::JsDoc => "Uint8Array",
1829        DocTarget::CSharpDoc => "byte[]",
1830    };
1831    out = replace_generic1(&out, "Vec", "u8", vec_u8_replacement);
1832
1833    // HashMap<K, V> — must come before Vec<T> to avoid order-dependency issues.
1834    let map_replacement_fn = |k: &str, v: &str| match target {
1835        DocTarget::PhpDoc => format!("array<{k}, {v}>"),
1836        DocTarget::JavaDoc => format!("Map<{k}, {v}>"),
1837        DocTarget::TsDoc | DocTarget::JsDoc => format!("Record<{k}, {v}>"),
1838        DocTarget::CSharpDoc => format!("Dictionary<{k}, {v}>"),
1839    };
1840    out = replace_generic2(&out, "HashMap", &map_replacement_fn);
1841
1842    // Vec<T> — generic.
1843    out = replace_generic1_passthrough(&out, "Vec", |inner| format!("{inner}[]"));
1844
1845    // Option<T>.
1846    let option_replacement_fn = |inner: &str| match target {
1847        DocTarget::PhpDoc => format!("{inner}?"),
1848        DocTarget::JavaDoc => format!("{inner} | null"),
1849        DocTarget::TsDoc | DocTarget::JsDoc => format!("{inner} | undefined"),
1850        DocTarget::CSharpDoc => format!("{inner}?"),
1851    };
1852    out = replace_generic1_passthrough(&out, "Option", option_replacement_fn);
1853
1854    // Result<T, E> — drop the error type, keep the success type.
1855    // C# has no Result type; the binding throws exceptions, so just the success type
1856    // is meaningful in prose. We do this for C# only; other targets historically left
1857    // `Result<T, E>` unchanged (their tests assert nothing about it).
1858    if matches!(target, DocTarget::CSharpDoc) {
1859        out = replace_generic2(&out, "Result", &|t: &str, _e: &str| t.to_string());
1860    }
1861
1862    // Smart pointer wrappers: strip to inner type.
1863    for wrapper in &["Arc", "Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
1864        out = replace_generic1_passthrough(&out, wrapper, |inner| inner.to_string());
1865    }
1866
1867    out
1868}
1869
1870/// Replace `Name<SingleArg>` where SingleArg is an exact literal (e.g. `Vec<u8>`).
1871fn replace_generic1(s: &str, name: &str, arg: &str, replacement: &str) -> String {
1872    let pattern = format!("{name}<{arg}>");
1873    s.replace(&pattern, replacement)
1874}
1875
1876/// Replace `Name<T>` → `f(T)` for an arbitrary inner type expression.
1877///
1878/// Handles nested generics by counting angle-bracket depth.
1879fn replace_generic1_passthrough<F>(s: &str, name: &str, f: F) -> String
1880where
1881    F: Fn(&str) -> String,
1882{
1883    let mut out = String::with_capacity(s.len());
1884    let mut i = 0;
1885    let prefix = format!("{name}<");
1886    let pbytes = prefix.as_bytes();
1887    let bytes = s.as_bytes();
1888
1889    while i < bytes.len() {
1890        if bytes[i..].starts_with(pbytes) {
1891            // Check that the char before is not alphanumeric (word boundary).
1892            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1893            if before_ok {
1894                let inner_start = i + pbytes.len();
1895                // Find the matching '>'.
1896                let mut depth = 1usize;
1897                let mut j = inner_start;
1898                while j < bytes.len() {
1899                    match bytes[j] {
1900                        b'<' => depth += 1,
1901                        b'>' => {
1902                            depth -= 1;
1903                            if depth == 0 {
1904                                break;
1905                            }
1906                        }
1907                        _ => {}
1908                    }
1909                    j += 1;
1910                }
1911                if depth == 0 && j < bytes.len() {
1912                    let inner = &s[inner_start..j];
1913                    out.push_str(&f(inner));
1914                    i = j + 1;
1915                    continue;
1916                }
1917            }
1918        }
1919        i = advance_char(s, &mut out, i);
1920    }
1921    out
1922}
1923
1924/// Replace `Name<K, V>` → `f(K, V)` for two-argument generics (e.g. `HashMap`).
1925fn replace_generic2<F>(s: &str, name: &str, f: &F) -> String
1926where
1927    F: Fn(&str, &str) -> String,
1928{
1929    let mut out = String::with_capacity(s.len());
1930    let mut i = 0;
1931    let prefix = format!("{name}<");
1932    let pbytes = prefix.as_bytes();
1933    let bytes = s.as_bytes();
1934
1935    while i < bytes.len() {
1936        if bytes[i..].starts_with(pbytes) {
1937            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1938            if before_ok {
1939                let inner_start = i + pbytes.len();
1940                // Find the matching '>' respecting nesting.
1941                let mut depth = 1usize;
1942                let mut j = inner_start;
1943                while j < bytes.len() {
1944                    match bytes[j] {
1945                        b'<' => depth += 1,
1946                        b'>' => {
1947                            depth -= 1;
1948                            if depth == 0 {
1949                                break;
1950                            }
1951                        }
1952                        _ => {}
1953                    }
1954                    j += 1;
1955                }
1956                if depth == 0 && j < bytes.len() {
1957                    let inner = &s[inner_start..j];
1958                    // Split on the first ',' at depth 0.
1959                    let split = split_on_comma_at_top_level(inner);
1960                    if let Some((k, v)) = split {
1961                        out.push_str(&f(k.trim(), v.trim()));
1962                        i = j + 1;
1963                        continue;
1964                    }
1965                }
1966            }
1967        }
1968        i = advance_char(s, &mut out, i);
1969    }
1970    out
1971}
1972
1973/// Split `s` on the first comma that is at angle-bracket depth 0.
1974fn split_on_comma_at_top_level(s: &str) -> Option<(&str, &str)> {
1975    let mut depth = 0i32;
1976    for (idx, ch) in s.char_indices() {
1977        match ch {
1978            '<' => depth += 1,
1979            '>' => depth -= 1,
1980            ',' if depth == 0 => return Some((&s[..idx], &s[idx + 1..])),
1981            _ => {}
1982        }
1983    }
1984    None
1985}
1986
1987/// Replace `Some(x)` in prose with `the value (x)`.
1988fn replace_some_calls(s: &str) -> String {
1989    let mut out = String::with_capacity(s.len());
1990    let bytes = s.as_bytes();
1991    let prefix = b"Some(";
1992    let mut i = 0;
1993
1994    while i < bytes.len() {
1995        if bytes[i..].starts_with(prefix) {
1996            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1997            if before_ok {
1998                let arg_start = i + prefix.len();
1999                // Find matching ')' respecting nesting.
2000                let mut depth = 1usize;
2001                let mut j = arg_start;
2002                while j < bytes.len() {
2003                    match bytes[j] {
2004                        b'(' => depth += 1,
2005                        b')' => {
2006                            depth -= 1;
2007                            if depth == 0 {
2008                                break;
2009                            }
2010                        }
2011                        _ => {}
2012                    }
2013                    j += 1;
2014                }
2015                if depth == 0 && j < bytes.len() {
2016                    let arg = &s[arg_start..j];
2017                    out.push_str("the value (");
2018                    out.push_str(arg);
2019                    out.push(')');
2020                    i = j + 1;
2021                    continue;
2022                }
2023            }
2024        }
2025        i = advance_char(s, &mut out, i);
2026    }
2027    out
2028}
2029
2030/// Drop bare `Some ` when it appears as a Rust-idiom modifier in prose
2031/// ("(Some values)", "Some keys leave the previous", etc.). The `Some(...)`
2032/// call form is handled separately by [`replace_some_calls`].
2033///
2034/// Match shape: word-boundary `Some` + single ASCII space + ASCII-lowercase
2035/// letter. The "Some " prefix is dropped; the following word is preserved.
2036/// `SomeType`, `Some.method()`, `Some(x)`, and sentence-initial `Some `
2037/// followed by an uppercase noun stay untouched.
2038fn replace_some_keyword_in_prose(s: &str) -> String {
2039    let keyword = b"Some ";
2040    let klen = keyword.len();
2041    let bytes = s.as_bytes();
2042    if klen >= bytes.len() {
2043        return s.to_string();
2044    }
2045    let mut out = String::with_capacity(s.len());
2046    let mut i = 0;
2047    while i + klen < bytes.len() {
2048        if &bytes[i..i + klen] == keyword {
2049            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2050            let after_ok = bytes[i + klen].is_ascii_lowercase();
2051            if before_ok && after_ok {
2052                i += klen;
2053                continue;
2054            }
2055        }
2056        i = advance_char(s, &mut out, i);
2057    }
2058    if i < bytes.len() {
2059        out.push_str(&s[i..]);
2060    }
2061    out
2062}
2063
2064/// Replace `None` (at word boundaries, uppercase) with the target-appropriate nil.
2065fn replace_none_keyword(s: &str, target: DocTarget) -> String {
2066    let replacement = match target {
2067        DocTarget::PhpDoc | DocTarget::JavaDoc | DocTarget::CSharpDoc => "null",
2068        DocTarget::TsDoc | DocTarget::JsDoc => "undefined",
2069    };
2070    let keyword = b"None";
2071    let klen = keyword.len();
2072    let mut out = String::with_capacity(s.len());
2073    let bytes = s.as_bytes();
2074    if klen > bytes.len() {
2075        return s.to_string();
2076    }
2077    let mut i = 0;
2078
2079    while i + klen <= bytes.len() {
2080        if &bytes[i..i + klen] == keyword {
2081            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2082            let after_ok =
2083                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
2084            if before_ok && after_ok {
2085                out.push_str(replacement);
2086                i += klen;
2087                continue;
2088            }
2089        }
2090        i = advance_char(s, &mut out, i);
2091    }
2092    if i < bytes.len() {
2093        out.push_str(&s[i..]);
2094    }
2095    out
2096}
2097
2098/// Replace standalone `::` between identifiers with `.`.
2099fn replace_path_separator(s: &str) -> String {
2100    let mut out = String::with_capacity(s.len());
2101    let bytes = s.as_bytes();
2102    let mut i = 0;
2103
2104    while i < bytes.len() {
2105        if i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b':' {
2106            // Only replace if surrounded by identifier characters or end/start of string.
2107            let before_ok = i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
2108            let after_ok = i + 2 < bytes.len() && (bytes[i + 2].is_ascii_alphanumeric() || bytes[i + 2] == b'_');
2109            if before_ok || after_ok {
2110                out.push('.');
2111                i += 2;
2112                continue;
2113            }
2114        }
2115        i = advance_char(s, &mut out, i);
2116    }
2117    out
2118}
2119
2120/// Strip `.unwrap()` and `.expect("...")` calls from prose.
2121fn strip_unwrap_expect(s: &str) -> String {
2122    let mut out = String::with_capacity(s.len());
2123    let bytes = s.as_bytes();
2124    let mut i = 0;
2125
2126    while i < bytes.len() {
2127        // Match .unwrap().
2128        if bytes[i..].starts_with(b".unwrap()") {
2129            i += b".unwrap()".len();
2130            continue;
2131        }
2132        // Match .expect(...).
2133        if bytes[i..].starts_with(b".expect(") {
2134            let arg_start = i + b".expect(".len();
2135            let mut depth = 1usize;
2136            let mut j = arg_start;
2137            while j < bytes.len() {
2138                match bytes[j] {
2139                    b'(' => depth += 1,
2140                    b')' => {
2141                        depth -= 1;
2142                        if depth == 0 {
2143                            break;
2144                        }
2145                    }
2146                    _ => {}
2147                }
2148                j += 1;
2149            }
2150            if depth == 0 {
2151                i = j + 1;
2152                continue;
2153            }
2154        }
2155        i = advance_char(s, &mut out, i);
2156    }
2157    out
2158}
2159
2160#[cfg(test)]
2161mod tests {
2162    use super::*;
2163
2164    #[test]
2165    fn test_emit_phpdoc() {
2166        let mut out = String::new();
2167        emit_phpdoc(&mut out, "Simple documentation", "    ", "TestException");
2168        assert!(out.contains("/**"));
2169        assert!(out.contains("Simple documentation"));
2170        assert!(out.contains("*/"));
2171    }
2172
2173    #[test]
2174    fn test_phpdoc_escaping() {
2175        let mut out = String::new();
2176        emit_phpdoc(&mut out, "Handle */ sequences", "", "TestException");
2177        assert!(out.contains("Handle * / sequences"));
2178    }
2179
2180    #[test]
2181    fn test_emit_csharp_doc() {
2182        let mut out = String::new();
2183        emit_csharp_doc(&mut out, "C# documentation", "    ", "TestException");
2184        assert!(out.contains("<summary>"));
2185        assert!(out.contains("C# documentation"));
2186        assert!(out.contains("</summary>"));
2187    }
2188
2189    #[test]
2190    fn test_csharp_xml_escaping() {
2191        let mut out = String::new();
2192        emit_csharp_doc(&mut out, "foo < bar & baz > qux", "", "TestException");
2193        assert!(out.contains("foo &lt; bar &amp; baz &gt; qux"));
2194    }
2195
2196    #[test]
2197    fn test_emit_elixir_doc() {
2198        let mut out = String::new();
2199        emit_elixir_doc(&mut out, "Elixir documentation");
2200        assert!(out.contains("@doc \"\"\""));
2201        assert!(out.contains("Elixir documentation"));
2202        assert!(out.contains("\"\"\""));
2203    }
2204
2205    #[test]
2206    fn test_elixir_heredoc_escaping() {
2207        let mut out = String::new();
2208        emit_elixir_doc(&mut out, "Handle \"\"\" sequences");
2209        assert!(out.contains("Handle \"\" \" sequences"));
2210    }
2211
2212    #[test]
2213    fn test_emit_roxygen() {
2214        let mut out = String::new();
2215        emit_roxygen(&mut out, "R documentation");
2216        assert!(out.contains("#' R documentation"));
2217    }
2218
2219    #[test]
2220    fn test_emit_swift_doc() {
2221        let mut out = String::new();
2222        emit_swift_doc(&mut out, "Swift documentation", "    ");
2223        assert!(out.contains("/// Swift documentation"));
2224    }
2225
2226    #[test]
2227    fn test_emit_javadoc() {
2228        let mut out = String::new();
2229        emit_javadoc(&mut out, "Java documentation", "    ");
2230        assert!(out.contains("/**"));
2231        assert!(out.contains("Java documentation"));
2232        assert!(out.contains("*/"));
2233    }
2234
2235    #[test]
2236    fn test_emit_kdoc() {
2237        let mut out = String::new();
2238        emit_kdoc(&mut out, "Kotlin documentation", "    ");
2239        assert!(out.contains("/**"));
2240        assert!(out.contains("Kotlin documentation"));
2241        assert!(out.contains("*/"));
2242    }
2243
2244    #[test]
2245    fn test_emit_dartdoc() {
2246        let mut out = String::new();
2247        emit_dartdoc(&mut out, "Dart documentation", "    ");
2248        assert!(out.contains("/// Dart documentation"));
2249    }
2250
2251    #[test]
2252    fn test_emit_gleam_doc() {
2253        let mut out = String::new();
2254        emit_gleam_doc(&mut out, "Gleam documentation", "    ");
2255        assert!(out.contains("/// Gleam documentation"));
2256    }
2257
2258    #[test]
2259    fn test_emit_zig_doc() {
2260        let mut out = String::new();
2261        emit_zig_doc(&mut out, "Zig documentation", "    ");
2262        assert!(out.contains("/// Zig documentation"));
2263    }
2264
2265    #[test]
2266    fn test_empty_doc_skipped() {
2267        let mut out = String::new();
2268        emit_phpdoc(&mut out, "", "", "TestException");
2269        emit_csharp_doc(&mut out, "", "", "TestException");
2270        emit_elixir_doc(&mut out, "");
2271        emit_roxygen(&mut out, "");
2272        emit_kdoc(&mut out, "", "");
2273        emit_dartdoc(&mut out, "", "");
2274        emit_gleam_doc(&mut out, "", "");
2275        emit_zig_doc(&mut out, "", "");
2276        assert!(out.is_empty());
2277    }
2278
2279    #[test]
2280    fn test_doc_first_paragraph_joined_single_line() {
2281        assert_eq!(doc_first_paragraph_joined("Simple doc."), "Simple doc.");
2282    }
2283
2284    #[test]
2285    fn test_doc_first_paragraph_joined_wrapped_sentence() {
2286        // Simulates a docstring like convert's: "Convert HTML to Markdown,\nreturning a result."
2287        let doc = "Convert HTML to Markdown,\nreturning a result.";
2288        assert_eq!(
2289            doc_first_paragraph_joined(doc),
2290            "Convert HTML to Markdown, returning a result."
2291        );
2292    }
2293
2294    #[test]
2295    fn test_doc_first_paragraph_joined_stops_at_blank_line() {
2296        let doc = "First paragraph.\nStill first.\n\nSecond paragraph.";
2297        assert_eq!(doc_first_paragraph_joined(doc), "First paragraph. Still first.");
2298    }
2299
2300    #[test]
2301    fn test_doc_first_paragraph_joined_empty() {
2302        assert_eq!(doc_first_paragraph_joined(""), "");
2303    }
2304
2305    #[test]
2306    fn test_parse_rustdoc_sections_basic() {
2307        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns `KreuzbergError` on failure.";
2308        let sections = parse_rustdoc_sections(doc);
2309        assert_eq!(sections.summary, "Extracts text from a file.");
2310        assert_eq!(sections.arguments.as_deref(), Some("* `path` - The file path."));
2311        assert_eq!(sections.returns.as_deref(), Some("The extracted text."));
2312        assert_eq!(sections.errors.as_deref(), Some("Returns `KreuzbergError` on failure."));
2313        assert!(sections.panics.is_none());
2314    }
2315
2316    #[test]
2317    fn test_parse_rustdoc_sections_example_with_fence() {
2318        let doc = "Run the thing.\n\n# Example\n\n```rust\nlet x = run();\n```";
2319        let sections = parse_rustdoc_sections(doc);
2320        assert_eq!(sections.summary, "Run the thing.");
2321        assert!(sections.example.as_ref().unwrap().contains("```rust"));
2322        assert!(sections.example.as_ref().unwrap().contains("let x = run();"));
2323    }
2324
2325    #[test]
2326    fn test_parse_rustdoc_sections_pound_inside_fence_is_not_a_heading() {
2327        // Even though we get rustdoc-hidden lines pre-stripped, a literal
2328        // `# foo` inside a non-rust fence (e.g. shell example) must not
2329        // start a new section.
2330        let doc = "Summary.\n\n# Example\n\n```bash\n# install deps\nrun --foo\n```";
2331        let sections = parse_rustdoc_sections(doc);
2332        assert_eq!(sections.summary, "Summary.");
2333        assert!(sections.example.as_ref().unwrap().contains("# install deps"));
2334    }
2335
2336    #[test]
2337    fn test_parse_arguments_bullets_dash_separator() {
2338        let body = "* `path` - The file path.\n* `config` - Optional configuration.";
2339        let pairs = parse_arguments_bullets(body);
2340        assert_eq!(pairs.len(), 2);
2341        assert_eq!(pairs[0], ("path".to_string(), "The file path.".to_string()));
2342        assert_eq!(pairs[1], ("config".to_string(), "Optional configuration.".to_string()));
2343    }
2344
2345    #[test]
2346    fn test_parse_arguments_bullets_continuation_line() {
2347        let body = "* `path` - The file path,\n  resolved relative to cwd.\n* `mode` - Open mode.";
2348        let pairs = parse_arguments_bullets(body);
2349        assert_eq!(pairs.len(), 2);
2350        assert_eq!(pairs[0].1, "The file path, resolved relative to cwd.");
2351    }
2352
2353    #[test]
2354    fn test_replace_fence_lang_rust_to_typescript() {
2355        let body = "```rust\nlet x = run();\n```";
2356        let out = replace_fence_lang(body, "typescript");
2357        assert!(out.starts_with("```typescript"));
2358        assert!(out.contains("let x = run();"));
2359    }
2360
2361    #[test]
2362    fn test_replace_fence_lang_preserves_attrs() {
2363        let body = "```rust,no_run\nlet x = run();\n```";
2364        let out = replace_fence_lang(body, "typescript");
2365        assert!(out.starts_with("```typescript,no_run"));
2366    }
2367
2368    #[test]
2369    fn test_replace_fence_lang_no_fence_unchanged() {
2370        let body = "Plain prose with `inline code`.";
2371        let out = replace_fence_lang(body, "typescript");
2372        assert_eq!(out, "Plain prose with `inline code`.");
2373    }
2374
2375    fn fixture_sections() -> RustdocSections {
2376        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n* `config` - Optional configuration.\n\n# Returns\n\nThe extracted text and metadata.\n\n# Errors\n\nReturns an error when the file is unreadable.\n\n# Example\n\n```rust\nlet result = extract(\"file.pdf\")?;\n```";
2377        parse_rustdoc_sections(doc)
2378    }
2379
2380    #[test]
2381    fn test_render_jsdoc_sections() {
2382        let sections = fixture_sections();
2383        let out = render_jsdoc_sections(&sections);
2384        assert!(out.starts_with("Extracts text from a file."));
2385        assert!(out.contains("@param path - The file path."));
2386        assert!(out.contains("@param config - Optional configuration."));
2387        assert!(out.contains("@returns The extracted text and metadata."));
2388        assert!(out.contains("@throws Returns an error when the file is unreadable."));
2389        // fixture example is ```rust — stripped when target is TypeScript
2390        assert!(!out.contains("@example"), "Rust example must not appear in TSDoc");
2391        assert!(!out.contains("```typescript"));
2392        assert!(!out.contains("```rust"));
2393    }
2394
2395    #[test]
2396    fn test_render_jsdoc_sections_preserves_typescript_example() {
2397        let doc = "Do something.\n\n# Example\n\n```typescript\nconst x = doSomething();\n```";
2398        let sections = parse_rustdoc_sections(doc);
2399        let out = render_jsdoc_sections(&sections);
2400        assert!(out.contains("@example"), "TypeScript example must be preserved");
2401        assert!(out.contains("```typescript"));
2402    }
2403
2404    #[test]
2405    fn test_render_javadoc_sections() {
2406        let sections = fixture_sections();
2407        let out = render_javadoc_sections(&sections, "KreuzbergRsException");
2408        assert!(out.contains("@param path The file path."));
2409        assert!(out.contains("@return The extracted text and metadata."));
2410        assert!(out.contains("@throws KreuzbergRsException Returns an error when the file is unreadable."));
2411        // Java rendering omits the example block (handled separately by emit_javadoc which
2412        // wraps code in `<pre>{@code}</pre>`); we just confirm summary survives.
2413        assert!(out.starts_with("Extracts text from a file."));
2414    }
2415
2416    #[test]
2417    fn test_render_csharp_xml_sections() {
2418        let sections = fixture_sections();
2419        let out = render_csharp_xml_sections(&sections, "KreuzbergException");
2420        assert!(out.contains("<summary>\nExtracts text from a file.\n</summary>"));
2421        assert!(out.contains("<param name=\"path\">The file path.</param>"));
2422        assert!(out.contains("<returns>The extracted text and metadata.</returns>"));
2423        assert!(out.contains("<exception cref=\"KreuzbergException\">"));
2424        assert!(out.contains("<example><code language=\"csharp\">"));
2425        assert!(out.contains("let result = extract"));
2426    }
2427
2428    #[test]
2429    fn test_render_phpdoc_sections() {
2430        let sections = fixture_sections();
2431        let out = render_phpdoc_sections(&sections, "KreuzbergException");
2432        assert!(out.contains("@param mixed $path The file path."));
2433        assert!(out.contains("@return The extracted text and metadata."));
2434        assert!(out.contains("@throws KreuzbergException"));
2435        // fixture example is ```rust — stripped when target is PHP
2436        assert!(!out.contains("```php"), "Rust example must not appear in PHPDoc");
2437        assert!(!out.contains("```rust"));
2438    }
2439
2440    #[test]
2441    fn test_render_phpdoc_sections_preserves_php_example() {
2442        let doc = "Do something.\n\n# Example\n\n```php\n$x = doSomething();\n```";
2443        let sections = parse_rustdoc_sections(doc);
2444        let out = render_phpdoc_sections(&sections, "MyException");
2445        assert!(out.contains("```php"), "PHP example must be preserved");
2446    }
2447
2448    #[test]
2449    fn test_render_doxygen_sections() {
2450        let sections = fixture_sections();
2451        let out = render_doxygen_sections(&sections);
2452        assert!(out.contains("\\param path The file path."));
2453        assert!(out.contains("\\return The extracted text and metadata."));
2454        assert!(out.contains("\\code"));
2455        assert!(out.contains("\\endcode"));
2456    }
2457
2458    #[test]
2459    fn test_emit_yard_doc_simple() {
2460        let mut out = String::new();
2461        emit_yard_doc(&mut out, "Simple Ruby documentation", "    ");
2462        assert!(out.contains("# Simple Ruby documentation"));
2463    }
2464
2465    #[test]
2466    fn test_emit_yard_doc_empty() {
2467        let mut out = String::new();
2468        emit_yard_doc(&mut out, "", "    ");
2469        assert!(out.is_empty());
2470    }
2471
2472    #[test]
2473    fn test_emit_yard_doc_with_sections() {
2474        let mut out = String::new();
2475        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns error on failure.";
2476        emit_yard_doc(&mut out, doc, "  ");
2477        assert!(out.contains("# Extracts text from a file."));
2478        assert!(out.contains("# @param path The file path."));
2479        assert!(out.contains("# @return The extracted text."));
2480        assert!(out.contains("# @raise Returns error on failure."));
2481    }
2482
2483    #[test]
2484    fn test_emit_c_doxygen_simple_prose() {
2485        let mut out = String::new();
2486        emit_c_doxygen(&mut out, "Free a string.", "");
2487        assert!(out.contains("/// Free a string."), "got: {out}");
2488    }
2489
2490    #[test]
2491    fn test_emit_c_doxygen_with_sections() {
2492        let mut out = String::new();
2493        let doc = "Extract content from a file.\n\n# Arguments\n\n* `path` - Path to the file.\n* `mode` - Read mode.\n\n# Returns\n\nA newly allocated string the caller owns.\n\n# Errors\n\nReturns null when the file is unreadable.";
2494        emit_c_doxygen(&mut out, doc, "");
2495        assert!(out.contains("/// Extract content from a file."));
2496        assert!(out.contains("/// \\param path Path to the file."));
2497        assert!(out.contains("/// \\param mode Read mode."));
2498        assert!(out.contains("/// \\return A newly allocated string the caller owns."));
2499        assert!(out.contains("/// \\note Returns null when the file is unreadable."));
2500    }
2501
2502    #[test]
2503    fn test_emit_c_doxygen_safety_section_maps_to_note() {
2504        let mut out = String::new();
2505        let doc = "Free a buffer.\n\n# Safety\n\nPointer must have been returned by this library.";
2506        emit_c_doxygen(&mut out, doc, "");
2507        assert!(out.contains("/// \\note SAFETY: Pointer must have been returned by this library."));
2508    }
2509
2510    #[test]
2511    fn test_emit_c_doxygen_example_renders_code_fence() {
2512        let mut out = String::new();
2513        let doc = "Demo.\n\n# Example\n\n```rust\nlet x = run();\n```";
2514        emit_c_doxygen(&mut out, doc, "");
2515        assert!(out.contains("/// \\code"));
2516        assert!(out.contains("/// \\endcode"));
2517        assert!(out.contains("let x = run();"));
2518    }
2519
2520    #[test]
2521    fn test_emit_c_doxygen_strips_markdown_links() {
2522        let mut out = String::new();
2523        let doc = "See [the docs](https://example.com/x) for details.";
2524        emit_c_doxygen(&mut out, doc, "");
2525        assert!(
2526            out.contains("the docs (https://example.com/x)"),
2527            "expected flattened link, got: {out}"
2528        );
2529        assert!(!out.contains("](https://"));
2530    }
2531
2532    #[test]
2533    fn test_emit_c_doxygen_word_wraps_long_lines() {
2534        let mut out = String::new();
2535        let long = "a ".repeat(80);
2536        emit_c_doxygen(&mut out, long.trim(), "");
2537        for line in out.lines() {
2538            // Each emitted prefix is "/// " (4 chars); the body after that
2539            // should be ≤ 100 chars per `DOXYGEN_WRAP_WIDTH`.
2540            let body = line.trim_start_matches("/// ");
2541            assert!(body.len() <= 100, "line too long ({}): {line}", body.len());
2542        }
2543    }
2544
2545    #[test]
2546    fn test_emit_c_doxygen_empty_input_is_noop() {
2547        let mut out = String::new();
2548        emit_c_doxygen(&mut out, "", "");
2549        emit_c_doxygen(&mut out, "   \n\t  ", "");
2550        assert!(out.is_empty());
2551    }
2552
2553    #[test]
2554    fn test_emit_c_doxygen_indent_applied() {
2555        let mut out = String::new();
2556        emit_c_doxygen(&mut out, "Hello.", "    ");
2557        assert!(out.starts_with("    /// Hello."));
2558    }
2559
2560    #[test]
2561    fn test_render_yard_sections() {
2562        let sections = fixture_sections();
2563        let out = render_yard_sections(&sections);
2564        assert!(out.contains("@param path The file path."));
2565        assert!(out.contains("@return The extracted text and metadata."));
2566        assert!(out.contains("@raise Returns an error when the file is unreadable."));
2567        // fixture example is ```rust — stripped when target is Ruby
2568        assert!(!out.contains("@example"), "Rust example must not appear in YARD");
2569        assert!(!out.contains("```ruby"));
2570        assert!(!out.contains("```rust"));
2571    }
2572
2573    #[test]
2574    fn test_render_yard_sections_preserves_ruby_example() {
2575        let doc = "Do something.\n\n# Example\n\n```ruby\nputs :hi\n```";
2576        let sections = parse_rustdoc_sections(doc);
2577        let out = render_yard_sections(&sections);
2578        assert!(out.contains("@example"), "Ruby example must be preserved");
2579        assert!(out.contains("```ruby"));
2580    }
2581
2582    // --- M1: example_for_target unit tests ---
2583
2584    #[test]
2585    fn example_for_target_rust_fenced_suppressed_for_php() {
2586        let example = "```rust\nlet x = 1;\n```";
2587        assert_eq!(
2588            example_for_target(example, "php"),
2589            None,
2590            "rust-fenced example must be omitted for PHP target"
2591        );
2592    }
2593
2594    #[test]
2595    fn example_for_target_bare_fence_defaults_to_rust_suppressed_for_ruby() {
2596        let example = "```\nlet x = 1;\n```";
2597        assert_eq!(
2598            example_for_target(example, "ruby"),
2599            None,
2600            "bare fence is treated as Rust and must be omitted for Ruby target"
2601        );
2602    }
2603
2604    #[test]
2605    fn example_for_target_php_example_preserved_for_php() {
2606        let example = "```php\n$x = 1;\n```";
2607        let result = example_for_target(example, "php");
2608        assert!(result.is_some(), "PHP example must be preserved for PHP target");
2609        assert!(result.unwrap().contains("```php"));
2610    }
2611
2612    #[test]
2613    fn example_for_target_ruby_example_preserved_for_ruby() {
2614        let example = "```ruby\nputs :hi\n```";
2615        let result = example_for_target(example, "ruby");
2616        assert!(result.is_some(), "Ruby example must be preserved for Ruby target");
2617        assert!(result.unwrap().contains("```ruby"));
2618    }
2619
2620    #[test]
2621    fn render_phpdoc_sections_with_rust_example_emits_no_at_example_block() {
2622        let doc = "Convert HTML.\n\n# Arguments\n\n* `html` - The HTML input.\n\n# Example\n\n```rust\nlet result = convert(html, None)?;\n```";
2623        let sections = parse_rustdoc_sections(doc);
2624        let out = render_phpdoc_sections(&sections, "HtmlToMarkdownException");
2625        assert!(!out.contains("```php"), "no PHP @example block for Rust source");
2626        assert!(!out.contains("```rust"), "raw Rust must not leak into PHPDoc");
2627        assert!(out.contains("@param"), "other sections must still be emitted");
2628    }
2629
2630    // --- KDoc ktfmt-canonical format tests ---
2631
2632    #[test]
2633    fn test_emit_kdoc_ktfmt_canonical_short_single_line() {
2634        let mut out = String::new();
2635        emit_kdoc_ktfmt_canonical(&mut out, "Simple doc.", "");
2636        assert_eq!(
2637            out, "/** Simple doc. */\n",
2638            "short single-line comment should collapse to canonical format"
2639        );
2640    }
2641
2642    #[test]
2643    fn test_emit_kdoc_ktfmt_canonical_short_with_indent() {
2644        let mut out = String::new();
2645        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2646        assert_eq!(out, "    /** Text node (most frequent - 100+ per document) */\n");
2647    }
2648
2649    #[test]
2650    fn test_emit_kdoc_ktfmt_canonical_long_comment_uses_multiline() {
2651        let mut out = String::new();
2652        let long_text = "This is a very long documentation comment that exceeds the 100-character line width limit and should therefore be emitted in multi-line format";
2653        emit_kdoc_ktfmt_canonical(&mut out, long_text, "");
2654        assert!(out.contains("/**\n"), "long comment should start with newline");
2655        assert!(out.contains(" * "), "long comment should use multi-line format");
2656        assert!(out.contains(" */\n"), "long comment should end with newline");
2657    }
2658
2659    #[test]
2660    fn test_emit_kdoc_ktfmt_canonical_multiline_comment() {
2661        let mut out = String::new();
2662        let doc = "First line.\n\nSecond paragraph.";
2663        emit_kdoc_ktfmt_canonical(&mut out, doc, "");
2664        assert!(out.contains("/**\n"), "multi-paragraph should use multi-line format");
2665        assert!(out.contains(" * First line."), "first paragraph preserved");
2666        assert!(out.contains(" *\n"), "blank line preserved");
2667        assert!(out.contains(" * Second paragraph."), "second paragraph preserved");
2668    }
2669
2670    #[test]
2671    fn test_emit_kdoc_ktfmt_canonical_empty_doc() {
2672        let mut out = String::new();
2673        emit_kdoc_ktfmt_canonical(&mut out, "", "");
2674        assert!(out.is_empty(), "empty doc should produce no output");
2675    }
2676
2677    #[test]
2678    fn test_emit_kdoc_ktfmt_canonical_fits_within_100_chars() {
2679        let mut out = String::new();
2680        // Construct exactly at the boundary: indent(0) + "/** " + content + " */" = 100 chars
2681        // "/** " = 4 chars, " */" = 3 chars, so content can be 93 chars
2682        let content = "a".repeat(93);
2683        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2684        let line = out.lines().next().unwrap();
2685        assert_eq!(
2686            line.len(),
2687            100,
2688            "should fit exactly at 100 chars and use single-line format"
2689        );
2690        assert!(out.starts_with("/**"), "should use single-line format");
2691    }
2692
2693    #[test]
2694    fn test_emit_kdoc_ktfmt_canonical_exceeds_100_chars() {
2695        let mut out = String::new();
2696        // Exceed 100 chars: content of 94 chars with "/** " + " */" = 101 chars
2697        let content = "a".repeat(94);
2698        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2699        assert!(
2700            out.contains("/**\n"),
2701            "should use multi-line format when exceeding 100 chars"
2702        );
2703        assert!(out.contains(" * "), "multi-line format with ` * ` prefix");
2704    }
2705
2706    #[test]
2707    fn test_emit_kdoc_ktfmt_canonical_respects_indent() {
2708        let mut out = String::new();
2709        // With 4-char indent, max content is 89 chars (4 + 4 + 89 + 3 = 100)
2710        let content = "a".repeat(89);
2711        emit_kdoc_ktfmt_canonical(&mut out, &content, "    ");
2712        let line = out.lines().next().unwrap();
2713        assert_eq!(line.len(), 100, "should respect indent in 100-char calculation");
2714        assert!(line.starts_with("    /** "), "should include indent");
2715    }
2716
2717    #[test]
2718    fn test_emit_kdoc_ktfmt_canonical_real_world_enum_variant() {
2719        let mut out = String::new();
2720        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2721        // This is from NodeType enum; should collapse to single-line
2722        assert!(out.starts_with("    /** "), "should preserve 4-space indent");
2723        assert!(out.contains(" */\n"), "should end with newline");
2724        // Verify it's single-line format
2725        let line_count = out.lines().count();
2726        assert_eq!(line_count, 1, "should be single-line format");
2727    }
2728
2729    #[test]
2730    fn test_emit_kdoc_ktfmt_canonical_real_world_data_class_field() {
2731        let mut out = String::new();
2732        let doc = "Heading style to use in Markdown output (ATX `#` or Setext underline).";
2733        emit_kdoc_ktfmt_canonical(&mut out, doc, "    ");
2734        // This is from ConversionOptions data class; should collapse to single-line
2735        let line_count = out.lines().count();
2736        assert_eq!(line_count, 1, "should be single-line format");
2737        assert!(out.starts_with("    /** "), "should have correct indent");
2738    }
2739
2740    // --- sanitize_rust_idioms tests ---
2741
2742    #[test]
2743    fn sanitize_intradoc_link_with_path_separator_java() {
2744        let input = "See [`ConversionOptions::builder()`] for details.";
2745        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2746        assert!(out.contains("`ConversionOptions.builder()`"), "got: {out}");
2747        assert!(!out.contains("[`"), "brackets must be removed, got: {out}");
2748    }
2749
2750    #[test]
2751    fn sanitize_intradoc_link_simple_type_php() {
2752        let input = "Returns a [`ConversionResult`].";
2753        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2754        assert!(out.contains("`ConversionResult`"), "got: {out}");
2755        assert!(!out.contains("[`"), "got: {out}");
2756    }
2757
2758    #[test]
2759    fn sanitize_none_to_null_javadoc() {
2760        let input = "Returns None when no value is found.";
2761        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2762        assert!(out.contains("null"), "got: {out}");
2763        assert!(!out.contains("None"), "got: {out}");
2764    }
2765
2766    #[test]
2767    fn sanitize_none_to_undefined_tsdoc() {
2768        let input = "Returns None if absent.";
2769        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2770        assert!(out.contains("undefined"), "got: {out}");
2771        assert!(!out.contains("None"), "got: {out}");
2772    }
2773
2774    #[test]
2775    fn sanitize_some_x_to_the_value_x() {
2776        let input = "Pass Some(value) to enable.";
2777        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2778        assert!(out.contains("the value (value)"), "got: {out}");
2779        assert!(!out.contains("Some("), "got: {out}");
2780    }
2781
2782    #[test]
2783    fn sanitize_bare_some_followed_by_lowercase_noun_is_dropped() {
2784        // Real leak from html-to-markdown PreprocessingOptionsUpdate.java:16.
2785        let input =
2786            "Only specified fields (Some values) will override existing options; None values leave the previous";
2787        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2788        assert!(
2789            out.contains("(values)"),
2790            "bare `Some ` before lowercase noun must be stripped; got: {out}"
2791        );
2792        assert!(
2793            out.contains("null values"),
2794            "bare `None ` must also be replaced; got: {out}"
2795        );
2796        assert!(!out.contains("Some "), "Some prefix must not survive; got: {out}");
2797    }
2798
2799    #[test]
2800    fn sanitize_bare_some_does_not_touch_identifiers_or_uppercase_followers() {
2801        // SomeType, Some.method(), Some(x), and "Some Title" (proper noun) all preserved.
2802        let cases = [
2803            "SomeType lives on.",
2804            "Some.method() returns Self.",
2805            "Some Title",
2806            "Some(x) is a value.",
2807        ];
2808        for case in cases {
2809            let out = sanitize_rust_idioms(case, DocTarget::JavaDoc);
2810            // For the Some(x) case, replace_some_calls (run earlier) converts to "the value (x)"
2811            // so "Some" itself is gone — that's expected; everything else preserves "Some".
2812            if case.starts_with("Some(") {
2813                assert!(out.contains("the value (x)"), "got: {out}");
2814            } else {
2815                assert!(out.contains("Some"), "Some must survive in {case:?}; got: {out}");
2816            }
2817        }
2818    }
2819
2820    #[test]
2821    fn sanitize_option_t_to_nullable_php() {
2822        let input = "The result is Option<String>.";
2823        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2824        assert!(out.contains("String?"), "got: {out}");
2825        assert!(!out.contains("Option<"), "got: {out}");
2826    }
2827
2828    #[test]
2829    fn sanitize_option_t_to_or_null_java() {
2830        let input = "The result is Option<String>.";
2831        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2832        assert!(out.contains("String | null"), "got: {out}");
2833    }
2834
2835    #[test]
2836    fn sanitize_option_t_to_or_undefined_tsdoc() {
2837        let input = "The result is Option<String>.";
2838        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2839        assert!(out.contains("String | undefined"), "got: {out}");
2840    }
2841
2842    #[test]
2843    fn sanitize_vec_u8_per_target() {
2844        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::PhpDoc).contains("string"));
2845        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JavaDoc).contains("byte[]"));
2846        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::TsDoc).contains("Uint8Array"));
2847        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JsDoc).contains("Uint8Array"));
2848    }
2849
2850    #[test]
2851    fn sanitize_vec_t_to_array() {
2852        let input = "Returns Vec<String>.";
2853        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2854        assert!(out.contains("String[]"), "got: {out}");
2855        assert!(!out.contains("Vec<"), "got: {out}");
2856    }
2857
2858    #[test]
2859    fn sanitize_hashmap_per_target() {
2860        let input = "Uses HashMap<String, u32>.";
2861        assert!(sanitize_rust_idioms(input, DocTarget::PhpDoc).contains("array<String, u32>"));
2862        assert!(sanitize_rust_idioms(input, DocTarget::JavaDoc).contains("Map<String, u32>"));
2863        assert!(sanitize_rust_idioms(input, DocTarget::TsDoc).contains("Record<String, u32>"));
2864    }
2865
2866    #[test]
2867    fn sanitize_arc_wrapper_stripped() {
2868        let input = "Holds Arc<Config>.";
2869        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2870        assert!(out.contains("Config"), "got: {out}");
2871        assert!(!out.contains("Arc<"), "got: {out}");
2872    }
2873
2874    #[test]
2875    fn sanitize_box_mutex_rwlock_rc_cell_refcell_stripped() {
2876        for wrapper in &["Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
2877            let input = format!("Contains {wrapper}<Inner>.");
2878            let out = sanitize_rust_idioms(&input, DocTarget::JavaDoc);
2879            assert!(out.contains("Inner"), "wrapper {wrapper} not stripped, got: {out}");
2880            assert!(
2881                !out.contains(&format!("{wrapper}<")),
2882                "wrapper {wrapper} still present, got: {out}"
2883            );
2884        }
2885    }
2886
2887    #[test]
2888    fn sanitize_send_sync_stripped() {
2889        let input = "The type is Send + Sync.";
2890        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2891        assert!(!out.contains("Send"), "got: {out}");
2892        assert!(!out.contains("Sync"), "got: {out}");
2893    }
2894
2895    #[test]
2896    fn sanitize_static_lifetime_stripped() {
2897        let input = "Requires 'static lifetime.";
2898        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2899        assert!(!out.contains("'static"), "got: {out}");
2900    }
2901
2902    #[test]
2903    fn sanitize_pub_fn_stripped() {
2904        let input = "Calls pub fn convert().";
2905        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2906        assert!(!out.contains("pub fn"), "got: {out}");
2907        assert!(out.contains("convert()"), "got: {out}");
2908    }
2909
2910    #[test]
2911    fn sanitize_crate_prefix_stripped() {
2912        let input = "See crate::error::ConversionError.";
2913        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2914        assert!(!out.contains("crate::"), "got: {out}");
2915        assert!(out.contains("error.ConversionError"), "got: {out}");
2916    }
2917
2918    #[test]
2919    fn sanitize_unwrap_expect_stripped() {
2920        let input = "Call result.unwrap() or result.expect(\"msg\").";
2921        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2922        assert!(!out.contains(".unwrap()"), "got: {out}");
2923        assert!(!out.contains(".expect("), "got: {out}");
2924    }
2925
2926    #[test]
2927    fn sanitize_no_mutation_inside_backticks() {
2928        // None inside backtick span must not be replaced.
2929        let input = "Use `None` as the argument.";
2930        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2931        assert!(out.contains("`None`"), "backtick span must be preserved, got: {out}");
2932    }
2933
2934    #[test]
2935    fn sanitize_rust_fence_dropped_for_tsdoc() {
2936        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2937        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2938        assert!(
2939            !out.contains("let x = 1;"),
2940            "rust fence content must be dropped, got: {out}"
2941        );
2942        assert!(!out.contains("```rust"), "got: {out}");
2943        assert!(out.contains("Trailer."), "text after fence must survive, got: {out}");
2944    }
2945
2946    #[test]
2947    fn sanitize_rust_fence_dropped_for_java() {
2948        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2949        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2950        // Rust fences are now dropped entirely for Java (Rust code is not portable).
2951        assert!(
2952            !out.contains("let x = 1;"),
2953            "fence content must be dropped for Java, got: {out}"
2954        );
2955        assert!(!out.contains("```"), "fence markers must be dropped, got: {out}");
2956        assert!(out.contains("Intro."), "prose before fence kept: {out}");
2957        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
2958    }
2959
2960    #[test]
2961    fn sanitize_non_rust_fence_passed_through() {
2962        let input = "Example:\n\n```typescript\nconst x = 1;\n```";
2963        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2964        assert!(out.contains("```typescript"), "non-rust fence must survive, got: {out}");
2965        assert!(out.contains("const x = 1;"), "got: {out}");
2966    }
2967
2968    #[test]
2969    fn sanitize_backtick_code_span_not_mutated_option() {
2970        // Option<T> inside backtick span must not be replaced.
2971        let input = "The type is `Option<String>`.";
2972        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2973        // The backtick-protected span should be preserved verbatim.
2974        assert!(
2975            out.contains("`Option<String>`"),
2976            "code span must be preserved, got: {out}"
2977        );
2978    }
2979
2980    #[test]
2981    fn sanitize_idempotent() {
2982        // Running twice should produce the same result as running once.
2983        let input = "Returns None when Vec<String> is empty.";
2984        let once = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2985        let twice = sanitize_rust_idioms(&once, DocTarget::JavaDoc);
2986        assert_eq!(once, twice, "sanitize_rust_idioms should be idempotent");
2987    }
2988
2989    #[test]
2990    fn sanitize_multiline_prose() {
2991        let input = "Convert HTML to Markdown.\n\nReturns None on failure.\nUse Option<String> for the result.";
2992        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2993        assert!(out.contains("null"), "None must be replaced on line 2, got: {out}");
2994        assert!(
2995            out.contains("String | null"),
2996            "Option<String> must be replaced on line 3, got: {out}"
2997        );
2998    }
2999
3000    #[test]
3001    fn sanitize_attribute_line_dropped() {
3002        let input = "#[derive(Debug, Clone)]\nSome documentation.";
3003        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3004        assert!(!out.contains("#[derive("), "attribute line must be dropped, got: {out}");
3005        // Prose survives, though bare "Some " before a lowercase noun is stripped
3006        // by `replace_some_keyword_in_prose`, so accept either form.
3007        assert!(out.contains("documentation."), "prose must survive, got: {out}");
3008    }
3009
3010    #[test]
3011    fn sanitize_path_separator_in_prose() {
3012        let input = "See std::collections::HashMap for details.";
3013        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3014        assert!(out.contains("std.collections.HashMap"), ":: must become ., got: {out}");
3015    }
3016
3017    #[test]
3018    fn sanitize_none_not_replaced_inside_identifier() {
3019        // "NoneType" must not be replaced.
3020        let input = "Unlike NoneType in Python.";
3021        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3022        assert!(out.contains("NoneType"), "NoneType must not be replaced, got: {out}");
3023    }
3024
3025    // --- CSharpDoc target tests ---
3026
3027    #[test]
3028    fn sanitize_csharp_drops_rust_section_headings_and_example_body() {
3029        // The GraphQLErrorException case: `# Examples` heading followed by a
3030        // ```ignore code fence containing `Self::error_code`, `Result<T, E>`,
3031        // intra-doc links — all of which previously leaked into `<summary>`.
3032        let input = "Convert error to HTTP status code\n\n\
3033            Maps GraphQL error types to status codes.\n\n\
3034            # Examples\n\n\
3035            ```ignore\n\
3036            use spikard_graphql::error::GraphQLError;\n\
3037            let error = GraphQLError::AuthenticationError(\"Invalid token\".to_string());\n\
3038            assert_eq!(error.status_code(), 401);\n\
3039            ```\n";
3040        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3041        assert!(
3042            out.contains("Convert error to HTTP status code"),
3043            "summary preserved: {out}"
3044        );
3045        assert!(out.contains("Maps GraphQL error types"), "prose preserved: {out}");
3046        assert!(!out.contains("# Examples"), "heading dropped: {out}");
3047        assert!(!out.contains("```"), "code fence dropped: {out}");
3048        assert!(!out.contains("Self::error_code"), "Self::method dropped: {out}");
3049        assert!(
3050            !out.contains("GraphQLError::AuthenticationError"),
3051            "rust path dropped: {out}"
3052        );
3053    }
3054
3055    #[test]
3056    fn sanitize_csharp_intradoc_link_with_path_separator() {
3057        let input = "See [`Self::error_code`] for the variant codes.";
3058        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3059        assert!(out.contains("`Self.error_code`"), "intra-doc link normalised: {out}");
3060        assert!(!out.contains("[`"), "square brackets removed: {out}");
3061        assert!(!out.contains("::"), ":: replaced with .: {out}");
3062    }
3063
3064    #[test]
3065    fn sanitize_csharp_result_type_keeps_success_drops_error() {
3066        let input = "Returns Result<String, ConversionError> on failure.";
3067        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3068        assert!(out.contains("String"), "success type kept: {out}");
3069        assert!(!out.contains("Result<"), "Result wrapper dropped: {out}");
3070        assert!(!out.contains("ConversionError"), "error type dropped: {out}");
3071    }
3072
3073    #[test]
3074    fn sanitize_csharp_option_becomes_nullable() {
3075        let input = "Returns Option<String>.";
3076        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3077        // After XML-escaping, the `?` survives but any surviving `<`/`>` get escaped.
3078        assert!(out.contains("String?"), "Option<T> -> T?: {out}");
3079        assert!(!out.contains("Option<"), "Option dropped: {out}");
3080    }
3081
3082    #[test]
3083    fn sanitize_csharp_vec_u8_becomes_byte_array() {
3084        let input = "Accepts Vec<u8>.";
3085        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3086        // `byte[]` survives — the `[` is not XML-significant.
3087        assert!(out.contains("byte[]"), "Vec<u8> -> byte[]: {out}");
3088    }
3089
3090    #[test]
3091    fn sanitize_csharp_hashmap_becomes_dictionary() {
3092        let input = "Holds HashMap<String, u32>.";
3093        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3094        // The `<` / `>` produced by Dictionary<K, V> must be XML-escaped.
3095        assert!(
3096            out.contains("Dictionary&lt;String, u32&gt;"),
3097            "HashMap -> Dictionary with XML-escaped brackets: {out}"
3098        );
3099    }
3100
3101    #[test]
3102    fn sanitize_csharp_none_to_null() {
3103        let input = "Returns None on miss.";
3104        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3105        assert!(out.contains("null"), "None -> null: {out}");
3106        assert!(!out.contains("None"), "None replaced: {out}");
3107    }
3108
3109    #[test]
3110    fn sanitize_csharp_escapes_raw_angle_brackets_and_amp() {
3111        // Unrecognised `<...>` constructs (e.g. trait objects, generic params on
3112        // unknown names) must still be XML-escaped so the result is valid inside
3113        // `<summary>`.
3114        let input = "Accepts Box<dyn Trait> and combines a & b.";
3115        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3116        // Box<T> wrapper is stripped to inner type, leaving `dyn Trait`.
3117        assert!(out.contains("dyn Trait"), "Box<T> stripped: {out}");
3118        assert!(out.contains("&amp;"), "ampersand escaped: {out}");
3119    }
3120
3121    #[test]
3122    fn sanitize_csharp_drops_rust_code_fence_entirely() {
3123        let input = "Intro.\n\n```rust\nlet x: Vec<u8> = vec![];\n```\n\nTrailer.";
3124        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3125        assert!(!out.contains("let x"), "code fence body dropped: {out}");
3126        assert!(!out.contains("```"), "fence markers dropped: {out}");
3127        assert!(out.contains("Intro."), "prose before fence kept: {out}");
3128        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
3129    }
3130
3131    #[test]
3132    fn sanitize_csharp_keep_sections_does_not_drop_headings() {
3133        // The sections-preserving variant leaves heading lines alone so callers
3134        // that have already extracted sections can sanitise each body fragment.
3135        let input = "Summary.\n\n# Arguments\n\n* `name` - the value.";
3136        let out = sanitize_rust_idioms_keep_sections(input, DocTarget::CSharpDoc);
3137        assert!(out.contains("# Arguments"), "heading preserved: {out}");
3138        assert!(out.contains("name"), "body preserved: {out}");
3139    }
3140
3141    #[test]
3142    fn sanitize_csharp_idempotent() {
3143        let input = "Returns Option<String> or None.";
3144        let once = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3145        let twice = sanitize_rust_idioms(&once, DocTarget::CSharpDoc);
3146        assert_eq!(once, twice, "CSharpDoc sanitisation must be idempotent");
3147    }
3148
3149    #[test]
3150    fn sanitize_phpdoc_drops_unmarked_rust_code_fences() {
3151        // Regression test: unmarked code fences (```\n...\n```) in Rust docstrings
3152        // are treated as Rust code and should be dropped for PHP target.
3153        let input = "Detect language name from a file extension.\n\nReturns `None` for unrecognized extensions.\n\n```\nuse tree_sitter_language_pack::detect_language_from_extension;\nassert_eq!(detect_language_from_extension(\"py\"), Some(\"python\"));\nassert_eq!(detect_language_from_extension(\"RS\"), Some(\"rust\"));\nassert_eq!(detect_language_from_extension(\"xyz\"), None);\n```";
3154        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3155        assert!(
3156            !out.contains("use tree_sitter_language_pack"),
3157            "Rust use stmt dropped: {out}"
3158        );
3159        assert!(!out.contains("assert_eq!"), "Rust code dropped: {out}");
3160        assert!(!out.contains("```"), "fence markers dropped: {out}");
3161        assert!(out.contains("Detect language name"), "prose before fence kept: {out}");
3162        assert!(out.contains("unrecognized extensions"), "prose kept: {out}");
3163    }
3164
3165    #[test]
3166    fn sanitize_javadoc_drops_unmarked_rust_code_fences() {
3167        // Regression test: unmarked code fences in Rust docstrings should be dropped
3168        // for Java target as well.
3169        let input = "Process a file.\n\n```\nlet result = process(\"def hello(): pass\", &config).unwrap();\n```";
3170        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3171        assert!(!out.contains("unwrap"), "Rust unwrap dropped: {out}");
3172        assert!(!out.contains("```"), "fence markers dropped: {out}");
3173        assert!(out.contains("Process a file"), "prose kept: {out}");
3174    }
3175
3176    #[test]
3177    fn sanitize_phpdoc_drops_explicit_rust_fences() {
3178        // Explicit ```rust fences should also be dropped for PHP.
3179        let input = "Summary.\n\n```rust\nuse std::path::PathBuf;\nlet p = PathBuf::from(\"/tmp\");\n```";
3180        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3181        assert!(!out.contains("use std::"), "Rust code dropped: {out}");
3182        assert!(!out.contains("PathBuf"), "Rust types dropped: {out}");
3183        assert!(!out.contains("```"), "fence markers dropped: {out}");
3184        assert!(out.contains("Summary"), "prose kept: {out}");
3185    }
3186
3187    // --- rustdoc test-attribute fence tests ---
3188
3189    #[test]
3190    fn sanitize_no_run_fence_dropped_for_tsdoc() {
3191        let input = "Intro.\n\n```no_run\nuse foo::bar;\nbar::init();\n```\n\nTrailer.";
3192        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
3193        assert!(!out.contains("use foo::bar"), "no_run fence body dropped: {out}");
3194        assert!(!out.contains("```"), "fence markers dropped: {out}");
3195        assert!(out.contains("Intro."), "prose before fence kept: {out}");
3196        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
3197    }
3198
3199    #[test]
3200    fn sanitize_ignore_fence_dropped_for_phpdoc() {
3201        let input = "Summary.\n\n```ignore\nlet x = 1;\n// this would not compile\n```";
3202        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3203        assert!(!out.contains("let x = 1"), "ignore fence body dropped: {out}");
3204        assert!(!out.contains("```"), "fence markers dropped: {out}");
3205        assert!(out.contains("Summary"), "prose kept: {out}");
3206    }
3207
3208    #[test]
3209    fn sanitize_should_panic_fence_dropped_for_javadoc() {
3210        let input = "Panics on null.\n\n```should_panic\nlet _ = parse(null);\n```";
3211        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3212        assert!(!out.contains("parse(null)"), "should_panic fence body dropped: {out}");
3213        assert!(!out.contains("```"), "fence markers dropped: {out}");
3214        assert!(out.contains("Panics on null"), "prose kept: {out}");
3215    }
3216
3217    #[test]
3218    fn sanitize_compile_fail_fence_dropped_for_csharp() {
3219        let input = "Type safety demo.\n\n```compile_fail\nlet x: u32 = \"hello\";\n```";
3220        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3221        assert!(!out.contains("let x:"), "compile_fail fence body dropped: {out}");
3222        assert!(!out.contains("```"), "fence markers dropped: {out}");
3223        assert!(out.contains("Type safety demo"), "prose kept: {out}");
3224    }
3225
3226    #[test]
3227    fn sanitize_edition_fence_dropped_for_tsdoc() {
3228        let input = "Edition example.\n\n```edition2021\nuse std::fmt;\n```\n\nSee also edition2018.";
3229        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
3230        assert!(!out.contains("use std::fmt"), "edition2021 fence body dropped: {out}");
3231        assert!(!out.contains("```"), "fence markers dropped: {out}");
3232        assert!(out.contains("Edition example"), "prose kept: {out}");
3233    }
3234
3235    #[test]
3236    fn sanitize_python_fence_preserved_for_tsdoc() {
3237        // Python fences are not Rust — they must pass through unchanged.
3238        let input = "Example:\n\n```python\nimport foo\nfoo.bar()\n```";
3239        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
3240        assert!(out.contains("```python"), "python fence preserved: {out}");
3241        assert!(out.contains("import foo"), "python body preserved: {out}");
3242    }
3243
3244    #[test]
3245    fn sanitize_javascript_fence_preserved_for_phpdoc() {
3246        let input = "Usage:\n\n```javascript\nconst x = require('foo');\n```";
3247        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3248        assert!(out.contains("```javascript"), "javascript fence preserved: {out}");
3249        assert!(out.contains("require('foo')"), "javascript body preserved: {out}");
3250    }
3251
3252    #[test]
3253    fn example_for_target_no_run_fence_suppressed_for_typescript() {
3254        let example = "```no_run\nuse tree_sitter_language_pack::available_languages;\nlet langs = available_languages();\n```";
3255        assert_eq!(
3256            example_for_target(example, "typescript"),
3257            None,
3258            "no_run fence must be treated as Rust and suppressed for TypeScript"
3259        );
3260    }
3261
3262    #[test]
3263    fn example_for_target_ignore_fence_suppressed_for_php() {
3264        let example = "```ignore\nlet x = 1;\n```";
3265        assert_eq!(
3266            example_for_target(example, "php"),
3267            None,
3268            "ignore fence must be treated as Rust and suppressed for PHP"
3269        );
3270    }
3271
3272    #[test]
3273    fn example_for_target_compile_fail_fence_suppressed_for_java() {
3274        let example = "```compile_fail\nlet x: u32 = \"wrong\";\n```";
3275        assert_eq!(
3276            example_for_target(example, "java"),
3277            None,
3278            "compile_fail fence must be treated as Rust and suppressed for Java"
3279        );
3280    }
3281
3282    #[test]
3283    fn example_for_target_should_panic_fence_suppressed_for_ruby() {
3284        let example = "```should_panic\nlet _ = parse(None);\n```";
3285        assert_eq!(
3286            example_for_target(example, "ruby"),
3287            None,
3288            "should_panic fence must be treated as Rust and suppressed for Ruby"
3289        );
3290    }
3291
3292    #[test]
3293    fn example_for_target_edition_fence_suppressed_for_php() {
3294        let example = "```edition2021\nuse std::fmt;\n```";
3295        assert_eq!(
3296            example_for_target(example, "php"),
3297            None,
3298            "edition2021 fence must be treated as Rust and suppressed for PHP"
3299        );
3300    }
3301
3302    #[test]
3303    fn example_for_target_python_fence_preserved() {
3304        let example = "```python\nimport foo\n```";
3305        let result = example_for_target(example, "php");
3306        assert!(result.is_some(), "python fence must be preserved for PHP target");
3307    }
3308}
alef_codegen/doc_emission.rs

alef_codegen/
doc_emission.rs