alef_codegen/
doc_emission.rs

1//! Language-native documentation comment emission.
2//! Provides standardized functions for emitting doc comments in different languages.
3
4/// Emit PHPDoc-style comments (/** ... */)
5/// Used for PHP classes, methods, and properties.
6///
7/// Sanitizes Rust-specific idioms before translating rustdoc sections
8/// (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@throws`,
9/// `# Example` → ` ```php ` fence) via [`render_phpdoc_sections`].
10///
11/// `exception_class` is the PHP exception class name to use in `@throws` tags.
12pub fn emit_phpdoc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
13    if doc.is_empty() {
14        return;
15    }
16    // Sanitize Rust-specific idioms before processing sections.
17    let sanitized = sanitize_rust_idioms(doc, DocTarget::PhpDoc);
18    let sections = parse_rustdoc_sections(&sanitized);
19    let any_section = sections.arguments.is_some()
20        || sections.returns.is_some()
21        || sections.errors.is_some()
22        || sections.example.is_some();
23    let body = if any_section {
24        render_phpdoc_sections(&sections, exception_class)
25    } else {
26        sanitized
27    };
28    out.push_str(indent);
29    out.push_str("/**\n");
30    for line in body.lines() {
31        out.push_str(indent);
32        out.push_str(" * ");
33        out.push_str(&escape_phpdoc_line(line));
34        out.push('\n');
35    }
36    out.push_str(indent);
37    out.push_str(" */\n");
38}
39
40/// Escape PHPDoc line: handle */ sequences that could close the comment early.
41fn escape_phpdoc_line(s: &str) -> String {
42    s.replace("*/", "* /")
43}
44
45/// Emit C# XML documentation comments (/// <summary> ... </summary>)
46/// Used for C# classes, structs, methods, and properties.
47///
48/// Translates rustdoc sections (`# Arguments` → `<param>`,
49/// `# Returns` → `<returns>`, `# Errors` → `<exception>`,
50/// `# Example` → `<example><code>`) via [`render_csharp_xml_sections`].
51///
52/// `exception_class` is the C# exception class name to use in `<exception cref="...">` tags.
53pub fn emit_csharp_doc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
54    if doc.is_empty() {
55        return;
56    }
57    // Parse sections from the raw rustdoc first (so `# Examples` / `# Arguments`
58    // / `# Returns` / `# Errors` are routed into structured XML tags), then
59    // sanitise each section body to strip Rust idioms and XML-escape `<`/`>`/`&`.
60    let raw_sections = parse_rustdoc_sections(doc);
61    let sections = RustdocSections {
62        summary: sanitize_rust_idioms_keep_sections(&raw_sections.summary, DocTarget::CSharpDoc),
63        arguments: raw_sections
64            .arguments
65            .as_deref()
66            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
67        returns: raw_sections
68            .returns
69            .as_deref()
70            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
71        errors: raw_sections
72            .errors
73            .as_deref()
74            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
75        panics: raw_sections
76            .panics
77            .as_deref()
78            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
79        safety: raw_sections
80            .safety
81            .as_deref()
82            .map(|s| sanitize_rust_idioms_keep_sections(s, DocTarget::CSharpDoc)),
83        // Examples typically contain Rust code that doesn't compile as C#; drop the body
84        // entirely rather than risk leaking unparseable code into `<example>`.
85        example: None,
86    };
87    let any_section = sections.arguments.is_some()
88        || sections.returns.is_some()
89        || sections.errors.is_some()
90        || sections.example.is_some();
91    if !any_section {
92        // Backwards-compatible path: plain `<summary>` for prose-only docs.
93        out.push_str(indent);
94        out.push_str("/// <summary>\n");
95        for line in sections.summary.lines() {
96            out.push_str(indent);
97            out.push_str("/// ");
98            // Note: sanitise_rust_idioms_keep_sections already XML-escaped <, >, & for
99            // the CSharpDoc target. We deliberately do NOT call escape_csharp_doc_line
100            // here because that would double-encode (e.g. `&amp;` → `&amp;amp;`).
101            out.push_str(line);
102            out.push('\n');
103        }
104        out.push_str(indent);
105        out.push_str("/// </summary>\n");
106        return;
107    }
108    let rendered = render_csharp_xml_sections(&sections, exception_class);
109    for line in rendered.lines() {
110        out.push_str(indent);
111        out.push_str("/// ");
112        // The rendered tags already contain the canonical chars; we only
113        // escape XML special chars that aren't part of our tag syntax. Since
114        // render_csharp_xml_sections produces well-formed XML, raw passthrough
115        // is correct.
116        out.push_str(line);
117        out.push('\n');
118    }
119}
120
121/// Emit Elixir documentation comments (@doc)
122/// Used for Elixir modules and functions.
123pub fn emit_elixir_doc(out: &mut String, doc: &str) {
124    if doc.is_empty() {
125        return;
126    }
127    out.push_str("@doc \"\"\"\n");
128    for line in doc.lines() {
129        out.push_str(&escape_elixir_doc_line(line));
130        out.push('\n');
131    }
132    out.push_str("\"\"\"\n");
133}
134
135/// Emit Rust `///` documentation comments.
136///
137/// Used by alef backends that emit Rust source (e.g., the Rustler NIF crate,
138/// the swift-bridge wrapper crate, the FRB Dart bridge crate). Distinct from
139/// `emit_swift_doc` only by intent — the syntax is identical (`/// ` per line).
140pub fn emit_rustdoc(out: &mut String, doc: &str, indent: &str) {
141    if doc.is_empty() {
142        return;
143    }
144    for line in doc.lines() {
145        out.push_str(indent);
146        out.push_str("/// ");
147        out.push_str(line);
148        out.push('\n');
149    }
150}
151
152/// Escape Elixir doc line: handle triple-quote sequences that could close the heredoc early.
153fn escape_elixir_doc_line(s: &str) -> String {
154    s.replace("\"\"\"", "\"\" \"")
155}
156
157/// Emit R roxygen2-style documentation comments (#')
158/// Used for R functions.
159pub fn emit_roxygen(out: &mut String, doc: &str) {
160    if doc.is_empty() {
161        return;
162    }
163    for line in doc.lines() {
164        out.push_str("#' ");
165        out.push_str(line);
166        out.push('\n');
167    }
168}
169
170/// Emit Swift-style documentation comments (///)
171/// Used for Swift structs, enums, and functions.
172pub fn emit_swift_doc(out: &mut String, doc: &str, indent: &str) {
173    if doc.is_empty() {
174        return;
175    }
176    for line in doc.lines() {
177        out.push_str(indent);
178        out.push_str("/// ");
179        out.push_str(line);
180        out.push('\n');
181    }
182}
183
184/// Emit Javadoc-style documentation comments (/** ... */)
185/// Used for Java classes, methods, and fields.
186/// Handles XML escaping and Javadoc tag formatting.
187pub fn emit_javadoc(out: &mut String, doc: &str, indent: &str) {
188    if doc.is_empty() {
189        return;
190    }
191    out.push_str(indent);
192    out.push_str("/**\n");
193    for line in doc.lines() {
194        let escaped = escape_javadoc_line(line);
195        let trimmed = escaped.trim_end();
196        if trimmed.is_empty() {
197            out.push_str(indent);
198            out.push_str(" *\n");
199        } else {
200            out.push_str(indent);
201            out.push_str(" * ");
202            out.push_str(trimmed);
203            out.push('\n');
204        }
205    }
206    out.push_str(indent);
207    out.push_str(" */\n");
208}
209
210/// Emit KDoc-style documentation comments (/** ... */)
211/// Used for Kotlin classes, methods, and properties.
212pub fn emit_kdoc(out: &mut String, doc: &str, indent: &str) {
213    if doc.is_empty() {
214        return;
215    }
216    out.push_str(indent);
217    out.push_str("/**\n");
218    for line in doc.lines() {
219        let trimmed = line.trim_end();
220        if trimmed.is_empty() {
221            out.push_str(indent);
222            out.push_str(" *\n");
223        } else {
224            out.push_str(indent);
225            out.push_str(" * ");
226            out.push_str(trimmed);
227            out.push('\n');
228        }
229    }
230    out.push_str(indent);
231    out.push_str(" */\n");
232}
233
234/// Emit KDoc-style documentation comments in ktfmt-canonical format.
235///
236/// ktfmt collapses short KDoc comments to single-line format (`/** ... */`)
237/// when they fit within the 100-character line width limit. This function
238/// generates KDoc in that canonical form to avoid unnecessary formatting
239/// diffs when the generated code is passed through ktfmt.
240///
241/// - Single-line comments that fit in 100 chars: emitted as `/** content */`
242/// - Multi-paragraph or longer comments: emitted with newlines and ` * ` prefixes
243/// - Preserves indent and respects line width boundary at 100 chars
244pub fn emit_kdoc_ktfmt_canonical(out: &mut String, doc: &str, indent: &str) {
245    const KTFMT_LINE_WIDTH: usize = 100;
246
247    if doc.is_empty() {
248        return;
249    }
250
251    let lines: Vec<&str> = doc.lines().collect();
252
253    // Check if this is a short, single-paragraph comment that fits on one line.
254    let is_short_single_paragraph = lines.len() == 1 && !lines[0].contains('\n');
255
256    if is_short_single_paragraph {
257        let trimmed = lines[0].trim();
258        // Calculate total length: indent + "/** " + content + " */"
259        let single_line_len = indent.len() + 4 + trimmed.len() + 3; // 4 for "/** ", 3 for " */"
260        if single_line_len <= KTFMT_LINE_WIDTH {
261            // Fits on one line in ktfmt-canonical format
262            out.push_str(indent);
263            out.push_str("/** ");
264            out.push_str(trimmed);
265            out.push_str(" */\n");
266            return;
267        }
268    }
269
270    // Multi-line format (default for long or multi-paragraph comments)
271    out.push_str(indent);
272    out.push_str("/**\n");
273    for line in lines {
274        let trimmed = line.trim_end();
275        if trimmed.is_empty() {
276            out.push_str(indent);
277            out.push_str(" *\n");
278        } else {
279            out.push_str(indent);
280            out.push_str(" * ");
281            out.push_str(trimmed);
282            out.push('\n');
283        }
284    }
285    out.push_str(indent);
286    out.push_str(" */\n");
287}
288
289/// Emit Dartdoc-style documentation comments (///)
290/// Used for Dart classes, methods, and properties.
291pub fn emit_dartdoc(out: &mut String, doc: &str, indent: &str) {
292    if doc.is_empty() {
293        return;
294    }
295    for line in doc.lines() {
296        out.push_str(indent);
297        out.push_str("/// ");
298        out.push_str(line);
299        out.push('\n');
300    }
301}
302
303/// Emit Gleam documentation comments (///)
304/// Used for Gleam functions and types.
305pub fn emit_gleam_doc(out: &mut String, doc: &str, indent: &str) {
306    if doc.is_empty() {
307        return;
308    }
309    for line in doc.lines() {
310        out.push_str(indent);
311        out.push_str("/// ");
312        out.push_str(line);
313        out.push('\n');
314    }
315}
316
317/// Emit Doxygen-style C documentation comments using `///`-prefixed lines.
318///
319/// Used by `alef-backend-ffi` above every `extern "C" fn`, the `*_len()`
320/// companion, opaque-handle typedef, and (post-cbindgen) the type/enum
321/// declarations cbindgen surfaces in the generated `.h`. cbindgen translates
322/// `///` source lines into a single `/** ... */` Doxygen block per item, so we
323/// only need to emit per-line `///` content here.
324///
325/// Translates rustdoc sections via [`render_doxygen_sections`]:
326///
327/// - `# Arguments` → `\param <name> <description>` (one per arg).
328/// - `# Returns`   → `\return <description>`.
329/// - `# Errors`    → `\note <description>` (Doxygen has no `\throws` for C;
330///   `\note` is the convention).
331/// - `# Safety`    → `\note SAFETY: <description>`.
332/// - `# Example`   → `\code` ... `\endcode` block.
333///
334/// Markdown links (`[text](url)`) are flattened to `text (url)`. Body lines
335/// are word-wrapped at ~100 columns so the rendered `/** */` block stays
336/// readable in IDE tooltips and terminal viewers.
337pub fn emit_c_doxygen(out: &mut String, doc: &str, indent: &str) {
338    if doc.trim().is_empty() {
339        return;
340    }
341    let sections = parse_rustdoc_sections(doc);
342    let any_section = sections.arguments.is_some()
343        || sections.returns.is_some()
344        || sections.errors.is_some()
345        || sections.safety.is_some()
346        || sections.example.is_some();
347    let mut body = if any_section {
348        render_doxygen_sections_with_notes(&sections)
349    } else {
350        sections.summary.clone()
351    };
352    body = strip_markdown_links(&body);
353    let wrapped = word_wrap(&body, DOXYGEN_WRAP_WIDTH);
354    for line in wrapped.lines() {
355        out.push_str(indent);
356        out.push_str("/// ");
357        out.push_str(line);
358        out.push('\n');
359    }
360}
361
362const DOXYGEN_WRAP_WIDTH: usize = 100;
363
364/// Render `RustdocSections` as a Doxygen body but route `# Errors` and
365/// `# Safety` to `\note` lines instead of plain prose. This is the variant
366/// `emit_c_doxygen` uses; the public `render_doxygen_sections` keeps its
367/// long-standing plain-prose semantics so existing callers don't shift.
368fn render_doxygen_sections_with_notes(sections: &RustdocSections) -> String {
369    let mut out = String::new();
370    if !sections.summary.is_empty() {
371        out.push_str(&sections.summary);
372    }
373    if let Some(args) = sections.arguments.as_deref() {
374        for (name, desc) in parse_arguments_bullets(args) {
375            if !out.is_empty() {
376                out.push('\n');
377            }
378            if desc.is_empty() {
379                out.push_str("\\param ");
380                out.push_str(&name);
381            } else {
382                out.push_str("\\param ");
383                out.push_str(&name);
384                out.push(' ');
385                out.push_str(&desc);
386            }
387        }
388    }
389    if let Some(ret) = sections.returns.as_deref() {
390        if !out.is_empty() {
391            out.push('\n');
392        }
393        out.push_str("\\return ");
394        out.push_str(ret.trim());
395    }
396    if let Some(err) = sections.errors.as_deref() {
397        if !out.is_empty() {
398            out.push('\n');
399        }
400        out.push_str("\\note ");
401        out.push_str(err.trim());
402    }
403    if let Some(safety) = sections.safety.as_deref() {
404        if !out.is_empty() {
405            out.push('\n');
406        }
407        out.push_str("\\note SAFETY: ");
408        out.push_str(safety.trim());
409    }
410    if let Some(example) = sections.example.as_deref() {
411        if !out.is_empty() {
412            out.push('\n');
413        }
414        out.push_str("\\code\n");
415        for line in example.lines() {
416            let t = line.trim_start();
417            if t.starts_with("```") {
418                continue;
419            }
420            out.push_str(line);
421            out.push('\n');
422        }
423        out.push_str("\\endcode");
424    }
425    out
426}
427
428/// Flatten Markdown inline links `[text](url)` to `text (url)` so the rendered
429/// Doxygen block stays readable when consumed without a Markdown filter.
430fn strip_markdown_links(s: &str) -> String {
431    let mut out = String::with_capacity(s.len());
432    let bytes = s.as_bytes();
433    let mut i = 0;
434    while i < bytes.len() {
435        if bytes[i] == b'[' {
436            // Find matching closing bracket on the same logical span (no nested brackets).
437            if let Some(close) = bytes[i + 1..].iter().position(|&b| b == b']') {
438                let text_end = i + 1 + close;
439                if text_end + 1 < bytes.len() && bytes[text_end + 1] == b'(' {
440                    if let Some(paren_close) = bytes[text_end + 2..].iter().position(|&b| b == b')') {
441                        let url_start = text_end + 2;
442                        let url_end = url_start + paren_close;
443                        let text = &s[i + 1..text_end];
444                        let url = &s[url_start..url_end];
445                        out.push_str(text);
446                        out.push_str(" (");
447                        out.push_str(url);
448                        out.push(')');
449                        i = url_end + 1;
450                        continue;
451                    }
452                }
453            }
454        }
455        out.push(bytes[i] as char);
456        i += 1;
457    }
458    out
459}
460
461/// Word-wrap each input line at `width` columns. Lines starting with `\code`
462/// or contained between `\code`/`\endcode` markers, as well as Markdown fence
463/// blocks, are passed through verbatim to preserve example formatting.
464fn word_wrap(input: &str, width: usize) -> String {
465    let mut out = String::with_capacity(input.len());
466    let mut in_code = false;
467    for raw in input.lines() {
468        let trimmed = raw.trim_start();
469        if trimmed.starts_with("\\code") {
470            in_code = true;
471            out.push_str(raw);
472            out.push('\n');
473            continue;
474        }
475        if trimmed.starts_with("\\endcode") {
476            in_code = false;
477            out.push_str(raw);
478            out.push('\n');
479            continue;
480        }
481        if in_code || trimmed.starts_with("```") {
482            out.push_str(raw);
483            out.push('\n');
484            continue;
485        }
486        if raw.len() <= width {
487            out.push_str(raw);
488            out.push('\n');
489            continue;
490        }
491        let mut current = String::with_capacity(width);
492        for word in raw.split_whitespace() {
493            if current.is_empty() {
494                current.push_str(word);
495            } else if current.len() + 1 + word.len() > width {
496                out.push_str(&current);
497                out.push('\n');
498                current.clear();
499                current.push_str(word);
500            } else {
501                current.push(' ');
502                current.push_str(word);
503            }
504        }
505        if !current.is_empty() {
506            out.push_str(&current);
507            out.push('\n');
508        }
509    }
510    out.trim_end_matches('\n').to_string()
511}
512
513/// Emit Zig documentation comments (///)
514/// Used for Zig functions, types, and declarations.
515pub fn emit_zig_doc(out: &mut String, doc: &str, indent: &str) {
516    if doc.is_empty() {
517        return;
518    }
519    for line in doc.lines() {
520        out.push_str(indent);
521        out.push_str("/// ");
522        out.push_str(line);
523        out.push('\n');
524    }
525}
526
527/// Emit YARD documentation comments for Ruby.
528/// Used for Ruby classes, methods, and attributes.
529///
530/// YARD syntax: each line prefixed with `# ` (with space). Translates rustdoc
531/// sections (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@raise`)
532/// via [`render_yard_sections`].
533pub fn emit_yard_doc(out: &mut String, doc: &str, indent: &str) {
534    if doc.is_empty() {
535        return;
536    }
537    let sections = parse_rustdoc_sections(doc);
538    let any_section = sections.arguments.is_some()
539        || sections.returns.is_some()
540        || sections.errors.is_some()
541        || sections.example.is_some();
542    let body = if any_section {
543        render_yard_sections(&sections)
544    } else {
545        doc.to_string()
546    };
547    for line in body.lines() {
548        out.push_str(indent);
549        out.push_str("# ");
550        out.push_str(line);
551        out.push('\n');
552    }
553}
554
555/// Render `RustdocSections` as YARD documentation comment body.
556///
557/// - `# Arguments` → `@param name desc` (one per arg)
558/// - `# Returns`   → `@return desc`
559/// - `# Errors`    → `@raise desc`
560/// - `# Example`   → `@example` block.
561///
562/// Output is a plain string with `\n` separators; the emitter wraps each line
563/// in `# ` itself.
564pub fn render_yard_sections(sections: &RustdocSections) -> String {
565    let mut out = String::new();
566    if !sections.summary.is_empty() {
567        out.push_str(&sections.summary);
568    }
569    if let Some(args) = sections.arguments.as_deref() {
570        for (name, desc) in parse_arguments_bullets(args) {
571            if !out.is_empty() {
572                out.push('\n');
573            }
574            if desc.is_empty() {
575                out.push_str("@param ");
576                out.push_str(&name);
577            } else {
578                out.push_str("@param ");
579                out.push_str(&name);
580                out.push(' ');
581                out.push_str(&desc);
582            }
583        }
584    }
585    if let Some(ret) = sections.returns.as_deref() {
586        if !out.is_empty() {
587            out.push('\n');
588        }
589        out.push_str("@return ");
590        out.push_str(ret.trim());
591    }
592    if let Some(err) = sections.errors.as_deref() {
593        if !out.is_empty() {
594            out.push('\n');
595        }
596        out.push_str("@raise ");
597        out.push_str(err.trim());
598    }
599    if let Some(example) = sections.example.as_deref() {
600        if let Some(body) = example_for_target(example, "ruby") {
601            if !out.is_empty() {
602                out.push('\n');
603            }
604            out.push_str("@example\n");
605            out.push_str(&body);
606        }
607    }
608    out
609}
610
611/// Escape Javadoc line: handle XML special chars and backtick code blocks.
612///
613/// HTML entities (`<`, `>`, `&`) are also escaped *inside* `{@code …}` blocks.
614/// Without that, content like `` `<pre><code>` `` would emit raw `<pre>`
615/// inside the Javadoc tag — Eclipse-formatter Spotless then treats it as a
616/// real `<pre>` block element and shatters the line across multiple `* `
617/// rows, breaking `alef-verify`'s embedded hash. Escaped content is
618/// rendered identically by Javadoc readers (the `{@code}` tag shows literal
619/// characters) and is stable under any post-formatter pass.
620fn escape_javadoc_line(s: &str) -> String {
621    let mut result = String::with_capacity(s.len());
622    let mut chars = s.chars().peekable();
623    while let Some(ch) = chars.next() {
624        if ch == '`' {
625            let mut code = String::new();
626            for c in chars.by_ref() {
627                if c == '`' {
628                    break;
629                }
630                code.push(c);
631            }
632            result.push_str("{@code ");
633            result.push_str(&escape_javadoc_html_entities(&code));
634            result.push('}');
635        } else if ch == '<' {
636            result.push_str("&lt;");
637        } else if ch == '>' {
638            result.push_str("&gt;");
639        } else if ch == '&' {
640            result.push_str("&amp;");
641        } else {
642            result.push(ch);
643        }
644    }
645    result
646}
647
648/// Escape only the HTML special characters that would otherwise be parsed by
649/// downstream Javadoc/Eclipse formatters as block-level HTML (e.g. `<pre>`).
650fn escape_javadoc_html_entities(s: &str) -> String {
651    let mut out = String::with_capacity(s.len());
652    for ch in s.chars() {
653        match ch {
654            '<' => out.push_str("&lt;"),
655            '>' => out.push_str("&gt;"),
656            '&' => out.push_str("&amp;"),
657            other => out.push(other),
658        }
659    }
660    out
661}
662
663/// A parsed rustdoc comment broken out into the sections binding emitters
664/// care about.
665///
666/// `summary` is the leading prose paragraph(s) before any `# Heading`.
667/// Sections are stored verbatim (without the `# Heading` line itself);
668/// each binding is responsible for translating bullet lists and code
669/// fences into its host-native conventions.
670///
671/// Trailing/leading whitespace inside each field is trimmed so emitters
672/// can concatenate without producing `* ` lines containing only spaces.
673#[derive(Debug, Default, Clone, PartialEq, Eq)]
674pub struct RustdocSections {
675    /// Prose before the first `# Section` heading.
676    pub summary: String,
677    /// Body of the `# Arguments` section, if present.
678    pub arguments: Option<String>,
679    /// Body of the `# Returns` section, if present.
680    pub returns: Option<String>,
681    /// Body of the `# Errors` section, if present.
682    pub errors: Option<String>,
683    /// Body of the `# Panics` section, if present.
684    pub panics: Option<String>,
685    /// Body of the `# Safety` section, if present.
686    pub safety: Option<String>,
687    /// Body of the `# Example` / `# Examples` section, if present.
688    pub example: Option<String>,
689}
690
691/// Parse a rustdoc string into [`RustdocSections`].
692///
693/// Recognises level-1 ATX headings whose name matches one of the standard
694/// rustdoc section names (`Arguments`, `Returns`, `Errors`, `Panics`,
695/// `Safety`, `Example`, `Examples`). Anything before the first heading
696/// becomes `summary`. Unrecognised headings are folded into the
697/// preceding section verbatim, so unconventional rustdoc isn't lost.
698///
699/// The input is expected to already have rustdoc-hidden lines stripped
700/// and intra-doc-link syntax rewritten by
701/// [`crate::extractor::helpers::normalize_rustdoc`].
702pub fn parse_rustdoc_sections(doc: &str) -> RustdocSections {
703    if doc.trim().is_empty() {
704        return RustdocSections::default();
705    }
706    let mut summary = String::new();
707    let mut arguments: Option<String> = None;
708    let mut returns: Option<String> = None;
709    let mut errors: Option<String> = None;
710    let mut panics: Option<String> = None;
711    let mut safety: Option<String> = None;
712    let mut example: Option<String> = None;
713    let mut current: Option<&'static str> = None;
714    let mut buf = String::new();
715    let mut in_fence = false;
716    let flush = |target: Option<&'static str>,
717                 buf: &mut String,
718                 summary: &mut String,
719                 arguments: &mut Option<String>,
720                 returns: &mut Option<String>,
721                 errors: &mut Option<String>,
722                 panics: &mut Option<String>,
723                 safety: &mut Option<String>,
724                 example: &mut Option<String>| {
725        let body = std::mem::take(buf).trim().to_string();
726        if body.is_empty() {
727            return;
728        }
729        match target {
730            None => {
731                if !summary.is_empty() {
732                    summary.push('\n');
733                }
734                summary.push_str(&body);
735            }
736            Some("arguments") => *arguments = Some(body),
737            Some("returns") => *returns = Some(body),
738            Some("errors") => *errors = Some(body),
739            Some("panics") => *panics = Some(body),
740            Some("safety") => *safety = Some(body),
741            Some("example") => *example = Some(body),
742            _ => {}
743        }
744    };
745    for line in doc.lines() {
746        let trimmed = line.trim_start();
747        if trimmed.starts_with("```") {
748            in_fence = !in_fence;
749            buf.push_str(line);
750            buf.push('\n');
751            continue;
752        }
753        if !in_fence {
754            if let Some(rest) = trimmed.strip_prefix("# ") {
755                let head = rest.trim().to_ascii_lowercase();
756                let target = match head.as_str() {
757                    "arguments" | "args" => Some("arguments"),
758                    "returns" => Some("returns"),
759                    "errors" => Some("errors"),
760                    "panics" => Some("panics"),
761                    "safety" => Some("safety"),
762                    "example" | "examples" => Some("example"),
763                    _ => None,
764                };
765                if target.is_some() {
766                    flush(
767                        current,
768                        &mut buf,
769                        &mut summary,
770                        &mut arguments,
771                        &mut returns,
772                        &mut errors,
773                        &mut panics,
774                        &mut safety,
775                        &mut example,
776                    );
777                    current = target;
778                    continue;
779                }
780            }
781        }
782        buf.push_str(line);
783        buf.push('\n');
784    }
785    flush(
786        current,
787        &mut buf,
788        &mut summary,
789        &mut arguments,
790        &mut returns,
791        &mut errors,
792        &mut panics,
793        &mut safety,
794        &mut example,
795    );
796    RustdocSections {
797        summary,
798        arguments,
799        returns,
800        errors,
801        panics,
802        safety,
803        example,
804    }
805}
806
807/// Parse `# Arguments` body into `(name, description)` pairs.
808///
809/// Recognises both Markdown bullet styles `*` and `-`, with optional
810/// backticks around the name: `* `name` - description` or
811/// `- name: description`. Continuation lines indented under a bullet
812/// are appended to the previous entry's description.
813///
814/// Used by emitters that translate to per-parameter documentation tags
815/// (`@param`, `<param>`, `\param`).
816pub fn parse_arguments_bullets(body: &str) -> Vec<(String, String)> {
817    let mut out: Vec<(String, String)> = Vec::new();
818    for raw in body.lines() {
819        let line = raw.trim_end();
820        let trimmed = line.trim_start();
821        let is_bullet = trimmed.starts_with("* ") || trimmed.starts_with("- ");
822        if is_bullet {
823            let after = &trimmed[2..];
824            // Accept `name`, `name:` or `name -` separator forms.
825            let (name, desc) = if let Some(idx) = after.find(" - ") {
826                (after[..idx].trim(), after[idx + 3..].trim())
827            } else if let Some(idx) = after.find(": ") {
828                (after[..idx].trim(), after[idx + 2..].trim())
829            } else if let Some(idx) = after.find(' ') {
830                (after[..idx].trim(), after[idx + 1..].trim())
831            } else {
832                (after.trim(), "")
833            };
834            let name = name.trim_matches('`').trim_matches('*').to_string();
835            out.push((name, desc.to_string()));
836        } else if !trimmed.is_empty() {
837            if let Some(last) = out.last_mut() {
838                if !last.1.is_empty() {
839                    last.1.push(' ');
840                }
841                last.1.push_str(trimmed);
842            }
843        }
844    }
845    out
846}
847
848/// Return `true` if `tag` (the first comma-separated token after the opening
849/// ` ``` ` of a code fence) identifies a Rust code block.
850///
851/// This covers:
852/// - bare tag (empty string) — rustdoc treats unlabelled fences as Rust by default
853/// - `"rust"` — explicit Rust
854/// - `"rust,<attrs>"` — Rust with trailing comma-separated attributes
855/// - rustdoc test-attribute-only fences: `no_run`, `ignore`, `should_panic`,
856///   `compile_fail` — these are only meaningful to rustdoc and always indicate
857///   Rust code even when `rust` itself is omitted
858/// - `"edition2018"`, `"edition2021"`, etc. — edition-gated Rust examples
859fn is_rust_fence_tag(tag: &str) -> bool {
860    const RUSTDOC_ATTRS: &[&str] = &["no_run", "ignore", "should_panic", "compile_fail"];
861    tag.is_empty()
862        || tag == "rust"
863        || tag.starts_with("rust,")
864        || RUSTDOC_ATTRS.contains(&tag)
865        || tag.starts_with("edition")
866}
867
868/// Detect the language tag on the first code fence in `body`.
869///
870/// Scans `body` for the first line that starts with ` ``` ` and returns the
871/// tag that follows (e.g. `"rust"`, `"php"`, `"typescript"`). A bare ` ``` `
872/// with no tag returns `"rust"` because rustdoc treats unlabelled fences as
873/// Rust by default. Returns `"rust"` when no fence is found at all.
874fn detect_first_fence_lang(body: &str) -> &str {
875    for line in body.lines() {
876        let trimmed = line.trim_start();
877        if let Some(rest) = trimmed.strip_prefix("```") {
878            let tag = rest.split(',').next().unwrap_or("").trim();
879            return if tag.is_empty() || is_rust_fence_tag(tag) {
880                "rust"
881            } else {
882                tag
883            };
884        }
885    }
886    "rust"
887}
888
889/// Return `Some(transformed_example)` if the example should be emitted for
890/// `target_lang`, or `None` when the example is Rust source that would be
891/// meaningless in the foreign language.
892///
893/// When the original fence language is `rust` (including bare ` ``` ` which
894/// rustdoc defaults to Rust) and the target is not `rust`, the example is
895/// suppressed entirely — better absent than misleading. Cross-language
896/// transliteration of example bodies is intentionally out of scope.
897pub fn example_for_target(example: &str, target_lang: &str) -> Option<String> {
898    let trimmed = example.trim();
899    let source_lang = detect_first_fence_lang(trimmed);
900    if source_lang == "rust" && target_lang != "rust" {
901        None
902    } else {
903        Some(replace_fence_lang(trimmed, target_lang))
904    }
905}
906
907/// Strip a single ` ```lang ` fence pair from `body`, returning the inner
908/// code lines. Replaces the leading ` ```rust ` (or any other tag) with
909/// `lang_replacement`, leaving the rest of the body unchanged.
910///
911/// When no fence is present the body is returned unchanged. Used by
912/// emitters that need to convert ` ```rust ` examples into
913/// ` ```typescript ` / ` ```python ` / ` ```swift ` etc.
914pub fn replace_fence_lang(body: &str, lang_replacement: &str) -> String {
915    let mut out = String::with_capacity(body.len());
916    for line in body.lines() {
917        let trimmed = line.trim_start();
918        if let Some(rest) = trimmed.strip_prefix("```") {
919            // Replace the language tag (everything up to the next comma or
920            // end of line). Preserve indentation.
921            let indent = &line[..line.len() - trimmed.len()];
922            let after_lang = rest.find(',').map(|i| &rest[i..]).unwrap_or("");
923            out.push_str(indent);
924            out.push_str("```");
925            out.push_str(lang_replacement);
926            out.push_str(after_lang);
927            out.push('\n');
928        } else {
929            out.push_str(line);
930            out.push('\n');
931        }
932    }
933    out.trim_end_matches('\n').to_string()
934}
935
936/// Render `RustdocSections` as a JSDoc comment body (without the `/**` /
937/// ` */` wrappers — those are added by the caller's emitter, which knows
938/// the indent/escape conventions).
939///
940/// - `# Arguments` → `@param name - desc`
941/// - `# Returns`   → `@returns desc`
942/// - `# Errors`    → `@throws desc`
943/// - `# Example`   → `@example` block. Replaces ` ```rust ` fences with
944///   ` ```typescript ` so the example highlights properly in TypeDoc.
945///
946/// Output is a plain string with `\n` separators; emitters wrap each line
947/// in ` * ` themselves.
948pub fn render_jsdoc_sections(sections: &RustdocSections) -> String {
949    let mut out = String::new();
950    if !sections.summary.is_empty() {
951        out.push_str(&sections.summary);
952    }
953    if let Some(args) = sections.arguments.as_deref() {
954        for (name, desc) in parse_arguments_bullets(args) {
955            if !out.is_empty() {
956                out.push('\n');
957            }
958            if desc.is_empty() {
959                out.push_str(&crate::template_env::render(
960                    "doc_jsdoc_param.jinja",
961                    minijinja::context! { name => &name },
962                ));
963            } else {
964                out.push_str(&crate::template_env::render(
965                    "doc_jsdoc_param_desc.jinja",
966                    minijinja::context! { name => &name, desc => &desc },
967                ));
968            }
969        }
970    }
971    if let Some(ret) = sections.returns.as_deref() {
972        if !out.is_empty() {
973            out.push('\n');
974        }
975        out.push_str(&crate::template_env::render(
976            "doc_jsdoc_returns.jinja",
977            minijinja::context! { content => ret.trim() },
978        ));
979    }
980    if let Some(err) = sections.errors.as_deref() {
981        if !out.is_empty() {
982            out.push('\n');
983        }
984        out.push_str(&crate::template_env::render(
985            "doc_jsdoc_throws.jinja",
986            minijinja::context! { content => err.trim() },
987        ));
988    }
989    if let Some(example) = sections.example.as_deref() {
990        if let Some(body) = example_for_target(example, "typescript") {
991            if !out.is_empty() {
992                out.push('\n');
993            }
994            out.push_str("@example\n");
995            out.push_str(&body);
996        }
997    }
998    out
999}
1000
1001/// Render `RustdocSections` as a JavaDoc comment body.
1002///
1003/// - `# Arguments` → `@param name desc` (one per param)
1004/// - `# Returns`   → `@return desc`
1005/// - `# Errors`    → `@throws KreuzbergRsException desc`
1006/// - `# Example`   → `<pre>{@code ...}</pre>` block.
1007///
1008/// `throws_class` is the FQN/simple name of the exception class to use in
1009/// the `@throws` tag (e.g. `"KreuzbergRsException"`).
1010pub fn render_javadoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
1011    let mut out = String::new();
1012    if !sections.summary.is_empty() {
1013        out.push_str(&sections.summary);
1014    }
1015    if let Some(args) = sections.arguments.as_deref() {
1016        for (name, desc) in parse_arguments_bullets(args) {
1017            if !out.is_empty() {
1018                out.push('\n');
1019            }
1020            if desc.is_empty() {
1021                out.push_str(&crate::template_env::render(
1022                    "doc_javadoc_param.jinja",
1023                    minijinja::context! { name => &name },
1024                ));
1025            } else {
1026                out.push_str(&crate::template_env::render(
1027                    "doc_javadoc_param_desc.jinja",
1028                    minijinja::context! { name => &name, desc => &desc },
1029                ));
1030            }
1031        }
1032    }
1033    if let Some(ret) = sections.returns.as_deref() {
1034        if !out.is_empty() {
1035            out.push('\n');
1036        }
1037        out.push_str(&crate::template_env::render(
1038            "doc_javadoc_return.jinja",
1039            minijinja::context! { content => ret.trim() },
1040        ));
1041    }
1042    if let Some(err) = sections.errors.as_deref() {
1043        if !out.is_empty() {
1044            out.push('\n');
1045        }
1046        out.push_str(&crate::template_env::render(
1047            "doc_javadoc_throws.jinja",
1048            minijinja::context! { throws_class => throws_class, content => err.trim() },
1049        ));
1050    }
1051    out
1052}
1053
1054/// Render `RustdocSections` as a C# XML doc comment body (without the
1055/// `/// ` line prefixes — the emitter adds those).
1056///
1057/// - summary  → `<summary>...</summary>`
1058/// - args     → `<param name="x">desc</param>` (one per arg)
1059/// - returns  → `<returns>desc</returns>`
1060/// - errors   → `<exception cref="KreuzbergException">desc</exception>`
1061/// - example  → `<example><code language="csharp">...</code></example>`
1062pub fn render_csharp_xml_sections(sections: &RustdocSections, exception_class: &str) -> String {
1063    let mut out = String::new();
1064    out.push_str("<summary>\n");
1065    let summary = if sections.summary.is_empty() {
1066        ""
1067    } else {
1068        sections.summary.as_str()
1069    };
1070    for line in summary.lines() {
1071        out.push_str(line);
1072        out.push('\n');
1073    }
1074    out.push_str("</summary>");
1075    if let Some(args) = sections.arguments.as_deref() {
1076        for (name, desc) in parse_arguments_bullets(args) {
1077            out.push('\n');
1078            if desc.is_empty() {
1079                out.push_str(&crate::template_env::render(
1080                    "doc_csharp_param.jinja",
1081                    minijinja::context! { name => &name },
1082                ));
1083            } else {
1084                out.push_str(&crate::template_env::render(
1085                    "doc_csharp_param_desc.jinja",
1086                    minijinja::context! { name => &name, desc => &desc },
1087                ));
1088            }
1089        }
1090    }
1091    if let Some(ret) = sections.returns.as_deref() {
1092        out.push('\n');
1093        out.push_str(&crate::template_env::render(
1094            "doc_csharp_returns.jinja",
1095            minijinja::context! { content => ret.trim() },
1096        ));
1097    }
1098    if let Some(err) = sections.errors.as_deref() {
1099        out.push('\n');
1100        out.push_str(&crate::template_env::render(
1101            "doc_csharp_exception.jinja",
1102            minijinja::context! {
1103                exception_class => exception_class,
1104                content => err.trim(),
1105            },
1106        ));
1107    }
1108    if let Some(example) = sections.example.as_deref() {
1109        out.push('\n');
1110        out.push_str("<example><code language=\"csharp\">\n");
1111        // Drop fence markers, keep code.
1112        for line in example.lines() {
1113            let t = line.trim_start();
1114            if t.starts_with("```") {
1115                continue;
1116            }
1117            out.push_str(line);
1118            out.push('\n');
1119        }
1120        out.push_str("</code></example>");
1121    }
1122    out
1123}
1124
1125/// Render `RustdocSections` as a PHPDoc comment body.
1126///
1127/// - `# Arguments` → `@param mixed $name desc`
1128/// - `# Returns`   → `@return desc`
1129/// - `# Errors`    → `@throws KreuzbergException desc`
1130/// - `# Example`   → ` ```php ` fence (replaces ` ```rust `).
1131pub fn render_phpdoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
1132    let mut out = String::new();
1133    if !sections.summary.is_empty() {
1134        out.push_str(&sections.summary);
1135    }
1136    if let Some(args) = sections.arguments.as_deref() {
1137        for (name, desc) in parse_arguments_bullets(args) {
1138            if !out.is_empty() {
1139                out.push('\n');
1140            }
1141            if desc.is_empty() {
1142                out.push_str(&crate::template_env::render(
1143                    "doc_phpdoc_param.jinja",
1144                    minijinja::context! { name => &name },
1145                ));
1146            } else {
1147                out.push_str(&crate::template_env::render(
1148                    "doc_phpdoc_param_desc.jinja",
1149                    minijinja::context! { name => &name, desc => &desc },
1150                ));
1151            }
1152        }
1153    }
1154    if let Some(ret) = sections.returns.as_deref() {
1155        if !out.is_empty() {
1156            out.push('\n');
1157        }
1158        out.push_str(&crate::template_env::render(
1159            "doc_phpdoc_return.jinja",
1160            minijinja::context! { content => ret.trim() },
1161        ));
1162    }
1163    if let Some(err) = sections.errors.as_deref() {
1164        if !out.is_empty() {
1165            out.push('\n');
1166        }
1167        out.push_str(&crate::template_env::render(
1168            "doc_phpdoc_throws.jinja",
1169            minijinja::context! { throws_class => throws_class, content => err.trim() },
1170        ));
1171    }
1172    if let Some(example) = sections.example.as_deref() {
1173        if let Some(body) = example_for_target(example, "php") {
1174            if !out.is_empty() {
1175                out.push('\n');
1176            }
1177            out.push_str(&body);
1178        }
1179    }
1180    out
1181}
1182
1183/// Render `RustdocSections` as a Doxygen comment body for the C header.
1184///
1185/// - args    → `\param name desc`
1186/// - returns → `\return desc`
1187/// - errors  → prose paragraph (Doxygen has no semantic tag for FFI errors)
1188/// - example → `\code` ... `\endcode`
1189pub fn render_doxygen_sections(sections: &RustdocSections) -> String {
1190    let mut out = String::new();
1191    if !sections.summary.is_empty() {
1192        out.push_str(&sections.summary);
1193    }
1194    if let Some(args) = sections.arguments.as_deref() {
1195        for (name, desc) in parse_arguments_bullets(args) {
1196            if !out.is_empty() {
1197                out.push('\n');
1198            }
1199            if desc.is_empty() {
1200                out.push_str(&crate::template_env::render(
1201                    "doc_doxygen_param.jinja",
1202                    minijinja::context! { name => &name },
1203                ));
1204            } else {
1205                out.push_str(&crate::template_env::render(
1206                    "doc_doxygen_param_desc.jinja",
1207                    minijinja::context! { name => &name, desc => &desc },
1208                ));
1209            }
1210        }
1211    }
1212    if let Some(ret) = sections.returns.as_deref() {
1213        if !out.is_empty() {
1214            out.push('\n');
1215        }
1216        out.push_str(&crate::template_env::render(
1217            "doc_doxygen_return.jinja",
1218            minijinja::context! { content => ret.trim() },
1219        ));
1220    }
1221    if let Some(err) = sections.errors.as_deref() {
1222        if !out.is_empty() {
1223            out.push('\n');
1224        }
1225        out.push_str(&crate::template_env::render(
1226            "doc_doxygen_errors.jinja",
1227            minijinja::context! { content => err.trim() },
1228        ));
1229    }
1230    if let Some(example) = sections.example.as_deref() {
1231        if !out.is_empty() {
1232            out.push('\n');
1233        }
1234        out.push_str("\\code\n");
1235        for line in example.lines() {
1236            let t = line.trim_start();
1237            if t.starts_with("```") {
1238                continue;
1239            }
1240            out.push_str(line);
1241            out.push('\n');
1242        }
1243        out.push_str("\\endcode");
1244    }
1245    out
1246}
1247
1248/// Return the first paragraph of a doc comment as a single joined line.
1249///
1250/// Collects lines until the first blank line, trims each, then joins with a
1251/// space. This handles wrapped sentences like:
1252///
1253/// ```text
1254/// Convert HTML to Markdown, returning
1255/// a `ConversionResult`.
1256/// ```
1257///
1258/// which would otherwise be truncated at the comma when callers use
1259/// `.lines().next()`.
1260pub fn doc_first_paragraph_joined(doc: &str) -> String {
1261    doc.lines()
1262        .take_while(|l| !l.trim().is_empty())
1263        .map(str::trim)
1264        .collect::<Vec<_>>()
1265        .join(" ")
1266}
1267
1268/// Target language for [`sanitize_rust_idioms`].
1269///
1270/// Each variant selects the idiomatic mapping for Rust constructs that do not
1271/// translate directly to foreign-language doc syntax.
1272#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1273pub enum DocTarget {
1274    /// PHPDoc (`/** ... */`), e.g. phpstan-typed prose.
1275    PhpDoc,
1276    /// Javadoc (`/** ... */`), e.g. OpenJDK-style annotations.
1277    JavaDoc,
1278    /// TSDoc (`/** ... */`), TypeScript variant of JSDoc.
1279    TsDoc,
1280    /// JSDoc (`/** ... */`), JavaScript variant.
1281    JsDoc,
1282    /// C# XML doc (`/// <summary>...</summary>`).
1283    ///
1284    /// Strips Rust code fences and section headings (`# Examples`,
1285    /// `# Arguments`, `# Returns`, etc.), drops Rust trait-bound prose,
1286    /// and XML-escapes any remaining `<` / `>` / `&` so the result is
1287    /// safe to embed inside a `<summary>` element.
1288    CSharpDoc,
1289}
1290
1291/// Sanitize Rust-specific idioms in a prose string for the given foreign-language
1292/// documentation target.
1293///
1294/// Transformations are applied **outside** backtick spans and code fences only,
1295/// so inline code examples and fenced blocks are never mutated (except that
1296/// ` ```rust ` fences and unmarked ` ``` ` code blocks are dropped entirely
1297/// for all targets [`DocTarget::TsDoc`], [`DocTarget::JsDoc`], [`DocTarget::PhpDoc`],
1298/// [`DocTarget::JavaDoc`], and [`DocTarget::CSharpDoc`]).
1299///
1300/// # Transformations
1301///
1302/// - Intra-doc links `` [`Type::method`] `` → `` `Type.method` ``.
1303/// - `[`Foo`]` (backtick inside square brackets) → `` `Foo` ``.
1304/// - `None` (word boundary) → `null` (PHP/Java) or `undefined` (TS/JS).
1305/// - `Some(x)` → `the value (x)`.
1306/// - `Option<T>` → `T?` (PHP) / `T | null` (Java) / `T | undefined` (TS/JS).
1307/// - `Vec<u8>` → `string` (PHP) / `byte[]` (Java) / `Uint8Array` (TS/JS).
1308/// - `Vec<T>` → `T[]` (all targets).
1309/// - `HashMap<K, V>` → `array<K, V>` (PHP) / `Map<K, V>` (Java) / `Record<K, V>` (TS/JS).
1310/// - `Arc<T>`, `Box<T>`, `Mutex<T>`, `RwLock<T>`, `Rc<T>`, `Cell<T>`, `RefCell<T>` → `T`.
1311/// - `Send + Sync`, `Send`, `Sync`, `'static` → stripped.
1312/// - Standalone `::` between identifiers → `.`.
1313/// - `pub fn `, `crate::`, `&self`, `&mut self` → stripped.
1314/// - `#[…]` attribute macros on their own line or inline → stripped.
1315/// - `.unwrap()`, `.expect("…")` → stripped.
1316/// - ` ```rust ` and unmarked ` ``` ` code fences → dropped entirely.
1317pub fn sanitize_rust_idioms(text: &str, target: DocTarget) -> String {
1318    // For C# XML doc the default is to drop rustdoc section headings
1319    // (`# Examples`, `# Arguments`, …) and the remainder of the comment,
1320    // because those bodies routinely contain content that cannot be embedded
1321    // safely inside `<summary>`. Callers that have already extracted sections
1322    // (`emit_csharp_doc`) sanitise each section body via [`sanitize_rust_idioms_keep_sections`].
1323    sanitize_rust_idioms_inner(text, target, true)
1324}
1325
1326/// Same as [`sanitize_rust_idioms`] but never drops rustdoc section headings.
1327///
1328/// Used by emitters that have already split the doc into sections and need to
1329/// sanitise each body fragment independently (e.g. C# XML doc emission with
1330/// per-section `<param>` / `<returns>` / `<exception>` tags).
1331pub fn sanitize_rust_idioms_keep_sections(text: &str, target: DocTarget) -> String {
1332    sanitize_rust_idioms_inner(text, target, false)
1333}
1334
1335fn sanitize_rust_idioms_inner(text: &str, target: DocTarget, drop_csharp_sections: bool) -> String {
1336    let mut out = String::with_capacity(text.len());
1337    let mut in_rust_fence = false;
1338    let mut in_other_fence = false;
1339    // For C# XML doc: once a `# Examples` / `# Arguments` / etc. heading is
1340    // encountered, drop the entire remainder of the comment. Rustdoc section
1341    // headings cannot be safely embedded inside `<summary>` and the per-section
1342    // content (code fences, intra-doc links, generics) is the leading cause
1343    // of CS1002/CS1519 leakage. The plain `<summary>` path collapses to the
1344    // top-level prose only.
1345    let mut csharp_section_dropped = false;
1346
1347    for line in text.lines() {
1348        if csharp_section_dropped {
1349            continue;
1350        }
1351        let trimmed = line.trim_start();
1352        if drop_csharp_sections
1353            && matches!(target, DocTarget::CSharpDoc)
1354            && !in_rust_fence
1355            && !in_other_fence
1356            && is_rustdoc_section_heading(trimmed)
1357        {
1358            csharp_section_dropped = true;
1359            continue;
1360        }
1361
1362        // Detect code fence boundaries.
1363        if let Some(rest) = trimmed.strip_prefix("```") {
1364            if in_rust_fence {
1365                // Closing fence of a rust block.
1366                in_rust_fence = false;
1367                match target {
1368                    DocTarget::TsDoc
1369                    | DocTarget::JsDoc
1370                    | DocTarget::CSharpDoc
1371                    | DocTarget::PhpDoc
1372                    | DocTarget::JavaDoc => {
1373                        // Entire rust block dropped — don't emit closing fence.
1374                    }
1375                }
1376                continue;
1377            }
1378            if in_other_fence {
1379                // Closing fence of a non-rust block.
1380                in_other_fence = false;
1381                out.push_str(line);
1382                out.push('\n');
1383                continue;
1384            }
1385            // Opening fence — determine language.
1386            let lang = rest.split(',').next().unwrap_or("").trim();
1387            let is_rust = is_rust_fence_tag(lang);
1388            if is_rust {
1389                in_rust_fence = true;
1390                match target {
1391                    DocTarget::TsDoc
1392                    | DocTarget::JsDoc
1393                    | DocTarget::CSharpDoc
1394                    | DocTarget::PhpDoc
1395                    | DocTarget::JavaDoc => {
1396                        // Drop the entire rust fence block — skip opening line.
1397                        // Rust code examples are not portable to any of the target languages.
1398                    }
1399                }
1400                continue;
1401            }
1402            // Non-rust fence: pass through verbatim.
1403            in_other_fence = true;
1404            out.push_str(line);
1405            out.push('\n');
1406            continue;
1407        }
1408
1409        // Inside a rust fence.
1410        if in_rust_fence {
1411            match target {
1412                DocTarget::TsDoc | DocTarget::JsDoc | DocTarget::CSharpDoc | DocTarget::PhpDoc | DocTarget::JavaDoc => {
1413                    // Drop content of rust fences — all targets filter out Rust code examples.
1414                }
1415            }
1416            continue;
1417        }
1418
1419        // Inside a non-rust fence: pass through verbatim.
1420        if in_other_fence {
1421            out.push_str(line);
1422            out.push('\n');
1423            continue;
1424        }
1425
1426        // Check if this line is a bare `#[...]` attribute line.
1427        let stripped_indent = line.trim_start();
1428        if stripped_indent.starts_with("#[") && stripped_indent.ends_with(']') {
1429            // Attribute-only line — drop entirely.
1430            continue;
1431        }
1432
1433        // Normal prose line: apply token-level transformations.
1434        let sanitized = apply_prose_transforms(line, target);
1435        out.push_str(&sanitized);
1436        out.push('\n');
1437    }
1438
1439    // Trim trailing newline added by the loop (preserve internal newlines).
1440    if out.ends_with('\n') && !text.ends_with('\n') {
1441        out.pop();
1442    }
1443
1444    // For C# XML doc, escape any remaining `<`, `>`, `&` so the result is
1445    // safe to embed inside `<summary>...</summary>`. By this point the
1446    // Rust-idiom substitutions have replaced `Vec<T>` / `Option<T>` /
1447    // `HashMap<K, V>` / `Result<T, E>` with their idiomatic forms, but
1448    // unrecognised generic constructs (e.g. trait-object references) may
1449    // still contain raw angle brackets that would break C# XML parsing.
1450    if matches!(target, DocTarget::CSharpDoc) {
1451        out = xml_escape_for_csharp(&out);
1452    }
1453
1454    out
1455}
1456
1457/// Return `true` if `line` (already left-trimmed) is a Rustdoc section heading
1458/// such as `# Examples`, `# Arguments`, `# Returns`, `# Errors`, `# Panics`,
1459/// or `# Safety`. Case-insensitive on the heading name.
1460fn is_rustdoc_section_heading(trimmed: &str) -> bool {
1461    let Some(rest) = trimmed.strip_prefix("# ") else {
1462        return false;
1463    };
1464    let head = rest.trim().to_ascii_lowercase();
1465    matches!(
1466        head.as_str(),
1467        "arguments" | "args" | "returns" | "errors" | "panics" | "safety" | "example" | "examples"
1468    )
1469}
1470
1471/// XML-escape `<`, `>`, `&` for safe embedding inside a C# `<summary>` element.
1472///
1473/// `<` / `>` may legitimately appear in prose after Rust-idiom substitution
1474/// when the substitutions produce C#-friendly forms (e.g. `Dictionary<K, V>`).
1475/// Those are still XML-significant characters and must be entity-escaped for
1476/// XML parsers (Roslyn, doxygen) to accept the resulting `<summary>` block.
1477fn xml_escape_for_csharp(s: &str) -> String {
1478    let mut out = String::with_capacity(s.len());
1479    for ch in s.chars() {
1480        match ch {
1481            '&' => out.push_str("&amp;"),
1482            '<' => out.push_str("&lt;"),
1483            '>' => out.push_str("&gt;"),
1484            _ => out.push(ch),
1485        }
1486    }
1487    out
1488}
1489
1490/// Apply prose-level Rust-idiom transformations to a single line.
1491///
1492/// Some transformations span or precede backtick boundaries and must be applied
1493/// to the full line before tokenisation:
1494///
1495/// 1. Intra-doc links (`` [`...`] ``) — they wrap a backtick pair.
1496/// 2. `::` path separator — even inside backtick spans it should become `.`
1497///    for all foreign-language targets, since the target language uses `.` for
1498///    member access and package paths in code examples too.
1499///
1500/// All remaining transformations are applied only to literal (non-code) segments
1501/// after tokenisation.
1502fn apply_prose_transforms(line: &str, target: DocTarget) -> String {
1503    // Step 1: replace intra-doc links before tokenisation (they span backtick pairs).
1504    let line = replace_intradoc_links(line, target);
1505
1506    // Step 2: replace :: everywhere (including inside backtick spans).
1507    // All targets use `.` as the member/package separator, so this is always safe.
1508    let line = replace_path_separator(&line);
1509
1510    // Step 3: strip .unwrap() and .expect() everywhere, including inside backtick spans,
1511    // since these Rust error-handling idioms are meaningless in all target languages.
1512    let line = strip_unwrap_expect(&line);
1513
1514    // Step 4: tokenise and apply remaining transforms only to literal segments.
1515    let segments = tokenize_backtick_spans(&line);
1516    let mut result = String::with_capacity(line.len());
1517    for (is_code, span) in segments {
1518        if is_code {
1519            result.push('`');
1520            result.push_str(span);
1521            result.push('`');
1522        } else {
1523            result.push_str(&transform_prose_segment(span, target));
1524        }
1525    }
1526    result
1527}
1528
1529/// Split a line into alternating literal/code segments.
1530///
1531/// Returns `Vec<(is_code, &str)>` where `is_code` is true for the content
1532/// between a matched backtick pair. Unmatched backticks are treated as
1533/// literal characters (passed through as literal segments).
1534fn tokenize_backtick_spans(line: &str) -> Vec<(bool, &str)> {
1535    let mut segments = Vec::new();
1536    let bytes = line.as_bytes();
1537    let mut start = 0;
1538    let mut i = 0;
1539
1540    while i < bytes.len() {
1541        if bytes[i] == b'`' {
1542            // Emit preceding literal segment.
1543            if i > start {
1544                segments.push((false, &line[start..i]));
1545            }
1546            // Find the closing backtick.
1547            let code_start = i + 1;
1548            let close = bytes[code_start..].iter().position(|&b| b == b'`');
1549            if let Some(offset) = close {
1550                let code_end = code_start + offset;
1551                segments.push((true, &line[code_start..code_end]));
1552                i = code_end + 1;
1553                start = i;
1554            } else {
1555                // No closing backtick — treat as literal from here.
1556                segments.push((false, &line[i..]));
1557                start = line.len();
1558                i = line.len();
1559            }
1560        } else {
1561            i += 1;
1562        }
1563    }
1564    if start < line.len() {
1565        segments.push((false, &line[start..]));
1566    }
1567    segments
1568}
1569
1570/// Apply all prose-level Rust substitutions to a literal text segment.
1571///
1572/// Intra-doc links have already been replaced by `apply_prose_transforms`
1573/// before tokenisation; this function handles the remaining transformations.
1574fn transform_prose_segment(text: &str, target: DocTarget) -> String {
1575    let mut s = text.to_string();
1576
1577    // 1. Strip #[derive(...)] and other inline attribute-style references.
1578    s = strip_inline_attributes(&s);
1579
1580    // 2. Strip pub fn, crate::, &self, &mut self.
1581    s = s.replace("pub fn ", "");
1582    s = s.replace("crate::", "");
1583    s = s.replace("&mut self", "");
1584    s = s.replace("&self", "");
1585
1586    // 3. Strip lifetime and bound markers.
1587    s = strip_lifetime_and_bounds(&s);
1588
1589    // 4. Type substitutions (order matters — most specific first).
1590    s = replace_type_wrappers(&s, target);
1591
1592    // 5. Some(x) -> the value (x).
1593    s = replace_some_calls(&s);
1594
1595    // 5b. Bare "Some <lowercase>" in prose -> drop "Some ".
1596    s = replace_some_keyword_in_prose(&s);
1597
1598    // 6. None -> null / undefined (word boundary, uppercase only).
1599    s = replace_none_keyword(&s, target);
1600
1601    // Note: :: -> . and .unwrap()/.expect() stripping are applied to the full
1602    // line before tokenisation in apply_prose_transforms and therefore do not
1603    // need to be repeated here.
1604
1605    s
1606}
1607
1608/// Advance byte position `i` in `s` past one full UTF-8 character, push that
1609/// character to `out`, and return the new byte position.
1610///
1611/// All the byte-crawling helpers below look for ASCII special characters only.
1612/// When none matches, they must advance by one full character (not one byte)
1613/// to avoid splitting multi-byte UTF-8 sequences.
1614#[inline]
1615fn advance_char(s: &str, out: &mut String, i: usize) -> usize {
1616    // Safety: `i` must be a valid char boundary; callers guarantee this
1617    // because all branch points look for ASCII bytes which are always
1618    // single-byte char boundaries.
1619    let ch = s[i..].chars().next().expect("valid UTF-8 position");
1620    out.push(ch);
1621    i + ch.len_utf8()
1622}
1623
1624/// Replace `` [`Type::method()`] `` and `` [`Foo`] `` intra-doc links with
1625/// backtick-wrapped identifiers, converting `::` to `.`.
1626fn replace_intradoc_links(s: &str, _target: DocTarget) -> String {
1627    let mut out = String::with_capacity(s.len());
1628    let bytes = s.as_bytes();
1629    let mut i = 0;
1630    while i < bytes.len() {
1631        // Look for [`
1632        if i + 1 < bytes.len() && bytes[i] == b'[' && bytes[i + 1] == b'`' {
1633            // Find closing `]
1634            let search_start = i + 2;
1635            let mut found = false;
1636            let mut j = search_start;
1637            while j + 1 < bytes.len() {
1638                if bytes[j] == b'`' && bytes[j + 1] == b']' {
1639                    let inner = &s[search_start..j];
1640                    // Convert :: to . in the inner part.
1641                    let converted = inner.replace("::", ".");
1642                    out.push('`');
1643                    out.push_str(&converted);
1644                    out.push('`');
1645                    i = j + 2;
1646                    found = true;
1647                    break;
1648                }
1649                j += 1;
1650            }
1651            if !found {
1652                i = advance_char(s, &mut out, i);
1653            }
1654        } else {
1655            i = advance_char(s, &mut out, i);
1656        }
1657    }
1658    out
1659}
1660
1661/// Strip inline `#[...]` attribute references (not on their own line — those
1662/// are handled as full-line drops in the main loop).
1663fn strip_inline_attributes(s: &str) -> String {
1664    let mut out = String::with_capacity(s.len());
1665    let bytes = s.as_bytes();
1666    let mut i = 0;
1667    while i < bytes.len() {
1668        if bytes[i] == b'#' && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
1669            // Skip until matching ']', handling nesting.
1670            let mut depth = 0usize;
1671            let mut j = i + 1;
1672            while j < bytes.len() {
1673                if bytes[j] == b'[' {
1674                    depth += 1;
1675                } else if bytes[j] == b']' {
1676                    depth -= 1;
1677                    if depth == 0 {
1678                        i = j + 1;
1679                        break;
1680                    }
1681                }
1682                j += 1;
1683            }
1684            if depth != 0 {
1685                // Unmatched bracket: emit literally.
1686                i = advance_char(s, &mut out, i);
1687            }
1688        } else {
1689            i = advance_char(s, &mut out, i);
1690        }
1691    }
1692    out
1693}
1694
1695/// Strip `'static`, `Send + Sync`, `Send`, `Sync` from prose text.
1696fn strip_lifetime_and_bounds(s: &str) -> String {
1697    // Order matters: match compound forms before simple forms.
1698    let mut out = s.to_string();
1699    // Strip `Send + Sync` (with optional spaces around `+`).
1700    out = regex_replace_all(&out, r"Send\s*\+\s*Sync", "");
1701    out = regex_replace_all(&out, r"Sync\s*\+\s*Send", "");
1702    // Strip standalone Send/Sync only at word boundaries.
1703    out = regex_replace_word_boundary(&out, "Send", "");
1704    out = regex_replace_word_boundary(&out, "Sync", "");
1705    // Strip 'static lifetime markers.
1706    out = regex_replace_all(&out, r"'\s*static\b", "");
1707    out
1708}
1709
1710/// Replace occurrences of `pattern` (treated as a simple substring pattern
1711/// with `\s*` only, no full regex) with `replacement` in `s`.
1712///
1713/// This is a lightweight regex-free replacement for simple patterns that
1714/// only need literal text or `\s*` between tokens.
1715fn regex_replace_all(s: &str, pattern: &str, replacement: &str) -> String {
1716    // Inline tiny pattern compiler for the three patterns we actually use.
1717    match pattern {
1718        r"Send\s*\+\s*Sync" => replace_with_optional_spaces(s, "Send", "+", "Sync", replacement),
1719        r"Sync\s*\+\s*Send" => replace_with_optional_spaces(s, "Sync", "+", "Send", replacement),
1720        r"'\s*static\b" => replace_static_lifetime(s, replacement),
1721        _ => s.replace(pattern, replacement),
1722    }
1723}
1724
1725/// Replace `word_boundary(keyword)` occurrences in `s` with `replacement`.
1726fn regex_replace_word_boundary(s: &str, keyword: &str, replacement: &str) -> String {
1727    let mut out = String::with_capacity(s.len());
1728    let klen = keyword.len();
1729    let bytes = s.as_bytes();
1730    let kbytes = keyword.as_bytes();
1731    if klen == 0 || klen > bytes.len() {
1732        return s.to_string();
1733    }
1734    let mut i = 0;
1735    while i + klen <= bytes.len() {
1736        if &bytes[i..i + klen] == kbytes {
1737            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1738            let after_ok =
1739                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
1740            if before_ok && after_ok {
1741                out.push_str(replacement);
1742                i += klen;
1743                continue;
1744            }
1745        }
1746        i = advance_char(s, &mut out, i);
1747    }
1748    if i < bytes.len() {
1749        out.push_str(&s[i..]);
1750    }
1751    out
1752}
1753
1754/// Replace `A <spaces> op <spaces> B` triplets with `replacement`.
1755fn replace_with_optional_spaces(s: &str, a: &str, op: &str, b: &str, replacement: &str) -> String {
1756    let mut out = String::with_capacity(s.len());
1757    let mut i = 0;
1758    let chars: Vec<char> = s.chars().collect();
1759    let total = chars.len();
1760
1761    while i < total {
1762        // Try to match `a` at position i.
1763        let a_chars: Vec<char> = a.chars().collect();
1764        let b_chars: Vec<char> = b.chars().collect();
1765        let op_chars: Vec<char> = op.chars().collect();
1766
1767        if chars[i..].starts_with(&a_chars) {
1768            let mut j = i + a_chars.len();
1769            // Skip spaces.
1770            while j < total && chars[j] == ' ' {
1771                j += 1;
1772            }
1773            // Match op.
1774            if j + op_chars.len() <= total && chars[j..].starts_with(&op_chars) {
1775                let mut k = j + op_chars.len();
1776                // Skip spaces.
1777                while k < total && chars[k] == ' ' {
1778                    k += 1;
1779                }
1780                // Match b.
1781                if k + b_chars.len() <= total && chars[k..].starts_with(&b_chars) {
1782                    out.push_str(replacement);
1783                    i = k + b_chars.len();
1784                    continue;
1785                }
1786            }
1787        }
1788        out.push(chars[i]);
1789        i += 1;
1790    }
1791    out
1792}
1793
1794/// Replace `'static` lifetime markers (with optional spaces after `'`).
1795fn replace_static_lifetime(s: &str, replacement: &str) -> String {
1796    let mut out = String::with_capacity(s.len());
1797    let bytes = s.as_bytes();
1798    let mut i = 0;
1799    while i < bytes.len() {
1800        if bytes[i] == b'\'' {
1801            // Peek ahead skipping spaces.
1802            let mut j = i + 1;
1803            while j < bytes.len() && bytes[j] == b' ' {
1804                j += 1;
1805            }
1806            let keyword = b"static";
1807            if bytes[j..].starts_with(keyword) {
1808                let end = j + keyword.len();
1809                // Must be followed by non-identifier char or end.
1810                let after_ok = end >= bytes.len() || !bytes[end].is_ascii_alphanumeric() && bytes[end] != b'_';
1811                if after_ok {
1812                    out.push_str(replacement);
1813                    i = end;
1814                    continue;
1815                }
1816            }
1817        }
1818        i = advance_char(s, &mut out, i);
1819    }
1820    out
1821}
1822
1823/// Replace Rust generic type wrappers in prose.
1824fn replace_type_wrappers(s: &str, target: DocTarget) -> String {
1825    // Order: most specific patterns first.
1826    let mut out = s.to_string();
1827
1828    // Vec<u8> — must come before Vec<T>.
1829    let vec_u8_replacement = match target {
1830        DocTarget::PhpDoc => "string",
1831        DocTarget::JavaDoc => "byte[]",
1832        DocTarget::TsDoc | DocTarget::JsDoc => "Uint8Array",
1833        DocTarget::CSharpDoc => "byte[]",
1834    };
1835    out = replace_generic1(&out, "Vec", "u8", vec_u8_replacement);
1836
1837    // HashMap<K, V> — must come before Vec<T> to avoid order-dependency issues.
1838    let map_replacement_fn = |k: &str, v: &str| match target {
1839        DocTarget::PhpDoc => format!("array<{k}, {v}>"),
1840        DocTarget::JavaDoc => format!("Map<{k}, {v}>"),
1841        DocTarget::TsDoc | DocTarget::JsDoc => format!("Record<{k}, {v}>"),
1842        DocTarget::CSharpDoc => format!("Dictionary<{k}, {v}>"),
1843    };
1844    out = replace_generic2(&out, "HashMap", &map_replacement_fn);
1845
1846    // Vec<T> — generic.
1847    out = replace_generic1_passthrough(&out, "Vec", |inner| format!("{inner}[]"));
1848
1849    // Option<T>.
1850    let option_replacement_fn = |inner: &str| match target {
1851        DocTarget::PhpDoc => format!("{inner}?"),
1852        DocTarget::JavaDoc => format!("{inner} | null"),
1853        DocTarget::TsDoc | DocTarget::JsDoc => format!("{inner} | undefined"),
1854        DocTarget::CSharpDoc => format!("{inner}?"),
1855    };
1856    out = replace_generic1_passthrough(&out, "Option", option_replacement_fn);
1857
1858    // Result<T, E> — drop the error type, keep the success type.
1859    // C# has no Result type; the binding throws exceptions, so just the success type
1860    // is meaningful in prose. We do this for C# only; other targets historically left
1861    // `Result<T, E>` unchanged (their tests assert nothing about it).
1862    if matches!(target, DocTarget::CSharpDoc) {
1863        out = replace_generic2(&out, "Result", &|t: &str, _e: &str| t.to_string());
1864    }
1865
1866    // Smart pointer wrappers: strip to inner type.
1867    for wrapper in &["Arc", "Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
1868        out = replace_generic1_passthrough(&out, wrapper, |inner| inner.to_string());
1869    }
1870
1871    out
1872}
1873
1874/// Replace `Name<SingleArg>` where SingleArg is an exact literal (e.g. `Vec<u8>`).
1875fn replace_generic1(s: &str, name: &str, arg: &str, replacement: &str) -> String {
1876    let pattern = format!("{name}<{arg}>");
1877    s.replace(&pattern, replacement)
1878}
1879
1880/// Replace `Name<T>` → `f(T)` for an arbitrary inner type expression.
1881///
1882/// Handles nested generics by counting angle-bracket depth.
1883fn replace_generic1_passthrough<F>(s: &str, name: &str, f: F) -> String
1884where
1885    F: Fn(&str) -> String,
1886{
1887    let mut out = String::with_capacity(s.len());
1888    let mut i = 0;
1889    let prefix = format!("{name}<");
1890    let pbytes = prefix.as_bytes();
1891    let bytes = s.as_bytes();
1892
1893    while i < bytes.len() {
1894        if bytes[i..].starts_with(pbytes) {
1895            // Check that the char before is not alphanumeric (word boundary).
1896            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1897            if before_ok {
1898                let inner_start = i + pbytes.len();
1899                // Find the matching '>'.
1900                let mut depth = 1usize;
1901                let mut j = inner_start;
1902                while j < bytes.len() {
1903                    match bytes[j] {
1904                        b'<' => depth += 1,
1905                        b'>' => {
1906                            depth -= 1;
1907                            if depth == 0 {
1908                                break;
1909                            }
1910                        }
1911                        _ => {}
1912                    }
1913                    j += 1;
1914                }
1915                if depth == 0 && j < bytes.len() {
1916                    let inner = &s[inner_start..j];
1917                    out.push_str(&f(inner));
1918                    i = j + 1;
1919                    continue;
1920                }
1921            }
1922        }
1923        i = advance_char(s, &mut out, i);
1924    }
1925    out
1926}
1927
1928/// Replace `Name<K, V>` → `f(K, V)` for two-argument generics (e.g. `HashMap`).
1929fn replace_generic2<F>(s: &str, name: &str, f: &F) -> String
1930where
1931    F: Fn(&str, &str) -> String,
1932{
1933    let mut out = String::with_capacity(s.len());
1934    let mut i = 0;
1935    let prefix = format!("{name}<");
1936    let pbytes = prefix.as_bytes();
1937    let bytes = s.as_bytes();
1938
1939    while i < bytes.len() {
1940        if bytes[i..].starts_with(pbytes) {
1941            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1942            if before_ok {
1943                let inner_start = i + pbytes.len();
1944                // Find the matching '>' respecting nesting.
1945                let mut depth = 1usize;
1946                let mut j = inner_start;
1947                while j < bytes.len() {
1948                    match bytes[j] {
1949                        b'<' => depth += 1,
1950                        b'>' => {
1951                            depth -= 1;
1952                            if depth == 0 {
1953                                break;
1954                            }
1955                        }
1956                        _ => {}
1957                    }
1958                    j += 1;
1959                }
1960                if depth == 0 && j < bytes.len() {
1961                    let inner = &s[inner_start..j];
1962                    // Split on the first ',' at depth 0.
1963                    let split = split_on_comma_at_top_level(inner);
1964                    if let Some((k, v)) = split {
1965                        out.push_str(&f(k.trim(), v.trim()));
1966                        i = j + 1;
1967                        continue;
1968                    }
1969                }
1970            }
1971        }
1972        i = advance_char(s, &mut out, i);
1973    }
1974    out
1975}
1976
1977/// Split `s` on the first comma that is at angle-bracket depth 0.
1978fn split_on_comma_at_top_level(s: &str) -> Option<(&str, &str)> {
1979    let mut depth = 0i32;
1980    for (idx, ch) in s.char_indices() {
1981        match ch {
1982            '<' => depth += 1,
1983            '>' => depth -= 1,
1984            ',' if depth == 0 => return Some((&s[..idx], &s[idx + 1..])),
1985            _ => {}
1986        }
1987    }
1988    None
1989}
1990
1991/// Replace `Some(x)` in prose with `the value (x)`.
1992fn replace_some_calls(s: &str) -> String {
1993    let mut out = String::with_capacity(s.len());
1994    let bytes = s.as_bytes();
1995    let prefix = b"Some(";
1996    let mut i = 0;
1997
1998    while i < bytes.len() {
1999        if bytes[i..].starts_with(prefix) {
2000            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2001            if before_ok {
2002                let arg_start = i + prefix.len();
2003                // Find matching ')' respecting nesting.
2004                let mut depth = 1usize;
2005                let mut j = arg_start;
2006                while j < bytes.len() {
2007                    match bytes[j] {
2008                        b'(' => depth += 1,
2009                        b')' => {
2010                            depth -= 1;
2011                            if depth == 0 {
2012                                break;
2013                            }
2014                        }
2015                        _ => {}
2016                    }
2017                    j += 1;
2018                }
2019                if depth == 0 && j < bytes.len() {
2020                    let arg = &s[arg_start..j];
2021                    out.push_str("the value (");
2022                    out.push_str(arg);
2023                    out.push(')');
2024                    i = j + 1;
2025                    continue;
2026                }
2027            }
2028        }
2029        i = advance_char(s, &mut out, i);
2030    }
2031    out
2032}
2033
2034/// Drop bare `Some ` when it appears as a Rust-idiom modifier in prose
2035/// ("(Some values)", "Some keys leave the previous", etc.). The `Some(...)`
2036/// call form is handled separately by [`replace_some_calls`].
2037///
2038/// Match shape: word-boundary `Some` + single ASCII space + ASCII-lowercase
2039/// letter. The "Some " prefix is dropped; the following word is preserved.
2040/// `SomeType`, `Some.method()`, `Some(x)`, and sentence-initial `Some `
2041/// followed by an uppercase noun stay untouched.
2042fn replace_some_keyword_in_prose(s: &str) -> String {
2043    let keyword = b"Some ";
2044    let klen = keyword.len();
2045    let bytes = s.as_bytes();
2046    if klen >= bytes.len() {
2047        return s.to_string();
2048    }
2049    let mut out = String::with_capacity(s.len());
2050    let mut i = 0;
2051    while i + klen < bytes.len() {
2052        if &bytes[i..i + klen] == keyword {
2053            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2054            let after_ok = bytes[i + klen].is_ascii_lowercase();
2055            if before_ok && after_ok {
2056                i += klen;
2057                continue;
2058            }
2059        }
2060        i = advance_char(s, &mut out, i);
2061    }
2062    if i < bytes.len() {
2063        out.push_str(&s[i..]);
2064    }
2065    out
2066}
2067
2068/// Replace `None` (at word boundaries, uppercase) with the target-appropriate nil.
2069fn replace_none_keyword(s: &str, target: DocTarget) -> String {
2070    let replacement = match target {
2071        DocTarget::PhpDoc | DocTarget::JavaDoc | DocTarget::CSharpDoc => "null",
2072        DocTarget::TsDoc | DocTarget::JsDoc => "undefined",
2073    };
2074    let keyword = b"None";
2075    let klen = keyword.len();
2076    let mut out = String::with_capacity(s.len());
2077    let bytes = s.as_bytes();
2078    if klen > bytes.len() {
2079        return s.to_string();
2080    }
2081    let mut i = 0;
2082
2083    while i + klen <= bytes.len() {
2084        if &bytes[i..i + klen] == keyword {
2085            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
2086            let after_ok =
2087                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
2088            if before_ok && after_ok {
2089                out.push_str(replacement);
2090                i += klen;
2091                continue;
2092            }
2093        }
2094        i = advance_char(s, &mut out, i);
2095    }
2096    if i < bytes.len() {
2097        out.push_str(&s[i..]);
2098    }
2099    out
2100}
2101
2102/// Replace standalone `::` between identifiers with `.`.
2103fn replace_path_separator(s: &str) -> String {
2104    let mut out = String::with_capacity(s.len());
2105    let bytes = s.as_bytes();
2106    let mut i = 0;
2107
2108    while i < bytes.len() {
2109        if i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b':' {
2110            // Only replace if surrounded by identifier characters or end/start of string.
2111            let before_ok = i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
2112            let after_ok = i + 2 < bytes.len() && (bytes[i + 2].is_ascii_alphanumeric() || bytes[i + 2] == b'_');
2113            if before_ok || after_ok {
2114                out.push('.');
2115                i += 2;
2116                continue;
2117            }
2118        }
2119        i = advance_char(s, &mut out, i);
2120    }
2121    out
2122}
2123
2124/// Strip `.unwrap()` and `.expect("...")` calls from prose.
2125fn strip_unwrap_expect(s: &str) -> String {
2126    let mut out = String::with_capacity(s.len());
2127    let bytes = s.as_bytes();
2128    let mut i = 0;
2129
2130    while i < bytes.len() {
2131        // Match .unwrap().
2132        if bytes[i..].starts_with(b".unwrap()") {
2133            i += b".unwrap()".len();
2134            continue;
2135        }
2136        // Match .expect(...).
2137        if bytes[i..].starts_with(b".expect(") {
2138            let arg_start = i + b".expect(".len();
2139            let mut depth = 1usize;
2140            let mut j = arg_start;
2141            while j < bytes.len() {
2142                match bytes[j] {
2143                    b'(' => depth += 1,
2144                    b')' => {
2145                        depth -= 1;
2146                        if depth == 0 {
2147                            break;
2148                        }
2149                    }
2150                    _ => {}
2151                }
2152                j += 1;
2153            }
2154            if depth == 0 {
2155                i = j + 1;
2156                continue;
2157            }
2158        }
2159        i = advance_char(s, &mut out, i);
2160    }
2161    out
2162}
2163
2164#[cfg(test)]
2165mod tests {
2166    use super::*;
2167
2168    #[test]
2169    fn test_emit_phpdoc() {
2170        let mut out = String::new();
2171        emit_phpdoc(&mut out, "Simple documentation", "    ", "TestException");
2172        assert!(out.contains("/**"));
2173        assert!(out.contains("Simple documentation"));
2174        assert!(out.contains("*/"));
2175    }
2176
2177    #[test]
2178    fn test_phpdoc_escaping() {
2179        let mut out = String::new();
2180        emit_phpdoc(&mut out, "Handle */ sequences", "", "TestException");
2181        assert!(out.contains("Handle * / sequences"));
2182    }
2183
2184    #[test]
2185    fn test_emit_csharp_doc() {
2186        let mut out = String::new();
2187        emit_csharp_doc(&mut out, "C# documentation", "    ", "TestException");
2188        assert!(out.contains("<summary>"));
2189        assert!(out.contains("C# documentation"));
2190        assert!(out.contains("</summary>"));
2191    }
2192
2193    #[test]
2194    fn test_csharp_xml_escaping() {
2195        let mut out = String::new();
2196        emit_csharp_doc(&mut out, "foo < bar & baz > qux", "", "TestException");
2197        assert!(out.contains("foo &lt; bar &amp; baz &gt; qux"));
2198    }
2199
2200    #[test]
2201    fn test_emit_elixir_doc() {
2202        let mut out = String::new();
2203        emit_elixir_doc(&mut out, "Elixir documentation");
2204        assert!(out.contains("@doc \"\"\""));
2205        assert!(out.contains("Elixir documentation"));
2206        assert!(out.contains("\"\"\""));
2207    }
2208
2209    #[test]
2210    fn test_elixir_heredoc_escaping() {
2211        let mut out = String::new();
2212        emit_elixir_doc(&mut out, "Handle \"\"\" sequences");
2213        assert!(out.contains("Handle \"\" \" sequences"));
2214    }
2215
2216    #[test]
2217    fn test_emit_roxygen() {
2218        let mut out = String::new();
2219        emit_roxygen(&mut out, "R documentation");
2220        assert!(out.contains("#' R documentation"));
2221    }
2222
2223    #[test]
2224    fn test_emit_swift_doc() {
2225        let mut out = String::new();
2226        emit_swift_doc(&mut out, "Swift documentation", "    ");
2227        assert!(out.contains("/// Swift documentation"));
2228    }
2229
2230    #[test]
2231    fn test_emit_javadoc() {
2232        let mut out = String::new();
2233        emit_javadoc(&mut out, "Java documentation", "    ");
2234        assert!(out.contains("/**"));
2235        assert!(out.contains("Java documentation"));
2236        assert!(out.contains("*/"));
2237    }
2238
2239    #[test]
2240    fn test_emit_kdoc() {
2241        let mut out = String::new();
2242        emit_kdoc(&mut out, "Kotlin documentation", "    ");
2243        assert!(out.contains("/**"));
2244        assert!(out.contains("Kotlin documentation"));
2245        assert!(out.contains("*/"));
2246    }
2247
2248    #[test]
2249    fn test_emit_dartdoc() {
2250        let mut out = String::new();
2251        emit_dartdoc(&mut out, "Dart documentation", "    ");
2252        assert!(out.contains("/// Dart documentation"));
2253    }
2254
2255    #[test]
2256    fn test_emit_gleam_doc() {
2257        let mut out = String::new();
2258        emit_gleam_doc(&mut out, "Gleam documentation", "    ");
2259        assert!(out.contains("/// Gleam documentation"));
2260    }
2261
2262    #[test]
2263    fn test_emit_zig_doc() {
2264        let mut out = String::new();
2265        emit_zig_doc(&mut out, "Zig documentation", "    ");
2266        assert!(out.contains("/// Zig documentation"));
2267    }
2268
2269    #[test]
2270    fn test_empty_doc_skipped() {
2271        let mut out = String::new();
2272        emit_phpdoc(&mut out, "", "", "TestException");
2273        emit_csharp_doc(&mut out, "", "", "TestException");
2274        emit_elixir_doc(&mut out, "");
2275        emit_roxygen(&mut out, "");
2276        emit_kdoc(&mut out, "", "");
2277        emit_dartdoc(&mut out, "", "");
2278        emit_gleam_doc(&mut out, "", "");
2279        emit_zig_doc(&mut out, "", "");
2280        assert!(out.is_empty());
2281    }
2282
2283    #[test]
2284    fn test_doc_first_paragraph_joined_single_line() {
2285        assert_eq!(doc_first_paragraph_joined("Simple doc."), "Simple doc.");
2286    }
2287
2288    #[test]
2289    fn test_doc_first_paragraph_joined_wrapped_sentence() {
2290        // Simulates a docstring like convert's: "Convert HTML to Markdown,\nreturning a result."
2291        let doc = "Convert HTML to Markdown,\nreturning a result.";
2292        assert_eq!(
2293            doc_first_paragraph_joined(doc),
2294            "Convert HTML to Markdown, returning a result."
2295        );
2296    }
2297
2298    #[test]
2299    fn test_doc_first_paragraph_joined_stops_at_blank_line() {
2300        let doc = "First paragraph.\nStill first.\n\nSecond paragraph.";
2301        assert_eq!(doc_first_paragraph_joined(doc), "First paragraph. Still first.");
2302    }
2303
2304    #[test]
2305    fn test_doc_first_paragraph_joined_empty() {
2306        assert_eq!(doc_first_paragraph_joined(""), "");
2307    }
2308
2309    #[test]
2310    fn test_parse_rustdoc_sections_basic() {
2311        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns `KreuzbergError` on failure.";
2312        let sections = parse_rustdoc_sections(doc);
2313        assert_eq!(sections.summary, "Extracts text from a file.");
2314        assert_eq!(sections.arguments.as_deref(), Some("* `path` - The file path."));
2315        assert_eq!(sections.returns.as_deref(), Some("The extracted text."));
2316        assert_eq!(sections.errors.as_deref(), Some("Returns `KreuzbergError` on failure."));
2317        assert!(sections.panics.is_none());
2318    }
2319
2320    #[test]
2321    fn test_parse_rustdoc_sections_example_with_fence() {
2322        let doc = "Run the thing.\n\n# Example\n\n```rust\nlet x = run();\n```";
2323        let sections = parse_rustdoc_sections(doc);
2324        assert_eq!(sections.summary, "Run the thing.");
2325        assert!(sections.example.as_ref().unwrap().contains("```rust"));
2326        assert!(sections.example.as_ref().unwrap().contains("let x = run();"));
2327    }
2328
2329    #[test]
2330    fn test_parse_rustdoc_sections_pound_inside_fence_is_not_a_heading() {
2331        // Even though we get rustdoc-hidden lines pre-stripped, a literal
2332        // `# foo` inside a non-rust fence (e.g. shell example) must not
2333        // start a new section.
2334        let doc = "Summary.\n\n# Example\n\n```bash\n# install deps\nrun --foo\n```";
2335        let sections = parse_rustdoc_sections(doc);
2336        assert_eq!(sections.summary, "Summary.");
2337        assert!(sections.example.as_ref().unwrap().contains("# install deps"));
2338    }
2339
2340    #[test]
2341    fn test_parse_arguments_bullets_dash_separator() {
2342        let body = "* `path` - The file path.\n* `config` - Optional configuration.";
2343        let pairs = parse_arguments_bullets(body);
2344        assert_eq!(pairs.len(), 2);
2345        assert_eq!(pairs[0], ("path".to_string(), "The file path.".to_string()));
2346        assert_eq!(pairs[1], ("config".to_string(), "Optional configuration.".to_string()));
2347    }
2348
2349    #[test]
2350    fn test_parse_arguments_bullets_continuation_line() {
2351        let body = "* `path` - The file path,\n  resolved relative to cwd.\n* `mode` - Open mode.";
2352        let pairs = parse_arguments_bullets(body);
2353        assert_eq!(pairs.len(), 2);
2354        assert_eq!(pairs[0].1, "The file path, resolved relative to cwd.");
2355    }
2356
2357    #[test]
2358    fn test_replace_fence_lang_rust_to_typescript() {
2359        let body = "```rust\nlet x = run();\n```";
2360        let out = replace_fence_lang(body, "typescript");
2361        assert!(out.starts_with("```typescript"));
2362        assert!(out.contains("let x = run();"));
2363    }
2364
2365    #[test]
2366    fn test_replace_fence_lang_preserves_attrs() {
2367        let body = "```rust,no_run\nlet x = run();\n```";
2368        let out = replace_fence_lang(body, "typescript");
2369        assert!(out.starts_with("```typescript,no_run"));
2370    }
2371
2372    #[test]
2373    fn test_replace_fence_lang_no_fence_unchanged() {
2374        let body = "Plain prose with `inline code`.";
2375        let out = replace_fence_lang(body, "typescript");
2376        assert_eq!(out, "Plain prose with `inline code`.");
2377    }
2378
2379    fn fixture_sections() -> RustdocSections {
2380        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n* `config` - Optional configuration.\n\n# Returns\n\nThe extracted text and metadata.\n\n# Errors\n\nReturns an error when the file is unreadable.\n\n# Example\n\n```rust\nlet result = extract(\"file.pdf\")?;\n```";
2381        parse_rustdoc_sections(doc)
2382    }
2383
2384    #[test]
2385    fn test_render_jsdoc_sections() {
2386        let sections = fixture_sections();
2387        let out = render_jsdoc_sections(&sections);
2388        assert!(out.starts_with("Extracts text from a file."));
2389        assert!(out.contains("@param path - The file path."));
2390        assert!(out.contains("@param config - Optional configuration."));
2391        assert!(out.contains("@returns The extracted text and metadata."));
2392        assert!(out.contains("@throws Returns an error when the file is unreadable."));
2393        // fixture example is ```rust — stripped when target is TypeScript
2394        assert!(!out.contains("@example"), "Rust example must not appear in TSDoc");
2395        assert!(!out.contains("```typescript"));
2396        assert!(!out.contains("```rust"));
2397    }
2398
2399    #[test]
2400    fn test_render_jsdoc_sections_preserves_typescript_example() {
2401        let doc = "Do something.\n\n# Example\n\n```typescript\nconst x = doSomething();\n```";
2402        let sections = parse_rustdoc_sections(doc);
2403        let out = render_jsdoc_sections(&sections);
2404        assert!(out.contains("@example"), "TypeScript example must be preserved");
2405        assert!(out.contains("```typescript"));
2406    }
2407
2408    #[test]
2409    fn test_render_javadoc_sections() {
2410        let sections = fixture_sections();
2411        let out = render_javadoc_sections(&sections, "KreuzbergRsException");
2412        assert!(out.contains("@param path The file path."));
2413        assert!(out.contains("@return The extracted text and metadata."));
2414        assert!(out.contains("@throws KreuzbergRsException Returns an error when the file is unreadable."));
2415        // Java rendering omits the example block (handled separately by emit_javadoc which
2416        // wraps code in `<pre>{@code}</pre>`); we just confirm summary survives.
2417        assert!(out.starts_with("Extracts text from a file."));
2418    }
2419
2420    #[test]
2421    fn test_render_csharp_xml_sections() {
2422        let sections = fixture_sections();
2423        let out = render_csharp_xml_sections(&sections, "KreuzbergException");
2424        assert!(out.contains("<summary>\nExtracts text from a file.\n</summary>"));
2425        assert!(out.contains("<param name=\"path\">The file path.</param>"));
2426        assert!(out.contains("<returns>The extracted text and metadata.</returns>"));
2427        assert!(out.contains("<exception cref=\"KreuzbergException\">"));
2428        assert!(out.contains("<example><code language=\"csharp\">"));
2429        assert!(out.contains("let result = extract"));
2430    }
2431
2432    #[test]
2433    fn test_render_phpdoc_sections() {
2434        let sections = fixture_sections();
2435        let out = render_phpdoc_sections(&sections, "KreuzbergException");
2436        assert!(out.contains("@param mixed $path The file path."));
2437        assert!(out.contains("@return The extracted text and metadata."));
2438        assert!(out.contains("@throws KreuzbergException"));
2439        // fixture example is ```rust — stripped when target is PHP
2440        assert!(!out.contains("```php"), "Rust example must not appear in PHPDoc");
2441        assert!(!out.contains("```rust"));
2442    }
2443
2444    #[test]
2445    fn test_render_phpdoc_sections_preserves_php_example() {
2446        let doc = "Do something.\n\n# Example\n\n```php\n$x = doSomething();\n```";
2447        let sections = parse_rustdoc_sections(doc);
2448        let out = render_phpdoc_sections(&sections, "MyException");
2449        assert!(out.contains("```php"), "PHP example must be preserved");
2450    }
2451
2452    #[test]
2453    fn test_render_doxygen_sections() {
2454        let sections = fixture_sections();
2455        let out = render_doxygen_sections(&sections);
2456        assert!(out.contains("\\param path The file path."));
2457        assert!(out.contains("\\return The extracted text and metadata."));
2458        assert!(out.contains("\\code"));
2459        assert!(out.contains("\\endcode"));
2460    }
2461
2462    #[test]
2463    fn test_emit_yard_doc_simple() {
2464        let mut out = String::new();
2465        emit_yard_doc(&mut out, "Simple Ruby documentation", "    ");
2466        assert!(out.contains("# Simple Ruby documentation"));
2467    }
2468
2469    #[test]
2470    fn test_emit_yard_doc_empty() {
2471        let mut out = String::new();
2472        emit_yard_doc(&mut out, "", "    ");
2473        assert!(out.is_empty());
2474    }
2475
2476    #[test]
2477    fn test_emit_yard_doc_with_sections() {
2478        let mut out = String::new();
2479        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns error on failure.";
2480        emit_yard_doc(&mut out, doc, "  ");
2481        assert!(out.contains("# Extracts text from a file."));
2482        assert!(out.contains("# @param path The file path."));
2483        assert!(out.contains("# @return The extracted text."));
2484        assert!(out.contains("# @raise Returns error on failure."));
2485    }
2486
2487    #[test]
2488    fn test_emit_c_doxygen_simple_prose() {
2489        let mut out = String::new();
2490        emit_c_doxygen(&mut out, "Free a string.", "");
2491        assert!(out.contains("/// Free a string."), "got: {out}");
2492    }
2493
2494    #[test]
2495    fn test_emit_c_doxygen_with_sections() {
2496        let mut out = String::new();
2497        let doc = "Extract content from a file.\n\n# Arguments\n\n* `path` - Path to the file.\n* `mode` - Read mode.\n\n# Returns\n\nA newly allocated string the caller owns.\n\n# Errors\n\nReturns null when the file is unreadable.";
2498        emit_c_doxygen(&mut out, doc, "");
2499        assert!(out.contains("/// Extract content from a file."));
2500        assert!(out.contains("/// \\param path Path to the file."));
2501        assert!(out.contains("/// \\param mode Read mode."));
2502        assert!(out.contains("/// \\return A newly allocated string the caller owns."));
2503        assert!(out.contains("/// \\note Returns null when the file is unreadable."));
2504    }
2505
2506    #[test]
2507    fn test_emit_c_doxygen_safety_section_maps_to_note() {
2508        let mut out = String::new();
2509        let doc = "Free a buffer.\n\n# Safety\n\nPointer must have been returned by this library.";
2510        emit_c_doxygen(&mut out, doc, "");
2511        assert!(out.contains("/// \\note SAFETY: Pointer must have been returned by this library."));
2512    }
2513
2514    #[test]
2515    fn test_emit_c_doxygen_example_renders_code_fence() {
2516        let mut out = String::new();
2517        let doc = "Demo.\n\n# Example\n\n```rust\nlet x = run();\n```";
2518        emit_c_doxygen(&mut out, doc, "");
2519        assert!(out.contains("/// \\code"));
2520        assert!(out.contains("/// \\endcode"));
2521        assert!(out.contains("let x = run();"));
2522    }
2523
2524    #[test]
2525    fn test_emit_c_doxygen_strips_markdown_links() {
2526        let mut out = String::new();
2527        let doc = "See [the docs](https://example.com/x) for details.";
2528        emit_c_doxygen(&mut out, doc, "");
2529        assert!(
2530            out.contains("the docs (https://example.com/x)"),
2531            "expected flattened link, got: {out}"
2532        );
2533        assert!(!out.contains("](https://"));
2534    }
2535
2536    #[test]
2537    fn test_emit_c_doxygen_word_wraps_long_lines() {
2538        let mut out = String::new();
2539        let long = "a ".repeat(80);
2540        emit_c_doxygen(&mut out, long.trim(), "");
2541        for line in out.lines() {
2542            // Each emitted prefix is "/// " (4 chars); the body after that
2543            // should be ≤ 100 chars per `DOXYGEN_WRAP_WIDTH`.
2544            let body = line.trim_start_matches("/// ");
2545            assert!(body.len() <= 100, "line too long ({}): {line}", body.len());
2546        }
2547    }
2548
2549    #[test]
2550    fn test_emit_c_doxygen_empty_input_is_noop() {
2551        let mut out = String::new();
2552        emit_c_doxygen(&mut out, "", "");
2553        emit_c_doxygen(&mut out, "   \n\t  ", "");
2554        assert!(out.is_empty());
2555    }
2556
2557    #[test]
2558    fn test_emit_c_doxygen_indent_applied() {
2559        let mut out = String::new();
2560        emit_c_doxygen(&mut out, "Hello.", "    ");
2561        assert!(out.starts_with("    /// Hello."));
2562    }
2563
2564    #[test]
2565    fn test_render_yard_sections() {
2566        let sections = fixture_sections();
2567        let out = render_yard_sections(&sections);
2568        assert!(out.contains("@param path The file path."));
2569        assert!(out.contains("@return The extracted text and metadata."));
2570        assert!(out.contains("@raise Returns an error when the file is unreadable."));
2571        // fixture example is ```rust — stripped when target is Ruby
2572        assert!(!out.contains("@example"), "Rust example must not appear in YARD");
2573        assert!(!out.contains("```ruby"));
2574        assert!(!out.contains("```rust"));
2575    }
2576
2577    #[test]
2578    fn test_render_yard_sections_preserves_ruby_example() {
2579        let doc = "Do something.\n\n# Example\n\n```ruby\nputs :hi\n```";
2580        let sections = parse_rustdoc_sections(doc);
2581        let out = render_yard_sections(&sections);
2582        assert!(out.contains("@example"), "Ruby example must be preserved");
2583        assert!(out.contains("```ruby"));
2584    }
2585
2586    // --- M1: example_for_target unit tests ---
2587
2588    #[test]
2589    fn example_for_target_rust_fenced_suppressed_for_php() {
2590        let example = "```rust\nlet x = 1;\n```";
2591        assert_eq!(
2592            example_for_target(example, "php"),
2593            None,
2594            "rust-fenced example must be omitted for PHP target"
2595        );
2596    }
2597
2598    #[test]
2599    fn example_for_target_bare_fence_defaults_to_rust_suppressed_for_ruby() {
2600        let example = "```\nlet x = 1;\n```";
2601        assert_eq!(
2602            example_for_target(example, "ruby"),
2603            None,
2604            "bare fence is treated as Rust and must be omitted for Ruby target"
2605        );
2606    }
2607
2608    #[test]
2609    fn example_for_target_php_example_preserved_for_php() {
2610        let example = "```php\n$x = 1;\n```";
2611        let result = example_for_target(example, "php");
2612        assert!(result.is_some(), "PHP example must be preserved for PHP target");
2613        assert!(result.unwrap().contains("```php"));
2614    }
2615
2616    #[test]
2617    fn example_for_target_ruby_example_preserved_for_ruby() {
2618        let example = "```ruby\nputs :hi\n```";
2619        let result = example_for_target(example, "ruby");
2620        assert!(result.is_some(), "Ruby example must be preserved for Ruby target");
2621        assert!(result.unwrap().contains("```ruby"));
2622    }
2623
2624    #[test]
2625    fn render_phpdoc_sections_with_rust_example_emits_no_at_example_block() {
2626        let doc = "Convert HTML.\n\n# Arguments\n\n* `html` - The HTML input.\n\n# Example\n\n```rust\nlet result = convert(html, None)?;\n```";
2627        let sections = parse_rustdoc_sections(doc);
2628        let out = render_phpdoc_sections(&sections, "HtmlToMarkdownException");
2629        assert!(!out.contains("```php"), "no PHP @example block for Rust source");
2630        assert!(!out.contains("```rust"), "raw Rust must not leak into PHPDoc");
2631        assert!(out.contains("@param"), "other sections must still be emitted");
2632    }
2633
2634    // --- KDoc ktfmt-canonical format tests ---
2635
2636    #[test]
2637    fn test_emit_kdoc_ktfmt_canonical_short_single_line() {
2638        let mut out = String::new();
2639        emit_kdoc_ktfmt_canonical(&mut out, "Simple doc.", "");
2640        assert_eq!(
2641            out, "/** Simple doc. */\n",
2642            "short single-line comment should collapse to canonical format"
2643        );
2644    }
2645
2646    #[test]
2647    fn test_emit_kdoc_ktfmt_canonical_short_with_indent() {
2648        let mut out = String::new();
2649        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2650        assert_eq!(out, "    /** Text node (most frequent - 100+ per document) */\n");
2651    }
2652
2653    #[test]
2654    fn test_emit_kdoc_ktfmt_canonical_long_comment_uses_multiline() {
2655        let mut out = String::new();
2656        let long_text = "This is a very long documentation comment that exceeds the 100-character line width limit and should therefore be emitted in multi-line format";
2657        emit_kdoc_ktfmt_canonical(&mut out, long_text, "");
2658        assert!(out.contains("/**\n"), "long comment should start with newline");
2659        assert!(out.contains(" * "), "long comment should use multi-line format");
2660        assert!(out.contains(" */\n"), "long comment should end with newline");
2661    }
2662
2663    #[test]
2664    fn test_emit_kdoc_ktfmt_canonical_multiline_comment() {
2665        let mut out = String::new();
2666        let doc = "First line.\n\nSecond paragraph.";
2667        emit_kdoc_ktfmt_canonical(&mut out, doc, "");
2668        assert!(out.contains("/**\n"), "multi-paragraph should use multi-line format");
2669        assert!(out.contains(" * First line."), "first paragraph preserved");
2670        assert!(out.contains(" *\n"), "blank line preserved");
2671        assert!(out.contains(" * Second paragraph."), "second paragraph preserved");
2672    }
2673
2674    #[test]
2675    fn test_emit_kdoc_ktfmt_canonical_empty_doc() {
2676        let mut out = String::new();
2677        emit_kdoc_ktfmt_canonical(&mut out, "", "");
2678        assert!(out.is_empty(), "empty doc should produce no output");
2679    }
2680
2681    #[test]
2682    fn test_emit_kdoc_ktfmt_canonical_fits_within_100_chars() {
2683        let mut out = String::new();
2684        // Construct exactly at the boundary: indent(0) + "/** " + content + " */" = 100 chars
2685        // "/** " = 4 chars, " */" = 3 chars, so content can be 93 chars
2686        let content = "a".repeat(93);
2687        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2688        let line = out.lines().next().unwrap();
2689        assert_eq!(
2690            line.len(),
2691            100,
2692            "should fit exactly at 100 chars and use single-line format"
2693        );
2694        assert!(out.starts_with("/**"), "should use single-line format");
2695    }
2696
2697    #[test]
2698    fn test_emit_kdoc_ktfmt_canonical_exceeds_100_chars() {
2699        let mut out = String::new();
2700        // Exceed 100 chars: content of 94 chars with "/** " + " */" = 101 chars
2701        let content = "a".repeat(94);
2702        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2703        assert!(
2704            out.contains("/**\n"),
2705            "should use multi-line format when exceeding 100 chars"
2706        );
2707        assert!(out.contains(" * "), "multi-line format with ` * ` prefix");
2708    }
2709
2710    #[test]
2711    fn test_emit_kdoc_ktfmt_canonical_respects_indent() {
2712        let mut out = String::new();
2713        // With 4-char indent, max content is 89 chars (4 + 4 + 89 + 3 = 100)
2714        let content = "a".repeat(89);
2715        emit_kdoc_ktfmt_canonical(&mut out, &content, "    ");
2716        let line = out.lines().next().unwrap();
2717        assert_eq!(line.len(), 100, "should respect indent in 100-char calculation");
2718        assert!(line.starts_with("    /** "), "should include indent");
2719    }
2720
2721    #[test]
2722    fn test_emit_kdoc_ktfmt_canonical_real_world_enum_variant() {
2723        let mut out = String::new();
2724        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2725        // This is from NodeType enum; should collapse to single-line
2726        assert!(out.starts_with("    /** "), "should preserve 4-space indent");
2727        assert!(out.contains(" */\n"), "should end with newline");
2728        // Verify it's single-line format
2729        let line_count = out.lines().count();
2730        assert_eq!(line_count, 1, "should be single-line format");
2731    }
2732
2733    #[test]
2734    fn test_emit_kdoc_ktfmt_canonical_real_world_data_class_field() {
2735        let mut out = String::new();
2736        let doc = "Heading style to use in Markdown output (ATX `#` or Setext underline).";
2737        emit_kdoc_ktfmt_canonical(&mut out, doc, "    ");
2738        // This is from ConversionOptions data class; should collapse to single-line
2739        let line_count = out.lines().count();
2740        assert_eq!(line_count, 1, "should be single-line format");
2741        assert!(out.starts_with("    /** "), "should have correct indent");
2742    }
2743
2744    // --- sanitize_rust_idioms tests ---
2745
2746    #[test]
2747    fn sanitize_intradoc_link_with_path_separator_java() {
2748        let input = "See [`ConversionOptions::builder()`] for details.";
2749        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2750        assert!(out.contains("`ConversionOptions.builder()`"), "got: {out}");
2751        assert!(!out.contains("[`"), "brackets must be removed, got: {out}");
2752    }
2753
2754    #[test]
2755    fn sanitize_intradoc_link_simple_type_php() {
2756        let input = "Returns a [`ConversionResult`].";
2757        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2758        assert!(out.contains("`ConversionResult`"), "got: {out}");
2759        assert!(!out.contains("[`"), "got: {out}");
2760    }
2761
2762    #[test]
2763    fn sanitize_none_to_null_javadoc() {
2764        let input = "Returns None when no value is found.";
2765        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2766        assert!(out.contains("null"), "got: {out}");
2767        assert!(!out.contains("None"), "got: {out}");
2768    }
2769
2770    #[test]
2771    fn sanitize_none_to_undefined_tsdoc() {
2772        let input = "Returns None if absent.";
2773        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2774        assert!(out.contains("undefined"), "got: {out}");
2775        assert!(!out.contains("None"), "got: {out}");
2776    }
2777
2778    #[test]
2779    fn sanitize_some_x_to_the_value_x() {
2780        let input = "Pass Some(value) to enable.";
2781        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2782        assert!(out.contains("the value (value)"), "got: {out}");
2783        assert!(!out.contains("Some("), "got: {out}");
2784    }
2785
2786    #[test]
2787    fn sanitize_bare_some_followed_by_lowercase_noun_is_dropped() {
2788        // Real leak from html-to-markdown PreprocessingOptionsUpdate.java:16.
2789        let input =
2790            "Only specified fields (Some values) will override existing options; None values leave the previous";
2791        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2792        assert!(
2793            out.contains("(values)"),
2794            "bare `Some ` before lowercase noun must be stripped; got: {out}"
2795        );
2796        assert!(
2797            out.contains("null values"),
2798            "bare `None ` must also be replaced; got: {out}"
2799        );
2800        assert!(!out.contains("Some "), "Some prefix must not survive; got: {out}");
2801    }
2802
2803    #[test]
2804    fn sanitize_bare_some_does_not_touch_identifiers_or_uppercase_followers() {
2805        // SomeType, Some.method(), Some(x), and "Some Title" (proper noun) all preserved.
2806        let cases = [
2807            "SomeType lives on.",
2808            "Some.method() returns Self.",
2809            "Some Title",
2810            "Some(x) is a value.",
2811        ];
2812        for case in cases {
2813            let out = sanitize_rust_idioms(case, DocTarget::JavaDoc);
2814            // For the Some(x) case, replace_some_calls (run earlier) converts to "the value (x)"
2815            // so "Some" itself is gone — that's expected; everything else preserves "Some".
2816            if case.starts_with("Some(") {
2817                assert!(out.contains("the value (x)"), "got: {out}");
2818            } else {
2819                assert!(out.contains("Some"), "Some must survive in {case:?}; got: {out}");
2820            }
2821        }
2822    }
2823
2824    #[test]
2825    fn sanitize_option_t_to_nullable_php() {
2826        let input = "The result is Option<String>.";
2827        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2828        assert!(out.contains("String?"), "got: {out}");
2829        assert!(!out.contains("Option<"), "got: {out}");
2830    }
2831
2832    #[test]
2833    fn sanitize_option_t_to_or_null_java() {
2834        let input = "The result is Option<String>.";
2835        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2836        assert!(out.contains("String | null"), "got: {out}");
2837    }
2838
2839    #[test]
2840    fn sanitize_option_t_to_or_undefined_tsdoc() {
2841        let input = "The result is Option<String>.";
2842        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2843        assert!(out.contains("String | undefined"), "got: {out}");
2844    }
2845
2846    #[test]
2847    fn sanitize_vec_u8_per_target() {
2848        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::PhpDoc).contains("string"));
2849        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JavaDoc).contains("byte[]"));
2850        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::TsDoc).contains("Uint8Array"));
2851        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JsDoc).contains("Uint8Array"));
2852    }
2853
2854    #[test]
2855    fn sanitize_vec_t_to_array() {
2856        let input = "Returns Vec<String>.";
2857        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2858        assert!(out.contains("String[]"), "got: {out}");
2859        assert!(!out.contains("Vec<"), "got: {out}");
2860    }
2861
2862    #[test]
2863    fn sanitize_hashmap_per_target() {
2864        let input = "Uses HashMap<String, u32>.";
2865        assert!(sanitize_rust_idioms(input, DocTarget::PhpDoc).contains("array<String, u32>"));
2866        assert!(sanitize_rust_idioms(input, DocTarget::JavaDoc).contains("Map<String, u32>"));
2867        assert!(sanitize_rust_idioms(input, DocTarget::TsDoc).contains("Record<String, u32>"));
2868    }
2869
2870    #[test]
2871    fn sanitize_arc_wrapper_stripped() {
2872        let input = "Holds Arc<Config>.";
2873        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2874        assert!(out.contains("Config"), "got: {out}");
2875        assert!(!out.contains("Arc<"), "got: {out}");
2876    }
2877
2878    #[test]
2879    fn sanitize_box_mutex_rwlock_rc_cell_refcell_stripped() {
2880        for wrapper in &["Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
2881            let input = format!("Contains {wrapper}<Inner>.");
2882            let out = sanitize_rust_idioms(&input, DocTarget::JavaDoc);
2883            assert!(out.contains("Inner"), "wrapper {wrapper} not stripped, got: {out}");
2884            assert!(
2885                !out.contains(&format!("{wrapper}<")),
2886                "wrapper {wrapper} still present, got: {out}"
2887            );
2888        }
2889    }
2890
2891    #[test]
2892    fn sanitize_send_sync_stripped() {
2893        let input = "The type is Send + Sync.";
2894        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2895        assert!(!out.contains("Send"), "got: {out}");
2896        assert!(!out.contains("Sync"), "got: {out}");
2897    }
2898
2899    #[test]
2900    fn sanitize_static_lifetime_stripped() {
2901        let input = "Requires 'static lifetime.";
2902        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2903        assert!(!out.contains("'static"), "got: {out}");
2904    }
2905
2906    #[test]
2907    fn sanitize_pub_fn_stripped() {
2908        let input = "Calls pub fn convert().";
2909        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2910        assert!(!out.contains("pub fn"), "got: {out}");
2911        assert!(out.contains("convert()"), "got: {out}");
2912    }
2913
2914    #[test]
2915    fn sanitize_crate_prefix_stripped() {
2916        let input = "See crate::error::ConversionError.";
2917        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2918        assert!(!out.contains("crate::"), "got: {out}");
2919        assert!(out.contains("error.ConversionError"), "got: {out}");
2920    }
2921
2922    #[test]
2923    fn sanitize_unwrap_expect_stripped() {
2924        let input = "Call result.unwrap() or result.expect(\"msg\").";
2925        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2926        assert!(!out.contains(".unwrap()"), "got: {out}");
2927        assert!(!out.contains(".expect("), "got: {out}");
2928    }
2929
2930    #[test]
2931    fn sanitize_no_mutation_inside_backticks() {
2932        // None inside backtick span must not be replaced.
2933        let input = "Use `None` as the argument.";
2934        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2935        assert!(out.contains("`None`"), "backtick span must be preserved, got: {out}");
2936    }
2937
2938    #[test]
2939    fn sanitize_rust_fence_dropped_for_tsdoc() {
2940        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2941        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2942        assert!(
2943            !out.contains("let x = 1;"),
2944            "rust fence content must be dropped, got: {out}"
2945        );
2946        assert!(!out.contains("```rust"), "got: {out}");
2947        assert!(out.contains("Trailer."), "text after fence must survive, got: {out}");
2948    }
2949
2950    #[test]
2951    fn sanitize_rust_fence_dropped_for_java() {
2952        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2953        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2954        // Rust fences are now dropped entirely for Java (Rust code is not portable).
2955        assert!(
2956            !out.contains("let x = 1;"),
2957            "fence content must be dropped for Java, got: {out}"
2958        );
2959        assert!(!out.contains("```"), "fence markers must be dropped, got: {out}");
2960        assert!(out.contains("Intro."), "prose before fence kept: {out}");
2961        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
2962    }
2963
2964    #[test]
2965    fn sanitize_non_rust_fence_passed_through() {
2966        let input = "Example:\n\n```typescript\nconst x = 1;\n```";
2967        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2968        assert!(out.contains("```typescript"), "non-rust fence must survive, got: {out}");
2969        assert!(out.contains("const x = 1;"), "got: {out}");
2970    }
2971
2972    #[test]
2973    fn sanitize_backtick_code_span_not_mutated_option() {
2974        // Option<T> inside backtick span must not be replaced.
2975        let input = "The type is `Option<String>`.";
2976        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2977        // The backtick-protected span should be preserved verbatim.
2978        assert!(
2979            out.contains("`Option<String>`"),
2980            "code span must be preserved, got: {out}"
2981        );
2982    }
2983
2984    #[test]
2985    fn sanitize_idempotent() {
2986        // Running twice should produce the same result as running once.
2987        let input = "Returns None when Vec<String> is empty.";
2988        let once = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2989        let twice = sanitize_rust_idioms(&once, DocTarget::JavaDoc);
2990        assert_eq!(once, twice, "sanitize_rust_idioms should be idempotent");
2991    }
2992
2993    #[test]
2994    fn sanitize_multiline_prose() {
2995        let input = "Convert HTML to Markdown.\n\nReturns None on failure.\nUse Option<String> for the result.";
2996        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2997        assert!(out.contains("null"), "None must be replaced on line 2, got: {out}");
2998        assert!(
2999            out.contains("String | null"),
3000            "Option<String> must be replaced on line 3, got: {out}"
3001        );
3002    }
3003
3004    #[test]
3005    fn sanitize_attribute_line_dropped() {
3006        let input = "#[derive(Debug, Clone)]\nSome documentation.";
3007        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3008        assert!(!out.contains("#[derive("), "attribute line must be dropped, got: {out}");
3009        // Prose survives, though bare "Some " before a lowercase noun is stripped
3010        // by `replace_some_keyword_in_prose`, so accept either form.
3011        assert!(out.contains("documentation."), "prose must survive, got: {out}");
3012    }
3013
3014    #[test]
3015    fn sanitize_path_separator_in_prose() {
3016        let input = "See std::collections::HashMap for details.";
3017        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3018        assert!(out.contains("std.collections.HashMap"), ":: must become ., got: {out}");
3019    }
3020
3021    #[test]
3022    fn sanitize_none_not_replaced_inside_identifier() {
3023        // "NoneType" must not be replaced.
3024        let input = "Unlike NoneType in Python.";
3025        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3026        assert!(out.contains("NoneType"), "NoneType must not be replaced, got: {out}");
3027    }
3028
3029    // --- CSharpDoc target tests ---
3030
3031    #[test]
3032    fn sanitize_csharp_drops_rust_section_headings_and_example_body() {
3033        // The GraphQLErrorException case: `# Examples` heading followed by a
3034        // ```ignore code fence containing `Self::error_code`, `Result<T, E>`,
3035        // intra-doc links — all of which previously leaked into `<summary>`.
3036        let input = "Convert error to HTTP status code\n\n\
3037            Maps GraphQL error types to status codes.\n\n\
3038            # Examples\n\n\
3039            ```ignore\n\
3040            use spikard_graphql::error::GraphQLError;\n\
3041            let error = GraphQLError::AuthenticationError(\"Invalid token\".to_string());\n\
3042            assert_eq!(error.status_code(), 401);\n\
3043            ```\n";
3044        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3045        assert!(
3046            out.contains("Convert error to HTTP status code"),
3047            "summary preserved: {out}"
3048        );
3049        assert!(out.contains("Maps GraphQL error types"), "prose preserved: {out}");
3050        assert!(!out.contains("# Examples"), "heading dropped: {out}");
3051        assert!(!out.contains("```"), "code fence dropped: {out}");
3052        assert!(!out.contains("Self::error_code"), "Self::method dropped: {out}");
3053        assert!(
3054            !out.contains("GraphQLError::AuthenticationError"),
3055            "rust path dropped: {out}"
3056        );
3057    }
3058
3059    #[test]
3060    fn sanitize_csharp_intradoc_link_with_path_separator() {
3061        let input = "See [`Self::error_code`] for the variant codes.";
3062        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3063        assert!(out.contains("`Self.error_code`"), "intra-doc link normalised: {out}");
3064        assert!(!out.contains("[`"), "square brackets removed: {out}");
3065        assert!(!out.contains("::"), ":: replaced with .: {out}");
3066    }
3067
3068    #[test]
3069    fn sanitize_csharp_result_type_keeps_success_drops_error() {
3070        let input = "Returns Result<String, ConversionError> on failure.";
3071        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3072        assert!(out.contains("String"), "success type kept: {out}");
3073        assert!(!out.contains("Result<"), "Result wrapper dropped: {out}");
3074        assert!(!out.contains("ConversionError"), "error type dropped: {out}");
3075    }
3076
3077    #[test]
3078    fn sanitize_csharp_option_becomes_nullable() {
3079        let input = "Returns Option<String>.";
3080        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3081        // After XML-escaping, the `?` survives but any surviving `<`/`>` get escaped.
3082        assert!(out.contains("String?"), "Option<T> -> T?: {out}");
3083        assert!(!out.contains("Option<"), "Option dropped: {out}");
3084    }
3085
3086    #[test]
3087    fn sanitize_csharp_vec_u8_becomes_byte_array() {
3088        let input = "Accepts Vec<u8>.";
3089        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3090        // `byte[]` survives — the `[` is not XML-significant.
3091        assert!(out.contains("byte[]"), "Vec<u8> -> byte[]: {out}");
3092    }
3093
3094    #[test]
3095    fn sanitize_csharp_hashmap_becomes_dictionary() {
3096        let input = "Holds HashMap<String, u32>.";
3097        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3098        // The `<` / `>` produced by Dictionary<K, V> must be XML-escaped.
3099        assert!(
3100            out.contains("Dictionary&lt;String, u32&gt;"),
3101            "HashMap -> Dictionary with XML-escaped brackets: {out}"
3102        );
3103    }
3104
3105    #[test]
3106    fn sanitize_csharp_none_to_null() {
3107        let input = "Returns None on miss.";
3108        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3109        assert!(out.contains("null"), "None -> null: {out}");
3110        assert!(!out.contains("None"), "None replaced: {out}");
3111    }
3112
3113    #[test]
3114    fn sanitize_csharp_escapes_raw_angle_brackets_and_amp() {
3115        // Unrecognised `<...>` constructs (e.g. trait objects, generic params on
3116        // unknown names) must still be XML-escaped so the result is valid inside
3117        // `<summary>`.
3118        let input = "Accepts Box<dyn Trait> and combines a & b.";
3119        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3120        // Box<T> wrapper is stripped to inner type, leaving `dyn Trait`.
3121        assert!(out.contains("dyn Trait"), "Box<T> stripped: {out}");
3122        assert!(out.contains("&amp;"), "ampersand escaped: {out}");
3123    }
3124
3125    #[test]
3126    fn sanitize_csharp_drops_rust_code_fence_entirely() {
3127        let input = "Intro.\n\n```rust\nlet x: Vec<u8> = vec![];\n```\n\nTrailer.";
3128        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3129        assert!(!out.contains("let x"), "code fence body dropped: {out}");
3130        assert!(!out.contains("```"), "fence markers dropped: {out}");
3131        assert!(out.contains("Intro."), "prose before fence kept: {out}");
3132        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
3133    }
3134
3135    #[test]
3136    fn sanitize_csharp_keep_sections_does_not_drop_headings() {
3137        // The sections-preserving variant leaves heading lines alone so callers
3138        // that have already extracted sections can sanitise each body fragment.
3139        let input = "Summary.\n\n# Arguments\n\n* `name` - the value.";
3140        let out = sanitize_rust_idioms_keep_sections(input, DocTarget::CSharpDoc);
3141        assert!(out.contains("# Arguments"), "heading preserved: {out}");
3142        assert!(out.contains("name"), "body preserved: {out}");
3143    }
3144
3145    #[test]
3146    fn sanitize_csharp_idempotent() {
3147        let input = "Returns Option<String> or None.";
3148        let once = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3149        let twice = sanitize_rust_idioms(&once, DocTarget::CSharpDoc);
3150        assert_eq!(once, twice, "CSharpDoc sanitisation must be idempotent");
3151    }
3152
3153    #[test]
3154    fn sanitize_phpdoc_drops_unmarked_rust_code_fences() {
3155        // Regression test: unmarked code fences (```\n...\n```) in Rust docstrings
3156        // are treated as Rust code and should be dropped for PHP target.
3157        let input = "Detect language name from a file extension.\n\nReturns `None` for unrecognized extensions.\n\n```\nuse tree_sitter_language_pack::detect_language_from_extension;\nassert_eq!(detect_language_from_extension(\"py\"), Some(\"python\"));\nassert_eq!(detect_language_from_extension(\"RS\"), Some(\"rust\"));\nassert_eq!(detect_language_from_extension(\"xyz\"), None);\n```";
3158        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3159        assert!(
3160            !out.contains("use tree_sitter_language_pack"),
3161            "Rust use stmt dropped: {out}"
3162        );
3163        assert!(!out.contains("assert_eq!"), "Rust code dropped: {out}");
3164        assert!(!out.contains("```"), "fence markers dropped: {out}");
3165        assert!(out.contains("Detect language name"), "prose before fence kept: {out}");
3166        assert!(out.contains("unrecognized extensions"), "prose kept: {out}");
3167    }
3168
3169    #[test]
3170    fn sanitize_javadoc_drops_unmarked_rust_code_fences() {
3171        // Regression test: unmarked code fences in Rust docstrings should be dropped
3172        // for Java target as well.
3173        let input = "Process a file.\n\n```\nlet result = process(\"def hello(): pass\", &config).unwrap();\n```";
3174        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3175        assert!(!out.contains("unwrap"), "Rust unwrap dropped: {out}");
3176        assert!(!out.contains("```"), "fence markers dropped: {out}");
3177        assert!(out.contains("Process a file"), "prose kept: {out}");
3178    }
3179
3180    #[test]
3181    fn sanitize_phpdoc_drops_explicit_rust_fences() {
3182        // Explicit ```rust fences should also be dropped for PHP.
3183        let input = "Summary.\n\n```rust\nuse std::path::PathBuf;\nlet p = PathBuf::from(\"/tmp\");\n```";
3184        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3185        assert!(!out.contains("use std::"), "Rust code dropped: {out}");
3186        assert!(!out.contains("PathBuf"), "Rust types dropped: {out}");
3187        assert!(!out.contains("```"), "fence markers dropped: {out}");
3188        assert!(out.contains("Summary"), "prose kept: {out}");
3189    }
3190
3191    // --- rustdoc test-attribute fence tests ---
3192
3193    #[test]
3194    fn sanitize_no_run_fence_dropped_for_tsdoc() {
3195        let input = "Intro.\n\n```no_run\nuse foo::bar;\nbar::init();\n```\n\nTrailer.";
3196        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
3197        assert!(!out.contains("use foo::bar"), "no_run fence body dropped: {out}");
3198        assert!(!out.contains("```"), "fence markers dropped: {out}");
3199        assert!(out.contains("Intro."), "prose before fence kept: {out}");
3200        assert!(out.contains("Trailer."), "prose after fence kept: {out}");
3201    }
3202
3203    #[test]
3204    fn sanitize_ignore_fence_dropped_for_phpdoc() {
3205        let input = "Summary.\n\n```ignore\nlet x = 1;\n// this would not compile\n```";
3206        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3207        assert!(!out.contains("let x = 1"), "ignore fence body dropped: {out}");
3208        assert!(!out.contains("```"), "fence markers dropped: {out}");
3209        assert!(out.contains("Summary"), "prose kept: {out}");
3210    }
3211
3212    #[test]
3213    fn sanitize_should_panic_fence_dropped_for_javadoc() {
3214        let input = "Panics on null.\n\n```should_panic\nlet _ = parse(null);\n```";
3215        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
3216        assert!(!out.contains("parse(null)"), "should_panic fence body dropped: {out}");
3217        assert!(!out.contains("```"), "fence markers dropped: {out}");
3218        assert!(out.contains("Panics on null"), "prose kept: {out}");
3219    }
3220
3221    #[test]
3222    fn sanitize_compile_fail_fence_dropped_for_csharp() {
3223        let input = "Type safety demo.\n\n```compile_fail\nlet x: u32 = \"hello\";\n```";
3224        let out = sanitize_rust_idioms(input, DocTarget::CSharpDoc);
3225        assert!(!out.contains("let x:"), "compile_fail fence body dropped: {out}");
3226        assert!(!out.contains("```"), "fence markers dropped: {out}");
3227        assert!(out.contains("Type safety demo"), "prose kept: {out}");
3228    }
3229
3230    #[test]
3231    fn sanitize_edition_fence_dropped_for_tsdoc() {
3232        let input = "Edition example.\n\n```edition2021\nuse std::fmt;\n```\n\nSee also edition2018.";
3233        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
3234        assert!(!out.contains("use std::fmt"), "edition2021 fence body dropped: {out}");
3235        assert!(!out.contains("```"), "fence markers dropped: {out}");
3236        assert!(out.contains("Edition example"), "prose kept: {out}");
3237    }
3238
3239    #[test]
3240    fn sanitize_python_fence_preserved_for_tsdoc() {
3241        // Python fences are not Rust — they must pass through unchanged.
3242        let input = "Example:\n\n```python\nimport foo\nfoo.bar()\n```";
3243        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
3244        assert!(out.contains("```python"), "python fence preserved: {out}");
3245        assert!(out.contains("import foo"), "python body preserved: {out}");
3246    }
3247
3248    #[test]
3249    fn sanitize_javascript_fence_preserved_for_phpdoc() {
3250        let input = "Usage:\n\n```javascript\nconst x = require('foo');\n```";
3251        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
3252        assert!(out.contains("```javascript"), "javascript fence preserved: {out}");
3253        assert!(out.contains("require('foo')"), "javascript body preserved: {out}");
3254    }
3255
3256    #[test]
3257    fn example_for_target_no_run_fence_suppressed_for_typescript() {
3258        let example =
3259            "```no_run\nuse tree_sitter_language_pack::available_languages;\nlet langs = available_languages();\n```";
3260        assert_eq!(
3261            example_for_target(example, "typescript"),
3262            None,
3263            "no_run fence must be treated as Rust and suppressed for TypeScript"
3264        );
3265    }
3266
3267    #[test]
3268    fn example_for_target_ignore_fence_suppressed_for_php() {
3269        let example = "```ignore\nlet x = 1;\n```";
3270        assert_eq!(
3271            example_for_target(example, "php"),
3272            None,
3273            "ignore fence must be treated as Rust and suppressed for PHP"
3274        );
3275    }
3276
3277    #[test]
3278    fn example_for_target_compile_fail_fence_suppressed_for_java() {
3279        let example = "```compile_fail\nlet x: u32 = \"wrong\";\n```";
3280        assert_eq!(
3281            example_for_target(example, "java"),
3282            None,
3283            "compile_fail fence must be treated as Rust and suppressed for Java"
3284        );
3285    }
3286
3287    #[test]
3288    fn example_for_target_should_panic_fence_suppressed_for_ruby() {
3289        let example = "```should_panic\nlet _ = parse(None);\n```";
3290        assert_eq!(
3291            example_for_target(example, "ruby"),
3292            None,
3293            "should_panic fence must be treated as Rust and suppressed for Ruby"
3294        );
3295    }
3296
3297    #[test]
3298    fn example_for_target_edition_fence_suppressed_for_php() {
3299        let example = "```edition2021\nuse std::fmt;\n```";
3300        assert_eq!(
3301            example_for_target(example, "php"),
3302            None,
3303            "edition2021 fence must be treated as Rust and suppressed for PHP"
3304        );
3305    }
3306
3307    #[test]
3308    fn example_for_target_python_fence_preserved() {
3309        let example = "```python\nimport foo\n```";
3310        let result = example_for_target(example, "php");
3311        assert!(result.is_some(), "python fence must be preserved for PHP target");
3312    }
3313}
alef_codegen/doc_emission.rs

alef_codegen/
doc_emission.rs