alef_codegen/
doc_emission.rs

1//! Language-native documentation comment emission.
2//! Provides standardized functions for emitting doc comments in different languages.
3
4/// Emit PHPDoc-style comments (/** ... */)
5/// Used for PHP classes, methods, and properties.
6///
7/// Sanitizes Rust-specific idioms before translating rustdoc sections
8/// (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@throws`,
9/// `# Example` → ` ```php ` fence) via [`render_phpdoc_sections`].
10///
11/// `exception_class` is the PHP exception class name to use in `@throws` tags.
12pub fn emit_phpdoc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
13    if doc.is_empty() {
14        return;
15    }
16    // Sanitize Rust-specific idioms before processing sections.
17    let sanitized = sanitize_rust_idioms(doc, DocTarget::PhpDoc);
18    let sections = parse_rustdoc_sections(&sanitized);
19    let any_section = sections.arguments.is_some()
20        || sections.returns.is_some()
21        || sections.errors.is_some()
22        || sections.example.is_some();
23    let body = if any_section {
24        render_phpdoc_sections(&sections, exception_class)
25    } else {
26        sanitized
27    };
28    out.push_str(indent);
29    out.push_str("/**\n");
30    for line in body.lines() {
31        out.push_str(indent);
32        out.push_str(" * ");
33        out.push_str(&escape_phpdoc_line(line));
34        out.push('\n');
35    }
36    out.push_str(indent);
37    out.push_str(" */\n");
38}
39
40/// Escape PHPDoc line: handle */ sequences that could close the comment early.
41fn escape_phpdoc_line(s: &str) -> String {
42    s.replace("*/", "* /")
43}
44
45/// Emit C# XML documentation comments (/// <summary> ... </summary>)
46/// Used for C# classes, structs, methods, and properties.
47///
48/// Translates rustdoc sections (`# Arguments` → `<param>`,
49/// `# Returns` → `<returns>`, `# Errors` → `<exception>`,
50/// `# Example` → `<example><code>`) via [`render_csharp_xml_sections`].
51///
52/// `exception_class` is the C# exception class name to use in `<exception cref="...">` tags.
53pub fn emit_csharp_doc(out: &mut String, doc: &str, indent: &str, exception_class: &str) {
54    if doc.is_empty() {
55        return;
56    }
57    let sections = parse_rustdoc_sections(doc);
58    let any_section = sections.arguments.is_some()
59        || sections.returns.is_some()
60        || sections.errors.is_some()
61        || sections.example.is_some();
62    if !any_section {
63        // Backwards-compatible path: plain `<summary>` for prose-only docs.
64        out.push_str(indent);
65        out.push_str("/// <summary>\n");
66        for line in doc.lines() {
67            out.push_str(indent);
68            out.push_str("/// ");
69            out.push_str(&escape_csharp_doc_line(line));
70            out.push('\n');
71        }
72        out.push_str(indent);
73        out.push_str("/// </summary>\n");
74        return;
75    }
76    let rendered = render_csharp_xml_sections(&sections, exception_class);
77    for line in rendered.lines() {
78        out.push_str(indent);
79        out.push_str("/// ");
80        // The rendered tags already contain the canonical chars; we only
81        // escape XML special chars that aren't part of our tag syntax. Since
82        // render_csharp_xml_sections produces well-formed XML, raw passthrough
83        // is correct.
84        out.push_str(line);
85        out.push('\n');
86    }
87}
88
89/// Escape C# XML doc line: handle XML special characters.
90fn escape_csharp_doc_line(s: &str) -> String {
91    s.replace('&', "&amp;").replace('<', "&lt;").replace('>', "&gt;")
92}
93
94/// Emit Elixir documentation comments (@doc)
95/// Used for Elixir modules and functions.
96pub fn emit_elixir_doc(out: &mut String, doc: &str) {
97    if doc.is_empty() {
98        return;
99    }
100    out.push_str("@doc \"\"\"\n");
101    for line in doc.lines() {
102        out.push_str(&escape_elixir_doc_line(line));
103        out.push('\n');
104    }
105    out.push_str("\"\"\"\n");
106}
107
108/// Emit Rust `///` documentation comments.
109///
110/// Used by alef backends that emit Rust source (e.g., the Rustler NIF crate,
111/// the swift-bridge wrapper crate, the FRB Dart bridge crate). Distinct from
112/// `emit_swift_doc` only by intent — the syntax is identical (`/// ` per line).
113pub fn emit_rustdoc(out: &mut String, doc: &str, indent: &str) {
114    if doc.is_empty() {
115        return;
116    }
117    for line in doc.lines() {
118        out.push_str(indent);
119        out.push_str("/// ");
120        out.push_str(line);
121        out.push('\n');
122    }
123}
124
125/// Escape Elixir doc line: handle triple-quote sequences that could close the heredoc early.
126fn escape_elixir_doc_line(s: &str) -> String {
127    s.replace("\"\"\"", "\"\" \"")
128}
129
130/// Emit R roxygen2-style documentation comments (#')
131/// Used for R functions.
132pub fn emit_roxygen(out: &mut String, doc: &str) {
133    if doc.is_empty() {
134        return;
135    }
136    for line in doc.lines() {
137        out.push_str("#' ");
138        out.push_str(line);
139        out.push('\n');
140    }
141}
142
143/// Emit Swift-style documentation comments (///)
144/// Used for Swift structs, enums, and functions.
145pub fn emit_swift_doc(out: &mut String, doc: &str, indent: &str) {
146    if doc.is_empty() {
147        return;
148    }
149    for line in doc.lines() {
150        out.push_str(indent);
151        out.push_str("/// ");
152        out.push_str(line);
153        out.push('\n');
154    }
155}
156
157/// Emit Javadoc-style documentation comments (/** ... */)
158/// Used for Java classes, methods, and fields.
159/// Handles XML escaping and Javadoc tag formatting.
160pub fn emit_javadoc(out: &mut String, doc: &str, indent: &str) {
161    if doc.is_empty() {
162        return;
163    }
164    out.push_str(indent);
165    out.push_str("/**\n");
166    for line in doc.lines() {
167        let escaped = escape_javadoc_line(line);
168        let trimmed = escaped.trim_end();
169        if trimmed.is_empty() {
170            out.push_str(indent);
171            out.push_str(" *\n");
172        } else {
173            out.push_str(indent);
174            out.push_str(" * ");
175            out.push_str(trimmed);
176            out.push('\n');
177        }
178    }
179    out.push_str(indent);
180    out.push_str(" */\n");
181}
182
183/// Emit KDoc-style documentation comments (/** ... */)
184/// Used for Kotlin classes, methods, and properties.
185pub fn emit_kdoc(out: &mut String, doc: &str, indent: &str) {
186    if doc.is_empty() {
187        return;
188    }
189    out.push_str(indent);
190    out.push_str("/**\n");
191    for line in doc.lines() {
192        let trimmed = line.trim_end();
193        if trimmed.is_empty() {
194            out.push_str(indent);
195            out.push_str(" *\n");
196        } else {
197            out.push_str(indent);
198            out.push_str(" * ");
199            out.push_str(trimmed);
200            out.push('\n');
201        }
202    }
203    out.push_str(indent);
204    out.push_str(" */\n");
205}
206
207/// Emit KDoc-style documentation comments in ktfmt-canonical format.
208///
209/// ktfmt collapses short KDoc comments to single-line format (`/** ... */`)
210/// when they fit within the 100-character line width limit. This function
211/// generates KDoc in that canonical form to avoid unnecessary formatting
212/// diffs when the generated code is passed through ktfmt.
213///
214/// - Single-line comments that fit in 100 chars: emitted as `/** content */`
215/// - Multi-paragraph or longer comments: emitted with newlines and ` * ` prefixes
216/// - Preserves indent and respects line width boundary at 100 chars
217pub fn emit_kdoc_ktfmt_canonical(out: &mut String, doc: &str, indent: &str) {
218    const KTFMT_LINE_WIDTH: usize = 100;
219
220    if doc.is_empty() {
221        return;
222    }
223
224    let lines: Vec<&str> = doc.lines().collect();
225
226    // Check if this is a short, single-paragraph comment that fits on one line.
227    let is_short_single_paragraph = lines.len() == 1 && !lines[0].contains('\n');
228
229    if is_short_single_paragraph {
230        let trimmed = lines[0].trim();
231        // Calculate total length: indent + "/** " + content + " */"
232        let single_line_len = indent.len() + 4 + trimmed.len() + 3; // 4 for "/** ", 3 for " */"
233        if single_line_len <= KTFMT_LINE_WIDTH {
234            // Fits on one line in ktfmt-canonical format
235            out.push_str(indent);
236            out.push_str("/** ");
237            out.push_str(trimmed);
238            out.push_str(" */\n");
239            return;
240        }
241    }
242
243    // Multi-line format (default for long or multi-paragraph comments)
244    out.push_str(indent);
245    out.push_str("/**\n");
246    for line in lines {
247        let trimmed = line.trim_end();
248        if trimmed.is_empty() {
249            out.push_str(indent);
250            out.push_str(" *\n");
251        } else {
252            out.push_str(indent);
253            out.push_str(" * ");
254            out.push_str(trimmed);
255            out.push('\n');
256        }
257    }
258    out.push_str(indent);
259    out.push_str(" */\n");
260}
261
262/// Emit Dartdoc-style documentation comments (///)
263/// Used for Dart classes, methods, and properties.
264pub fn emit_dartdoc(out: &mut String, doc: &str, indent: &str) {
265    if doc.is_empty() {
266        return;
267    }
268    for line in doc.lines() {
269        out.push_str(indent);
270        out.push_str("/// ");
271        out.push_str(line);
272        out.push('\n');
273    }
274}
275
276/// Emit Gleam documentation comments (///)
277/// Used for Gleam functions and types.
278pub fn emit_gleam_doc(out: &mut String, doc: &str, indent: &str) {
279    if doc.is_empty() {
280        return;
281    }
282    for line in doc.lines() {
283        out.push_str(indent);
284        out.push_str("/// ");
285        out.push_str(line);
286        out.push('\n');
287    }
288}
289
290/// Emit Doxygen-style C documentation comments using `///`-prefixed lines.
291///
292/// Used by `alef-backend-ffi` above every `extern "C" fn`, the `*_len()`
293/// companion, opaque-handle typedef, and (post-cbindgen) the type/enum
294/// declarations cbindgen surfaces in the generated `.h`. cbindgen translates
295/// `///` source lines into a single `/** ... */` Doxygen block per item, so we
296/// only need to emit per-line `///` content here.
297///
298/// Translates rustdoc sections via [`render_doxygen_sections`]:
299///
300/// - `# Arguments` → `\param <name> <description>` (one per arg).
301/// - `# Returns`   → `\return <description>`.
302/// - `# Errors`    → `\note <description>` (Doxygen has no `\throws` for C;
303///   `\note` is the convention).
304/// - `# Safety`    → `\note SAFETY: <description>`.
305/// - `# Example`   → `\code` ... `\endcode` block.
306///
307/// Markdown links (`[text](url)`) are flattened to `text (url)`. Body lines
308/// are word-wrapped at ~100 columns so the rendered `/** */` block stays
309/// readable in IDE tooltips and terminal viewers.
310pub fn emit_c_doxygen(out: &mut String, doc: &str, indent: &str) {
311    if doc.trim().is_empty() {
312        return;
313    }
314    let sections = parse_rustdoc_sections(doc);
315    let any_section = sections.arguments.is_some()
316        || sections.returns.is_some()
317        || sections.errors.is_some()
318        || sections.safety.is_some()
319        || sections.example.is_some();
320    let mut body = if any_section {
321        render_doxygen_sections_with_notes(&sections)
322    } else {
323        sections.summary.clone()
324    };
325    body = strip_markdown_links(&body);
326    let wrapped = word_wrap(&body, DOXYGEN_WRAP_WIDTH);
327    for line in wrapped.lines() {
328        out.push_str(indent);
329        out.push_str("/// ");
330        out.push_str(line);
331        out.push('\n');
332    }
333}
334
335const DOXYGEN_WRAP_WIDTH: usize = 100;
336
337/// Render `RustdocSections` as a Doxygen body but route `# Errors` and
338/// `# Safety` to `\note` lines instead of plain prose. This is the variant
339/// `emit_c_doxygen` uses; the public `render_doxygen_sections` keeps its
340/// long-standing plain-prose semantics so existing callers don't shift.
341fn render_doxygen_sections_with_notes(sections: &RustdocSections) -> String {
342    let mut out = String::new();
343    if !sections.summary.is_empty() {
344        out.push_str(&sections.summary);
345    }
346    if let Some(args) = sections.arguments.as_deref() {
347        for (name, desc) in parse_arguments_bullets(args) {
348            if !out.is_empty() {
349                out.push('\n');
350            }
351            if desc.is_empty() {
352                out.push_str("\\param ");
353                out.push_str(&name);
354            } else {
355                out.push_str("\\param ");
356                out.push_str(&name);
357                out.push(' ');
358                out.push_str(&desc);
359            }
360        }
361    }
362    if let Some(ret) = sections.returns.as_deref() {
363        if !out.is_empty() {
364            out.push('\n');
365        }
366        out.push_str("\\return ");
367        out.push_str(ret.trim());
368    }
369    if let Some(err) = sections.errors.as_deref() {
370        if !out.is_empty() {
371            out.push('\n');
372        }
373        out.push_str("\\note ");
374        out.push_str(err.trim());
375    }
376    if let Some(safety) = sections.safety.as_deref() {
377        if !out.is_empty() {
378            out.push('\n');
379        }
380        out.push_str("\\note SAFETY: ");
381        out.push_str(safety.trim());
382    }
383    if let Some(example) = sections.example.as_deref() {
384        if !out.is_empty() {
385            out.push('\n');
386        }
387        out.push_str("\\code\n");
388        for line in example.lines() {
389            let t = line.trim_start();
390            if t.starts_with("```") {
391                continue;
392            }
393            out.push_str(line);
394            out.push('\n');
395        }
396        out.push_str("\\endcode");
397    }
398    out
399}
400
401/// Flatten Markdown inline links `[text](url)` to `text (url)` so the rendered
402/// Doxygen block stays readable when consumed without a Markdown filter.
403fn strip_markdown_links(s: &str) -> String {
404    let mut out = String::with_capacity(s.len());
405    let bytes = s.as_bytes();
406    let mut i = 0;
407    while i < bytes.len() {
408        if bytes[i] == b'[' {
409            // Find matching closing bracket on the same logical span (no nested brackets).
410            if let Some(close) = bytes[i + 1..].iter().position(|&b| b == b']') {
411                let text_end = i + 1 + close;
412                if text_end + 1 < bytes.len() && bytes[text_end + 1] == b'(' {
413                    if let Some(paren_close) = bytes[text_end + 2..].iter().position(|&b| b == b')') {
414                        let url_start = text_end + 2;
415                        let url_end = url_start + paren_close;
416                        let text = &s[i + 1..text_end];
417                        let url = &s[url_start..url_end];
418                        out.push_str(text);
419                        out.push_str(" (");
420                        out.push_str(url);
421                        out.push(')');
422                        i = url_end + 1;
423                        continue;
424                    }
425                }
426            }
427        }
428        out.push(bytes[i] as char);
429        i += 1;
430    }
431    out
432}
433
434/// Word-wrap each input line at `width` columns. Lines starting with `\code`
435/// or contained between `\code`/`\endcode` markers, as well as Markdown fence
436/// blocks, are passed through verbatim to preserve example formatting.
437fn word_wrap(input: &str, width: usize) -> String {
438    let mut out = String::with_capacity(input.len());
439    let mut in_code = false;
440    for raw in input.lines() {
441        let trimmed = raw.trim_start();
442        if trimmed.starts_with("\\code") {
443            in_code = true;
444            out.push_str(raw);
445            out.push('\n');
446            continue;
447        }
448        if trimmed.starts_with("\\endcode") {
449            in_code = false;
450            out.push_str(raw);
451            out.push('\n');
452            continue;
453        }
454        if in_code || trimmed.starts_with("```") {
455            out.push_str(raw);
456            out.push('\n');
457            continue;
458        }
459        if raw.len() <= width {
460            out.push_str(raw);
461            out.push('\n');
462            continue;
463        }
464        let mut current = String::with_capacity(width);
465        for word in raw.split_whitespace() {
466            if current.is_empty() {
467                current.push_str(word);
468            } else if current.len() + 1 + word.len() > width {
469                out.push_str(&current);
470                out.push('\n');
471                current.clear();
472                current.push_str(word);
473            } else {
474                current.push(' ');
475                current.push_str(word);
476            }
477        }
478        if !current.is_empty() {
479            out.push_str(&current);
480            out.push('\n');
481        }
482    }
483    out.trim_end_matches('\n').to_string()
484}
485
486/// Emit Zig documentation comments (///)
487/// Used for Zig functions, types, and declarations.
488pub fn emit_zig_doc(out: &mut String, doc: &str, indent: &str) {
489    if doc.is_empty() {
490        return;
491    }
492    for line in doc.lines() {
493        out.push_str(indent);
494        out.push_str("/// ");
495        out.push_str(line);
496        out.push('\n');
497    }
498}
499
500/// Emit YARD documentation comments for Ruby.
501/// Used for Ruby classes, methods, and attributes.
502///
503/// YARD syntax: each line prefixed with `# ` (with space). Translates rustdoc
504/// sections (`# Arguments` → `@param`, `# Returns` → `@return`, `# Errors` → `@raise`)
505/// via [`render_yard_sections`].
506pub fn emit_yard_doc(out: &mut String, doc: &str, indent: &str) {
507    if doc.is_empty() {
508        return;
509    }
510    let sections = parse_rustdoc_sections(doc);
511    let any_section = sections.arguments.is_some()
512        || sections.returns.is_some()
513        || sections.errors.is_some()
514        || sections.example.is_some();
515    let body = if any_section {
516        render_yard_sections(&sections)
517    } else {
518        doc.to_string()
519    };
520    for line in body.lines() {
521        out.push_str(indent);
522        out.push_str("# ");
523        out.push_str(line);
524        out.push('\n');
525    }
526}
527
528/// Render `RustdocSections` as YARD documentation comment body.
529///
530/// - `# Arguments` → `@param name desc` (one per arg)
531/// - `# Returns`   → `@return desc`
532/// - `# Errors`    → `@raise desc`
533/// - `# Example`   → `@example` block.
534///
535/// Output is a plain string with `\n` separators; the emitter wraps each line
536/// in `# ` itself.
537pub fn render_yard_sections(sections: &RustdocSections) -> String {
538    let mut out = String::new();
539    if !sections.summary.is_empty() {
540        out.push_str(&sections.summary);
541    }
542    if let Some(args) = sections.arguments.as_deref() {
543        for (name, desc) in parse_arguments_bullets(args) {
544            if !out.is_empty() {
545                out.push('\n');
546            }
547            if desc.is_empty() {
548                out.push_str("@param ");
549                out.push_str(&name);
550            } else {
551                out.push_str("@param ");
552                out.push_str(&name);
553                out.push(' ');
554                out.push_str(&desc);
555            }
556        }
557    }
558    if let Some(ret) = sections.returns.as_deref() {
559        if !out.is_empty() {
560            out.push('\n');
561        }
562        out.push_str("@return ");
563        out.push_str(ret.trim());
564    }
565    if let Some(err) = sections.errors.as_deref() {
566        if !out.is_empty() {
567            out.push('\n');
568        }
569        out.push_str("@raise ");
570        out.push_str(err.trim());
571    }
572    if let Some(example) = sections.example.as_deref() {
573        if let Some(body) = example_for_target(example, "ruby") {
574            if !out.is_empty() {
575                out.push('\n');
576            }
577            out.push_str("@example\n");
578            out.push_str(&body);
579        }
580    }
581    out
582}
583
584/// Escape Javadoc line: handle XML special chars and backtick code blocks.
585///
586/// HTML entities (`<`, `>`, `&`) are also escaped *inside* `{@code …}` blocks.
587/// Without that, content like `` `<pre><code>` `` would emit raw `<pre>`
588/// inside the Javadoc tag — Eclipse-formatter Spotless then treats it as a
589/// real `<pre>` block element and shatters the line across multiple `* `
590/// rows, breaking `alef-verify`'s embedded hash. Escaped content is
591/// rendered identically by Javadoc readers (the `{@code}` tag shows literal
592/// characters) and is stable under any post-formatter pass.
593fn escape_javadoc_line(s: &str) -> String {
594    let mut result = String::with_capacity(s.len());
595    let mut chars = s.chars().peekable();
596    while let Some(ch) = chars.next() {
597        if ch == '`' {
598            let mut code = String::new();
599            for c in chars.by_ref() {
600                if c == '`' {
601                    break;
602                }
603                code.push(c);
604            }
605            result.push_str("{@code ");
606            result.push_str(&escape_javadoc_html_entities(&code));
607            result.push('}');
608        } else if ch == '<' {
609            result.push_str("&lt;");
610        } else if ch == '>' {
611            result.push_str("&gt;");
612        } else if ch == '&' {
613            result.push_str("&amp;");
614        } else {
615            result.push(ch);
616        }
617    }
618    result
619}
620
621/// Escape only the HTML special characters that would otherwise be parsed by
622/// downstream Javadoc/Eclipse formatters as block-level HTML (e.g. `<pre>`).
623fn escape_javadoc_html_entities(s: &str) -> String {
624    let mut out = String::with_capacity(s.len());
625    for ch in s.chars() {
626        match ch {
627            '<' => out.push_str("&lt;"),
628            '>' => out.push_str("&gt;"),
629            '&' => out.push_str("&amp;"),
630            other => out.push(other),
631        }
632    }
633    out
634}
635
636/// A parsed rustdoc comment broken out into the sections binding emitters
637/// care about.
638///
639/// `summary` is the leading prose paragraph(s) before any `# Heading`.
640/// Sections are stored verbatim (without the `# Heading` line itself);
641/// each binding is responsible for translating bullet lists and code
642/// fences into its host-native conventions.
643///
644/// Trailing/leading whitespace inside each field is trimmed so emitters
645/// can concatenate without producing `* ` lines containing only spaces.
646#[derive(Debug, Default, Clone, PartialEq, Eq)]
647pub struct RustdocSections {
648    /// Prose before the first `# Section` heading.
649    pub summary: String,
650    /// Body of the `# Arguments` section, if present.
651    pub arguments: Option<String>,
652    /// Body of the `# Returns` section, if present.
653    pub returns: Option<String>,
654    /// Body of the `# Errors` section, if present.
655    pub errors: Option<String>,
656    /// Body of the `# Panics` section, if present.
657    pub panics: Option<String>,
658    /// Body of the `# Safety` section, if present.
659    pub safety: Option<String>,
660    /// Body of the `# Example` / `# Examples` section, if present.
661    pub example: Option<String>,
662}
663
664/// Parse a rustdoc string into [`RustdocSections`].
665///
666/// Recognises level-1 ATX headings whose name matches one of the standard
667/// rustdoc section names (`Arguments`, `Returns`, `Errors`, `Panics`,
668/// `Safety`, `Example`, `Examples`). Anything before the first heading
669/// becomes `summary`. Unrecognised headings are folded into the
670/// preceding section verbatim, so unconventional rustdoc isn't lost.
671///
672/// The input is expected to already have rustdoc-hidden lines stripped
673/// and intra-doc-link syntax rewritten by
674/// [`crate::extractor::helpers::normalize_rustdoc`].
675pub fn parse_rustdoc_sections(doc: &str) -> RustdocSections {
676    if doc.trim().is_empty() {
677        return RustdocSections::default();
678    }
679    let mut summary = String::new();
680    let mut arguments: Option<String> = None;
681    let mut returns: Option<String> = None;
682    let mut errors: Option<String> = None;
683    let mut panics: Option<String> = None;
684    let mut safety: Option<String> = None;
685    let mut example: Option<String> = None;
686    let mut current: Option<&'static str> = None;
687    let mut buf = String::new();
688    let mut in_fence = false;
689    let flush = |target: Option<&'static str>,
690                 buf: &mut String,
691                 summary: &mut String,
692                 arguments: &mut Option<String>,
693                 returns: &mut Option<String>,
694                 errors: &mut Option<String>,
695                 panics: &mut Option<String>,
696                 safety: &mut Option<String>,
697                 example: &mut Option<String>| {
698        let body = std::mem::take(buf).trim().to_string();
699        if body.is_empty() {
700            return;
701        }
702        match target {
703            None => {
704                if !summary.is_empty() {
705                    summary.push('\n');
706                }
707                summary.push_str(&body);
708            }
709            Some("arguments") => *arguments = Some(body),
710            Some("returns") => *returns = Some(body),
711            Some("errors") => *errors = Some(body),
712            Some("panics") => *panics = Some(body),
713            Some("safety") => *safety = Some(body),
714            Some("example") => *example = Some(body),
715            _ => {}
716        }
717    };
718    for line in doc.lines() {
719        let trimmed = line.trim_start();
720        if trimmed.starts_with("```") {
721            in_fence = !in_fence;
722            buf.push_str(line);
723            buf.push('\n');
724            continue;
725        }
726        if !in_fence {
727            if let Some(rest) = trimmed.strip_prefix("# ") {
728                let head = rest.trim().to_ascii_lowercase();
729                let target = match head.as_str() {
730                    "arguments" | "args" => Some("arguments"),
731                    "returns" => Some("returns"),
732                    "errors" => Some("errors"),
733                    "panics" => Some("panics"),
734                    "safety" => Some("safety"),
735                    "example" | "examples" => Some("example"),
736                    _ => None,
737                };
738                if target.is_some() {
739                    flush(
740                        current,
741                        &mut buf,
742                        &mut summary,
743                        &mut arguments,
744                        &mut returns,
745                        &mut errors,
746                        &mut panics,
747                        &mut safety,
748                        &mut example,
749                    );
750                    current = target;
751                    continue;
752                }
753            }
754        }
755        buf.push_str(line);
756        buf.push('\n');
757    }
758    flush(
759        current,
760        &mut buf,
761        &mut summary,
762        &mut arguments,
763        &mut returns,
764        &mut errors,
765        &mut panics,
766        &mut safety,
767        &mut example,
768    );
769    RustdocSections {
770        summary,
771        arguments,
772        returns,
773        errors,
774        panics,
775        safety,
776        example,
777    }
778}
779
780/// Parse `# Arguments` body into `(name, description)` pairs.
781///
782/// Recognises both Markdown bullet styles `*` and `-`, with optional
783/// backticks around the name: `* `name` - description` or
784/// `- name: description`. Continuation lines indented under a bullet
785/// are appended to the previous entry's description.
786///
787/// Used by emitters that translate to per-parameter documentation tags
788/// (`@param`, `<param>`, `\param`).
789pub fn parse_arguments_bullets(body: &str) -> Vec<(String, String)> {
790    let mut out: Vec<(String, String)> = Vec::new();
791    for raw in body.lines() {
792        let line = raw.trim_end();
793        let trimmed = line.trim_start();
794        let is_bullet = trimmed.starts_with("* ") || trimmed.starts_with("- ");
795        if is_bullet {
796            let after = &trimmed[2..];
797            // Accept `name`, `name:` or `name -` separator forms.
798            let (name, desc) = if let Some(idx) = after.find(" - ") {
799                (after[..idx].trim(), after[idx + 3..].trim())
800            } else if let Some(idx) = after.find(": ") {
801                (after[..idx].trim(), after[idx + 2..].trim())
802            } else if let Some(idx) = after.find(' ') {
803                (after[..idx].trim(), after[idx + 1..].trim())
804            } else {
805                (after.trim(), "")
806            };
807            let name = name.trim_matches('`').trim_matches('*').to_string();
808            out.push((name, desc.to_string()));
809        } else if !trimmed.is_empty() {
810            if let Some(last) = out.last_mut() {
811                if !last.1.is_empty() {
812                    last.1.push(' ');
813                }
814                last.1.push_str(trimmed);
815            }
816        }
817    }
818    out
819}
820
821/// Detect the language tag on the first code fence in `body`.
822///
823/// Scans `body` for the first line that starts with ` ``` ` and returns the
824/// tag that follows (e.g. `"rust"`, `"php"`, `"typescript"`). A bare ` ``` `
825/// with no tag returns `"rust"` because rustdoc treats unlabelled fences as
826/// Rust by default. Returns `"rust"` when no fence is found at all.
827fn detect_first_fence_lang(body: &str) -> &str {
828    for line in body.lines() {
829        let trimmed = line.trim_start();
830        if let Some(rest) = trimmed.strip_prefix("```") {
831            let tag = rest.split(',').next().unwrap_or("").trim();
832            return if tag.is_empty() { "rust" } else { tag };
833        }
834    }
835    "rust"
836}
837
838/// Return `Some(transformed_example)` if the example should be emitted for
839/// `target_lang`, or `None` when the example is Rust source that would be
840/// meaningless in the foreign language.
841///
842/// When the original fence language is `rust` (including bare ` ``` ` which
843/// rustdoc defaults to Rust) and the target is not `rust`, the example is
844/// suppressed entirely — better absent than misleading. Cross-language
845/// transliteration of example bodies is intentionally out of scope.
846pub fn example_for_target(example: &str, target_lang: &str) -> Option<String> {
847    let trimmed = example.trim();
848    let source_lang = detect_first_fence_lang(trimmed);
849    if source_lang == "rust" && target_lang != "rust" {
850        None
851    } else {
852        Some(replace_fence_lang(trimmed, target_lang))
853    }
854}
855
856/// Strip a single ` ```lang ` fence pair from `body`, returning the inner
857/// code lines. Replaces the leading ` ```rust ` (or any other tag) with
858/// `lang_replacement`, leaving the rest of the body unchanged.
859///
860/// When no fence is present the body is returned unchanged. Used by
861/// emitters that need to convert ` ```rust ` examples into
862/// ` ```typescript ` / ` ```python ` / ` ```swift ` etc.
863pub fn replace_fence_lang(body: &str, lang_replacement: &str) -> String {
864    let mut out = String::with_capacity(body.len());
865    for line in body.lines() {
866        let trimmed = line.trim_start();
867        if let Some(rest) = trimmed.strip_prefix("```") {
868            // Replace the language tag (everything up to the next comma or
869            // end of line). Preserve indentation.
870            let indent = &line[..line.len() - trimmed.len()];
871            let after_lang = rest.find(',').map(|i| &rest[i..]).unwrap_or("");
872            out.push_str(indent);
873            out.push_str("```");
874            out.push_str(lang_replacement);
875            out.push_str(after_lang);
876            out.push('\n');
877        } else {
878            out.push_str(line);
879            out.push('\n');
880        }
881    }
882    out.trim_end_matches('\n').to_string()
883}
884
885/// Render `RustdocSections` as a JSDoc comment body (without the `/**` /
886/// ` */` wrappers — those are added by the caller's emitter, which knows
887/// the indent/escape conventions).
888///
889/// - `# Arguments` → `@param name - desc`
890/// - `# Returns`   → `@returns desc`
891/// - `# Errors`    → `@throws desc`
892/// - `# Example`   → `@example` block. Replaces ` ```rust ` fences with
893///   ` ```typescript ` so the example highlights properly in TypeDoc.
894///
895/// Output is a plain string with `\n` separators; emitters wrap each line
896/// in ` * ` themselves.
897pub fn render_jsdoc_sections(sections: &RustdocSections) -> String {
898    let mut out = String::new();
899    if !sections.summary.is_empty() {
900        out.push_str(&sections.summary);
901    }
902    if let Some(args) = sections.arguments.as_deref() {
903        for (name, desc) in parse_arguments_bullets(args) {
904            if !out.is_empty() {
905                out.push('\n');
906            }
907            if desc.is_empty() {
908                out.push_str(&crate::template_env::render(
909                    "doc_jsdoc_param.jinja",
910                    minijinja::context! { name => &name },
911                ));
912            } else {
913                out.push_str(&crate::template_env::render(
914                    "doc_jsdoc_param_desc.jinja",
915                    minijinja::context! { name => &name, desc => &desc },
916                ));
917            }
918        }
919    }
920    if let Some(ret) = sections.returns.as_deref() {
921        if !out.is_empty() {
922            out.push('\n');
923        }
924        out.push_str(&crate::template_env::render(
925            "doc_jsdoc_returns.jinja",
926            minijinja::context! { content => ret.trim() },
927        ));
928    }
929    if let Some(err) = sections.errors.as_deref() {
930        if !out.is_empty() {
931            out.push('\n');
932        }
933        out.push_str(&crate::template_env::render(
934            "doc_jsdoc_throws.jinja",
935            minijinja::context! { content => err.trim() },
936        ));
937    }
938    if let Some(example) = sections.example.as_deref() {
939        if let Some(body) = example_for_target(example, "typescript") {
940            if !out.is_empty() {
941                out.push('\n');
942            }
943            out.push_str("@example\n");
944            out.push_str(&body);
945        }
946    }
947    out
948}
949
950/// Render `RustdocSections` as a JavaDoc comment body.
951///
952/// - `# Arguments` → `@param name desc` (one per param)
953/// - `# Returns`   → `@return desc`
954/// - `# Errors`    → `@throws KreuzbergRsException desc`
955/// - `# Example`   → `<pre>{@code ...}</pre>` block.
956///
957/// `throws_class` is the FQN/simple name of the exception class to use in
958/// the `@throws` tag (e.g. `"KreuzbergRsException"`).
959pub fn render_javadoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
960    let mut out = String::new();
961    if !sections.summary.is_empty() {
962        out.push_str(&sections.summary);
963    }
964    if let Some(args) = sections.arguments.as_deref() {
965        for (name, desc) in parse_arguments_bullets(args) {
966            if !out.is_empty() {
967                out.push('\n');
968            }
969            if desc.is_empty() {
970                out.push_str(&crate::template_env::render(
971                    "doc_javadoc_param.jinja",
972                    minijinja::context! { name => &name },
973                ));
974            } else {
975                out.push_str(&crate::template_env::render(
976                    "doc_javadoc_param_desc.jinja",
977                    minijinja::context! { name => &name, desc => &desc },
978                ));
979            }
980        }
981    }
982    if let Some(ret) = sections.returns.as_deref() {
983        if !out.is_empty() {
984            out.push('\n');
985        }
986        out.push_str(&crate::template_env::render(
987            "doc_javadoc_return.jinja",
988            minijinja::context! { content => ret.trim() },
989        ));
990    }
991    if let Some(err) = sections.errors.as_deref() {
992        if !out.is_empty() {
993            out.push('\n');
994        }
995        out.push_str(&crate::template_env::render(
996            "doc_javadoc_throws.jinja",
997            minijinja::context! { throws_class => throws_class, content => err.trim() },
998        ));
999    }
1000    out
1001}
1002
1003/// Render `RustdocSections` as a C# XML doc comment body (without the
1004/// `/// ` line prefixes — the emitter adds those).
1005///
1006/// - summary  → `<summary>...</summary>`
1007/// - args     → `<param name="x">desc</param>` (one per arg)
1008/// - returns  → `<returns>desc</returns>`
1009/// - errors   → `<exception cref="KreuzbergException">desc</exception>`
1010/// - example  → `<example><code language="csharp">...</code></example>`
1011pub fn render_csharp_xml_sections(sections: &RustdocSections, exception_class: &str) -> String {
1012    let mut out = String::new();
1013    out.push_str("<summary>\n");
1014    let summary = if sections.summary.is_empty() {
1015        ""
1016    } else {
1017        sections.summary.as_str()
1018    };
1019    for line in summary.lines() {
1020        out.push_str(line);
1021        out.push('\n');
1022    }
1023    out.push_str("</summary>");
1024    if let Some(args) = sections.arguments.as_deref() {
1025        for (name, desc) in parse_arguments_bullets(args) {
1026            out.push('\n');
1027            if desc.is_empty() {
1028                out.push_str(&crate::template_env::render(
1029                    "doc_csharp_param.jinja",
1030                    minijinja::context! { name => &name },
1031                ));
1032            } else {
1033                out.push_str(&crate::template_env::render(
1034                    "doc_csharp_param_desc.jinja",
1035                    minijinja::context! { name => &name, desc => &desc },
1036                ));
1037            }
1038        }
1039    }
1040    if let Some(ret) = sections.returns.as_deref() {
1041        out.push('\n');
1042        out.push_str(&crate::template_env::render(
1043            "doc_csharp_returns.jinja",
1044            minijinja::context! { content => ret.trim() },
1045        ));
1046    }
1047    if let Some(err) = sections.errors.as_deref() {
1048        out.push('\n');
1049        out.push_str(&crate::template_env::render(
1050            "doc_csharp_exception.jinja",
1051            minijinja::context! {
1052                exception_class => exception_class,
1053                content => err.trim(),
1054            },
1055        ));
1056    }
1057    if let Some(example) = sections.example.as_deref() {
1058        out.push('\n');
1059        out.push_str("<example><code language=\"csharp\">\n");
1060        // Drop fence markers, keep code.
1061        for line in example.lines() {
1062            let t = line.trim_start();
1063            if t.starts_with("```") {
1064                continue;
1065            }
1066            out.push_str(line);
1067            out.push('\n');
1068        }
1069        out.push_str("</code></example>");
1070    }
1071    out
1072}
1073
1074/// Render `RustdocSections` as a PHPDoc comment body.
1075///
1076/// - `# Arguments` → `@param mixed $name desc`
1077/// - `# Returns`   → `@return desc`
1078/// - `# Errors`    → `@throws KreuzbergException desc`
1079/// - `# Example`   → ` ```php ` fence (replaces ` ```rust `).
1080pub fn render_phpdoc_sections(sections: &RustdocSections, throws_class: &str) -> String {
1081    let mut out = String::new();
1082    if !sections.summary.is_empty() {
1083        out.push_str(&sections.summary);
1084    }
1085    if let Some(args) = sections.arguments.as_deref() {
1086        for (name, desc) in parse_arguments_bullets(args) {
1087            if !out.is_empty() {
1088                out.push('\n');
1089            }
1090            if desc.is_empty() {
1091                out.push_str(&crate::template_env::render(
1092                    "doc_phpdoc_param.jinja",
1093                    minijinja::context! { name => &name },
1094                ));
1095            } else {
1096                out.push_str(&crate::template_env::render(
1097                    "doc_phpdoc_param_desc.jinja",
1098                    minijinja::context! { name => &name, desc => &desc },
1099                ));
1100            }
1101        }
1102    }
1103    if let Some(ret) = sections.returns.as_deref() {
1104        if !out.is_empty() {
1105            out.push('\n');
1106        }
1107        out.push_str(&crate::template_env::render(
1108            "doc_phpdoc_return.jinja",
1109            minijinja::context! { content => ret.trim() },
1110        ));
1111    }
1112    if let Some(err) = sections.errors.as_deref() {
1113        if !out.is_empty() {
1114            out.push('\n');
1115        }
1116        out.push_str(&crate::template_env::render(
1117            "doc_phpdoc_throws.jinja",
1118            minijinja::context! { throws_class => throws_class, content => err.trim() },
1119        ));
1120    }
1121    if let Some(example) = sections.example.as_deref() {
1122        if let Some(body) = example_for_target(example, "php") {
1123            if !out.is_empty() {
1124                out.push('\n');
1125            }
1126            out.push_str(&body);
1127        }
1128    }
1129    out
1130}
1131
1132/// Render `RustdocSections` as a Doxygen comment body for the C header.
1133///
1134/// - args    → `\param name desc`
1135/// - returns → `\return desc`
1136/// - errors  → prose paragraph (Doxygen has no semantic tag for FFI errors)
1137/// - example → `\code` ... `\endcode`
1138pub fn render_doxygen_sections(sections: &RustdocSections) -> String {
1139    let mut out = String::new();
1140    if !sections.summary.is_empty() {
1141        out.push_str(&sections.summary);
1142    }
1143    if let Some(args) = sections.arguments.as_deref() {
1144        for (name, desc) in parse_arguments_bullets(args) {
1145            if !out.is_empty() {
1146                out.push('\n');
1147            }
1148            if desc.is_empty() {
1149                out.push_str(&crate::template_env::render(
1150                    "doc_doxygen_param.jinja",
1151                    minijinja::context! { name => &name },
1152                ));
1153            } else {
1154                out.push_str(&crate::template_env::render(
1155                    "doc_doxygen_param_desc.jinja",
1156                    minijinja::context! { name => &name, desc => &desc },
1157                ));
1158            }
1159        }
1160    }
1161    if let Some(ret) = sections.returns.as_deref() {
1162        if !out.is_empty() {
1163            out.push('\n');
1164        }
1165        out.push_str(&crate::template_env::render(
1166            "doc_doxygen_return.jinja",
1167            minijinja::context! { content => ret.trim() },
1168        ));
1169    }
1170    if let Some(err) = sections.errors.as_deref() {
1171        if !out.is_empty() {
1172            out.push('\n');
1173        }
1174        out.push_str(&crate::template_env::render(
1175            "doc_doxygen_errors.jinja",
1176            minijinja::context! { content => err.trim() },
1177        ));
1178    }
1179    if let Some(example) = sections.example.as_deref() {
1180        if !out.is_empty() {
1181            out.push('\n');
1182        }
1183        out.push_str("\\code\n");
1184        for line in example.lines() {
1185            let t = line.trim_start();
1186            if t.starts_with("```") {
1187                continue;
1188            }
1189            out.push_str(line);
1190            out.push('\n');
1191        }
1192        out.push_str("\\endcode");
1193    }
1194    out
1195}
1196
1197/// Return the first paragraph of a doc comment as a single joined line.
1198///
1199/// Collects lines until the first blank line, trims each, then joins with a
1200/// space. This handles wrapped sentences like:
1201///
1202/// ```text
1203/// Convert HTML to Markdown, returning
1204/// a `ConversionResult`.
1205/// ```
1206///
1207/// which would otherwise be truncated at the comma when callers use
1208/// `.lines().next()`.
1209pub fn doc_first_paragraph_joined(doc: &str) -> String {
1210    doc.lines()
1211        .take_while(|l| !l.trim().is_empty())
1212        .map(str::trim)
1213        .collect::<Vec<_>>()
1214        .join(" ")
1215}
1216
1217/// Target language for [`sanitize_rust_idioms`].
1218///
1219/// Each variant selects the idiomatic mapping for Rust constructs that do not
1220/// translate directly to foreign-language doc syntax.
1221#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1222pub enum DocTarget {
1223    /// PHPDoc (`/** ... */`), e.g. phpstan-typed prose.
1224    PhpDoc,
1225    /// Javadoc (`/** ... */`), e.g. OpenJDK-style annotations.
1226    JavaDoc,
1227    /// TSDoc (`/** ... */`), TypeScript variant of JSDoc.
1228    TsDoc,
1229    /// JSDoc (`/** ... */`), JavaScript variant.
1230    JsDoc,
1231}
1232
1233/// Sanitize Rust-specific idioms in a prose string for the given foreign-language
1234/// documentation target.
1235///
1236/// Transformations are applied **outside** backtick spans and code fences only,
1237/// so inline code examples and fenced blocks are never mutated (except that
1238/// ` ```rust ` fences are dropped entirely for [`DocTarget::TsDoc`] /
1239/// [`DocTarget::JsDoc`] and have their language tag stripped for
1240/// [`DocTarget::PhpDoc`] / [`DocTarget::JavaDoc`]).
1241///
1242/// # Transformations
1243///
1244/// - Intra-doc links `` [`Type::method`] `` → `` `Type.method` ``.
1245/// - `[`Foo`]` (backtick inside square brackets) → `` `Foo` ``.
1246/// - `None` (word boundary) → `null` (PHP/Java) or `undefined` (TS/JS).
1247/// - `Some(x)` → `the value (x)`.
1248/// - `Option<T>` → `T?` (PHP) / `T | null` (Java) / `T | undefined` (TS/JS).
1249/// - `Vec<u8>` → `string` (PHP) / `byte[]` (Java) / `Uint8Array` (TS/JS).
1250/// - `Vec<T>` → `T[]` (all targets).
1251/// - `HashMap<K, V>` → `array<K, V>` (PHP) / `Map<K, V>` (Java) / `Record<K, V>` (TS/JS).
1252/// - `Arc<T>`, `Box<T>`, `Mutex<T>`, `RwLock<T>`, `Rc<T>`, `Cell<T>`, `RefCell<T>` → `T`.
1253/// - `Send + Sync`, `Send`, `Sync`, `'static` → stripped.
1254/// - Standalone `::` between identifiers → `.`.
1255/// - `pub fn `, `crate::`, `&self`, `&mut self` → stripped.
1256/// - `#[…]` attribute macros on their own line or inline → stripped.
1257/// - `.unwrap()`, `.expect("…")` → stripped.
1258/// - ` ```rust ` fences → dropped entirely (TS/JS) or tag removed (PHP/Java).
1259pub fn sanitize_rust_idioms(text: &str, target: DocTarget) -> String {
1260    let mut out = String::with_capacity(text.len());
1261    let mut in_rust_fence = false;
1262    let mut in_other_fence = false;
1263
1264    for line in text.lines() {
1265        let trimmed = line.trim_start();
1266
1267        // Detect code fence boundaries.
1268        if let Some(rest) = trimmed.strip_prefix("```") {
1269            if in_rust_fence {
1270                // Closing fence of a rust block.
1271                in_rust_fence = false;
1272                match target {
1273                    DocTarget::TsDoc | DocTarget::JsDoc => {
1274                        // Entire rust block dropped — don't emit closing fence.
1275                    }
1276                    DocTarget::PhpDoc | DocTarget::JavaDoc => {
1277                        out.push_str(line);
1278                        out.push('\n');
1279                    }
1280                }
1281                continue;
1282            }
1283            if in_other_fence {
1284                // Closing fence of a non-rust block.
1285                in_other_fence = false;
1286                out.push_str(line);
1287                out.push('\n');
1288                continue;
1289            }
1290            // Opening fence — determine language.
1291            let lang = rest.split(',').next().unwrap_or("").trim();
1292            let is_rust = lang.is_empty() || lang == "rust" || lang.starts_with("rust,");
1293            if is_rust {
1294                in_rust_fence = true;
1295                match target {
1296                    DocTarget::TsDoc | DocTarget::JsDoc => {
1297                        // Drop the entire rust fence block — skip opening line.
1298                    }
1299                    DocTarget::PhpDoc | DocTarget::JavaDoc => {
1300                        // Emit fence without language tag.
1301                        let indent = &line[..line.len() - trimmed.len()];
1302                        let after_lang = rest.find(',').map(|i| &rest[i..]).unwrap_or("");
1303                        out.push_str(indent);
1304                        out.push_str("```");
1305                        out.push_str(after_lang);
1306                        out.push('\n');
1307                    }
1308                }
1309                continue;
1310            }
1311            // Non-rust fence: pass through verbatim.
1312            in_other_fence = true;
1313            out.push_str(line);
1314            out.push('\n');
1315            continue;
1316        }
1317
1318        // Inside a rust fence.
1319        if in_rust_fence {
1320            match target {
1321                DocTarget::TsDoc | DocTarget::JsDoc => {
1322                    // Drop content of rust fences.
1323                }
1324                DocTarget::PhpDoc | DocTarget::JavaDoc => {
1325                    out.push_str(line);
1326                    out.push('\n');
1327                }
1328            }
1329            continue;
1330        }
1331
1332        // Inside a non-rust fence: pass through verbatim.
1333        if in_other_fence {
1334            out.push_str(line);
1335            out.push('\n');
1336            continue;
1337        }
1338
1339        // Check if this line is a bare `#[...]` attribute line.
1340        let stripped_indent = line.trim_start();
1341        if stripped_indent.starts_with("#[") && stripped_indent.ends_with(']') {
1342            // Attribute-only line — drop entirely.
1343            continue;
1344        }
1345
1346        // Normal prose line: apply token-level transformations.
1347        let sanitized = apply_prose_transforms(line, target);
1348        out.push_str(&sanitized);
1349        out.push('\n');
1350    }
1351
1352    // Trim trailing newline added by the loop (preserve internal newlines).
1353    if out.ends_with('\n') && !text.ends_with('\n') {
1354        out.pop();
1355    }
1356    out
1357}
1358
1359/// Apply prose-level Rust-idiom transformations to a single line.
1360///
1361/// Some transformations span or precede backtick boundaries and must be applied
1362/// to the full line before tokenisation:
1363///
1364/// 1. Intra-doc links (`` [`...`] ``) — they wrap a backtick pair.
1365/// 2. `::` path separator — even inside backtick spans it should become `.`
1366///    for all foreign-language targets, since the target language uses `.` for
1367///    member access and package paths in code examples too.
1368///
1369/// All remaining transformations are applied only to literal (non-code) segments
1370/// after tokenisation.
1371fn apply_prose_transforms(line: &str, target: DocTarget) -> String {
1372    // Step 1: replace intra-doc links before tokenisation (they span backtick pairs).
1373    let line = replace_intradoc_links(line, target);
1374
1375    // Step 2: replace :: everywhere (including inside backtick spans).
1376    // All targets use `.` as the member/package separator, so this is always safe.
1377    let line = replace_path_separator(&line);
1378
1379    // Step 3: strip .unwrap() and .expect() everywhere, including inside backtick spans,
1380    // since these Rust error-handling idioms are meaningless in all target languages.
1381    let line = strip_unwrap_expect(&line);
1382
1383    // Step 4: tokenise and apply remaining transforms only to literal segments.
1384    let segments = tokenize_backtick_spans(&line);
1385    let mut result = String::with_capacity(line.len());
1386    for (is_code, span) in segments {
1387        if is_code {
1388            result.push('`');
1389            result.push_str(span);
1390            result.push('`');
1391        } else {
1392            result.push_str(&transform_prose_segment(span, target));
1393        }
1394    }
1395    result
1396}
1397
1398/// Split a line into alternating literal/code segments.
1399///
1400/// Returns `Vec<(is_code, &str)>` where `is_code` is true for the content
1401/// between a matched backtick pair. Unmatched backticks are treated as
1402/// literal characters (passed through as literal segments).
1403fn tokenize_backtick_spans(line: &str) -> Vec<(bool, &str)> {
1404    let mut segments = Vec::new();
1405    let bytes = line.as_bytes();
1406    let mut start = 0;
1407    let mut i = 0;
1408
1409    while i < bytes.len() {
1410        if bytes[i] == b'`' {
1411            // Emit preceding literal segment.
1412            if i > start {
1413                segments.push((false, &line[start..i]));
1414            }
1415            // Find the closing backtick.
1416            let code_start = i + 1;
1417            let close = bytes[code_start..].iter().position(|&b| b == b'`');
1418            if let Some(offset) = close {
1419                let code_end = code_start + offset;
1420                segments.push((true, &line[code_start..code_end]));
1421                i = code_end + 1;
1422                start = i;
1423            } else {
1424                // No closing backtick — treat as literal from here.
1425                segments.push((false, &line[i..]));
1426                start = line.len();
1427                i = line.len();
1428            }
1429        } else {
1430            i += 1;
1431        }
1432    }
1433    if start < line.len() {
1434        segments.push((false, &line[start..]));
1435    }
1436    segments
1437}
1438
1439/// Apply all prose-level Rust substitutions to a literal text segment.
1440///
1441/// Intra-doc links have already been replaced by `apply_prose_transforms`
1442/// before tokenisation; this function handles the remaining transformations.
1443fn transform_prose_segment(text: &str, target: DocTarget) -> String {
1444    let mut s = text.to_string();
1445
1446    // 1. Strip #[derive(...)] and other inline attribute-style references.
1447    s = strip_inline_attributes(&s);
1448
1449    // 2. Strip pub fn, crate::, &self, &mut self.
1450    s = s.replace("pub fn ", "");
1451    s = s.replace("crate::", "");
1452    s = s.replace("&mut self", "");
1453    s = s.replace("&self", "");
1454
1455    // 3. Strip lifetime and bound markers.
1456    s = strip_lifetime_and_bounds(&s);
1457
1458    // 4. Type substitutions (order matters — most specific first).
1459    s = replace_type_wrappers(&s, target);
1460
1461    // 5. Some(x) -> the value (x).
1462    s = replace_some_calls(&s);
1463
1464    // 5b. Bare "Some <lowercase>" in prose -> drop "Some ".
1465    s = replace_some_keyword_in_prose(&s);
1466
1467    // 6. None -> null / undefined (word boundary, uppercase only).
1468    s = replace_none_keyword(&s, target);
1469
1470    // Note: :: -> . and .unwrap()/.expect() stripping are applied to the full
1471    // line before tokenisation in apply_prose_transforms and therefore do not
1472    // need to be repeated here.
1473
1474    s
1475}
1476
1477/// Advance byte position `i` in `s` past one full UTF-8 character, push that
1478/// character to `out`, and return the new byte position.
1479///
1480/// All the byte-crawling helpers below look for ASCII special characters only.
1481/// When none matches, they must advance by one full character (not one byte)
1482/// to avoid splitting multi-byte UTF-8 sequences.
1483#[inline]
1484fn advance_char(s: &str, out: &mut String, i: usize) -> usize {
1485    // Safety: `i` must be a valid char boundary; callers guarantee this
1486    // because all branch points look for ASCII bytes which are always
1487    // single-byte char boundaries.
1488    let ch = s[i..].chars().next().expect("valid UTF-8 position");
1489    out.push(ch);
1490    i + ch.len_utf8()
1491}
1492
1493/// Replace `` [`Type::method()`] `` and `` [`Foo`] `` intra-doc links with
1494/// backtick-wrapped identifiers, converting `::` to `.`.
1495fn replace_intradoc_links(s: &str, _target: DocTarget) -> String {
1496    let mut out = String::with_capacity(s.len());
1497    let bytes = s.as_bytes();
1498    let mut i = 0;
1499    while i < bytes.len() {
1500        // Look for [`
1501        if i + 1 < bytes.len() && bytes[i] == b'[' && bytes[i + 1] == b'`' {
1502            // Find closing `]
1503            let search_start = i + 2;
1504            let mut found = false;
1505            let mut j = search_start;
1506            while j + 1 < bytes.len() {
1507                if bytes[j] == b'`' && bytes[j + 1] == b']' {
1508                    let inner = &s[search_start..j];
1509                    // Convert :: to . in the inner part.
1510                    let converted = inner.replace("::", ".");
1511                    out.push('`');
1512                    out.push_str(&converted);
1513                    out.push('`');
1514                    i = j + 2;
1515                    found = true;
1516                    break;
1517                }
1518                j += 1;
1519            }
1520            if !found {
1521                i = advance_char(s, &mut out, i);
1522            }
1523        } else {
1524            i = advance_char(s, &mut out, i);
1525        }
1526    }
1527    out
1528}
1529
1530/// Strip inline `#[...]` attribute references (not on their own line — those
1531/// are handled as full-line drops in the main loop).
1532fn strip_inline_attributes(s: &str) -> String {
1533    let mut out = String::with_capacity(s.len());
1534    let bytes = s.as_bytes();
1535    let mut i = 0;
1536    while i < bytes.len() {
1537        if bytes[i] == b'#' && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
1538            // Skip until matching ']', handling nesting.
1539            let mut depth = 0usize;
1540            let mut j = i + 1;
1541            while j < bytes.len() {
1542                if bytes[j] == b'[' {
1543                    depth += 1;
1544                } else if bytes[j] == b']' {
1545                    depth -= 1;
1546                    if depth == 0 {
1547                        i = j + 1;
1548                        break;
1549                    }
1550                }
1551                j += 1;
1552            }
1553            if depth != 0 {
1554                // Unmatched bracket: emit literally.
1555                i = advance_char(s, &mut out, i);
1556            }
1557        } else {
1558            i = advance_char(s, &mut out, i);
1559        }
1560    }
1561    out
1562}
1563
1564/// Strip `'static`, `Send + Sync`, `Send`, `Sync` from prose text.
1565fn strip_lifetime_and_bounds(s: &str) -> String {
1566    // Order matters: match compound forms before simple forms.
1567    let mut out = s.to_string();
1568    // Strip `Send + Sync` (with optional spaces around `+`).
1569    out = regex_replace_all(&out, r"Send\s*\+\s*Sync", "");
1570    out = regex_replace_all(&out, r"Sync\s*\+\s*Send", "");
1571    // Strip standalone Send/Sync only at word boundaries.
1572    out = regex_replace_word_boundary(&out, "Send", "");
1573    out = regex_replace_word_boundary(&out, "Sync", "");
1574    // Strip 'static lifetime markers.
1575    out = regex_replace_all(&out, r"'\s*static\b", "");
1576    out
1577}
1578
1579/// Replace occurrences of `pattern` (treated as a simple substring pattern
1580/// with `\s*` only, no full regex) with `replacement` in `s`.
1581///
1582/// This is a lightweight regex-free replacement for simple patterns that
1583/// only need literal text or `\s*` between tokens.
1584fn regex_replace_all(s: &str, pattern: &str, replacement: &str) -> String {
1585    // Inline tiny pattern compiler for the three patterns we actually use.
1586    match pattern {
1587        r"Send\s*\+\s*Sync" => replace_with_optional_spaces(s, "Send", "+", "Sync", replacement),
1588        r"Sync\s*\+\s*Send" => replace_with_optional_spaces(s, "Sync", "+", "Send", replacement),
1589        r"'\s*static\b" => replace_static_lifetime(s, replacement),
1590        _ => s.replace(pattern, replacement),
1591    }
1592}
1593
1594/// Replace `word_boundary(keyword)` occurrences in `s` with `replacement`.
1595fn regex_replace_word_boundary(s: &str, keyword: &str, replacement: &str) -> String {
1596    let mut out = String::with_capacity(s.len());
1597    let klen = keyword.len();
1598    let bytes = s.as_bytes();
1599    let kbytes = keyword.as_bytes();
1600    if klen == 0 || klen > bytes.len() {
1601        return s.to_string();
1602    }
1603    let mut i = 0;
1604    while i + klen <= bytes.len() {
1605        if &bytes[i..i + klen] == kbytes {
1606            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1607            let after_ok =
1608                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
1609            if before_ok && after_ok {
1610                out.push_str(replacement);
1611                i += klen;
1612                continue;
1613            }
1614        }
1615        i = advance_char(s, &mut out, i);
1616    }
1617    if i < bytes.len() {
1618        out.push_str(&s[i..]);
1619    }
1620    out
1621}
1622
1623/// Replace `A <spaces> op <spaces> B` triplets with `replacement`.
1624fn replace_with_optional_spaces(s: &str, a: &str, op: &str, b: &str, replacement: &str) -> String {
1625    let mut out = String::with_capacity(s.len());
1626    let mut i = 0;
1627    let chars: Vec<char> = s.chars().collect();
1628    let total = chars.len();
1629
1630    while i < total {
1631        // Try to match `a` at position i.
1632        let a_chars: Vec<char> = a.chars().collect();
1633        let b_chars: Vec<char> = b.chars().collect();
1634        let op_chars: Vec<char> = op.chars().collect();
1635
1636        if chars[i..].starts_with(&a_chars) {
1637            let mut j = i + a_chars.len();
1638            // Skip spaces.
1639            while j < total && chars[j] == ' ' {
1640                j += 1;
1641            }
1642            // Match op.
1643            if j + op_chars.len() <= total && chars[j..].starts_with(&op_chars) {
1644                let mut k = j + op_chars.len();
1645                // Skip spaces.
1646                while k < total && chars[k] == ' ' {
1647                    k += 1;
1648                }
1649                // Match b.
1650                if k + b_chars.len() <= total && chars[k..].starts_with(&b_chars) {
1651                    out.push_str(replacement);
1652                    i = k + b_chars.len();
1653                    continue;
1654                }
1655            }
1656        }
1657        out.push(chars[i]);
1658        i += 1;
1659    }
1660    out
1661}
1662
1663/// Replace `'static` lifetime markers (with optional spaces after `'`).
1664fn replace_static_lifetime(s: &str, replacement: &str) -> String {
1665    let mut out = String::with_capacity(s.len());
1666    let bytes = s.as_bytes();
1667    let mut i = 0;
1668    while i < bytes.len() {
1669        if bytes[i] == b'\'' {
1670            // Peek ahead skipping spaces.
1671            let mut j = i + 1;
1672            while j < bytes.len() && bytes[j] == b' ' {
1673                j += 1;
1674            }
1675            let keyword = b"static";
1676            if bytes[j..].starts_with(keyword) {
1677                let end = j + keyword.len();
1678                // Must be followed by non-identifier char or end.
1679                let after_ok = end >= bytes.len() || !bytes[end].is_ascii_alphanumeric() && bytes[end] != b'_';
1680                if after_ok {
1681                    out.push_str(replacement);
1682                    i = end;
1683                    continue;
1684                }
1685            }
1686        }
1687        i = advance_char(s, &mut out, i);
1688    }
1689    out
1690}
1691
1692/// Replace Rust generic type wrappers in prose.
1693fn replace_type_wrappers(s: &str, target: DocTarget) -> String {
1694    // Order: most specific patterns first.
1695    let mut out = s.to_string();
1696
1697    // Vec<u8> — must come before Vec<T>.
1698    let vec_u8_replacement = match target {
1699        DocTarget::PhpDoc => "string",
1700        DocTarget::JavaDoc => "byte[]",
1701        DocTarget::TsDoc | DocTarget::JsDoc => "Uint8Array",
1702    };
1703    out = replace_generic1(&out, "Vec", "u8", vec_u8_replacement);
1704
1705    // HashMap<K, V> — must come before Vec<T> to avoid order-dependency issues.
1706    let map_replacement_fn = |k: &str, v: &str| match target {
1707        DocTarget::PhpDoc => format!("array<{k}, {v}>"),
1708        DocTarget::JavaDoc => format!("Map<{k}, {v}>"),
1709        DocTarget::TsDoc | DocTarget::JsDoc => format!("Record<{k}, {v}>"),
1710    };
1711    out = replace_generic2(&out, "HashMap", &map_replacement_fn);
1712
1713    // Vec<T> — generic.
1714    out = replace_generic1_passthrough(&out, "Vec", |inner| format!("{inner}[]"));
1715
1716    // Option<T>.
1717    let option_replacement_fn = |inner: &str| match target {
1718        DocTarget::PhpDoc => format!("{inner}?"),
1719        DocTarget::JavaDoc => format!("{inner} | null"),
1720        DocTarget::TsDoc | DocTarget::JsDoc => format!("{inner} | undefined"),
1721    };
1722    out = replace_generic1_passthrough(&out, "Option", option_replacement_fn);
1723
1724    // Smart pointer wrappers: strip to inner type.
1725    for wrapper in &["Arc", "Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
1726        out = replace_generic1_passthrough(&out, wrapper, |inner| inner.to_string());
1727    }
1728
1729    out
1730}
1731
1732/// Replace `Name<SingleArg>` where SingleArg is an exact literal (e.g. `Vec<u8>`).
1733fn replace_generic1(s: &str, name: &str, arg: &str, replacement: &str) -> String {
1734    let pattern = format!("{name}<{arg}>");
1735    s.replace(&pattern, replacement)
1736}
1737
1738/// Replace `Name<T>` → `f(T)` for an arbitrary inner type expression.
1739///
1740/// Handles nested generics by counting angle-bracket depth.
1741fn replace_generic1_passthrough<F>(s: &str, name: &str, f: F) -> String
1742where
1743    F: Fn(&str) -> String,
1744{
1745    let mut out = String::with_capacity(s.len());
1746    let mut i = 0;
1747    let prefix = format!("{name}<");
1748    let pbytes = prefix.as_bytes();
1749    let bytes = s.as_bytes();
1750
1751    while i < bytes.len() {
1752        if bytes[i..].starts_with(pbytes) {
1753            // Check that the char before is not alphanumeric (word boundary).
1754            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1755            if before_ok {
1756                let inner_start = i + pbytes.len();
1757                // Find the matching '>'.
1758                let mut depth = 1usize;
1759                let mut j = inner_start;
1760                while j < bytes.len() {
1761                    match bytes[j] {
1762                        b'<' => depth += 1,
1763                        b'>' => {
1764                            depth -= 1;
1765                            if depth == 0 {
1766                                break;
1767                            }
1768                        }
1769                        _ => {}
1770                    }
1771                    j += 1;
1772                }
1773                if depth == 0 && j < bytes.len() {
1774                    let inner = &s[inner_start..j];
1775                    out.push_str(&f(inner));
1776                    i = j + 1;
1777                    continue;
1778                }
1779            }
1780        }
1781        i = advance_char(s, &mut out, i);
1782    }
1783    out
1784}
1785
1786/// Replace `Name<K, V>` → `f(K, V)` for two-argument generics (e.g. `HashMap`).
1787fn replace_generic2<F>(s: &str, name: &str, f: &F) -> String
1788where
1789    F: Fn(&str, &str) -> String,
1790{
1791    let mut out = String::with_capacity(s.len());
1792    let mut i = 0;
1793    let prefix = format!("{name}<");
1794    let pbytes = prefix.as_bytes();
1795    let bytes = s.as_bytes();
1796
1797    while i < bytes.len() {
1798        if bytes[i..].starts_with(pbytes) {
1799            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1800            if before_ok {
1801                let inner_start = i + pbytes.len();
1802                // Find the matching '>' respecting nesting.
1803                let mut depth = 1usize;
1804                let mut j = inner_start;
1805                while j < bytes.len() {
1806                    match bytes[j] {
1807                        b'<' => depth += 1,
1808                        b'>' => {
1809                            depth -= 1;
1810                            if depth == 0 {
1811                                break;
1812                            }
1813                        }
1814                        _ => {}
1815                    }
1816                    j += 1;
1817                }
1818                if depth == 0 && j < bytes.len() {
1819                    let inner = &s[inner_start..j];
1820                    // Split on the first ',' at depth 0.
1821                    let split = split_on_comma_at_top_level(inner);
1822                    if let Some((k, v)) = split {
1823                        out.push_str(&f(k.trim(), v.trim()));
1824                        i = j + 1;
1825                        continue;
1826                    }
1827                }
1828            }
1829        }
1830        i = advance_char(s, &mut out, i);
1831    }
1832    out
1833}
1834
1835/// Split `s` on the first comma that is at angle-bracket depth 0.
1836fn split_on_comma_at_top_level(s: &str) -> Option<(&str, &str)> {
1837    let mut depth = 0i32;
1838    for (idx, ch) in s.char_indices() {
1839        match ch {
1840            '<' => depth += 1,
1841            '>' => depth -= 1,
1842            ',' if depth == 0 => return Some((&s[..idx], &s[idx + 1..])),
1843            _ => {}
1844        }
1845    }
1846    None
1847}
1848
1849/// Replace `Some(x)` in prose with `the value (x)`.
1850fn replace_some_calls(s: &str) -> String {
1851    let mut out = String::with_capacity(s.len());
1852    let bytes = s.as_bytes();
1853    let prefix = b"Some(";
1854    let mut i = 0;
1855
1856    while i < bytes.len() {
1857        if bytes[i..].starts_with(prefix) {
1858            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1859            if before_ok {
1860                let arg_start = i + prefix.len();
1861                // Find matching ')' respecting nesting.
1862                let mut depth = 1usize;
1863                let mut j = arg_start;
1864                while j < bytes.len() {
1865                    match bytes[j] {
1866                        b'(' => depth += 1,
1867                        b')' => {
1868                            depth -= 1;
1869                            if depth == 0 {
1870                                break;
1871                            }
1872                        }
1873                        _ => {}
1874                    }
1875                    j += 1;
1876                }
1877                if depth == 0 && j < bytes.len() {
1878                    let arg = &s[arg_start..j];
1879                    out.push_str("the value (");
1880                    out.push_str(arg);
1881                    out.push(')');
1882                    i = j + 1;
1883                    continue;
1884                }
1885            }
1886        }
1887        i = advance_char(s, &mut out, i);
1888    }
1889    out
1890}
1891
1892/// Drop bare `Some ` when it appears as a Rust-idiom modifier in prose
1893/// ("(Some values)", "Some keys leave the previous", etc.). The `Some(...)`
1894/// call form is handled separately by [`replace_some_calls`].
1895///
1896/// Match shape: word-boundary `Some` + single ASCII space + ASCII-lowercase
1897/// letter. The "Some " prefix is dropped; the following word is preserved.
1898/// `SomeType`, `Some.method()`, `Some(x)`, and sentence-initial `Some `
1899/// followed by an uppercase noun stay untouched.
1900fn replace_some_keyword_in_prose(s: &str) -> String {
1901    let keyword = b"Some ";
1902    let klen = keyword.len();
1903    let bytes = s.as_bytes();
1904    if klen >= bytes.len() {
1905        return s.to_string();
1906    }
1907    let mut out = String::with_capacity(s.len());
1908    let mut i = 0;
1909    while i + klen < bytes.len() {
1910        if &bytes[i..i + klen] == keyword {
1911            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1912            let after_ok = bytes[i + klen].is_ascii_lowercase();
1913            if before_ok && after_ok {
1914                i += klen;
1915                continue;
1916            }
1917        }
1918        i = advance_char(s, &mut out, i);
1919    }
1920    if i < bytes.len() {
1921        out.push_str(&s[i..]);
1922    }
1923    out
1924}
1925
1926/// Replace `None` (at word boundaries, uppercase) with the target-appropriate nil.
1927fn replace_none_keyword(s: &str, target: DocTarget) -> String {
1928    let replacement = match target {
1929        DocTarget::PhpDoc | DocTarget::JavaDoc => "null",
1930        DocTarget::TsDoc | DocTarget::JsDoc => "undefined",
1931    };
1932    let keyword = b"None";
1933    let klen = keyword.len();
1934    let mut out = String::with_capacity(s.len());
1935    let bytes = s.as_bytes();
1936    if klen > bytes.len() {
1937        return s.to_string();
1938    }
1939    let mut i = 0;
1940
1941    while i + klen <= bytes.len() {
1942        if &bytes[i..i + klen] == keyword {
1943            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_';
1944            let after_ok =
1945                i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric() && bytes[i + klen] != b'_';
1946            if before_ok && after_ok {
1947                out.push_str(replacement);
1948                i += klen;
1949                continue;
1950            }
1951        }
1952        i = advance_char(s, &mut out, i);
1953    }
1954    if i < bytes.len() {
1955        out.push_str(&s[i..]);
1956    }
1957    out
1958}
1959
1960/// Replace standalone `::` between identifiers with `.`.
1961fn replace_path_separator(s: &str) -> String {
1962    let mut out = String::with_capacity(s.len());
1963    let bytes = s.as_bytes();
1964    let mut i = 0;
1965
1966    while i < bytes.len() {
1967        if i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b':' {
1968            // Only replace if surrounded by identifier characters or end/start of string.
1969            let before_ok = i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
1970            let after_ok = i + 2 < bytes.len() && (bytes[i + 2].is_ascii_alphanumeric() || bytes[i + 2] == b'_');
1971            if before_ok || after_ok {
1972                out.push('.');
1973                i += 2;
1974                continue;
1975            }
1976        }
1977        i = advance_char(s, &mut out, i);
1978    }
1979    out
1980}
1981
1982/// Strip `.unwrap()` and `.expect("...")` calls from prose.
1983fn strip_unwrap_expect(s: &str) -> String {
1984    let mut out = String::with_capacity(s.len());
1985    let bytes = s.as_bytes();
1986    let mut i = 0;
1987
1988    while i < bytes.len() {
1989        // Match .unwrap().
1990        if bytes[i..].starts_with(b".unwrap()") {
1991            i += b".unwrap()".len();
1992            continue;
1993        }
1994        // Match .expect(...).
1995        if bytes[i..].starts_with(b".expect(") {
1996            let arg_start = i + b".expect(".len();
1997            let mut depth = 1usize;
1998            let mut j = arg_start;
1999            while j < bytes.len() {
2000                match bytes[j] {
2001                    b'(' => depth += 1,
2002                    b')' => {
2003                        depth -= 1;
2004                        if depth == 0 {
2005                            break;
2006                        }
2007                    }
2008                    _ => {}
2009                }
2010                j += 1;
2011            }
2012            if depth == 0 {
2013                i = j + 1;
2014                continue;
2015            }
2016        }
2017        i = advance_char(s, &mut out, i);
2018    }
2019    out
2020}
2021
2022#[cfg(test)]
2023mod tests {
2024    use super::*;
2025
2026    #[test]
2027    fn test_emit_phpdoc() {
2028        let mut out = String::new();
2029        emit_phpdoc(&mut out, "Simple documentation", "    ", "TestException");
2030        assert!(out.contains("/**"));
2031        assert!(out.contains("Simple documentation"));
2032        assert!(out.contains("*/"));
2033    }
2034
2035    #[test]
2036    fn test_phpdoc_escaping() {
2037        let mut out = String::new();
2038        emit_phpdoc(&mut out, "Handle */ sequences", "", "TestException");
2039        assert!(out.contains("Handle * / sequences"));
2040    }
2041
2042    #[test]
2043    fn test_emit_csharp_doc() {
2044        let mut out = String::new();
2045        emit_csharp_doc(&mut out, "C# documentation", "    ", "TestException");
2046        assert!(out.contains("<summary>"));
2047        assert!(out.contains("C# documentation"));
2048        assert!(out.contains("</summary>"));
2049    }
2050
2051    #[test]
2052    fn test_csharp_xml_escaping() {
2053        let mut out = String::new();
2054        emit_csharp_doc(&mut out, "foo < bar & baz > qux", "", "TestException");
2055        assert!(out.contains("foo &lt; bar &amp; baz &gt; qux"));
2056    }
2057
2058    #[test]
2059    fn test_emit_elixir_doc() {
2060        let mut out = String::new();
2061        emit_elixir_doc(&mut out, "Elixir documentation");
2062        assert!(out.contains("@doc \"\"\""));
2063        assert!(out.contains("Elixir documentation"));
2064        assert!(out.contains("\"\"\""));
2065    }
2066
2067    #[test]
2068    fn test_elixir_heredoc_escaping() {
2069        let mut out = String::new();
2070        emit_elixir_doc(&mut out, "Handle \"\"\" sequences");
2071        assert!(out.contains("Handle \"\" \" sequences"));
2072    }
2073
2074    #[test]
2075    fn test_emit_roxygen() {
2076        let mut out = String::new();
2077        emit_roxygen(&mut out, "R documentation");
2078        assert!(out.contains("#' R documentation"));
2079    }
2080
2081    #[test]
2082    fn test_emit_swift_doc() {
2083        let mut out = String::new();
2084        emit_swift_doc(&mut out, "Swift documentation", "    ");
2085        assert!(out.contains("/// Swift documentation"));
2086    }
2087
2088    #[test]
2089    fn test_emit_javadoc() {
2090        let mut out = String::new();
2091        emit_javadoc(&mut out, "Java documentation", "    ");
2092        assert!(out.contains("/**"));
2093        assert!(out.contains("Java documentation"));
2094        assert!(out.contains("*/"));
2095    }
2096
2097    #[test]
2098    fn test_emit_kdoc() {
2099        let mut out = String::new();
2100        emit_kdoc(&mut out, "Kotlin documentation", "    ");
2101        assert!(out.contains("/**"));
2102        assert!(out.contains("Kotlin documentation"));
2103        assert!(out.contains("*/"));
2104    }
2105
2106    #[test]
2107    fn test_emit_dartdoc() {
2108        let mut out = String::new();
2109        emit_dartdoc(&mut out, "Dart documentation", "    ");
2110        assert!(out.contains("/// Dart documentation"));
2111    }
2112
2113    #[test]
2114    fn test_emit_gleam_doc() {
2115        let mut out = String::new();
2116        emit_gleam_doc(&mut out, "Gleam documentation", "    ");
2117        assert!(out.contains("/// Gleam documentation"));
2118    }
2119
2120    #[test]
2121    fn test_emit_zig_doc() {
2122        let mut out = String::new();
2123        emit_zig_doc(&mut out, "Zig documentation", "    ");
2124        assert!(out.contains("/// Zig documentation"));
2125    }
2126
2127    #[test]
2128    fn test_empty_doc_skipped() {
2129        let mut out = String::new();
2130        emit_phpdoc(&mut out, "", "", "TestException");
2131        emit_csharp_doc(&mut out, "", "", "TestException");
2132        emit_elixir_doc(&mut out, "");
2133        emit_roxygen(&mut out, "");
2134        emit_kdoc(&mut out, "", "");
2135        emit_dartdoc(&mut out, "", "");
2136        emit_gleam_doc(&mut out, "", "");
2137        emit_zig_doc(&mut out, "", "");
2138        assert!(out.is_empty());
2139    }
2140
2141    #[test]
2142    fn test_doc_first_paragraph_joined_single_line() {
2143        assert_eq!(doc_first_paragraph_joined("Simple doc."), "Simple doc.");
2144    }
2145
2146    #[test]
2147    fn test_doc_first_paragraph_joined_wrapped_sentence() {
2148        // Simulates a docstring like convert's: "Convert HTML to Markdown,\nreturning a result."
2149        let doc = "Convert HTML to Markdown,\nreturning a result.";
2150        assert_eq!(
2151            doc_first_paragraph_joined(doc),
2152            "Convert HTML to Markdown, returning a result."
2153        );
2154    }
2155
2156    #[test]
2157    fn test_doc_first_paragraph_joined_stops_at_blank_line() {
2158        let doc = "First paragraph.\nStill first.\n\nSecond paragraph.";
2159        assert_eq!(doc_first_paragraph_joined(doc), "First paragraph. Still first.");
2160    }
2161
2162    #[test]
2163    fn test_doc_first_paragraph_joined_empty() {
2164        assert_eq!(doc_first_paragraph_joined(""), "");
2165    }
2166
2167    #[test]
2168    fn test_parse_rustdoc_sections_basic() {
2169        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns `KreuzbergError` on failure.";
2170        let sections = parse_rustdoc_sections(doc);
2171        assert_eq!(sections.summary, "Extracts text from a file.");
2172        assert_eq!(sections.arguments.as_deref(), Some("* `path` - The file path."));
2173        assert_eq!(sections.returns.as_deref(), Some("The extracted text."));
2174        assert_eq!(sections.errors.as_deref(), Some("Returns `KreuzbergError` on failure."));
2175        assert!(sections.panics.is_none());
2176    }
2177
2178    #[test]
2179    fn test_parse_rustdoc_sections_example_with_fence() {
2180        let doc = "Run the thing.\n\n# Example\n\n```rust\nlet x = run();\n```";
2181        let sections = parse_rustdoc_sections(doc);
2182        assert_eq!(sections.summary, "Run the thing.");
2183        assert!(sections.example.as_ref().unwrap().contains("```rust"));
2184        assert!(sections.example.as_ref().unwrap().contains("let x = run();"));
2185    }
2186
2187    #[test]
2188    fn test_parse_rustdoc_sections_pound_inside_fence_is_not_a_heading() {
2189        // Even though we get rustdoc-hidden lines pre-stripped, a literal
2190        // `# foo` inside a non-rust fence (e.g. shell example) must not
2191        // start a new section.
2192        let doc = "Summary.\n\n# Example\n\n```bash\n# install deps\nrun --foo\n```";
2193        let sections = parse_rustdoc_sections(doc);
2194        assert_eq!(sections.summary, "Summary.");
2195        assert!(sections.example.as_ref().unwrap().contains("# install deps"));
2196    }
2197
2198    #[test]
2199    fn test_parse_arguments_bullets_dash_separator() {
2200        let body = "* `path` - The file path.\n* `config` - Optional configuration.";
2201        let pairs = parse_arguments_bullets(body);
2202        assert_eq!(pairs.len(), 2);
2203        assert_eq!(pairs[0], ("path".to_string(), "The file path.".to_string()));
2204        assert_eq!(pairs[1], ("config".to_string(), "Optional configuration.".to_string()));
2205    }
2206
2207    #[test]
2208    fn test_parse_arguments_bullets_continuation_line() {
2209        let body = "* `path` - The file path,\n  resolved relative to cwd.\n* `mode` - Open mode.";
2210        let pairs = parse_arguments_bullets(body);
2211        assert_eq!(pairs.len(), 2);
2212        assert_eq!(pairs[0].1, "The file path, resolved relative to cwd.");
2213    }
2214
2215    #[test]
2216    fn test_replace_fence_lang_rust_to_typescript() {
2217        let body = "```rust\nlet x = run();\n```";
2218        let out = replace_fence_lang(body, "typescript");
2219        assert!(out.starts_with("```typescript"));
2220        assert!(out.contains("let x = run();"));
2221    }
2222
2223    #[test]
2224    fn test_replace_fence_lang_preserves_attrs() {
2225        let body = "```rust,no_run\nlet x = run();\n```";
2226        let out = replace_fence_lang(body, "typescript");
2227        assert!(out.starts_with("```typescript,no_run"));
2228    }
2229
2230    #[test]
2231    fn test_replace_fence_lang_no_fence_unchanged() {
2232        let body = "Plain prose with `inline code`.";
2233        let out = replace_fence_lang(body, "typescript");
2234        assert_eq!(out, "Plain prose with `inline code`.");
2235    }
2236
2237    fn fixture_sections() -> RustdocSections {
2238        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n* `config` - Optional configuration.\n\n# Returns\n\nThe extracted text and metadata.\n\n# Errors\n\nReturns an error when the file is unreadable.\n\n# Example\n\n```rust\nlet result = extract(\"file.pdf\")?;\n```";
2239        parse_rustdoc_sections(doc)
2240    }
2241
2242    #[test]
2243    fn test_render_jsdoc_sections() {
2244        let sections = fixture_sections();
2245        let out = render_jsdoc_sections(&sections);
2246        assert!(out.starts_with("Extracts text from a file."));
2247        assert!(out.contains("@param path - The file path."));
2248        assert!(out.contains("@param config - Optional configuration."));
2249        assert!(out.contains("@returns The extracted text and metadata."));
2250        assert!(out.contains("@throws Returns an error when the file is unreadable."));
2251        // fixture example is ```rust — stripped when target is TypeScript
2252        assert!(!out.contains("@example"), "Rust example must not appear in TSDoc");
2253        assert!(!out.contains("```typescript"));
2254        assert!(!out.contains("```rust"));
2255    }
2256
2257    #[test]
2258    fn test_render_jsdoc_sections_preserves_typescript_example() {
2259        let doc = "Do something.\n\n# Example\n\n```typescript\nconst x = doSomething();\n```";
2260        let sections = parse_rustdoc_sections(doc);
2261        let out = render_jsdoc_sections(&sections);
2262        assert!(out.contains("@example"), "TypeScript example must be preserved");
2263        assert!(out.contains("```typescript"));
2264    }
2265
2266    #[test]
2267    fn test_render_javadoc_sections() {
2268        let sections = fixture_sections();
2269        let out = render_javadoc_sections(&sections, "KreuzbergRsException");
2270        assert!(out.contains("@param path The file path."));
2271        assert!(out.contains("@return The extracted text and metadata."));
2272        assert!(out.contains("@throws KreuzbergRsException Returns an error when the file is unreadable."));
2273        // Java rendering omits the example block (handled separately by emit_javadoc which
2274        // wraps code in `<pre>{@code}</pre>`); we just confirm summary survives.
2275        assert!(out.starts_with("Extracts text from a file."));
2276    }
2277
2278    #[test]
2279    fn test_render_csharp_xml_sections() {
2280        let sections = fixture_sections();
2281        let out = render_csharp_xml_sections(&sections, "KreuzbergException");
2282        assert!(out.contains("<summary>\nExtracts text from a file.\n</summary>"));
2283        assert!(out.contains("<param name=\"path\">The file path.</param>"));
2284        assert!(out.contains("<returns>The extracted text and metadata.</returns>"));
2285        assert!(out.contains("<exception cref=\"KreuzbergException\">"));
2286        assert!(out.contains("<example><code language=\"csharp\">"));
2287        assert!(out.contains("let result = extract"));
2288    }
2289
2290    #[test]
2291    fn test_render_phpdoc_sections() {
2292        let sections = fixture_sections();
2293        let out = render_phpdoc_sections(&sections, "KreuzbergException");
2294        assert!(out.contains("@param mixed $path The file path."));
2295        assert!(out.contains("@return The extracted text and metadata."));
2296        assert!(out.contains("@throws KreuzbergException"));
2297        // fixture example is ```rust — stripped when target is PHP
2298        assert!(!out.contains("```php"), "Rust example must not appear in PHPDoc");
2299        assert!(!out.contains("```rust"));
2300    }
2301
2302    #[test]
2303    fn test_render_phpdoc_sections_preserves_php_example() {
2304        let doc = "Do something.\n\n# Example\n\n```php\n$x = doSomething();\n```";
2305        let sections = parse_rustdoc_sections(doc);
2306        let out = render_phpdoc_sections(&sections, "MyException");
2307        assert!(out.contains("```php"), "PHP example must be preserved");
2308    }
2309
2310    #[test]
2311    fn test_render_doxygen_sections() {
2312        let sections = fixture_sections();
2313        let out = render_doxygen_sections(&sections);
2314        assert!(out.contains("\\param path The file path."));
2315        assert!(out.contains("\\return The extracted text and metadata."));
2316        assert!(out.contains("\\code"));
2317        assert!(out.contains("\\endcode"));
2318    }
2319
2320    #[test]
2321    fn test_emit_yard_doc_simple() {
2322        let mut out = String::new();
2323        emit_yard_doc(&mut out, "Simple Ruby documentation", "    ");
2324        assert!(out.contains("# Simple Ruby documentation"));
2325    }
2326
2327    #[test]
2328    fn test_emit_yard_doc_empty() {
2329        let mut out = String::new();
2330        emit_yard_doc(&mut out, "", "    ");
2331        assert!(out.is_empty());
2332    }
2333
2334    #[test]
2335    fn test_emit_yard_doc_with_sections() {
2336        let mut out = String::new();
2337        let doc = "Extracts text from a file.\n\n# Arguments\n\n* `path` - The file path.\n\n# Returns\n\nThe extracted text.\n\n# Errors\n\nReturns error on failure.";
2338        emit_yard_doc(&mut out, doc, "  ");
2339        assert!(out.contains("# Extracts text from a file."));
2340        assert!(out.contains("# @param path The file path."));
2341        assert!(out.contains("# @return The extracted text."));
2342        assert!(out.contains("# @raise Returns error on failure."));
2343    }
2344
2345    #[test]
2346    fn test_emit_c_doxygen_simple_prose() {
2347        let mut out = String::new();
2348        emit_c_doxygen(&mut out, "Free a string.", "");
2349        assert!(out.contains("/// Free a string."), "got: {out}");
2350    }
2351
2352    #[test]
2353    fn test_emit_c_doxygen_with_sections() {
2354        let mut out = String::new();
2355        let doc = "Extract content from a file.\n\n# Arguments\n\n* `path` - Path to the file.\n* `mode` - Read mode.\n\n# Returns\n\nA newly allocated string the caller owns.\n\n# Errors\n\nReturns null when the file is unreadable.";
2356        emit_c_doxygen(&mut out, doc, "");
2357        assert!(out.contains("/// Extract content from a file."));
2358        assert!(out.contains("/// \\param path Path to the file."));
2359        assert!(out.contains("/// \\param mode Read mode."));
2360        assert!(out.contains("/// \\return A newly allocated string the caller owns."));
2361        assert!(out.contains("/// \\note Returns null when the file is unreadable."));
2362    }
2363
2364    #[test]
2365    fn test_emit_c_doxygen_safety_section_maps_to_note() {
2366        let mut out = String::new();
2367        let doc = "Free a buffer.\n\n# Safety\n\nPointer must have been returned by this library.";
2368        emit_c_doxygen(&mut out, doc, "");
2369        assert!(out.contains("/// \\note SAFETY: Pointer must have been returned by this library."));
2370    }
2371
2372    #[test]
2373    fn test_emit_c_doxygen_example_renders_code_fence() {
2374        let mut out = String::new();
2375        let doc = "Demo.\n\n# Example\n\n```rust\nlet x = run();\n```";
2376        emit_c_doxygen(&mut out, doc, "");
2377        assert!(out.contains("/// \\code"));
2378        assert!(out.contains("/// \\endcode"));
2379        assert!(out.contains("let x = run();"));
2380    }
2381
2382    #[test]
2383    fn test_emit_c_doxygen_strips_markdown_links() {
2384        let mut out = String::new();
2385        let doc = "See [the docs](https://example.com/x) for details.";
2386        emit_c_doxygen(&mut out, doc, "");
2387        assert!(
2388            out.contains("the docs (https://example.com/x)"),
2389            "expected flattened link, got: {out}"
2390        );
2391        assert!(!out.contains("](https://"));
2392    }
2393
2394    #[test]
2395    fn test_emit_c_doxygen_word_wraps_long_lines() {
2396        let mut out = String::new();
2397        let long = "a ".repeat(80);
2398        emit_c_doxygen(&mut out, long.trim(), "");
2399        for line in out.lines() {
2400            // Each emitted prefix is "/// " (4 chars); the body after that
2401            // should be ≤ 100 chars per `DOXYGEN_WRAP_WIDTH`.
2402            let body = line.trim_start_matches("/// ");
2403            assert!(body.len() <= 100, "line too long ({}): {line}", body.len());
2404        }
2405    }
2406
2407    #[test]
2408    fn test_emit_c_doxygen_empty_input_is_noop() {
2409        let mut out = String::new();
2410        emit_c_doxygen(&mut out, "", "");
2411        emit_c_doxygen(&mut out, "   \n\t  ", "");
2412        assert!(out.is_empty());
2413    }
2414
2415    #[test]
2416    fn test_emit_c_doxygen_indent_applied() {
2417        let mut out = String::new();
2418        emit_c_doxygen(&mut out, "Hello.", "    ");
2419        assert!(out.starts_with("    /// Hello."));
2420    }
2421
2422    #[test]
2423    fn test_render_yard_sections() {
2424        let sections = fixture_sections();
2425        let out = render_yard_sections(&sections);
2426        assert!(out.contains("@param path The file path."));
2427        assert!(out.contains("@return The extracted text and metadata."));
2428        assert!(out.contains("@raise Returns an error when the file is unreadable."));
2429        // fixture example is ```rust — stripped when target is Ruby
2430        assert!(!out.contains("@example"), "Rust example must not appear in YARD");
2431        assert!(!out.contains("```ruby"));
2432        assert!(!out.contains("```rust"));
2433    }
2434
2435    #[test]
2436    fn test_render_yard_sections_preserves_ruby_example() {
2437        let doc = "Do something.\n\n# Example\n\n```ruby\nputs :hi\n```";
2438        let sections = parse_rustdoc_sections(doc);
2439        let out = render_yard_sections(&sections);
2440        assert!(out.contains("@example"), "Ruby example must be preserved");
2441        assert!(out.contains("```ruby"));
2442    }
2443
2444    // --- M1: example_for_target unit tests ---
2445
2446    #[test]
2447    fn example_for_target_rust_fenced_suppressed_for_php() {
2448        let example = "```rust\nlet x = 1;\n```";
2449        assert_eq!(
2450            example_for_target(example, "php"),
2451            None,
2452            "rust-fenced example must be omitted for PHP target"
2453        );
2454    }
2455
2456    #[test]
2457    fn example_for_target_bare_fence_defaults_to_rust_suppressed_for_ruby() {
2458        let example = "```\nlet x = 1;\n```";
2459        assert_eq!(
2460            example_for_target(example, "ruby"),
2461            None,
2462            "bare fence is treated as Rust and must be omitted for Ruby target"
2463        );
2464    }
2465
2466    #[test]
2467    fn example_for_target_php_example_preserved_for_php() {
2468        let example = "```php\n$x = 1;\n```";
2469        let result = example_for_target(example, "php");
2470        assert!(result.is_some(), "PHP example must be preserved for PHP target");
2471        assert!(result.unwrap().contains("```php"));
2472    }
2473
2474    #[test]
2475    fn example_for_target_ruby_example_preserved_for_ruby() {
2476        let example = "```ruby\nputs :hi\n```";
2477        let result = example_for_target(example, "ruby");
2478        assert!(result.is_some(), "Ruby example must be preserved for Ruby target");
2479        assert!(result.unwrap().contains("```ruby"));
2480    }
2481
2482    #[test]
2483    fn render_phpdoc_sections_with_rust_example_emits_no_at_example_block() {
2484        let doc = "Convert HTML.\n\n# Arguments\n\n* `html` - The HTML input.\n\n# Example\n\n```rust\nlet result = convert(html, None)?;\n```";
2485        let sections = parse_rustdoc_sections(doc);
2486        let out = render_phpdoc_sections(&sections, "HtmlToMarkdownException");
2487        assert!(!out.contains("```php"), "no PHP @example block for Rust source");
2488        assert!(!out.contains("```rust"), "raw Rust must not leak into PHPDoc");
2489        assert!(out.contains("@param"), "other sections must still be emitted");
2490    }
2491
2492    // --- KDoc ktfmt-canonical format tests ---
2493
2494    #[test]
2495    fn test_emit_kdoc_ktfmt_canonical_short_single_line() {
2496        let mut out = String::new();
2497        emit_kdoc_ktfmt_canonical(&mut out, "Simple doc.", "");
2498        assert_eq!(
2499            out, "/** Simple doc. */\n",
2500            "short single-line comment should collapse to canonical format"
2501        );
2502    }
2503
2504    #[test]
2505    fn test_emit_kdoc_ktfmt_canonical_short_with_indent() {
2506        let mut out = String::new();
2507        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2508        assert_eq!(out, "    /** Text node (most frequent - 100+ per document) */\n");
2509    }
2510
2511    #[test]
2512    fn test_emit_kdoc_ktfmt_canonical_long_comment_uses_multiline() {
2513        let mut out = String::new();
2514        let long_text = "This is a very long documentation comment that exceeds the 100-character line width limit and should therefore be emitted in multi-line format";
2515        emit_kdoc_ktfmt_canonical(&mut out, long_text, "");
2516        assert!(out.contains("/**\n"), "long comment should start with newline");
2517        assert!(out.contains(" * "), "long comment should use multi-line format");
2518        assert!(out.contains(" */\n"), "long comment should end with newline");
2519    }
2520
2521    #[test]
2522    fn test_emit_kdoc_ktfmt_canonical_multiline_comment() {
2523        let mut out = String::new();
2524        let doc = "First line.\n\nSecond paragraph.";
2525        emit_kdoc_ktfmt_canonical(&mut out, doc, "");
2526        assert!(out.contains("/**\n"), "multi-paragraph should use multi-line format");
2527        assert!(out.contains(" * First line."), "first paragraph preserved");
2528        assert!(out.contains(" *\n"), "blank line preserved");
2529        assert!(out.contains(" * Second paragraph."), "second paragraph preserved");
2530    }
2531
2532    #[test]
2533    fn test_emit_kdoc_ktfmt_canonical_empty_doc() {
2534        let mut out = String::new();
2535        emit_kdoc_ktfmt_canonical(&mut out, "", "");
2536        assert!(out.is_empty(), "empty doc should produce no output");
2537    }
2538
2539    #[test]
2540    fn test_emit_kdoc_ktfmt_canonical_fits_within_100_chars() {
2541        let mut out = String::new();
2542        // Construct exactly at the boundary: indent(0) + "/** " + content + " */" = 100 chars
2543        // "/** " = 4 chars, " */" = 3 chars, so content can be 93 chars
2544        let content = "a".repeat(93);
2545        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2546        let line = out.lines().next().unwrap();
2547        assert_eq!(
2548            line.len(),
2549            100,
2550            "should fit exactly at 100 chars and use single-line format"
2551        );
2552        assert!(out.starts_with("/**"), "should use single-line format");
2553    }
2554
2555    #[test]
2556    fn test_emit_kdoc_ktfmt_canonical_exceeds_100_chars() {
2557        let mut out = String::new();
2558        // Exceed 100 chars: content of 94 chars with "/** " + " */" = 101 chars
2559        let content = "a".repeat(94);
2560        emit_kdoc_ktfmt_canonical(&mut out, &content, "");
2561        assert!(
2562            out.contains("/**\n"),
2563            "should use multi-line format when exceeding 100 chars"
2564        );
2565        assert!(out.contains(" * "), "multi-line format with ` * ` prefix");
2566    }
2567
2568    #[test]
2569    fn test_emit_kdoc_ktfmt_canonical_respects_indent() {
2570        let mut out = String::new();
2571        // With 4-char indent, max content is 89 chars (4 + 4 + 89 + 3 = 100)
2572        let content = "a".repeat(89);
2573        emit_kdoc_ktfmt_canonical(&mut out, &content, "    ");
2574        let line = out.lines().next().unwrap();
2575        assert_eq!(line.len(), 100, "should respect indent in 100-char calculation");
2576        assert!(line.starts_with("    /** "), "should include indent");
2577    }
2578
2579    #[test]
2580    fn test_emit_kdoc_ktfmt_canonical_real_world_enum_variant() {
2581        let mut out = String::new();
2582        emit_kdoc_ktfmt_canonical(&mut out, "Text node (most frequent - 100+ per document)", "    ");
2583        // This is from NodeType enum; should collapse to single-line
2584        assert!(out.starts_with("    /** "), "should preserve 4-space indent");
2585        assert!(out.contains(" */\n"), "should end with newline");
2586        // Verify it's single-line format
2587        let line_count = out.lines().count();
2588        assert_eq!(line_count, 1, "should be single-line format");
2589    }
2590
2591    #[test]
2592    fn test_emit_kdoc_ktfmt_canonical_real_world_data_class_field() {
2593        let mut out = String::new();
2594        let doc = "Heading style to use in Markdown output (ATX `#` or Setext underline).";
2595        emit_kdoc_ktfmt_canonical(&mut out, doc, "    ");
2596        // This is from ConversionOptions data class; should collapse to single-line
2597        let line_count = out.lines().count();
2598        assert_eq!(line_count, 1, "should be single-line format");
2599        assert!(out.starts_with("    /** "), "should have correct indent");
2600    }
2601
2602    // --- sanitize_rust_idioms tests ---
2603
2604    #[test]
2605    fn sanitize_intradoc_link_with_path_separator_java() {
2606        let input = "See [`ConversionOptions::builder()`] for details.";
2607        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2608        assert!(out.contains("`ConversionOptions.builder()`"), "got: {out}");
2609        assert!(!out.contains("[`"), "brackets must be removed, got: {out}");
2610    }
2611
2612    #[test]
2613    fn sanitize_intradoc_link_simple_type_php() {
2614        let input = "Returns a [`ConversionResult`].";
2615        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2616        assert!(out.contains("`ConversionResult`"), "got: {out}");
2617        assert!(!out.contains("[`"), "got: {out}");
2618    }
2619
2620    #[test]
2621    fn sanitize_none_to_null_javadoc() {
2622        let input = "Returns None when no value is found.";
2623        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2624        assert!(out.contains("null"), "got: {out}");
2625        assert!(!out.contains("None"), "got: {out}");
2626    }
2627
2628    #[test]
2629    fn sanitize_none_to_undefined_tsdoc() {
2630        let input = "Returns None if absent.";
2631        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2632        assert!(out.contains("undefined"), "got: {out}");
2633        assert!(!out.contains("None"), "got: {out}");
2634    }
2635
2636    #[test]
2637    fn sanitize_some_x_to_the_value_x() {
2638        let input = "Pass Some(value) to enable.";
2639        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2640        assert!(out.contains("the value (value)"), "got: {out}");
2641        assert!(!out.contains("Some("), "got: {out}");
2642    }
2643
2644    #[test]
2645    fn sanitize_bare_some_followed_by_lowercase_noun_is_dropped() {
2646        // Real leak from html-to-markdown PreprocessingOptionsUpdate.java:16.
2647        let input =
2648            "Only specified fields (Some values) will override existing options; None values leave the previous";
2649        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2650        assert!(
2651            out.contains("(values)"),
2652            "bare `Some ` before lowercase noun must be stripped; got: {out}"
2653        );
2654        assert!(
2655            out.contains("null values"),
2656            "bare `None ` must also be replaced; got: {out}"
2657        );
2658        assert!(!out.contains("Some "), "Some prefix must not survive; got: {out}");
2659    }
2660
2661    #[test]
2662    fn sanitize_bare_some_does_not_touch_identifiers_or_uppercase_followers() {
2663        // SomeType, Some.method(), Some(x), and "Some Title" (proper noun) all preserved.
2664        let cases = [
2665            "SomeType lives on.",
2666            "Some.method() returns Self.",
2667            "Some Title",
2668            "Some(x) is a value.",
2669        ];
2670        for case in cases {
2671            let out = sanitize_rust_idioms(case, DocTarget::JavaDoc);
2672            // For the Some(x) case, replace_some_calls (run earlier) converts to "the value (x)"
2673            // so "Some" itself is gone — that's expected; everything else preserves "Some".
2674            if case.starts_with("Some(") {
2675                assert!(out.contains("the value (x)"), "got: {out}");
2676            } else {
2677                assert!(out.contains("Some"), "Some must survive in {case:?}; got: {out}");
2678            }
2679        }
2680    }
2681
2682    #[test]
2683    fn sanitize_option_t_to_nullable_php() {
2684        let input = "The result is Option<String>.";
2685        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2686        assert!(out.contains("String?"), "got: {out}");
2687        assert!(!out.contains("Option<"), "got: {out}");
2688    }
2689
2690    #[test]
2691    fn sanitize_option_t_to_or_null_java() {
2692        let input = "The result is Option<String>.";
2693        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2694        assert!(out.contains("String | null"), "got: {out}");
2695    }
2696
2697    #[test]
2698    fn sanitize_option_t_to_or_undefined_tsdoc() {
2699        let input = "The result is Option<String>.";
2700        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2701        assert!(out.contains("String | undefined"), "got: {out}");
2702    }
2703
2704    #[test]
2705    fn sanitize_vec_u8_per_target() {
2706        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::PhpDoc).contains("string"));
2707        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JavaDoc).contains("byte[]"));
2708        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::TsDoc).contains("Uint8Array"));
2709        assert!(sanitize_rust_idioms("Takes Vec<u8>.", DocTarget::JsDoc).contains("Uint8Array"));
2710    }
2711
2712    #[test]
2713    fn sanitize_vec_t_to_array() {
2714        let input = "Returns Vec<String>.";
2715        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2716        assert!(out.contains("String[]"), "got: {out}");
2717        assert!(!out.contains("Vec<"), "got: {out}");
2718    }
2719
2720    #[test]
2721    fn sanitize_hashmap_per_target() {
2722        let input = "Uses HashMap<String, u32>.";
2723        assert!(sanitize_rust_idioms(input, DocTarget::PhpDoc).contains("array<String, u32>"));
2724        assert!(sanitize_rust_idioms(input, DocTarget::JavaDoc).contains("Map<String, u32>"));
2725        assert!(sanitize_rust_idioms(input, DocTarget::TsDoc).contains("Record<String, u32>"));
2726    }
2727
2728    #[test]
2729    fn sanitize_arc_wrapper_stripped() {
2730        let input = "Holds Arc<Config>.";
2731        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2732        assert!(out.contains("Config"), "got: {out}");
2733        assert!(!out.contains("Arc<"), "got: {out}");
2734    }
2735
2736    #[test]
2737    fn sanitize_box_mutex_rwlock_rc_cell_refcell_stripped() {
2738        for wrapper in &["Box", "Mutex", "RwLock", "Rc", "Cell", "RefCell"] {
2739            let input = format!("Contains {wrapper}<Inner>.");
2740            let out = sanitize_rust_idioms(&input, DocTarget::JavaDoc);
2741            assert!(out.contains("Inner"), "wrapper {wrapper} not stripped, got: {out}");
2742            assert!(
2743                !out.contains(&format!("{wrapper}<")),
2744                "wrapper {wrapper} still present, got: {out}"
2745            );
2746        }
2747    }
2748
2749    #[test]
2750    fn sanitize_send_sync_stripped() {
2751        let input = "The type is Send + Sync.";
2752        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2753        assert!(!out.contains("Send"), "got: {out}");
2754        assert!(!out.contains("Sync"), "got: {out}");
2755    }
2756
2757    #[test]
2758    fn sanitize_static_lifetime_stripped() {
2759        let input = "Requires 'static lifetime.";
2760        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2761        assert!(!out.contains("'static"), "got: {out}");
2762    }
2763
2764    #[test]
2765    fn sanitize_pub_fn_stripped() {
2766        let input = "Calls pub fn convert().";
2767        let out = sanitize_rust_idioms(input, DocTarget::PhpDoc);
2768        assert!(!out.contains("pub fn"), "got: {out}");
2769        assert!(out.contains("convert()"), "got: {out}");
2770    }
2771
2772    #[test]
2773    fn sanitize_crate_prefix_stripped() {
2774        let input = "See crate::error::ConversionError.";
2775        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2776        assert!(!out.contains("crate::"), "got: {out}");
2777        assert!(out.contains("error.ConversionError"), "got: {out}");
2778    }
2779
2780    #[test]
2781    fn sanitize_unwrap_expect_stripped() {
2782        let input = "Call result.unwrap() or result.expect(\"msg\").";
2783        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2784        assert!(!out.contains(".unwrap()"), "got: {out}");
2785        assert!(!out.contains(".expect("), "got: {out}");
2786    }
2787
2788    #[test]
2789    fn sanitize_no_mutation_inside_backticks() {
2790        // None inside backtick span must not be replaced.
2791        let input = "Use `None` as the argument.";
2792        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2793        assert!(out.contains("`None`"), "backtick span must be preserved, got: {out}");
2794    }
2795
2796    #[test]
2797    fn sanitize_rust_fence_dropped_for_tsdoc() {
2798        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2799        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2800        assert!(
2801            !out.contains("let x = 1;"),
2802            "rust fence content must be dropped, got: {out}"
2803        );
2804        assert!(!out.contains("```rust"), "got: {out}");
2805        assert!(out.contains("Trailer."), "text after fence must survive, got: {out}");
2806    }
2807
2808    #[test]
2809    fn sanitize_rust_fence_tag_stripped_for_java() {
2810        let input = "Intro.\n\n```rust\nlet x = 1;\n```\n\nTrailer.";
2811        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2812        // Language tag is stripped; content is kept.
2813        assert!(
2814            out.contains("let x = 1;"),
2815            "fence content must survive for Java, got: {out}"
2816        );
2817        assert!(!out.contains("```rust"), "rust tag must be stripped, got: {out}");
2818        assert!(out.contains("```\n"), "bare fence must be kept, got: {out}");
2819    }
2820
2821    #[test]
2822    fn sanitize_non_rust_fence_passed_through() {
2823        let input = "Example:\n\n```typescript\nconst x = 1;\n```";
2824        let out = sanitize_rust_idioms(input, DocTarget::TsDoc);
2825        assert!(out.contains("```typescript"), "non-rust fence must survive, got: {out}");
2826        assert!(out.contains("const x = 1;"), "got: {out}");
2827    }
2828
2829    #[test]
2830    fn sanitize_backtick_code_span_not_mutated_option() {
2831        // Option<T> inside backtick span must not be replaced.
2832        let input = "The type is `Option<String>`.";
2833        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2834        // The backtick-protected span should be preserved verbatim.
2835        assert!(
2836            out.contains("`Option<String>`"),
2837            "code span must be preserved, got: {out}"
2838        );
2839    }
2840
2841    #[test]
2842    fn sanitize_idempotent() {
2843        // Running twice should produce the same result as running once.
2844        let input = "Returns None when Vec<String> is empty.";
2845        let once = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2846        let twice = sanitize_rust_idioms(&once, DocTarget::JavaDoc);
2847        assert_eq!(once, twice, "sanitize_rust_idioms should be idempotent");
2848    }
2849
2850    #[test]
2851    fn sanitize_multiline_prose() {
2852        let input = "Convert HTML to Markdown.\n\nReturns None on failure.\nUse Option<String> for the result.";
2853        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2854        assert!(out.contains("null"), "None must be replaced on line 2, got: {out}");
2855        assert!(
2856            out.contains("String | null"),
2857            "Option<String> must be replaced on line 3, got: {out}"
2858        );
2859    }
2860
2861    #[test]
2862    fn sanitize_attribute_line_dropped() {
2863        let input = "#[derive(Debug, Clone)]\nSome documentation.";
2864        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2865        assert!(!out.contains("#[derive("), "attribute line must be dropped, got: {out}");
2866        // Prose survives, though bare "Some " before a lowercase noun is stripped
2867        // by `replace_some_keyword_in_prose`, so accept either form.
2868        assert!(out.contains("documentation."), "prose must survive, got: {out}");
2869    }
2870
2871    #[test]
2872    fn sanitize_path_separator_in_prose() {
2873        let input = "See std::collections::HashMap for details.";
2874        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2875        assert!(out.contains("std.collections.HashMap"), ":: must become ., got: {out}");
2876    }
2877
2878    #[test]
2879    fn sanitize_none_not_replaced_inside_identifier() {
2880        // "NoneType" must not be replaced.
2881        let input = "Unlike NoneType in Python.";
2882        let out = sanitize_rust_idioms(input, DocTarget::JavaDoc);
2883        assert!(out.contains("NoneType"), "NoneType must not be replaced, got: {out}");
2884    }
2885}
alef_codegen/doc_emission.rs

alef_codegen/
doc_emission.rs