Skip to main content

sqry_classpath/resolve/
source_jars.rs

1//! On-demand source JAR documentation extractor.
2//!
3//! Extracts Javadoc/KDoc/Scaladoc comments from source JARs when requested
4//! for hover information. Results are cached in an LRU cache to avoid
5//! re-reading JARs.
6//!
7//! # Usage
8//!
9//! ```rust,ignore
10//! use sqry_classpath::resolve::source_jars::SourceJarProvider;
11//! use sqry_classpath::resolve::ClasspathEntry;
12//!
13//! let provider = SourceJarProvider::new(&entries, 1000);
14//! let docs = provider.get_docs("com.google.common.collect.ImmutableList", jar_path);
15//! ```
16//!
17//! # Source file location
18//!
19//! Given FQN `com.example.MyClass`, the source file is located at
20//! `com/example/MyClass.java` within the source JAR. For Kotlin and Scala,
21//! `.kt` and `.scala` extensions are tried as fallbacks.
22//!
23//! # Doc comment extraction
24//!
25//! Extracts `/** ... */` Javadoc-style blocks and converts to plain text by:
26//! - Stripping leading `*` from each line
27//! - Converting HTML tags (`<p>`, `<code>`, `<pre>`) to plain text equivalents
28//! - Converting inline tags (`{@code ...}`, `{@link ...}`) to readable form
29//! - Formatting block tags (`@param`, `@return`, `@throws`) for display
30
31use std::collections::HashMap;
32use std::io::Read;
33use std::path::{Path, PathBuf};
34use std::sync::Mutex;
35
36use log::warn;
37
38use super::ClasspathEntry;
39
40/// Default LRU cache capacity for extracted documentation strings.
41const DEFAULT_CACHE_SIZE: usize = 1000;
42
43/// File extensions to search in source JARs, in priority order.
44const SOURCE_EXTENSIONS: &[&str] = &["java", "kt", "scala"];
45
46/// On-demand source JAR documentation extractor.
47///
48/// Extracts Javadoc/KDoc/Scaladoc comments from source JARs when requested.
49/// Results are cached in an LRU cache to avoid re-reading JARs.
50pub struct SourceJarProvider {
51    /// Map from binary JAR path to source JAR path.
52    source_jar_map: HashMap<PathBuf, PathBuf>,
53    /// LRU cache: cache key -> extracted documentation string.
54    /// Cache key format: `"{jar_path}::{fqn}"` or `"{jar_path}::{fqn}::{member}"`.
55    /// `None` values are cached to avoid re-reading JARs for missing docs.
56    cache: Mutex<lru::LruCache<String, Option<String>>>,
57}
58
59impl SourceJarProvider {
60    /// Create a new provider from classpath entries.
61    ///
62    /// Scans entries for `source_jar` paths and builds the mapping from
63    /// binary JAR path to source JAR path.
64    #[must_use]
65    pub fn new(entries: &[ClasspathEntry], cache_size: usize) -> Self {
66        let size = if cache_size == 0 {
67            DEFAULT_CACHE_SIZE
68        } else {
69            cache_size
70        };
71        let mut source_jar_map = HashMap::new();
72        for entry in entries {
73            if let Some(ref source_jar) = entry.source_jar {
74                source_jar_map.insert(entry.jar_path.clone(), source_jar.clone());
75            }
76        }
77        Self {
78            source_jar_map,
79            cache: Mutex::new(lru::LruCache::new(
80                std::num::NonZeroUsize::new(size).expect("cache size must be non-zero"),
81            )),
82        }
83    }
84
85    /// Create a new provider with the default cache size.
86    #[must_use]
87    pub fn with_defaults(entries: &[ClasspathEntry]) -> Self {
88        Self::new(entries, DEFAULT_CACHE_SIZE)
89    }
90
91    /// Extract documentation for a class by fully qualified name.
92    ///
93    /// Returns the doc comment as plain text, or `None` if:
94    /// - No source JAR is available for the given binary JAR
95    /// - The class source file is not found in the source JAR
96    /// - The source file has no doc comment before the class declaration
97    pub fn get_docs(&self, fqn: &str, jar_path: &Path) -> Option<String> {
98        let cache_key = format!("{}::{fqn}", jar_path.display());
99
100        // Check cache first.
101        {
102            let mut cache = self.cache.lock().expect("cache lock poisoned");
103            if let Some(cached) = cache.get(&cache_key) {
104                return cached.clone();
105            }
106        }
107
108        let result = self.extract_class_docs(fqn, jar_path);
109
110        // Cache the result (including None for missing docs).
111        {
112            let mut cache = self.cache.lock().expect("cache lock poisoned");
113            cache.put(cache_key, result.clone());
114        }
115
116        result
117    }
118
119    /// Extract documentation for a specific member (method/field) within a class.
120    ///
121    /// Searches the source file for a doc comment immediately preceding a
122    /// declaration that matches `member_name`.
123    pub fn get_member_docs(&self, fqn: &str, member_name: &str, jar_path: &Path) -> Option<String> {
124        let cache_key = format!("{}::{fqn}::{member_name}", jar_path.display());
125
126        // Check cache first.
127        {
128            let mut cache = self.cache.lock().expect("cache lock poisoned");
129            if let Some(cached) = cache.get(&cache_key) {
130                return cached.clone();
131            }
132        }
133
134        let result = self.extract_member_docs(fqn, member_name, jar_path);
135
136        // Cache the result (including None for missing docs).
137        {
138            let mut cache = self.cache.lock().expect("cache lock poisoned");
139            cache.put(cache_key, result.clone());
140        }
141
142        result
143    }
144
145    /// Read the source file content for a given FQN from the source JAR.
146    ///
147    /// Tries `.java`, `.kt`, and `.scala` extensions in order.
148    fn read_source_from_jar(&self, fqn: &str, jar_path: &Path) -> Option<String> {
149        let source_jar_path = self.source_jar_map.get(jar_path)?;
150
151        let file = match std::fs::File::open(source_jar_path) {
152            Ok(f) => f,
153            Err(e) => {
154                warn!(
155                    "Failed to open source JAR {}: {e}",
156                    source_jar_path.display()
157                );
158                return None;
159            }
160        };
161
162        let mut archive = match zip::ZipArchive::new(file) {
163            Ok(a) => a,
164            Err(e) => {
165                warn!(
166                    "Failed to read source JAR {} as ZIP: {e}",
167                    source_jar_path.display()
168                );
169                return None;
170            }
171        };
172
173        // Convert FQN to path: com.example.MyClass -> com/example/MyClass
174        let base_path = fqn.replace('.', "/");
175
176        for ext in SOURCE_EXTENSIONS {
177            let entry_path = format!("{base_path}.{ext}");
178            if let Ok(mut entry) = archive.by_name(&entry_path) {
179                let mut content = String::new();
180                if let Err(e) = entry.read_to_string(&mut content) {
181                    warn!("Failed to read {entry_path} from source JAR: {e}");
182                    return None;
183                }
184                return Some(content);
185            }
186        }
187
188        None
189    }
190
191    /// Extract the class-level doc comment from a source file.
192    fn extract_class_docs(&self, fqn: &str, jar_path: &Path) -> Option<String> {
193        let source = self.read_source_from_jar(fqn, jar_path)?;
194        let class_name = fqn.rsplit('.').next().unwrap_or(fqn);
195        extract_class_doc_comment(&source, class_name)
196    }
197
198    /// Extract a member-level doc comment from a source file.
199    fn extract_member_docs(&self, fqn: &str, member_name: &str, jar_path: &Path) -> Option<String> {
200        let source = self.read_source_from_jar(fqn, jar_path)?;
201        extract_member_doc_comment(&source, member_name)
202    }
203}
204
205/// Extract the doc comment for a class declaration from source text.
206///
207/// Finds the last `/** ... */` block that appears before the class/interface/object
208/// declaration matching `class_name`.
209fn extract_class_doc_comment(source: &str, class_name: &str) -> Option<String> {
210    // Find the class declaration line.
211    let class_patterns = [
212        format!("class {class_name}"),
213        format!("interface {class_name}"),
214        format!("enum {class_name}"),
215        format!("object {class_name}"),
216        format!("trait {class_name}"),
217        format!("record {class_name}"),
218    ];
219
220    let class_pos = class_patterns
221        .iter()
222        .filter_map(|pat| find_declaration_position(source, pat))
223        .min()?;
224
225    // Find the doc comment preceding this position.
226    extract_preceding_doc_comment(source, class_pos)
227}
228
229/// Extract the doc comment for a member (method/field) from source text.
230///
231/// Finds the `/** ... */` block immediately preceding the first occurrence
232/// of `member_name` in a declaration context (i.e., followed by `(` for methods
233/// or preceded by a type for fields).
234fn extract_member_doc_comment(source: &str, member_name: &str) -> Option<String> {
235    // Search for the member name in a declaration context.
236    // We look for patterns like: `memberName(`, `memberName =`, `memberName;`,
237    // or just the member name preceded by whitespace and a type.
238    let mut search_start = 0;
239    while search_start < source.len() {
240        let remaining = &source[search_start..];
241        let offset = remaining.find(member_name)?;
242        let abs_pos = search_start + offset;
243
244        // Verify this is a declaration context (not a reference/call inside a method body).
245        // The member name must be preceded by whitespace or declaration keywords.
246        if is_declaration_context(source, abs_pos, member_name)
247            && let Some(doc) = extract_preceding_doc_comment(source, abs_pos)
248        {
249            return Some(doc);
250        }
251
252        search_start = abs_pos + member_name.len();
253    }
254
255    None
256}
257
258/// Check if the position is likely a member declaration rather than a usage.
259///
260/// Simple heuristic: the character before the member name (after skipping whitespace)
261/// should be a type name character, a generic closing bracket, or an annotation.
262fn is_declaration_context(source: &str, pos: usize, member_name: &str) -> bool {
263    // Check the character before the match.
264    if pos == 0 {
265        return false;
266    }
267
268    let before = &source[..pos];
269    let before_trimmed = before.trim_end();
270    if before_trimmed.is_empty() {
271        return false;
272    }
273
274    let last_char = before_trimmed.chars().next_back().unwrap_or(' ');
275
276    // After the member name, check for method signature `(` or field assignment/terminator.
277    let after_pos = pos + member_name.len();
278    let after = if after_pos < source.len() {
279        source[after_pos..].trim_start()
280    } else {
281        ""
282    };
283
284    let after_char = after.chars().next().unwrap_or(' ');
285
286    // Declaration context: preceded by a type (letter, >, ]) and followed by ( ; = { or end.
287    let valid_before = last_char.is_alphanumeric() || last_char == '>' || last_char == ']';
288    let valid_after = matches!(after_char, '(' | ';' | '=' | '{' | ':' | '\n');
289
290    valid_before && valid_after
291}
292
293/// Find the position of a declaration keyword pattern in source text.
294///
295/// Ensures the match is a whole word (not part of a larger identifier).
296fn find_declaration_position(source: &str, pattern: &str) -> Option<usize> {
297    let mut search_start = 0;
298    while search_start < source.len() {
299        let remaining = &source[search_start..];
300        let offset = remaining.find(pattern)?;
301        let abs_pos = search_start + offset;
302
303        // Verify whole-word boundary before the pattern.
304        if abs_pos > 0 {
305            let prev_char = source.as_bytes()[abs_pos - 1];
306            if prev_char.is_ascii_alphanumeric() || prev_char == b'_' {
307                search_start = abs_pos + pattern.len();
308                continue;
309            }
310        }
311
312        // Verify boundary after the pattern (should be whitespace, <, (, {, etc.).
313        let end_pos = abs_pos + pattern.len();
314        if end_pos < source.len() {
315            let next_char = source.as_bytes()[end_pos];
316            if next_char.is_ascii_alphanumeric() || next_char == b'_' {
317                search_start = end_pos;
318                continue;
319            }
320        }
321
322        return Some(abs_pos);
323    }
324
325    None
326}
327
328/// Extract the `/** ... */` doc comment that immediately precedes the given position.
329///
330/// Searches backwards from `pos` for the closest `*/` and then its matching `/**`.
331/// Ensures there is no non-whitespace/non-annotation content between the comment
332/// end and the declaration.
333fn extract_preceding_doc_comment(source: &str, pos: usize) -> Option<String> {
334    let before = &source[..pos];
335
336    // Find the last `*/` before this position.
337    let comment_end = before.rfind("*/")?;
338    let comment_end_full = comment_end + 2;
339
340    // Check that between the comment end and the declaration, there's only
341    // whitespace and annotations.
342    let between = before[comment_end_full..].trim();
343    if !between.is_empty() && !is_only_annotations_modifiers_and_types(between) {
344        return None;
345    }
346
347    // Find the matching `/**`.
348    let before_end = &source[..=comment_end];
349    let comment_start = before_end.rfind("/**")?;
350
351    // Extract and clean the doc comment.
352    let raw_comment = &source[comment_start..comment_end_full];
353    Some(clean_doc_comment(raw_comment))
354}
355
356/// Check if text between a doc comment and a declaration contains only
357/// annotations, access modifiers, and type signatures.
358///
359/// This is intentionally permissive: it allows any identifier-like tokens
360/// (type names, generics, array brackets) that can appear between a doc
361/// comment and the declared member name. The key exclusions are statement
362/// terminators and assignment operators that would indicate intervening code.
363fn is_only_annotations_modifiers_and_types(text: &str) -> bool {
364    // If the text contains statement-like constructs, it's not just declarations.
365    if text.contains(';') || text.contains("return ") || text.contains("throw ") {
366        return false;
367    }
368
369    for token in text.split_whitespace() {
370        // Allow annotations like @Override, @Deprecated, @SuppressWarnings("...")
371        if token.starts_with('@') {
372            continue;
373        }
374        // Allow annotation arguments in parentheses.
375        if token.starts_with('(') || token.ends_with(')') || token.ends_with(',') {
376            continue;
377        }
378        // Allow string literals within annotations.
379        if token.starts_with('"') || token.ends_with('"') {
380            continue;
381        }
382        // Allow identifier-like tokens (modifiers, types, generics).
383        // These match: public, static, String, List<String>, int, void,
384        // Map<K,V>, Optional<T>, int[], byte[], etc.
385        if is_declaration_token(token) {
386            continue;
387        }
388        return false;
389    }
390    true
391}
392
393/// Check if a token looks like it belongs in a declaration signature.
394///
395/// Allows: identifiers, generic types (`List<String>`), array types (`int[]`),
396/// keywords, wildcards (`?`), bounds (`extends`, `super`).
397fn is_declaration_token(token: &str) -> bool {
398    token.chars().all(|c| {
399        c.is_alphanumeric()
400            || c == '_'
401            || c == '<'
402            || c == '>'
403            || c == '['
404            || c == ']'
405            || c == '.'
406            || c == ','
407            || c == '?'
408    })
409}
410
411/// Clean a raw `/** ... */` doc comment into plain text.
412///
413/// Strips the comment delimiters, leading `*` characters, and converts
414/// HTML/Javadoc markup to plain text.
415fn clean_doc_comment(raw: &str) -> String {
416    // Remove the opening `/**` and closing `*/`.
417    let content = raw
418        .strip_prefix("/**")
419        .unwrap_or(raw)
420        .strip_suffix("*/")
421        .unwrap_or(raw);
422
423    let mut lines: Vec<String> = Vec::new();
424
425    for line in content.lines() {
426        let trimmed = line.trim();
427        // Strip leading `*` (with optional space after).
428        let cleaned = if let Some(rest) = trimmed.strip_prefix("* ") {
429            rest
430        } else if let Some(rest) = trimmed.strip_prefix('*') {
431            rest
432        } else {
433            trimmed
434        };
435        lines.push(cleaned.to_string());
436    }
437
438    let joined = lines.join("\n");
439    let result = convert_html_to_plain_text(&joined);
440
441    // Trim leading/trailing blank lines and normalize whitespace.
442    let trimmed = result.trim();
443    normalize_blank_lines(trimmed)
444}
445
446/// Convert HTML and Javadoc inline tags to plain text.
447fn convert_html_to_plain_text(text: &str) -> String {
448    let mut result = text.to_string();
449
450    // Convert {@code text} -> `text`
451    result = replace_inline_tag(&result, "code");
452
453    // Convert {@link Type#method} -> Type.method
454    result = replace_link_tags(&result);
455
456    // Convert {@literal text} -> text
457    result = replace_literal_tags(&result);
458
459    // Convert {@value ...} -> the value reference.
460    result = replace_value_tags(&result);
461
462    // Convert HTML tags.
463    result = convert_html_tags(&result);
464
465    // Convert block tags (@param, @return, @throws, etc.).
466    result = convert_block_tags(&result);
467
468    result
469}
470
471/// Replace `{@code text}` with `` `text` ``.
472fn replace_inline_tag(text: &str, tag_name: &str) -> String {
473    let open_pattern = format!("{{@{tag_name} ");
474    let mut result = String::with_capacity(text.len());
475    let mut remaining = text;
476
477    while let Some(start) = remaining.find(&open_pattern) {
478        result.push_str(&remaining[..start]);
479        let after_tag = &remaining[start + open_pattern.len()..];
480
481        if let Some(close) = find_matching_brace(after_tag) {
482            let content = &after_tag[..close];
483            result.push('`');
484            result.push_str(content.trim());
485            result.push('`');
486            remaining = &after_tag[close + 1..];
487        } else {
488            // No matching brace; keep as-is.
489            result.push_str(&remaining[start..start + open_pattern.len()]);
490            remaining = after_tag;
491        }
492    }
493    result.push_str(remaining);
494    result
495}
496
497/// Find the position of the matching `}` for a `{@tag ...}` construct,
498/// handling nested braces.
499fn find_matching_brace(text: &str) -> Option<usize> {
500    let mut depth = 0u32;
501    for (i, ch) in text.char_indices() {
502        match ch {
503            '{' => depth += 1,
504            '}' => {
505                if depth == 0 {
506                    return Some(i);
507                }
508                depth -= 1;
509            }
510            _ => {}
511        }
512    }
513    None
514}
515
516/// Replace `{@link Type#method}` with `Type.method`.
517fn replace_link_tags(text: &str) -> String {
518    let open_patterns = ["{@link ", "{@linkplain "];
519    let mut result = text.to_string();
520
521    for open_pattern in &open_patterns {
522        let mut new_result = String::with_capacity(result.len());
523        let mut remaining = result.as_str();
524
525        while let Some(start) = remaining.find(open_pattern) {
526            new_result.push_str(&remaining[..start]);
527            let after_tag = &remaining[start + open_pattern.len()..];
528
529            if let Some(close) = find_matching_brace(after_tag) {
530                let content = after_tag[..close].trim();
531                // If there's a display label (space-separated), use that.
532                // Otherwise, convert # to .
533                let display = if let Some(label_start) = content.find(' ') {
534                    content[label_start + 1..].trim()
535                } else {
536                    content
537                };
538                new_result.push_str(&display.replace('#', "."));
539                remaining = &after_tag[close + 1..];
540            } else {
541                new_result.push_str(&remaining[start..start + open_pattern.len()]);
542                remaining = after_tag;
543            }
544        }
545        new_result.push_str(remaining);
546        result = new_result;
547    }
548
549    result
550}
551
552/// Replace `{@literal text}` with backtick-wrapped text.
553///
554/// `{@literal}` in Javadoc means "display as-is without HTML interpretation".
555/// We wrap the content in backticks to protect it from HTML tag stripping
556/// and to signal that it is a literal/code-like reference.
557fn replace_literal_tags(text: &str) -> String {
558    let open_pattern = "{@literal ";
559    let mut result = String::with_capacity(text.len());
560    let mut remaining = text;
561
562    while let Some(start) = remaining.find(open_pattern) {
563        result.push_str(&remaining[..start]);
564        let after_tag = &remaining[start + open_pattern.len()..];
565
566        if let Some(close) = find_matching_brace(after_tag) {
567            result.push('`');
568            result.push_str(after_tag[..close].trim());
569            result.push('`');
570            remaining = &after_tag[close + 1..];
571        } else {
572            result.push_str(&remaining[start..start + open_pattern.len()]);
573            remaining = after_tag;
574        }
575    }
576    result.push_str(remaining);
577    result
578}
579
580/// Replace `{@value ...}` with the reference.
581fn replace_value_tags(text: &str) -> String {
582    let open_pattern = "{@value ";
583    let mut result = String::with_capacity(text.len());
584    let mut remaining = text;
585
586    while let Some(start) = remaining.find(open_pattern) {
587        result.push_str(&remaining[..start]);
588        let after_tag = &remaining[start + open_pattern.len()..];
589
590        if let Some(close) = find_matching_brace(after_tag) {
591            result.push_str(after_tag[..close].trim());
592            remaining = &after_tag[close + 1..];
593        } else {
594            result.push_str(&remaining[start..start + open_pattern.len()]);
595            remaining = after_tag;
596        }
597    }
598    result.push_str(remaining);
599    result
600}
601
602/// Convert HTML tags to plain text equivalents.
603fn convert_html_tags(text: &str) -> String {
604    let mut result = text.to_string();
605
606    // <p> and </p> -> newline
607    result = result.replace("<p>", "\n");
608    result = result.replace("</p>", "");
609    result = result.replace("<P>", "\n");
610
611    // <br> and <br/> -> newline
612    result = result.replace("<br>", "\n");
613    result = result.replace("<br/>", "\n");
614    result = result.replace("<br />", "\n");
615    result = result.replace("<BR>", "\n");
616
617    // <code>...</code> -> `...`
618    result = result.replace("<code>", "`");
619    result = result.replace("</code>", "`");
620    result = result.replace("<CODE>", "`");
621    result = result.replace("</CODE>", "`");
622
623    // <pre>...</pre> -> preserve content (already plain text in code blocks).
624    result = result.replace("<pre>", "");
625    result = result.replace("</pre>", "");
626    result = result.replace("<PRE>", "");
627    result = result.replace("</PRE>", "");
628
629    // <b>/<strong> -> keep content.
630    result = result.replace("<b>", "");
631    result = result.replace("</b>", "");
632    result = result.replace("<strong>", "");
633    result = result.replace("</strong>", "");
634    result = result.replace("<B>", "");
635    result = result.replace("</B>", "");
636
637    // <i>/<em> -> keep content.
638    result = result.replace("<i>", "");
639    result = result.replace("</i>", "");
640    result = result.replace("<em>", "");
641    result = result.replace("</em>", "");
642    result = result.replace("<I>", "");
643    result = result.replace("</I>", "");
644
645    // <ul>/<ol>/<li> -> basic list formatting.
646    result = result.replace("<ul>", "");
647    result = result.replace("</ul>", "");
648    result = result.replace("<ol>", "");
649    result = result.replace("</ol>", "");
650    result = result.replace("<li>", "\n- ");
651    result = result.replace("</li>", "");
652
653    // <tt> (teletype) -> backticks.
654    result = result.replace("<tt>", "`");
655    result = result.replace("</tt>", "`");
656
657    // Strip remaining HTML tags.
658    strip_remaining_html_tags(&result)
659}
660
661/// Strip any remaining HTML tags from the text.
662///
663/// Content inside backticks is preserved verbatim (e.g., `` `Map<K, V>` ``
664/// should not have `<K, V>` stripped as an HTML tag).
665fn strip_remaining_html_tags(text: &str) -> String {
666    let mut result = String::with_capacity(text.len());
667    let mut in_tag = false;
668    let mut in_backtick = false;
669
670    for ch in text.chars() {
671        if ch == '`' {
672            in_backtick = !in_backtick;
673            result.push(ch);
674        } else if in_backtick {
675            // Inside backticks, preserve everything verbatim.
676            result.push(ch);
677        } else {
678            match ch {
679                '<' => in_tag = true,
680                '>' if in_tag => in_tag = false,
681                _ if !in_tag => result.push(ch),
682                _ => {}
683            }
684        }
685    }
686
687    result
688}
689
690/// Convert Javadoc block tags (@param, @return, @throws, etc.) to plain text.
691fn convert_block_tags(text: &str) -> String {
692    let mut lines: Vec<String> = Vec::new();
693
694    for line in text.lines() {
695        let trimmed = line.trim();
696
697        if let Some(rest) = trimmed.strip_prefix("@param ") {
698            let parts: Vec<&str> = rest.splitn(2, ' ').collect();
699            if parts.len() == 2 {
700                lines.push(format!("param {}: {}", parts[0], parts[1]));
701            } else {
702                lines.push(format!("param {rest}"));
703            }
704        } else if let Some(rest) = trimmed.strip_prefix("@return ") {
705            lines.push(format!("returns: {rest}"));
706        } else if let Some(rest) = trimmed.strip_prefix("@returns ") {
707            lines.push(format!("returns: {rest}"));
708        } else if let Some(rest) = trimmed.strip_prefix("@throws ") {
709            let parts: Vec<&str> = rest.splitn(2, ' ').collect();
710            if parts.len() == 2 {
711                lines.push(format!("throws {}: {}", parts[0], parts[1]));
712            } else {
713                lines.push(format!("throws {rest}"));
714            }
715        } else if let Some(rest) = trimmed.strip_prefix("@exception ") {
716            let parts: Vec<&str> = rest.splitn(2, ' ').collect();
717            if parts.len() == 2 {
718                lines.push(format!("throws {}: {}", parts[0], parts[1]));
719            } else {
720                lines.push(format!("throws {rest}"));
721            }
722        } else if let Some(rest) = trimmed.strip_prefix("@see ") {
723            lines.push(format!("see: {rest}"));
724        } else if let Some(rest) = trimmed.strip_prefix("@since ") {
725            lines.push(format!("since: {rest}"));
726        } else if let Some(rest) = trimmed.strip_prefix("@version ") {
727            lines.push(format!("version: {rest}"));
728        } else if let Some(rest) = trimmed.strip_prefix("@author ") {
729            lines.push(format!("author: {rest}"));
730        } else if trimmed.starts_with("@deprecated") {
731            let rest = trimmed
732                .strip_prefix("@deprecated ")
733                .unwrap_or("(deprecated)");
734            lines.push(format!("DEPRECATED: {rest}"));
735        } else {
736            lines.push(line.to_string());
737        }
738    }
739
740    lines.join("\n")
741}
742
743/// Normalize consecutive blank lines to at most one.
744fn normalize_blank_lines(text: &str) -> String {
745    let mut result = String::with_capacity(text.len());
746    let mut prev_blank = false;
747
748    for line in text.lines() {
749        let is_blank = line.trim().is_empty();
750        if is_blank {
751            if !prev_blank {
752                result.push('\n');
753            }
754            prev_blank = true;
755        } else {
756            if prev_blank && !result.is_empty() {
757                result.push('\n');
758            }
759            if !result.is_empty() && !prev_blank {
760                result.push('\n');
761            }
762            result.push_str(line);
763            prev_blank = false;
764        }
765    }
766
767    result
768}
769
770#[cfg(test)]
771mod tests {
772    use std::io::Write;
773
774    use super::*;
775
776    /// Helper: create a ZIP file in memory containing the given entries.
777    fn create_source_jar(entries: &[(&str, &str)]) -> Vec<u8> {
778        let mut buf = Vec::new();
779        {
780            let mut writer = zip::ZipWriter::new(std::io::Cursor::new(&mut buf));
781            let options = zip::write::SimpleFileOptions::default()
782                .compression_method(zip::CompressionMethod::Stored);
783            for (path, content) in entries {
784                writer.start_file((*path).to_string(), options).unwrap();
785                writer.write_all(content.as_bytes()).unwrap();
786            }
787            writer.finish().unwrap();
788        }
789        buf
790    }
791
792    /// Helper: write a source JAR to a temp directory and create classpath entries.
793    fn setup_provider(
794        dir: &tempfile::TempDir,
795        entries: &[(&str, &str)],
796    ) -> (SourceJarProvider, PathBuf) {
797        let source_jar_path = dir.path().join("lib-sources.jar");
798        let jar_data = create_source_jar(entries);
799        std::fs::write(&source_jar_path, jar_data).unwrap();
800
801        let binary_jar_path = dir.path().join("lib.jar");
802        std::fs::write(&binary_jar_path, b"fake binary jar").unwrap();
803
804        let classpath_entries = vec![ClasspathEntry {
805            jar_path: binary_jar_path.clone(),
806            coordinates: Some("com.example:lib:1.0".to_string()),
807            is_direct: true,
808            source_jar: Some(source_jar_path),
809        }];
810
811        let provider = SourceJarProvider::new(&classpath_entries, 100);
812        (provider, binary_jar_path)
813    }
814
815    #[test]
816    fn test_extract_javadoc_from_simple_class() {
817        let dir = tempfile::tempdir().unwrap();
818        let source = r#"package com.example;
819
820/**
821 * A simple utility class for string operations.
822 *
823 * This class provides common string manipulation methods.
824 */
825public class StringUtils {
826    public static String trim(String s) {
827        return s.trim();
828    }
829}
830"#;
831        let (provider, jar_path) =
832            setup_provider(&dir, &[("com/example/StringUtils.java", source)]);
833
834        let docs = provider
835            .get_docs("com.example.StringUtils", &jar_path)
836            .unwrap();
837        assert!(docs.contains("A simple utility class for string operations"));
838        assert!(docs.contains("common string manipulation methods"));
839    }
840
841    #[test]
842    fn test_extract_method_level_javadoc() {
843        let dir = tempfile::tempdir().unwrap();
844        let source = r#"package com.example;
845
846/**
847 * String utilities.
848 */
849public class StringUtils {
850    /**
851     * Trims whitespace from both ends of a string.
852     *
853     * @param s the input string
854     * @return the trimmed string
855     */
856    public static String trim(String s) {
857        return s.trim();
858    }
859}
860"#;
861        let (provider, jar_path) =
862            setup_provider(&dir, &[("com/example/StringUtils.java", source)]);
863
864        let docs = provider
865            .get_member_docs("com.example.StringUtils", "trim", &jar_path)
866            .unwrap();
867        assert!(docs.contains("Trims whitespace from both ends"));
868        assert!(docs.contains("param s:"));
869        assert!(docs.contains("returns:"));
870    }
871
872    #[test]
873    fn test_missing_source_jar_returns_none() {
874        let entries = vec![ClasspathEntry {
875            jar_path: PathBuf::from("/nonexistent/lib.jar"),
876            coordinates: None,
877            is_direct: true,
878            source_jar: None,
879        }];
880        let provider = SourceJarProvider::new(&entries, 100);
881        let result = provider.get_docs("com.example.Foo", Path::new("/nonexistent/lib.jar"));
882        assert!(result.is_none());
883    }
884
885    #[test]
886    fn test_class_not_found_in_source_jar_returns_none() {
887        let dir = tempfile::tempdir().unwrap();
888        let source = "package com.example;\npublic class Other {}\n";
889        let (provider, jar_path) = setup_provider(&dir, &[("com/example/Other.java", source)]);
890
891        let result = provider.get_docs("com.example.Missing", &jar_path);
892        assert!(result.is_none());
893    }
894
895    #[test]
896    fn test_html_to_plain_text_conversion() {
897        let input = "First paragraph.<p>Second paragraph.\n<code>some code</code> and text.";
898        let result = convert_html_to_plain_text(input);
899        assert!(result.contains("First paragraph."));
900        assert!(result.contains("Second paragraph."));
901        assert!(result.contains("`some code`"));
902    }
903
904    #[test]
905    fn test_code_and_link_tag_conversion() {
906        let input = "Use {@code Map<K, V>} for mappings. See {@link HashMap#get the get method}.";
907        let result = convert_html_to_plain_text(input);
908        assert!(result.contains("`Map<K, V>`"));
909        assert!(result.contains("the get method"));
910    }
911
912    #[test]
913    fn test_param_return_throws_conversion() {
914        let input = "@param name the user name\n@return the greeting\n@throws IllegalArgumentException if name is null";
915        let result = convert_block_tags(input);
916        assert!(result.contains("param name: the user name"));
917        assert!(result.contains("returns: the greeting"));
918        assert!(result.contains("throws IllegalArgumentException: if name is null"));
919    }
920
921    #[test]
922    fn test_cache_hit_returns_same_result() {
923        let dir = tempfile::tempdir().unwrap();
924        let source = r#"package com.example;
925/**
926 * Cached class docs.
927 */
928public class Cached {
929}
930"#;
931        let (provider, jar_path) = setup_provider(&dir, &[("com/example/Cached.java", source)]);
932
933        let first = provider.get_docs("com.example.Cached", &jar_path);
934        let second = provider.get_docs("com.example.Cached", &jar_path);
935        assert_eq!(first, second);
936        assert!(first.is_some());
937
938        // Verify it's actually in the cache.
939        let cache = provider.cache.lock().unwrap();
940        assert!(!cache.is_empty());
941    }
942
943    #[test]
944    fn test_lru_eviction() {
945        let dir = tempfile::tempdir().unwrap();
946        let mut jar_entries = Vec::new();
947
948        // Create 5 classes.
949        for i in 0..5 {
950            let name = format!("Class{i}");
951            let path = format!("com/example/{name}.java");
952            let content =
953                format!("package com.example;\n/** Doc for {name}. */\npublic class {name} {{}}\n");
954            jar_entries.push((path, content));
955        }
956
957        let jar_entry_refs: Vec<(&str, &str)> = jar_entries
958            .iter()
959            .map(|(p, c)| (p.as_str(), c.as_str()))
960            .collect();
961
962        let source_jar_path = dir.path().join("lib-sources.jar");
963        let jar_data = create_source_jar(&jar_entry_refs);
964        std::fs::write(&source_jar_path, jar_data).unwrap();
965
966        let binary_jar_path = dir.path().join("lib.jar");
967        std::fs::write(&binary_jar_path, b"fake").unwrap();
968
969        let entries = vec![ClasspathEntry {
970            jar_path: binary_jar_path.clone(),
971            coordinates: None,
972            is_direct: true,
973            source_jar: Some(source_jar_path),
974        }];
975
976        // Cache size of 3 means older entries get evicted.
977        let provider = SourceJarProvider::new(&entries, 3);
978
979        // Access 5 classes with cache size 3.
980        for i in 0..5 {
981            let fqn = format!("com.example.Class{i}");
982            let result = provider.get_docs(&fqn, &binary_jar_path);
983            assert!(result.is_some(), "Class{i} should have docs");
984        }
985
986        // Cache should have at most 3 entries.
987        let cache = provider.cache.lock().unwrap();
988        assert!(cache.len() <= 3, "LRU cache should evict to capacity");
989    }
990
991    #[test]
992    fn test_kotlin_extension_fallback() {
993        let dir = tempfile::tempdir().unwrap();
994        let source = r#"package com.example
995
996/**
997 * A Kotlin data class.
998 */
999data class UserProfile(val name: String, val age: Int)
1000"#;
1001        let (provider, jar_path) = setup_provider(&dir, &[("com/example/UserProfile.kt", source)]);
1002
1003        let docs = provider
1004            .get_docs("com.example.UserProfile", &jar_path)
1005            .unwrap();
1006        assert!(docs.contains("Kotlin data class"));
1007    }
1008
1009    #[test]
1010    fn test_scala_extension_fallback() {
1011        let dir = tempfile::tempdir().unwrap();
1012        let source = r#"package com.example
1013
1014/**
1015 * A Scala case class for configuration.
1016 */
1017case class AppConfig(host: String, port: Int)
1018"#;
1019        // Scala uses the same /** */ doc comments.
1020        let (provider, jar_path) = setup_provider(&dir, &[("com/example/AppConfig.scala", source)]);
1021
1022        let docs = provider
1023            .get_docs("com.example.AppConfig", &jar_path)
1024            .unwrap();
1025        assert!(docs.contains("Scala case class for configuration"));
1026    }
1027
1028    #[test]
1029    fn test_multiline_doc_comment() {
1030        let dir = tempfile::tempdir().unwrap();
1031        let source = r#"package com.example;
1032
1033/**
1034 * An immutable, ordered collection of elements.
1035 *
1036 * <p>This is the second paragraph with more details.
1037 *
1038 * <p>Usage example:
1039 * <pre>
1040 *   ImmutableList<String> list = ImmutableList.of("a", "b", "c");
1041 * </pre>
1042 *
1043 * @param <E> the element type
1044 * @since 2.0
1045 * @see java.util.List
1046 * @author Google
1047 */
1048public class ImmutableList<E> {
1049}
1050"#;
1051        let (provider, jar_path) =
1052            setup_provider(&dir, &[("com/example/ImmutableList.java", source)]);
1053
1054        let docs = provider
1055            .get_docs("com.example.ImmutableList", &jar_path)
1056            .unwrap();
1057        assert!(docs.contains("immutable, ordered collection"));
1058        assert!(docs.contains("second paragraph"));
1059        assert!(docs.contains("since: 2.0"));
1060        assert!(docs.contains("see: java.util.List"));
1061        assert!(docs.contains("author: Google"));
1062    }
1063
1064    // --- Unit tests for internal conversion functions ---
1065
1066    #[test]
1067    fn test_clean_doc_comment_strips_delimiters() {
1068        let raw = "/** Simple doc. */";
1069        let result = clean_doc_comment(raw);
1070        assert_eq!(result.trim(), "Simple doc.");
1071    }
1072
1073    #[test]
1074    fn test_clean_doc_comment_strips_leading_stars() {
1075        let raw = "/**\n * Line one.\n * Line two.\n */";
1076        let result = clean_doc_comment(raw);
1077        assert!(result.contains("Line one."));
1078        assert!(result.contains("Line two."));
1079        assert!(!result.contains("* "));
1080    }
1081
1082    #[test]
1083    fn test_replace_inline_code_tag() {
1084        assert_eq!(replace_inline_tag("Use {@code foo}.", "code"), "Use `foo`.");
1085    }
1086
1087    #[test]
1088    fn test_replace_inline_code_tag_with_generics() {
1089        let result = replace_inline_tag("A {@code Map<K, V>} instance.", "code");
1090        assert_eq!(result, "A `Map<K, V>` instance.");
1091    }
1092
1093    #[test]
1094    fn test_replace_link_tag_simple() {
1095        let result = replace_link_tags("See {@link String}.");
1096        assert_eq!(result, "See String.");
1097    }
1098
1099    #[test]
1100    fn test_replace_link_tag_with_member() {
1101        let result = replace_link_tags("See {@link String#length}.");
1102        assert_eq!(result, "See String.length.");
1103    }
1104
1105    #[test]
1106    fn test_replace_link_tag_with_label() {
1107        let result = replace_link_tags("See {@link String#length the length method}.");
1108        assert_eq!(result, "See the length method.");
1109    }
1110
1111    #[test]
1112    fn test_strip_remaining_html_tags() {
1113        let input = "Hello <b>world</b> and <unknown-tag>foo</unknown-tag>.";
1114        let result = strip_remaining_html_tags(input);
1115        assert_eq!(result, "Hello world and foo.");
1116    }
1117
1118    #[test]
1119    fn test_normalize_blank_lines() {
1120        let input = "Line 1\n\n\n\nLine 2\n\nLine 3";
1121        let result = normalize_blank_lines(input);
1122        // Should have at most one blank line between content lines.
1123        assert!(!result.contains("\n\n\n"));
1124    }
1125
1126    #[test]
1127    fn test_find_matching_brace_simple() {
1128        assert_eq!(find_matching_brace("text}rest"), Some(4));
1129    }
1130
1131    #[test]
1132    fn test_find_matching_brace_nested() {
1133        assert_eq!(find_matching_brace("a{b}c}rest"), Some(5));
1134    }
1135
1136    #[test]
1137    fn test_is_only_annotations_modifiers_and_types() {
1138        assert!(is_only_annotations_modifiers_and_types("@Override public"));
1139        assert!(is_only_annotations_modifiers_and_types("@Deprecated"));
1140        assert!(is_only_annotations_modifiers_and_types(
1141            "public static final"
1142        ));
1143        assert!(!is_only_annotations_modifiers_and_types("int x = 5;"));
1144    }
1145
1146    #[test]
1147    fn test_deprecated_tag_conversion() {
1148        let input = "@deprecated Use newMethod() instead.";
1149        let result = convert_block_tags(input);
1150        assert!(result.contains("DEPRECATED: Use newMethod() instead."));
1151    }
1152
1153    #[test]
1154    fn test_exception_tag_conversion() {
1155        let input = "@exception IOException if I/O fails";
1156        let result = convert_block_tags(input);
1157        assert!(result.contains("throws IOException: if I/O fails"));
1158    }
1159
1160    #[test]
1161    fn test_literal_tag_conversion() {
1162        let input = "Use {@literal <T>} for generics.";
1163        let result = convert_html_to_plain_text(input);
1164        assert!(result.contains("<T>"));
1165    }
1166
1167    #[test]
1168    fn test_class_with_annotations() {
1169        let dir = tempfile::tempdir().unwrap();
1170        let source = r#"package com.example;
1171
1172/**
1173 * Deprecated utility class.
1174 */
1175@Deprecated
1176@SuppressWarnings("unused")
1177public final class OldUtils {
1178}
1179"#;
1180        let (provider, jar_path) = setup_provider(&dir, &[("com/example/OldUtils.java", source)]);
1181
1182        let docs = provider
1183            .get_docs("com.example.OldUtils", &jar_path)
1184            .unwrap();
1185        assert!(docs.contains("Deprecated utility class"));
1186    }
1187
1188    #[test]
1189    fn test_interface_doc_extraction() {
1190        let dir = tempfile::tempdir().unwrap();
1191        let source = r#"package com.example;
1192
1193/**
1194 * A service interface for user management.
1195 */
1196public interface UserService {
1197    void createUser(String name);
1198}
1199"#;
1200        let (provider, jar_path) =
1201            setup_provider(&dir, &[("com/example/UserService.java", source)]);
1202
1203        let docs = provider
1204            .get_docs("com.example.UserService", &jar_path)
1205            .unwrap();
1206        assert!(docs.contains("service interface for user management"));
1207    }
1208
1209    #[test]
1210    fn test_enum_doc_extraction() {
1211        let dir = tempfile::tempdir().unwrap();
1212        let source = r#"package com.example;
1213
1214/**
1215 * Represents the status of an order.
1216 */
1217public enum OrderStatus {
1218    PENDING, SHIPPED, DELIVERED
1219}
1220"#;
1221        let (provider, jar_path) =
1222            setup_provider(&dir, &[("com/example/OrderStatus.java", source)]);
1223
1224        let docs = provider
1225            .get_docs("com.example.OrderStatus", &jar_path)
1226            .unwrap();
1227        assert!(docs.contains("status of an order"));
1228    }
1229
1230    #[test]
1231    fn test_no_doc_comment_returns_none() {
1232        let dir = tempfile::tempdir().unwrap();
1233        let source = r#"package com.example;
1234
1235public class NoDoc {
1236}
1237"#;
1238        let (provider, jar_path) = setup_provider(&dir, &[("com/example/NoDoc.java", source)]);
1239
1240        let result = provider.get_docs("com.example.NoDoc", &jar_path);
1241        assert!(result.is_none());
1242    }
1243
1244    #[test]
1245    fn test_member_not_found_returns_none() {
1246        let dir = tempfile::tempdir().unwrap();
1247        let source = r#"package com.example;
1248
1249/**
1250 * A class.
1251 */
1252public class MyClass {
1253    public void existingMethod() {}
1254}
1255"#;
1256        let (provider, jar_path) = setup_provider(&dir, &[("com/example/MyClass.java", source)]);
1257
1258        let result = provider.get_member_docs("com.example.MyClass", "nonExistent", &jar_path);
1259        assert!(result.is_none());
1260    }
1261
1262    #[test]
1263    fn test_cache_none_for_missing_docs() {
1264        let dir = tempfile::tempdir().unwrap();
1265        let source = "package com.example;\npublic class NoDoc {}\n";
1266        let (provider, jar_path) = setup_provider(&dir, &[("com/example/NoDoc.java", source)]);
1267
1268        // First call caches None.
1269        let first = provider.get_docs("com.example.NoDoc", &jar_path);
1270        assert!(first.is_none());
1271
1272        // Second call should hit cache (also None).
1273        let second = provider.get_docs("com.example.NoDoc", &jar_path);
1274        assert!(second.is_none());
1275
1276        // Verify it's cached.
1277        let cache = provider.cache.lock().unwrap();
1278        let key = format!("{}::com.example.NoDoc", jar_path.display());
1279        assert!(cache.peek(&key).is_some());
1280    }
1281
1282    #[test]
1283    fn test_with_defaults_constructor() {
1284        let entries = vec![];
1285        let provider = SourceJarProvider::with_defaults(&entries);
1286        // Should not panic and should have empty map.
1287        assert!(provider.source_jar_map.is_empty());
1288    }
1289
1290    #[test]
1291    fn test_multiple_jars_mapping() {
1292        let dir = tempfile::tempdir().unwrap();
1293
1294        // Create two source JARs with different classes.
1295        let source_a = "package com.a;\n/** Class A. */\npublic class A {}\n";
1296        let source_b = "package com.b;\n/** Class B. */\npublic class B {}\n";
1297
1298        let jar_a_src = dir.path().join("a-sources.jar");
1299        let jar_b_src = dir.path().join("b-sources.jar");
1300        let jar_a_bin = dir.path().join("a.jar");
1301        let jar_b_bin = dir.path().join("b.jar");
1302
1303        std::fs::write(&jar_a_src, create_source_jar(&[("com/a/A.java", source_a)])).unwrap();
1304        std::fs::write(&jar_b_src, create_source_jar(&[("com/b/B.java", source_b)])).unwrap();
1305        std::fs::write(&jar_a_bin, b"fake").unwrap();
1306        std::fs::write(&jar_b_bin, b"fake").unwrap();
1307
1308        let entries = vec![
1309            ClasspathEntry {
1310                jar_path: jar_a_bin.clone(),
1311                coordinates: None,
1312                is_direct: true,
1313                source_jar: Some(jar_a_src),
1314            },
1315            ClasspathEntry {
1316                jar_path: jar_b_bin.clone(),
1317                coordinates: None,
1318                is_direct: true,
1319                source_jar: Some(jar_b_src),
1320            },
1321        ];
1322
1323        let provider = SourceJarProvider::new(&entries, 100);
1324
1325        let docs_a = provider.get_docs("com.a.A", &jar_a_bin).unwrap();
1326        assert!(docs_a.contains("Class A"));
1327
1328        let docs_b = provider.get_docs("com.b.B", &jar_b_bin).unwrap();
1329        assert!(docs_b.contains("Class B"));
1330
1331        // Cross-lookup should fail.
1332        assert!(provider.get_docs("com.a.A", &jar_b_bin).is_none());
1333    }
1334
1335    #[test]
1336    fn test_field_member_docs() {
1337        let dir = tempfile::tempdir().unwrap();
1338        let source = r#"package com.example;
1339
1340public class Config {
1341    /**
1342     * The maximum number of retries.
1343     */
1344    public static final int MAX_RETRIES = 3;
1345
1346    /**
1347     * The default timeout in milliseconds.
1348     */
1349    private long timeout = 5000;
1350}
1351"#;
1352        let (provider, jar_path) = setup_provider(&dir, &[("com/example/Config.java", source)]);
1353
1354        let docs = provider
1355            .get_member_docs("com.example.Config", "MAX_RETRIES", &jar_path)
1356            .unwrap();
1357        assert!(docs.contains("maximum number of retries"));
1358
1359        let docs = provider
1360            .get_member_docs("com.example.Config", "timeout", &jar_path)
1361            .unwrap();
1362        assert!(docs.contains("default timeout in milliseconds"));
1363    }
1364}