Skip to main content

sqry_classpath/resolve/
source_jars.rs

1//! On-demand source JAR documentation extractor.
2//!
3//! Extracts Javadoc/KDoc/Scaladoc comments from source JARs when requested
4//! for hover information. Results are cached in an LRU cache to avoid
5//! re-reading JARs.
6//!
7//! # Usage
8//!
9//! ```rust,ignore
10//! use sqry_classpath::resolve::source_jars::SourceJarProvider;
11//! use sqry_classpath::resolve::ClasspathEntry;
12//!
13//! let provider = SourceJarProvider::new(&entries, 1000);
14//! let docs = provider.get_docs("com.google.common.collect.ImmutableList", jar_path);
15//! ```
16//!
17//! # Source file location
18//!
19//! Given FQN `com.example.MyClass`, the source file is located at
20//! `com/example/MyClass.java` within the source JAR. For Kotlin and Scala,
21//! `.kt` and `.scala` extensions are tried as fallbacks.
22//!
23//! # Doc comment extraction
24//!
25//! Extracts `/** ... */` Javadoc-style blocks and converts to plain text by:
26//! - Stripping leading `*` from each line
27//! - Converting HTML tags (`<p>`, `<code>`, `<pre>`) to plain text equivalents
28//! - Converting inline tags (`{@code ...}`, `{@link ...}`) to readable form
29//! - Formatting block tags (`@param`, `@return`, `@throws`) for display
30
31use std::collections::HashMap;
32use std::io::Read;
33use std::path::{Path, PathBuf};
34use std::sync::Mutex;
35
36use log::warn;
37
38use super::ClasspathEntry;
39
40/// Default LRU cache capacity for extracted documentation strings.
41const DEFAULT_CACHE_SIZE: usize = 1000;
42
43/// File extensions to search in source JARs, in priority order.
44const SOURCE_EXTENSIONS: &[&str] = &["java", "kt", "scala"];
45
46/// On-demand source JAR documentation extractor.
47///
48/// Extracts Javadoc/KDoc/Scaladoc comments from source JARs when requested.
49/// Results are cached in an LRU cache to avoid re-reading JARs.
50pub struct SourceJarProvider {
51    /// Map from binary JAR path to source JAR path.
52    source_jar_map: HashMap<PathBuf, PathBuf>,
53    /// LRU cache: cache key -> extracted documentation string.
54    /// Cache key format: `"{jar_path}::{fqn}"` or `"{jar_path}::{fqn}::{member}"`.
55    /// `None` values are cached to avoid re-reading JARs for missing docs.
56    cache: Mutex<lru::LruCache<String, Option<String>>>,
57}
58
59impl SourceJarProvider {
60    /// Create a new provider from classpath entries.
61    ///
62    /// Scans entries for `source_jar` paths and builds the mapping from
63    /// binary JAR path to source JAR path.
64    #[must_use]
65    #[allow(clippy::missing_errors_doc)] // Internal helper function
66    #[allow(clippy::missing_panics_doc)] // Panic condition documented in body
67    pub fn new(entries: &[ClasspathEntry], cache_size: usize) -> Self {
68        let size = if cache_size == 0 {
69            DEFAULT_CACHE_SIZE
70        } else {
71            cache_size
72        };
73        let mut source_jar_map = HashMap::new();
74        for entry in entries {
75            if let Some(ref source_jar) = entry.source_jar {
76                source_jar_map.insert(entry.jar_path.clone(), source_jar.clone());
77            }
78        }
79        Self {
80            source_jar_map,
81            cache: Mutex::new(lru::LruCache::new(
82                std::num::NonZeroUsize::new(size).expect("cache size must be non-zero"),
83            )),
84        }
85    }
86
87    /// Create a new provider with the default cache size.
88    #[must_use]
89    pub fn with_defaults(entries: &[ClasspathEntry]) -> Self {
90        Self::new(entries, DEFAULT_CACHE_SIZE)
91    }
92
93    /// Extract documentation for a class by fully qualified name.
94    ///
95    /// Returns the doc comment as plain text, or `None` if:
96    /// - No source JAR is available for the given binary JAR
97    /// - The class source file is not found in the source JAR
98    /// - The source file has no doc comment before the class declaration
99    #[allow(clippy::missing_panics_doc)] // Panic condition documented in body
100    pub fn get_docs(&self, fqn: &str, jar_path: &Path) -> Option<String> {
101        let cache_key = format!("{}::{fqn}", jar_path.display());
102
103        // Check cache first.
104        {
105            let mut cache = self.cache.lock().expect("cache lock poisoned");
106            if let Some(cached) = cache.get(&cache_key) {
107                return cached.clone();
108            }
109        }
110
111        let result = self.extract_class_docs(fqn, jar_path);
112
113        // Cache the result (including None for missing docs).
114        {
115            let mut cache = self.cache.lock().expect("cache lock poisoned");
116            cache.put(cache_key, result.clone());
117        }
118
119        result
120    }
121
122    /// Extract documentation for a specific member (method/field) within a class.
123    ///
124    /// Searches the source file for a doc comment immediately preceding a
125    /// declaration that matches `member_name`.
126    #[allow(clippy::missing_panics_doc)] // Panic documented in body
127    pub fn get_member_docs(&self, fqn: &str, member_name: &str, jar_path: &Path) -> Option<String> {
128        let cache_key = format!("{}::{fqn}::{member_name}", jar_path.display());
129
130        // Check cache first.
131        {
132            let mut cache = self.cache.lock().expect("cache lock poisoned");
133            if let Some(cached) = cache.get(&cache_key) {
134                return cached.clone();
135            }
136        }
137
138        let result = self.extract_member_docs(fqn, member_name, jar_path);
139
140        // Cache the result (including None for missing docs).
141        {
142            let mut cache = self.cache.lock().expect("cache lock poisoned");
143            cache.put(cache_key, result.clone());
144        }
145
146        result
147    }
148
149    /// Read the source file content for a given FQN from the source JAR.
150    ///
151    /// Tries `.java`, `.kt`, and `.scala` extensions in order.
152    fn read_source_from_jar(&self, fqn: &str, jar_path: &Path) -> Option<String> {
153        let source_jar_path = self.source_jar_map.get(jar_path)?;
154
155        let file = match std::fs::File::open(source_jar_path) {
156            Ok(f) => f,
157            Err(e) => {
158                warn!(
159                    "Failed to open source JAR {}: {e}",
160                    source_jar_path.display()
161                );
162                return None;
163            }
164        };
165
166        let mut archive = match zip::ZipArchive::new(file) {
167            Ok(a) => a,
168            Err(e) => {
169                warn!(
170                    "Failed to read source JAR {} as ZIP: {e}",
171                    source_jar_path.display()
172                );
173                return None;
174            }
175        };
176
177        // Convert FQN to path: com.example.MyClass -> com/example/MyClass
178        let base_path = fqn.replace('.', "/");
179
180        for ext in SOURCE_EXTENSIONS {
181            let entry_path = format!("{base_path}.{ext}");
182            if let Ok(mut entry) = archive.by_name(&entry_path) {
183                let mut content = String::new();
184                if let Err(e) = entry.read_to_string(&mut content) {
185                    warn!("Failed to read {entry_path} from source JAR: {e}");
186                    return None;
187                }
188                return Some(content);
189            }
190        }
191
192        None
193    }
194
195    /// Extract the class-level doc comment from a source file.
196    fn extract_class_docs(&self, fqn: &str, jar_path: &Path) -> Option<String> {
197        let source = self.read_source_from_jar(fqn, jar_path)?;
198        let class_name = fqn.rsplit('.').next().unwrap_or(fqn);
199        extract_class_doc_comment(&source, class_name)
200    }
201
202    /// Extract a member-level doc comment from a source file.
203    fn extract_member_docs(&self, fqn: &str, member_name: &str, jar_path: &Path) -> Option<String> {
204        let source = self.read_source_from_jar(fqn, jar_path)?;
205        extract_member_doc_comment(&source, member_name)
206    }
207}
208
209/// Extract the doc comment for a class declaration from source text.
210///
211/// Finds the last `/** ... */` block that appears before the class/interface/object
212/// declaration matching `class_name`.
213fn extract_class_doc_comment(source: &str, class_name: &str) -> Option<String> {
214    // Find the class declaration line.
215    let class_patterns = [
216        format!("class {class_name}"),
217        format!("interface {class_name}"),
218        format!("enum {class_name}"),
219        format!("object {class_name}"),
220        format!("trait {class_name}"),
221        format!("record {class_name}"),
222    ];
223
224    let class_pos = class_patterns
225        .iter()
226        .filter_map(|pat| find_declaration_position(source, pat))
227        .min()?;
228
229    // Find the doc comment preceding this position.
230    extract_preceding_doc_comment(source, class_pos)
231}
232
233/// Extract the doc comment for a member (method/field) from source text.
234///
235/// Finds the `/** ... */` block immediately preceding the first occurrence
236/// of `member_name` in a declaration context (i.e., followed by `(` for methods
237/// or preceded by a type for fields).
238fn extract_member_doc_comment(source: &str, member_name: &str) -> Option<String> {
239    // Search for the member name in a declaration context.
240    // We look for patterns like: `memberName(`, `memberName =`, `memberName;`,
241    // or just the member name preceded by whitespace and a type.
242    let mut search_start = 0;
243    while search_start < source.len() {
244        let remaining = &source[search_start..];
245        let offset = remaining.find(member_name)?;
246        let abs_pos = search_start + offset;
247
248        // Verify this is a declaration context (not a reference/call inside a method body).
249        // The member name must be preceded by whitespace or declaration keywords.
250        if is_declaration_context(source, abs_pos, member_name)
251            && let Some(doc) = extract_preceding_doc_comment(source, abs_pos)
252        {
253            return Some(doc);
254        }
255
256        search_start = abs_pos + member_name.len();
257    }
258
259    None
260}
261
262/// Check if the position is likely a member declaration rather than a usage.
263///
264/// Simple heuristic: the character before the member name (after skipping whitespace)
265/// should be a type name character, a generic closing bracket, or an annotation.
266fn is_declaration_context(source: &str, pos: usize, member_name: &str) -> bool {
267    // Check the character before the match.
268    if pos == 0 {
269        return false;
270    }
271
272    let before = &source[..pos];
273    let before_trimmed = before.trim_end();
274    if before_trimmed.is_empty() {
275        return false;
276    }
277
278    let last_char = before_trimmed.chars().next_back().unwrap_or(' ');
279
280    // After the member name, check for method signature `(` or field assignment/terminator.
281    let after_pos = pos + member_name.len();
282    let after = if after_pos < source.len() {
283        source[after_pos..].trim_start()
284    } else {
285        ""
286    };
287
288    let after_char = after.chars().next().unwrap_or(' ');
289
290    // Declaration context: preceded by a type (letter, >, ]) and followed by ( ; = { or end.
291    let valid_before = last_char.is_alphanumeric() || last_char == '>' || last_char == ']';
292    let valid_after = matches!(after_char, '(' | ';' | '=' | '{' | ':' | '\n');
293
294    valid_before && valid_after
295}
296
297/// Find the position of a declaration keyword pattern in source text.
298///
299/// Ensures the match is a whole word (not part of a larger identifier).
300fn find_declaration_position(source: &str, pattern: &str) -> Option<usize> {
301    let mut search_start = 0;
302    while search_start < source.len() {
303        let remaining = &source[search_start..];
304        let offset = remaining.find(pattern)?;
305        let abs_pos = search_start + offset;
306
307        // Verify whole-word boundary before the pattern.
308        if abs_pos > 0 {
309            let prev_char = source.as_bytes()[abs_pos - 1];
310            if prev_char.is_ascii_alphanumeric() || prev_char == b'_' {
311                search_start = abs_pos + pattern.len();
312                continue;
313            }
314        }
315
316        // Verify boundary after the pattern (should be whitespace, <, (, {, etc.).
317        let end_pos = abs_pos + pattern.len();
318        if end_pos < source.len() {
319            let next_char = source.as_bytes()[end_pos];
320            if next_char.is_ascii_alphanumeric() || next_char == b'_' {
321                search_start = end_pos;
322                continue;
323            }
324        }
325
326        return Some(abs_pos);
327    }
328
329    None
330}
331
332/// Extract the `/** ... */` doc comment that immediately precedes the given position.
333///
334/// Searches backwards from `pos` for the closest `*/` and then its matching `/**`.
335/// Ensures there is no non-whitespace/non-annotation content between the comment
336/// end and the declaration.
337fn extract_preceding_doc_comment(source: &str, pos: usize) -> Option<String> {
338    let before = &source[..pos];
339
340    // Find the last `*/` before this position.
341    let comment_end = before.rfind("*/")?;
342    let comment_end_full = comment_end + 2;
343
344    // Check that between the comment end and the declaration, there's only
345    // whitespace and annotations.
346    let between = before[comment_end_full..].trim();
347    if !between.is_empty() && !is_only_annotations_modifiers_and_types(between) {
348        return None;
349    }
350
351    // Find the matching `/**`.
352    let before_end = &source[..=comment_end];
353    let comment_start = before_end.rfind("/**")?;
354
355    // Extract and clean the doc comment.
356    let raw_comment = &source[comment_start..comment_end_full];
357    Some(clean_doc_comment(raw_comment))
358}
359
360/// Check if text between a doc comment and a declaration contains only
361/// annotations, access modifiers, and type signatures.
362///
363/// This is intentionally permissive: it allows any identifier-like tokens
364/// (type names, generics, array brackets) that can appear between a doc
365/// comment and the declared member name. The key exclusions are statement
366/// terminators and assignment operators that would indicate intervening code.
367fn is_only_annotations_modifiers_and_types(text: &str) -> bool {
368    // If the text contains statement-like constructs, it's not just declarations.
369    if text.contains(';') || text.contains("return ") || text.contains("throw ") {
370        return false;
371    }
372
373    for token in text.split_whitespace() {
374        // Allow annotations like @Override, @Deprecated, @SuppressWarnings("...")
375        if token.starts_with('@') {
376            continue;
377        }
378        // Allow annotation arguments in parentheses.
379        if token.starts_with('(') || token.ends_with(')') || token.ends_with(',') {
380            continue;
381        }
382        // Allow string literals within annotations.
383        if token.starts_with('"') || token.ends_with('"') {
384            continue;
385        }
386        // Allow identifier-like tokens (modifiers, types, generics).
387        // These match: public, static, String, List<String>, int, void,
388        // Map<K,V>, Optional<T>, int[], byte[], etc.
389        if is_declaration_token(token) {
390            continue;
391        }
392        return false;
393    }
394    true
395}
396
397/// Check if a token looks like it belongs in a declaration signature.
398///
399/// Allows: identifiers, generic types (`List<String>`), array types (`int[]`),
400/// keywords, wildcards (`?`), bounds (`extends`, `super`).
401fn is_declaration_token(token: &str) -> bool {
402    token.chars().all(|c| {
403        c.is_alphanumeric()
404            || c == '_'
405            || c == '<'
406            || c == '>'
407            || c == '['
408            || c == ']'
409            || c == '.'
410            || c == ','
411            || c == '?'
412    })
413}
414
415/// Clean a raw `/** ... */` doc comment into plain text.
416///
417/// Strips the comment delimiters, leading `*` characters, and converts
418/// HTML/Javadoc markup to plain text.
419fn clean_doc_comment(raw: &str) -> String {
420    // Remove the opening `/**` and closing `*/`.
421    let content = raw
422        .strip_prefix("/**")
423        .unwrap_or(raw)
424        .strip_suffix("*/")
425        .unwrap_or(raw);
426
427    let mut lines: Vec<String> = Vec::new();
428
429    for line in content.lines() {
430        let trimmed = line.trim();
431        // Strip leading `*` (with optional space after).
432        let cleaned = if let Some(rest) = trimmed.strip_prefix("* ") {
433            rest
434        } else if let Some(rest) = trimmed.strip_prefix('*') {
435            rest
436        } else {
437            trimmed
438        };
439        lines.push(cleaned.to_string());
440    }
441
442    let joined = lines.join("\n");
443    let result = convert_html_to_plain_text(&joined);
444
445    // Trim leading/trailing blank lines and normalize whitespace.
446    let trimmed = result.trim();
447    normalize_blank_lines(trimmed)
448}
449
450/// Convert HTML and Javadoc inline tags to plain text.
451fn convert_html_to_plain_text(text: &str) -> String {
452    let mut result = text.to_string();
453
454    // Convert {@code text} -> `text`
455    result = replace_inline_tag(&result, "code");
456
457    // Convert {@link Type#method} -> Type.method
458    result = replace_link_tags(&result);
459
460    // Convert {@literal text} -> text
461    result = replace_literal_tags(&result);
462
463    // Convert {@value ...} -> the value reference.
464    result = replace_value_tags(&result);
465
466    // Convert HTML tags.
467    result = convert_html_tags(&result);
468
469    // Convert block tags (@param, @return, @throws, etc.).
470    result = convert_block_tags(&result);
471
472    result
473}
474
475/// Replace `{@code text}` with `` `text` ``.
476fn replace_inline_tag(text: &str, tag_name: &str) -> String {
477    let open_pattern = format!("{{@{tag_name} ");
478    let mut result = String::with_capacity(text.len());
479    let mut remaining = text;
480
481    while let Some(start) = remaining.find(&open_pattern) {
482        result.push_str(&remaining[..start]);
483        let after_tag = &remaining[start + open_pattern.len()..];
484
485        if let Some(close) = find_matching_brace(after_tag) {
486            let content = &after_tag[..close];
487            result.push('`');
488            result.push_str(content.trim());
489            result.push('`');
490            remaining = &after_tag[close + 1..];
491        } else {
492            // No matching brace; keep as-is.
493            result.push_str(&remaining[start..start + open_pattern.len()]);
494            remaining = after_tag;
495        }
496    }
497    result.push_str(remaining);
498    result
499}
500
501/// Find the position of the matching `}` for a `{@tag ...}` construct,
502/// handling nested braces.
503fn find_matching_brace(text: &str) -> Option<usize> {
504    let mut depth = 0u32;
505    for (i, ch) in text.char_indices() {
506        match ch {
507            '{' => depth += 1,
508            '}' => {
509                if depth == 0 {
510                    return Some(i);
511                }
512                depth -= 1;
513            }
514            _ => {}
515        }
516    }
517    None
518}
519
520/// Replace `{@link Type#method}` with `Type.method`.
521fn replace_link_tags(text: &str) -> String {
522    let open_patterns = ["{@link ", "{@linkplain "];
523    let mut result = text.to_string();
524
525    for open_pattern in &open_patterns {
526        let mut new_result = String::with_capacity(result.len());
527        let mut remaining = result.as_str();
528
529        while let Some(start) = remaining.find(open_pattern) {
530            new_result.push_str(&remaining[..start]);
531            let after_tag = &remaining[start + open_pattern.len()..];
532
533            if let Some(close) = find_matching_brace(after_tag) {
534                let content = after_tag[..close].trim();
535                // If there's a display label (space-separated), use that.
536                // Otherwise, convert # to .
537                let display = if let Some(label_start) = content.find(' ') {
538                    content[label_start + 1..].trim()
539                } else {
540                    content
541                };
542                new_result.push_str(&display.replace('#', "."));
543                remaining = &after_tag[close + 1..];
544            } else {
545                new_result.push_str(&remaining[start..start + open_pattern.len()]);
546                remaining = after_tag;
547            }
548        }
549        new_result.push_str(remaining);
550        result = new_result;
551    }
552
553    result
554}
555
556/// Replace `{@literal text}` with backtick-wrapped text.
557///
558/// `{@literal}` in Javadoc means "display as-is without HTML interpretation".
559/// We wrap the content in backticks to protect it from HTML tag stripping
560/// and to signal that it is a literal/code-like reference.
561fn replace_literal_tags(text: &str) -> String {
562    let open_pattern = "{@literal ";
563    let mut result = String::with_capacity(text.len());
564    let mut remaining = text;
565
566    while let Some(start) = remaining.find(open_pattern) {
567        result.push_str(&remaining[..start]);
568        let after_tag = &remaining[start + open_pattern.len()..];
569
570        if let Some(close) = find_matching_brace(after_tag) {
571            result.push('`');
572            result.push_str(after_tag[..close].trim());
573            result.push('`');
574            remaining = &after_tag[close + 1..];
575        } else {
576            result.push_str(&remaining[start..start + open_pattern.len()]);
577            remaining = after_tag;
578        }
579    }
580    result.push_str(remaining);
581    result
582}
583
584/// Replace `{@value ...}` with the reference.
585fn replace_value_tags(text: &str) -> String {
586    let open_pattern = "{@value ";
587    let mut result = String::with_capacity(text.len());
588    let mut remaining = text;
589
590    while let Some(start) = remaining.find(open_pattern) {
591        result.push_str(&remaining[..start]);
592        let after_tag = &remaining[start + open_pattern.len()..];
593
594        if let Some(close) = find_matching_brace(after_tag) {
595            result.push_str(after_tag[..close].trim());
596            remaining = &after_tag[close + 1..];
597        } else {
598            result.push_str(&remaining[start..start + open_pattern.len()]);
599            remaining = after_tag;
600        }
601    }
602    result.push_str(remaining);
603    result
604}
605
606/// Convert HTML tags to plain text equivalents.
607fn convert_html_tags(text: &str) -> String {
608    let mut result = text.to_string();
609
610    // <p> and </p> -> newline
611    result = result.replace("<p>", "\n");
612    result = result.replace("</p>", "");
613    result = result.replace("<P>", "\n");
614
615    // <br> and <br/> -> newline
616    result = result.replace("<br>", "\n");
617    result = result.replace("<br/>", "\n");
618    result = result.replace("<br />", "\n");
619    result = result.replace("<BR>", "\n");
620
621    // <code>...</code> -> `...`
622    result = result.replace("<code>", "`");
623    result = result.replace("</code>", "`");
624    result = result.replace("<CODE>", "`");
625    result = result.replace("</CODE>", "`");
626
627    // <pre>...</pre> -> preserve content (already plain text in code blocks).
628    result = result.replace("<pre>", "");
629    result = result.replace("</pre>", "");
630    result = result.replace("<PRE>", "");
631    result = result.replace("</PRE>", "");
632
633    // <b>/<strong> -> keep content.
634    result = result.replace("<b>", "");
635    result = result.replace("</b>", "");
636    result = result.replace("<strong>", "");
637    result = result.replace("</strong>", "");
638    result = result.replace("<B>", "");
639    result = result.replace("</B>", "");
640
641    // <i>/<em> -> keep content.
642    result = result.replace("<i>", "");
643    result = result.replace("</i>", "");
644    result = result.replace("<em>", "");
645    result = result.replace("</em>", "");
646    result = result.replace("<I>", "");
647    result = result.replace("</I>", "");
648
649    // <ul>/<ol>/<li> -> basic list formatting.
650    result = result.replace("<ul>", "");
651    result = result.replace("</ul>", "");
652    result = result.replace("<ol>", "");
653    result = result.replace("</ol>", "");
654    result = result.replace("<li>", "\n- ");
655    result = result.replace("</li>", "");
656
657    // <tt> (teletype) -> backticks.
658    result = result.replace("<tt>", "`");
659    result = result.replace("</tt>", "`");
660
661    // Strip remaining HTML tags.
662    strip_remaining_html_tags(&result)
663}
664
665/// Strip any remaining HTML tags from the text.
666///
667/// Content inside backticks is preserved verbatim (e.g., `` `Map<K, V>` ``
668/// should not have `<K, V>` stripped as an HTML tag).
669fn strip_remaining_html_tags(text: &str) -> String {
670    let mut result = String::with_capacity(text.len());
671    let mut in_tag = false;
672    let mut in_backtick = false;
673
674    for ch in text.chars() {
675        if ch == '`' {
676            in_backtick = !in_backtick;
677            result.push(ch);
678        } else if in_backtick {
679            // Inside backticks, preserve everything verbatim.
680            result.push(ch);
681        } else {
682            match ch {
683                '<' => in_tag = true,
684                '>' if in_tag => in_tag = false,
685                _ if !in_tag => result.push(ch),
686                _ => {}
687            }
688        }
689    }
690
691    result
692}
693
694/// Convert Javadoc block tags (@param, @return, @throws, etc.) to plain text.
695fn convert_block_tags(text: &str) -> String {
696    let mut lines: Vec<String> = Vec::new();
697
698    for line in text.lines() {
699        let trimmed = line.trim();
700
701        if let Some(rest) = trimmed.strip_prefix("@param ") {
702            let parts: Vec<&str> = rest.splitn(2, ' ').collect();
703            if parts.len() == 2 {
704                lines.push(format!("param {}: {}", parts[0], parts[1]));
705            } else {
706                lines.push(format!("param {rest}"));
707            }
708        } else if let Some(rest) = trimmed.strip_prefix("@return ") {
709            lines.push(format!("returns: {rest}"));
710        } else if let Some(rest) = trimmed.strip_prefix("@returns ") {
711            lines.push(format!("returns: {rest}"));
712        } else if let Some(rest) = trimmed.strip_prefix("@throws ") {
713            let parts: Vec<&str> = rest.splitn(2, ' ').collect();
714            if parts.len() == 2 {
715                lines.push(format!("throws {}: {}", parts[0], parts[1]));
716            } else {
717                lines.push(format!("throws {rest}"));
718            }
719        } else if let Some(rest) = trimmed.strip_prefix("@exception ") {
720            let parts: Vec<&str> = rest.splitn(2, ' ').collect();
721            if parts.len() == 2 {
722                lines.push(format!("throws {}: {}", parts[0], parts[1]));
723            } else {
724                lines.push(format!("throws {rest}"));
725            }
726        } else if let Some(rest) = trimmed.strip_prefix("@see ") {
727            lines.push(format!("see: {rest}"));
728        } else if let Some(rest) = trimmed.strip_prefix("@since ") {
729            lines.push(format!("since: {rest}"));
730        } else if let Some(rest) = trimmed.strip_prefix("@version ") {
731            lines.push(format!("version: {rest}"));
732        } else if let Some(rest) = trimmed.strip_prefix("@author ") {
733            lines.push(format!("author: {rest}"));
734        } else if trimmed.starts_with("@deprecated") {
735            let rest = trimmed
736                .strip_prefix("@deprecated ")
737                .unwrap_or("(deprecated)");
738            lines.push(format!("DEPRECATED: {rest}"));
739        } else {
740            lines.push(line.to_string());
741        }
742    }
743
744    lines.join("\n")
745}
746
747/// Normalize consecutive blank lines to at most one.
748fn normalize_blank_lines(text: &str) -> String {
749    let mut result = String::with_capacity(text.len());
750    let mut prev_blank = false;
751
752    for line in text.lines() {
753        let is_blank = line.trim().is_empty();
754        if is_blank {
755            if !prev_blank {
756                result.push('\n');
757            }
758            prev_blank = true;
759        } else {
760            if prev_blank && !result.is_empty() {
761                result.push('\n');
762            }
763            if !result.is_empty() && !prev_blank {
764                result.push('\n');
765            }
766            result.push_str(line);
767            prev_blank = false;
768        }
769    }
770
771    result
772}
773
774#[cfg(test)]
775mod tests {
776    use std::io::Write;
777
778    use super::*;
779
780    /// Helper: create a ZIP file in memory containing the given entries.
781    fn create_source_jar(entries: &[(&str, &str)]) -> Vec<u8> {
782        let mut buf = Vec::new();
783        {
784            let mut writer = zip::ZipWriter::new(std::io::Cursor::new(&mut buf));
785            let options = zip::write::SimpleFileOptions::default()
786                .compression_method(zip::CompressionMethod::Stored);
787            for (path, content) in entries {
788                writer.start_file((*path).to_string(), options).unwrap();
789                writer.write_all(content.as_bytes()).unwrap();
790            }
791            writer.finish().unwrap();
792        }
793        buf
794    }
795
796    /// Helper: write a source JAR to a temp directory and create classpath entries.
797    fn setup_provider(
798        dir: &tempfile::TempDir,
799        entries: &[(&str, &str)],
800    ) -> (SourceJarProvider, PathBuf) {
801        let source_jar_path = dir.path().join("lib-sources.jar");
802        let jar_data = create_source_jar(entries);
803        std::fs::write(&source_jar_path, jar_data).unwrap();
804
805        let binary_jar_path = dir.path().join("lib.jar");
806        std::fs::write(&binary_jar_path, b"fake binary jar").unwrap();
807
808        let classpath_entries = vec![ClasspathEntry {
809            jar_path: binary_jar_path.clone(),
810            coordinates: Some("com.example:lib:1.0".to_string()),
811            is_direct: true,
812            source_jar: Some(source_jar_path),
813        }];
814
815        let provider = SourceJarProvider::new(&classpath_entries, 100);
816        (provider, binary_jar_path)
817    }
818
819    #[test]
820    fn test_extract_javadoc_from_simple_class() {
821        let dir = tempfile::tempdir().unwrap();
822        let source = r"package com.example;
823
824/**
825 * A simple utility class for string operations.
826 *
827 * This class provides common string manipulation methods.
828 */
829public class StringUtils {
830    public static String trim(String s) {
831        return s.trim();
832    }
833}
834";
835        let (provider, jar_path) =
836            setup_provider(&dir, &[("com/example/StringUtils.java", source)]);
837
838        let docs = provider
839            .get_docs("com.example.StringUtils", &jar_path)
840            .unwrap();
841        assert!(docs.contains("A simple utility class for string operations"));
842        assert!(docs.contains("common string manipulation methods"));
843    }
844
845    #[test]
846    fn test_extract_method_level_javadoc() {
847        let dir = tempfile::tempdir().unwrap();
848        let source = r"package com.example;
849
850/**
851 * String utilities.
852 */
853public class StringUtils {
854    /**
855     * Trims whitespace from both ends of a string.
856     *
857     * @param s the input string
858     * @return the trimmed string
859     */
860    public static String trim(String s) {
861        return s.trim();
862    }
863}
864";
865        let (provider, jar_path) =
866            setup_provider(&dir, &[("com/example/StringUtils.java", source)]);
867
868        let docs = provider
869            .get_member_docs("com.example.StringUtils", "trim", &jar_path)
870            .unwrap();
871        assert!(docs.contains("Trims whitespace from both ends"));
872        assert!(docs.contains("param s:"));
873        assert!(docs.contains("returns:"));
874    }
875
876    #[test]
877    fn test_missing_source_jar_returns_none() {
878        let entries = vec![ClasspathEntry {
879            jar_path: PathBuf::from("/nonexistent/lib.jar"),
880            coordinates: None,
881            is_direct: true,
882            source_jar: None,
883        }];
884        let provider = SourceJarProvider::new(&entries, 100);
885        let result = provider.get_docs("com.example.Foo", Path::new("/nonexistent/lib.jar"));
886        assert!(result.is_none());
887    }
888
889    #[test]
890    fn test_class_not_found_in_source_jar_returns_none() {
891        let dir = tempfile::tempdir().unwrap();
892        let source = "package com.example;\npublic class Other {}\n";
893        let (provider, jar_path) = setup_provider(&dir, &[("com/example/Other.java", source)]);
894
895        let result = provider.get_docs("com.example.Missing", &jar_path);
896        assert!(result.is_none());
897    }
898
899    #[test]
900    fn test_html_to_plain_text_conversion() {
901        let input = "First paragraph.<p>Second paragraph.\n<code>some code</code> and text.";
902        let result = convert_html_to_plain_text(input);
903        assert!(result.contains("First paragraph."));
904        assert!(result.contains("Second paragraph."));
905        assert!(result.contains("`some code`"));
906    }
907
908    #[test]
909    fn test_code_and_link_tag_conversion() {
910        let input = "Use {@code Map<K, V>} for mappings. See {@link HashMap#get the get method}.";
911        let result = convert_html_to_plain_text(input);
912        assert!(result.contains("`Map<K, V>`"));
913        assert!(result.contains("the get method"));
914    }
915
916    #[test]
917    fn test_param_return_throws_conversion() {
918        let input = "@param name the user name\n@return the greeting\n@throws IllegalArgumentException if name is null";
919        let result = convert_block_tags(input);
920        assert!(result.contains("param name: the user name"));
921        assert!(result.contains("returns: the greeting"));
922        assert!(result.contains("throws IllegalArgumentException: if name is null"));
923    }
924
925    #[test]
926    fn test_cache_hit_returns_same_result() {
927        let dir = tempfile::tempdir().unwrap();
928        let source = r"package com.example;
929/**
930 * Cached class docs.
931 */
932public class Cached {
933}
934";
935        let (provider, jar_path) = setup_provider(&dir, &[("com/example/Cached.java", source)]);
936
937        let first = provider.get_docs("com.example.Cached", &jar_path);
938        let second = provider.get_docs("com.example.Cached", &jar_path);
939        assert_eq!(first, second);
940        assert!(first.is_some());
941
942        // Verify it's actually in the cache.
943        let cache = provider.cache.lock().unwrap();
944        assert!(!cache.is_empty());
945    }
946
947    #[test]
948    fn test_lru_eviction() {
949        let dir = tempfile::tempdir().unwrap();
950        let mut jar_entries = Vec::new();
951
952        // Create 5 classes.
953        for i in 0..5 {
954            let name = format!("Class{i}");
955            let path = format!("com/example/{name}.java");
956            let content =
957                format!("package com.example;\n/** Doc for {name}. */\npublic class {name} {{}}\n");
958            jar_entries.push((path, content));
959        }
960
961        let jar_entry_refs: Vec<(&str, &str)> = jar_entries
962            .iter()
963            .map(|(p, c)| (p.as_str(), c.as_str()))
964            .collect();
965
966        let source_jar_path = dir.path().join("lib-sources.jar");
967        let jar_data = create_source_jar(&jar_entry_refs);
968        std::fs::write(&source_jar_path, jar_data).unwrap();
969
970        let binary_jar_path = dir.path().join("lib.jar");
971        std::fs::write(&binary_jar_path, b"fake").unwrap();
972
973        let entries = vec![ClasspathEntry {
974            jar_path: binary_jar_path.clone(),
975            coordinates: None,
976            is_direct: true,
977            source_jar: Some(source_jar_path),
978        }];
979
980        // Cache size of 3 means older entries get evicted.
981        let provider = SourceJarProvider::new(&entries, 3);
982
983        // Access 5 classes with cache size 3.
984        for i in 0..5 {
985            let fqn = format!("com.example.Class{i}");
986            let result = provider.get_docs(&fqn, &binary_jar_path);
987            assert!(result.is_some(), "Class{i} should have docs");
988        }
989
990        // Cache should have at most 3 entries.
991        let cache = provider.cache.lock().unwrap();
992        assert!(cache.len() <= 3, "LRU cache should evict to capacity");
993    }
994
995    #[test]
996    fn test_kotlin_extension_fallback() {
997        let dir = tempfile::tempdir().unwrap();
998        let source = r"package com.example
999
1000/**
1001 * A Kotlin data class.
1002 */
1003data class UserProfile(val name: String, val age: Int)
1004";
1005        let (provider, jar_path) = setup_provider(&dir, &[("com/example/UserProfile.kt", source)]);
1006
1007        let docs = provider
1008            .get_docs("com.example.UserProfile", &jar_path)
1009            .unwrap();
1010        assert!(docs.contains("Kotlin data class"));
1011    }
1012
1013    #[test]
1014    fn test_scala_extension_fallback() {
1015        let dir = tempfile::tempdir().unwrap();
1016        let source = r"package com.example
1017
1018/**
1019 * A Scala case class for configuration.
1020 */
1021case class AppConfig(host: String, port: Int)
1022";
1023        // Scala uses the same /** */ doc comments.
1024        let (provider, jar_path) = setup_provider(&dir, &[("com/example/AppConfig.scala", source)]);
1025
1026        let docs = provider
1027            .get_docs("com.example.AppConfig", &jar_path)
1028            .unwrap();
1029        assert!(docs.contains("Scala case class for configuration"));
1030    }
1031
1032    #[test]
1033    fn test_multiline_doc_comment() {
1034        let dir = tempfile::tempdir().unwrap();
1035        let source = r#"package com.example;
1036
1037/**
1038 * An immutable, ordered collection of elements.
1039 *
1040 * <p>This is the second paragraph with more details.
1041 *
1042 * <p>Usage example:
1043 * <pre>
1044 *   ImmutableList<String> list = ImmutableList.of("a", "b", "c");
1045 * </pre>
1046 *
1047 * @param <E> the element type
1048 * @since 2.0
1049 * @see java.util.List
1050 * @author Google
1051 */
1052public class ImmutableList<E> {
1053}
1054"#;
1055        let (provider, jar_path) =
1056            setup_provider(&dir, &[("com/example/ImmutableList.java", source)]);
1057
1058        let docs = provider
1059            .get_docs("com.example.ImmutableList", &jar_path)
1060            .unwrap();
1061        assert!(docs.contains("immutable, ordered collection"));
1062        assert!(docs.contains("second paragraph"));
1063        assert!(docs.contains("since: 2.0"));
1064        assert!(docs.contains("see: java.util.List"));
1065        assert!(docs.contains("author: Google"));
1066    }
1067
1068    // --- Unit tests for internal conversion functions ---
1069
1070    #[test]
1071    fn test_clean_doc_comment_strips_delimiters() {
1072        let raw = "/** Simple doc. */";
1073        let result = clean_doc_comment(raw);
1074        assert_eq!(result.trim(), "Simple doc.");
1075    }
1076
1077    #[test]
1078    fn test_clean_doc_comment_strips_leading_stars() {
1079        let raw = "/**\n * Line one.\n * Line two.\n */";
1080        let result = clean_doc_comment(raw);
1081        assert!(result.contains("Line one."));
1082        assert!(result.contains("Line two."));
1083        assert!(!result.contains("* "));
1084    }
1085
1086    #[test]
1087    fn test_replace_inline_code_tag() {
1088        assert_eq!(replace_inline_tag("Use {@code foo}.", "code"), "Use `foo`.");
1089    }
1090
1091    #[test]
1092    fn test_replace_inline_code_tag_with_generics() {
1093        let result = replace_inline_tag("A {@code Map<K, V>} instance.", "code");
1094        assert_eq!(result, "A `Map<K, V>` instance.");
1095    }
1096
1097    #[test]
1098    fn test_replace_link_tag_simple() {
1099        let result = replace_link_tags("See {@link String}.");
1100        assert_eq!(result, "See String.");
1101    }
1102
1103    #[test]
1104    fn test_replace_link_tag_with_member() {
1105        let result = replace_link_tags("See {@link String#length}.");
1106        assert_eq!(result, "See String.length.");
1107    }
1108
1109    #[test]
1110    fn test_replace_link_tag_with_label() {
1111        let result = replace_link_tags("See {@link String#length the length method}.");
1112        assert_eq!(result, "See the length method.");
1113    }
1114
1115    #[test]
1116    fn test_strip_remaining_html_tags() {
1117        let input = "Hello <b>world</b> and <unknown-tag>foo</unknown-tag>.";
1118        let result = strip_remaining_html_tags(input);
1119        assert_eq!(result, "Hello world and foo.");
1120    }
1121
1122    #[test]
1123    fn test_normalize_blank_lines() {
1124        let input = "Line 1\n\n\n\nLine 2\n\nLine 3";
1125        let result = normalize_blank_lines(input);
1126        // Should have at most one blank line between content lines.
1127        assert!(!result.contains("\n\n\n"));
1128    }
1129
1130    #[test]
1131    fn test_find_matching_brace_simple() {
1132        assert_eq!(find_matching_brace("text}rest"), Some(4));
1133    }
1134
1135    #[test]
1136    fn test_find_matching_brace_nested() {
1137        assert_eq!(find_matching_brace("a{b}c}rest"), Some(5));
1138    }
1139
1140    #[test]
1141    fn test_is_only_annotations_modifiers_and_types() {
1142        assert!(is_only_annotations_modifiers_and_types("@Override public"));
1143        assert!(is_only_annotations_modifiers_and_types("@Deprecated"));
1144        assert!(is_only_annotations_modifiers_and_types(
1145            "public static final"
1146        ));
1147        assert!(!is_only_annotations_modifiers_and_types("int x = 5;"));
1148    }
1149
1150    #[test]
1151    fn test_deprecated_tag_conversion() {
1152        let input = "@deprecated Use newMethod() instead.";
1153        let result = convert_block_tags(input);
1154        assert!(result.contains("DEPRECATED: Use newMethod() instead."));
1155    }
1156
1157    #[test]
1158    fn test_exception_tag_conversion() {
1159        let input = "@exception IOException if I/O fails";
1160        let result = convert_block_tags(input);
1161        assert!(result.contains("throws IOException: if I/O fails"));
1162    }
1163
1164    #[test]
1165    fn test_literal_tag_conversion() {
1166        let input = "Use {@literal <T>} for generics.";
1167        let result = convert_html_to_plain_text(input);
1168        assert!(result.contains("<T>"));
1169    }
1170
1171    #[test]
1172    fn test_class_with_annotations() {
1173        let dir = tempfile::tempdir().unwrap();
1174        let source = r#"package com.example;
1175
1176/**
1177 * Deprecated utility class.
1178 */
1179@Deprecated
1180@SuppressWarnings("unused")
1181public final class OldUtils {
1182}
1183"#;
1184        let (provider, jar_path) = setup_provider(&dir, &[("com/example/OldUtils.java", source)]);
1185
1186        let docs = provider
1187            .get_docs("com.example.OldUtils", &jar_path)
1188            .unwrap();
1189        assert!(docs.contains("Deprecated utility class"));
1190    }
1191
1192    #[test]
1193    fn test_interface_doc_extraction() {
1194        let dir = tempfile::tempdir().unwrap();
1195        let source = r"package com.example;
1196
1197/**
1198 * A service interface for user management.
1199 */
1200public interface UserService {
1201    void createUser(String name);
1202}
1203";
1204        let (provider, jar_path) =
1205            setup_provider(&dir, &[("com/example/UserService.java", source)]);
1206
1207        let docs = provider
1208            .get_docs("com.example.UserService", &jar_path)
1209            .unwrap();
1210        assert!(docs.contains("service interface for user management"));
1211    }
1212
1213    #[test]
1214    fn test_enum_doc_extraction() {
1215        let dir = tempfile::tempdir().unwrap();
1216        let source = r"package com.example;
1217
1218/**
1219 * Represents the status of an order.
1220 */
1221public enum OrderStatus {
1222    PENDING, SHIPPED, DELIVERED
1223}
1224";
1225        let (provider, jar_path) =
1226            setup_provider(&dir, &[("com/example/OrderStatus.java", source)]);
1227
1228        let docs = provider
1229            .get_docs("com.example.OrderStatus", &jar_path)
1230            .unwrap();
1231        assert!(docs.contains("status of an order"));
1232    }
1233
1234    #[test]
1235    fn test_no_doc_comment_returns_none() {
1236        let dir = tempfile::tempdir().unwrap();
1237        let source = r"package com.example;
1238
1239public class NoDoc {
1240}
1241";
1242        let (provider, jar_path) = setup_provider(&dir, &[("com/example/NoDoc.java", source)]);
1243
1244        let result = provider.get_docs("com.example.NoDoc", &jar_path);
1245        assert!(result.is_none());
1246    }
1247
1248    #[test]
1249    fn test_member_not_found_returns_none() {
1250        let dir = tempfile::tempdir().unwrap();
1251        let source = r"package com.example;
1252
1253/**
1254 * A class.
1255 */
1256public class MyClass {
1257    public void existingMethod() {}
1258}
1259";
1260        let (provider, jar_path) = setup_provider(&dir, &[("com/example/MyClass.java", source)]);
1261
1262        let result = provider.get_member_docs("com.example.MyClass", "nonExistent", &jar_path);
1263        assert!(result.is_none());
1264    }
1265
1266    #[test]
1267    fn test_cache_none_for_missing_docs() {
1268        let dir = tempfile::tempdir().unwrap();
1269        let source = "package com.example;\npublic class NoDoc {}\n";
1270        let (provider, jar_path) = setup_provider(&dir, &[("com/example/NoDoc.java", source)]);
1271
1272        // First call caches None.
1273        let first = provider.get_docs("com.example.NoDoc", &jar_path);
1274        assert!(first.is_none());
1275
1276        // Second call should hit cache (also None).
1277        let second = provider.get_docs("com.example.NoDoc", &jar_path);
1278        assert!(second.is_none());
1279
1280        // Verify it's cached.
1281        let cache = provider.cache.lock().unwrap();
1282        let key = format!("{}::com.example.NoDoc", jar_path.display());
1283        assert!(cache.peek(&key).is_some());
1284    }
1285
1286    #[test]
1287    fn test_with_defaults_constructor() {
1288        let entries = vec![];
1289        let provider = SourceJarProvider::with_defaults(&entries);
1290        // Should not panic and should have empty map.
1291        assert!(provider.source_jar_map.is_empty());
1292    }
1293
1294    #[test]
1295    #[allow(clippy::similar_names)] // Domain naming is intentional
1296    fn test_multiple_jars_mapping() {
1297        let dir = tempfile::tempdir().unwrap();
1298
1299        // Create two source JARs with different classes.
1300        let source_a = "package com.a;\n/** Class A. */\npublic class A {}\n";
1301        let source_b = "package com.b;\n/** Class B. */\npublic class B {}\n";
1302
1303        let jar_a_src = dir.path().join("a-sources.jar");
1304        let jar_b_src = dir.path().join("b-sources.jar");
1305        let jar_a_bin = dir.path().join("a.jar");
1306        let jar_b_bin = dir.path().join("b.jar");
1307
1308        std::fs::write(&jar_a_src, create_source_jar(&[("com/a/A.java", source_a)])).unwrap();
1309        std::fs::write(&jar_b_src, create_source_jar(&[("com/b/B.java", source_b)])).unwrap();
1310        std::fs::write(&jar_a_bin, b"fake").unwrap();
1311        std::fs::write(&jar_b_bin, b"fake").unwrap();
1312
1313        let entries = vec![
1314            ClasspathEntry {
1315                jar_path: jar_a_bin.clone(),
1316                coordinates: None,
1317                is_direct: true,
1318                source_jar: Some(jar_a_src),
1319            },
1320            ClasspathEntry {
1321                jar_path: jar_b_bin.clone(),
1322                coordinates: None,
1323                is_direct: true,
1324                source_jar: Some(jar_b_src),
1325            },
1326        ];
1327
1328        let provider = SourceJarProvider::new(&entries, 100);
1329
1330        let docs_a = provider.get_docs("com.a.A", &jar_a_bin).unwrap();
1331        assert!(docs_a.contains("Class A"));
1332
1333        let docs_b = provider.get_docs("com.b.B", &jar_b_bin).unwrap();
1334        assert!(docs_b.contains("Class B"));
1335
1336        // Cross-lookup should fail.
1337        assert!(provider.get_docs("com.a.A", &jar_b_bin).is_none());
1338    }
1339
1340    #[test]
1341    fn test_field_member_docs() {
1342        let dir = tempfile::tempdir().unwrap();
1343        let source = r"package com.example;
1344
1345public class Config {
1346    /**
1347     * The maximum number of retries.
1348     */
1349    public static final int MAX_RETRIES = 3;
1350
1351    /**
1352     * The default timeout in milliseconds.
1353     */
1354    private long timeout = 5000;
1355}
1356";
1357        let (provider, jar_path) = setup_provider(&dir, &[("com/example/Config.java", source)]);
1358
1359        let docs = provider
1360            .get_member_docs("com.example.Config", "MAX_RETRIES", &jar_path)
1361            .unwrap();
1362        assert!(docs.contains("maximum number of retries"));
1363
1364        let docs = provider
1365            .get_member_docs("com.example.Config", "timeout", &jar_path)
1366            .unwrap();
1367        assert!(docs.contains("default timeout in milliseconds"));
1368    }
1369}