manx_cli/
search.rs

1use crate::client::{Context7Client, SearchResult};
2use crate::rag::embeddings::EmbeddingModel;
3use anyhow::Result;
4use fuzzy_matcher::skim::SkimMatcherV2;
5use fuzzy_matcher::FuzzyMatcher;
6use std::sync::Arc;
7
8#[derive(Debug, Clone)]
9struct ParsedQuery {
10    quoted_phrases: Vec<String>,
11    individual_terms: Vec<String>,
12    original_query: String,
13}
14
15pub struct SearchEngine {
16    client: Context7Client,
17    matcher: SkimMatcherV2,
18    embedding_model: Option<Arc<EmbeddingModel>>,
19}
20
21impl SearchEngine {
22    /// Create SearchEngine without embeddings (fallback mode)
23    pub fn new(client: Context7Client) -> Self {
24        Self {
25            client,
26            matcher: SkimMatcherV2::default(),
27            embedding_model: None,
28        }
29    }
30
31    /// Create SearchEngine with a shared embedding model (for pooling/reuse)
32    /// Recommended approach for better performance and memory efficiency
33    pub fn with_shared_embeddings(
34        client: Context7Client,
35        embedding_model: Arc<EmbeddingModel>,
36    ) -> Self {
37        log::info!("🔄 Reusing shared embedding model for Context7 search");
38        Self {
39            client,
40            matcher: SkimMatcherV2::default(),
41            embedding_model: Some(embedding_model),
42        }
43    }
44
45    /// Check if semantic embeddings are available
46    pub fn has_embeddings(&self) -> bool {
47        self.embedding_model.is_some()
48    }
49
50    pub async fn search(
51        &self,
52        library: &str,
53        query: &str,
54        limit: Option<usize>,
55    ) -> Result<(Vec<SearchResult>, String, String)> {
56        // Parse library@version format
57        let (lib_name, _version) = parse_library_spec(library);
58
59        // Step 1: Resolve library to Context7 ID
60        let (library_id, library_title) = self.client.resolve_library(lib_name).await?;
61
62        // Step 2: Parse the query to extract phrases and terms
63        let parsed_query = self.parse_search_query(query);
64
65        // Step 3: Multi-pass search with phrase prioritization
66        let mut results = self
67            .multi_pass_search(&library_id, library, &parsed_query)
68            .await?;
69
70        // Step 5: Apply limit if specified
71        if let Some(limit) = limit {
72            if limit > 0 && results.len() > limit {
73                results.truncate(limit);
74            }
75        }
76
77        // Step 4: Cache individual snippets for later retrieval via snippet command
78        if let Ok(cache_manager) = crate::cache::CacheManager::new() {
79            for result in &results {
80                let snippet_cache_key = format!("{}_{}", library, &result.id);
81                // Cache the complete excerpt content
82                let _ = cache_manager
83                    .set("snippets", &snippet_cache_key, &result.excerpt)
84                    .await;
85            }
86        }
87
88        Ok((results, library_title, library_id))
89    }
90
91    async fn multi_pass_search(
92        &self,
93        library_id: &str,
94        library: &str,
95        parsed_query: &ParsedQuery,
96    ) -> Result<Vec<SearchResult>> {
97        let mut all_results = Vec::new();
98
99        // Pass 1: Phrase-priority search if we have quoted phrases
100        if !parsed_query.quoted_phrases.is_empty() {
101            let phrase_query = self.build_phrase_priority_query(parsed_query);
102            let docs = self
103                .client
104                .get_documentation(library_id, Some(&phrase_query))
105                .await?;
106
107            let phrase_results = self
108                .parse_documentation_into_results(
109                    library,
110                    &parsed_query.original_query,
111                    &docs,
112                    parsed_query,
113                    true, // is_phrase_search
114                )
115                .await?;
116
117            all_results.extend(phrase_results);
118        }
119
120        // Pass 2: Individual term search if needed
121        let should_do_term_search = parsed_query.quoted_phrases.is_empty() || all_results.len() < 5;
122
123        if should_do_term_search && !parsed_query.individual_terms.is_empty() {
124            let term_query = parsed_query.individual_terms.join(" ");
125            let docs = self
126                .client
127                .get_documentation(library_id, Some(&term_query))
128                .await?;
129
130            let term_results = self
131                .parse_documentation_into_results(
132                    library,
133                    &parsed_query.original_query,
134                    &docs,
135                    parsed_query,
136                    false, // is_phrase_search
137                )
138                .await?;
139
140            all_results.extend(term_results);
141        }
142
143        // Pass 3: Merge, deduplicate, and rank
144        let merged_results = self.merge_and_rank_results(all_results, parsed_query);
145
146        Ok(merged_results)
147    }
148
149    fn build_phrase_priority_query(&self, parsed_query: &ParsedQuery) -> String {
150        let mut query_parts = Vec::new();
151
152        // Add quoted phrases with quotes preserved for Context7
153        for phrase in &parsed_query.quoted_phrases {
154            query_parts.push(format!("\"{}\"", phrase));
155        }
156
157        // Add individual terms
158        query_parts.extend(parsed_query.individual_terms.clone());
159
160        query_parts.join(" ")
161    }
162
163    fn parse_search_query(&self, query: &str) -> ParsedQuery {
164        let mut quoted_phrases = Vec::new();
165        let mut individual_terms = Vec::new();
166        let mut current_term = String::new();
167        let mut in_quotes = false;
168
169        for ch in query.chars() {
170            match ch {
171                '"' => {
172                    in_quotes = !in_quotes;
173                    if !in_quotes && !current_term.is_empty() {
174                        // This was a quoted phrase
175                        quoted_phrases.push(current_term.clone());
176                        current_term.clear();
177                    }
178                }
179                ' ' if !in_quotes => {
180                    if !current_term.is_empty() {
181                        // This was an individual term
182                        individual_terms.push(current_term.clone());
183                        current_term.clear();
184                    }
185                }
186                _ => {
187                    current_term.push(ch);
188                }
189            }
190        }
191
192        // Handle any remaining term
193        if !current_term.is_empty() {
194            if in_quotes {
195                // Unclosed quote - treat as phrase anyway
196                quoted_phrases.push(current_term);
197            } else {
198                individual_terms.push(current_term);
199            }
200        }
201
202        // If no terms found, treat the whole query as individual terms
203        if quoted_phrases.is_empty() && individual_terms.is_empty() {
204            individual_terms.push(query.to_string());
205        }
206
207        ParsedQuery {
208            quoted_phrases,
209            individual_terms,
210            original_query: query.to_string(),
211        }
212    }
213
214    async fn parse_documentation_into_results(
215        &self,
216        library: &str,
217        original_query: &str,
218        docs: &str,
219        parsed_query: &ParsedQuery,
220        is_phrase_search: bool,
221    ) -> Result<Vec<SearchResult>> {
222        let mut results = Vec::new();
223
224        // Split documentation into individual code snippets/sections
225        let sections = self.split_into_sections(docs);
226
227        // OPTIMIZATION: Batch embed all sections at once if embeddings available
228        let relevance_scores = if self.embedding_model.is_some() {
229            self.calculate_embedding_relevance_batch(
230                &sections,
231                &parsed_query.original_query,
232                parsed_query,
233                is_phrase_search,
234            )
235            .await
236            .unwrap_or_else(|e| {
237                log::warn!(
238                    "Batch embedding failed, falling back to keyword matching: {}",
239                    e
240                );
241                // Fallback to keyword scoring for all sections
242                sections
243                    .iter()
244                    .map(|section| {
245                        self.calculate_enhanced_section_relevance(
246                            section,
247                            parsed_query,
248                            is_phrase_search,
249                        )
250                    })
251                    .collect()
252            })
253        } else {
254            // Use keyword-only scoring
255            sections
256                .iter()
257                .map(|section| {
258                    self.calculate_enhanced_section_relevance(
259                        section,
260                        parsed_query,
261                        is_phrase_search,
262                    )
263                })
264                .collect()
265        };
266
267        for (idx, (section, &relevance)) in sections.iter().zip(relevance_scores.iter()).enumerate()
268        {
269            // Lower threshold for including results when we have multiple sections
270            let relevance_threshold = if sections.len() > 1 { 0.05 } else { 0.1 };
271
272            if relevance > relevance_threshold {
273                // Only include sections with reasonable relevance
274                let title = self.extract_section_title(section).unwrap_or_else(|| {
275                    // Try to create a meaningful title from the section content
276                    let first_line = section.lines().next().unwrap_or("");
277                    let title_candidate = if first_line.len() > 60 {
278                        format!("{}...", &first_line[..57])
279                    } else if first_line.is_empty() {
280                        format!("{} - Result {}", original_query, idx + 1)
281                    } else {
282                        first_line.to_string()
283                    };
284                    format!("{} ({})", title_candidate, library)
285                });
286
287                let excerpt = self.extract_section_excerpt(section);
288
289                results.push(SearchResult {
290                    id: format!("{}-doc-{}", library, idx + 1),
291                    library: library.to_string(),
292                    title,
293                    excerpt,
294                    url: None,
295                    relevance_score: relevance,
296                });
297            }
298        }
299
300        // Sort by relevance score (highest first)
301        results.sort_by(|a, b| b.relevance_score.partial_cmp(&a.relevance_score).unwrap());
302
303        // If no specific sections matched well, create results from all sections anyway
304        if results.is_empty() && !sections.is_empty() {
305            for (idx, section) in sections.iter().enumerate().take(10) {
306                // Limit to first 10 sections
307                let title = self.extract_section_title(section).unwrap_or_else(|| {
308                    // Try to extract a meaningful title from the section
309                    let lines: Vec<&str> = section.lines().take(3).collect();
310                    let mut title_candidate = String::new();
311
312                    // Look for the first non-empty, meaningful line
313                    for line in &lines {
314                        let trimmed = line.trim();
315                        if !trimmed.is_empty() && trimmed.len() > 10 {
316                            title_candidate = if trimmed.len() > 60 {
317                                format!("{}...", &trimmed[..57])
318                            } else {
319                                trimmed.to_string()
320                            };
321                            break;
322                        }
323                    }
324
325                    if title_candidate.is_empty() {
326                        format!("{} - Section {}", original_query, idx + 1)
327                    } else {
328                        title_candidate
329                    }
330                });
331
332                // Create a unique excerpt from this specific section
333                let excerpt = self.create_unique_excerpt(section, idx);
334
335                results.push(SearchResult {
336                    id: format!("doc-{}", idx + 1),
337                    library: library.to_string(),
338                    title,
339                    excerpt,
340                    url: None,
341                    relevance_score: 0.5, // Default relevance for unmatched sections
342                });
343            }
344        }
345
346        Ok(results)
347    }
348
349    fn merge_and_rank_results(
350        &self,
351        mut all_results: Vec<SearchResult>,
352        parsed_query: &ParsedQuery,
353    ) -> Vec<SearchResult> {
354        // Remove duplicates based on content similarity
355        all_results.sort_by(|a, b| b.relevance_score.partial_cmp(&a.relevance_score).unwrap());
356        all_results.dedup_by(|a, b| {
357            // Consider results duplicates if titles are very similar
358            let similarity = self
359                .matcher
360                .fuzzy_match(&a.title.to_lowercase(), &b.title.to_lowercase());
361            similarity.unwrap_or(0) > 800 // High similarity threshold
362        });
363
364        // Apply final phrase boost to top results
365        for result in all_results.iter_mut() {
366            if self.contains_quoted_phrases(&result.excerpt, &parsed_query.quoted_phrases) {
367                result.relevance_score *= 1.5; // Final boost for phrase-containing results
368            }
369        }
370
371        // Final sort after boost
372        all_results.sort_by(|a, b| b.relevance_score.partial_cmp(&a.relevance_score).unwrap());
373        all_results
374    }
375
376    fn contains_quoted_phrases(&self, text: &str, phrases: &[String]) -> bool {
377        let text_lower = text.to_lowercase();
378        phrases
379            .iter()
380            .any(|phrase| text_lower.contains(&phrase.to_lowercase()))
381    }
382
383    fn calculate_enhanced_section_relevance(
384        &self,
385        section: &str,
386        parsed_query: &ParsedQuery,
387        is_phrase_search: bool,
388    ) -> f32 {
389        let section_lower = section.to_lowercase();
390        let mut total_score = 0.0;
391
392        // Score quoted phrases with high priority
393        for phrase in &parsed_query.quoted_phrases {
394            let phrase_lower = phrase.to_lowercase();
395
396            if section_lower.contains(&phrase_lower) {
397                // Exact phrase match gets highest score
398                let phrase_score = if is_phrase_search { 10.0 } else { 5.0 };
399                total_score += phrase_score;
400
401                // Extra bonus for title matches
402                if let Some(title_line) = section.lines().find(|line| line.starts_with("TITLE: ")) {
403                    if title_line.to_lowercase().contains(&phrase_lower) {
404                        total_score += phrase_score * 0.5;
405                    }
406                }
407
408                // Extra bonus for description matches
409                if let Some(desc_line) = section
410                    .lines()
411                    .find(|line| line.starts_with("DESCRIPTION: "))
412                {
413                    if desc_line.to_lowercase().contains(&phrase_lower) {
414                        total_score += phrase_score * 0.3;
415                    }
416                }
417            } else {
418                // Try partial phrase matching (words close together)
419                let proximity_score = self.calculate_phrase_proximity(section, phrase);
420                total_score += proximity_score;
421            }
422        }
423
424        // Score individual terms with lower priority
425        for term in &parsed_query.individual_terms {
426            let term_lower = term.to_lowercase();
427
428            if section_lower.contains(&term_lower) {
429                total_score += 1.0;
430
431                // Bonus for title/description matches
432                if let Some(title_line) = section.lines().find(|line| line.starts_with("TITLE: ")) {
433                    if title_line.to_lowercase().contains(&term_lower) {
434                        total_score += 0.5;
435                    }
436                }
437                if let Some(desc_line) = section
438                    .lines()
439                    .find(|line| line.starts_with("DESCRIPTION: "))
440                {
441                    if desc_line.to_lowercase().contains(&term_lower) {
442                        total_score += 0.3;
443                    }
444                }
445            } else {
446                // Fuzzy match for individual terms
447                if let Some(score) = self.matcher.fuzzy_match(&section_lower, &term_lower) {
448                    total_score += (score as f32) / 1000.0;
449                }
450            }
451        }
452
453        // Normalize by total number of search elements
454        let total_elements =
455            parsed_query.quoted_phrases.len() + parsed_query.individual_terms.len();
456        if total_elements > 0 {
457            total_score / total_elements as f32
458        } else {
459            0.0
460        }
461    }
462
463    fn calculate_phrase_proximity(&self, section: &str, phrase: &str) -> f32 {
464        let words: Vec<&str> = phrase.split_whitespace().collect();
465        if words.len() < 2 {
466            return 0.0;
467        }
468
469        let section_lower = section.to_lowercase();
470        let mut max_proximity_score: f32 = 0.0;
471
472        // Look for words appearing close together
473        for window in section_lower
474            .split_whitespace()
475            .collect::<Vec<_>>()
476            .windows(words.len())
477        {
478            let mut proximity_score = 0.0;
479            let mut found_words = 0;
480
481            for (i, &target_word) in words.iter().enumerate() {
482                if let Some(fuzzy_score) = self.matcher.fuzzy_match(window[i], target_word) {
483                    if fuzzy_score > 700 {
484                        // Good match threshold
485                        proximity_score += 1.0;
486                        found_words += 1;
487                    }
488                }
489            }
490
491            if found_words > 0 {
492                let proximity_multiplier = found_words as f32 / words.len() as f32;
493                proximity_score = proximity_score * proximity_multiplier * 2.0; // Proximity bonus
494                max_proximity_score = max_proximity_score.max(proximity_score);
495            }
496        }
497
498        max_proximity_score
499    }
500
501    /// OPTIMIZED: Calculate relevance for all sections using batch embedding generation
502    async fn calculate_embedding_relevance_batch(
503        &self,
504        sections: &[String],
505        query: &str,
506        parsed_query: &ParsedQuery,
507        is_phrase_search: bool,
508    ) -> Result<Vec<f32>> {
509        let embedding_model = self
510            .embedding_model
511            .as_ref()
512            .ok_or_else(|| anyhow::anyhow!("Embedding model not available"))?;
513
514        // Generate query embedding once
515        let query_embedding = embedding_model.embed_text(query).await?;
516
517        // Prepare all sections for embedding
518        let section_texts: Vec<String> = sections
519            .iter()
520            .map(|section| self.prepare_section_for_embedding(section))
521            .collect();
522
523        // Batch generate embeddings for all sections at once (MAJOR OPTIMIZATION)
524        let section_text_refs: Vec<&str> = section_texts.iter().map(|s| s.as_str()).collect();
525        let section_embeddings = embedding_model.embed_batch(&section_text_refs).await?;
526
527        // Calculate scores for all sections
528        let mut scores = Vec::with_capacity(sections.len());
529
530        for (i, section) in sections.iter().enumerate() {
531            if let Some(section_embedding) = section_embeddings.get(i) {
532                // Calculate semantic similarity (0.0-1.0)
533                let embedding_score =
534                    EmbeddingModel::cosine_similarity(&query_embedding, section_embedding);
535
536                // Calculate existing keyword-based relevance (0.0-N)
537                let keyword_score = self.calculate_enhanced_section_relevance(
538                    section,
539                    parsed_query,
540                    is_phrase_search,
541                );
542
543                // Calculate quoted phrase bonus for exact matches
544                let phrase_bonus =
545                    self.calculate_phrase_bonus(section, parsed_query, is_phrase_search);
546
547                // Hybrid scoring: Embeddings (70%) + Keywords (20%) + Phrase bonus (10%)
548                let normalized_keyword_score = (keyword_score / 5.0).min(1.0);
549                let normalized_phrase_bonus = (phrase_bonus / 10.0).min(1.0);
550
551                let final_score = (embedding_score * 0.7)
552                    + (normalized_keyword_score * 0.2)
553                    + (normalized_phrase_bonus * 0.1);
554
555                scores.push(final_score);
556            } else {
557                // Fallback to keyword-only if embedding failed for this section
558                log::warn!("Missing embedding for section {}, using keyword scoring", i);
559                scores.push(self.calculate_enhanced_section_relevance(
560                    section,
561                    parsed_query,
562                    is_phrase_search,
563                ));
564            }
565        }
566
567        log::debug!(
568            "Batch embedded {} sections with average score: {:.3}",
569            sections.len(),
570            scores.iter().sum::<f32>() / scores.len() as f32
571        );
572
573        Ok(scores)
574    }
575
576    /// Calculate section relevance using semantic embeddings + keyword hybrid scoring (single section - legacy)
577    #[allow(dead_code)]
578    async fn calculate_embedding_section_relevance(
579        &self,
580        section: &str,
581        query: &str,
582        parsed_query: &ParsedQuery,
583        is_phrase_search: bool,
584    ) -> Result<f32> {
585        let embedding_model = self
586            .embedding_model
587            .as_ref()
588            .ok_or_else(|| anyhow::anyhow!("Embedding model not available"))?;
589
590        // Generate embeddings for semantic similarity
591        let query_embedding = embedding_model.embed_text(query).await?;
592
593        // Create combined text for section (title + content for better context)
594        let section_text = self.prepare_section_for_embedding(section);
595        let section_embedding = embedding_model.embed_text(&section_text).await?;
596
597        // Calculate semantic similarity (0.0-1.0)
598        let embedding_score =
599            EmbeddingModel::cosine_similarity(&query_embedding, &section_embedding);
600
601        // Calculate existing keyword-based relevance (0.0-N)
602        let keyword_score =
603            self.calculate_enhanced_section_relevance(section, parsed_query, is_phrase_search);
604
605        // Calculate quoted phrase bonus for exact matches
606        let phrase_bonus = self.calculate_phrase_bonus(section, parsed_query, is_phrase_search);
607
608        // Hybrid scoring: Embeddings (70%) + Keywords (20%) + Phrase bonus (10%)
609        // Normalize keyword score to 0-1 range by dividing by reasonable maximum
610        let normalized_keyword_score = (keyword_score / 5.0).min(1.0);
611        let normalized_phrase_bonus = (phrase_bonus / 10.0).min(1.0);
612
613        let final_score = (embedding_score * 0.7)
614            + (normalized_keyword_score * 0.2)
615            + (normalized_phrase_bonus * 0.1);
616
617        log::debug!("Embedding hybrid scoring for section: Embedding={:.3}, Keywords={:.3}, Phrase={:.3}, Final={:.3}",
618            embedding_score, normalized_keyword_score, normalized_phrase_bonus, final_score);
619
620        Ok(final_score)
621    }
622
623    /// Prepare section text for optimal embedding generation
624    fn prepare_section_for_embedding(&self, section: &str) -> String {
625        // Extract title and first few lines for context
626        let lines: Vec<&str> = section.lines().collect();
627        let mut embedding_text = String::new();
628
629        // Add title if available
630        if let Some(title_line) = lines.iter().find(|line| line.starts_with("TITLE: ")) {
631            embedding_text.push_str(title_line[7..].trim());
632            embedding_text.push(' ');
633        }
634
635        // Add description if available
636        if let Some(desc_line) = lines.iter().find(|line| line.starts_with("DESCRIPTION: ")) {
637            embedding_text.push_str(desc_line[13..].trim());
638            embedding_text.push(' ');
639        }
640
641        // Add first few content lines (up to 200 chars)
642        let content_lines: Vec<&str> = lines
643            .iter()
644            .filter(|line| !line.starts_with("TITLE: ") && !line.starts_with("DESCRIPTION: "))
645            .take(5)
646            .copied()
647            .collect();
648
649        let content = content_lines.join(" ");
650        let content_preview = if content.len() > 200 {
651            format!("{}...", &content[..200])
652        } else {
653            content
654        };
655
656        embedding_text.push_str(&content_preview);
657        embedding_text.trim().to_string()
658    }
659
660    /// Calculate phrase bonus for quoted phrase exact matches
661    fn calculate_phrase_bonus(
662        &self,
663        section: &str,
664        parsed_query: &ParsedQuery,
665        is_phrase_search: bool,
666    ) -> f32 {
667        let section_lower = section.to_lowercase();
668        let mut phrase_score = 0.0;
669
670        for phrase in &parsed_query.quoted_phrases {
671            let phrase_lower = phrase.to_lowercase();
672            if section_lower.contains(&phrase_lower) {
673                phrase_score += if is_phrase_search { 10.0 } else { 5.0 };
674
675                // Extra bonus for title matches
676                if let Some(title_line) = section.lines().find(|line| line.starts_with("TITLE: ")) {
677                    if title_line.to_lowercase().contains(&phrase_lower) {
678                        phrase_score += 2.0;
679                    }
680                }
681            }
682        }
683
684        phrase_score
685    }
686
687    fn split_into_sections(&self, docs: &str) -> Vec<String> {
688        // Split by title markers but preserve complete documentation structure
689        let mut sections = Vec::new();
690        let lines: Vec<&str> = docs.lines().collect();
691        let mut current_section = Vec::new();
692        let mut in_section = false;
693
694        for line in lines {
695            if line.starts_with("TITLE: ") {
696                // Save previous section if it exists
697                if in_section && !current_section.is_empty() {
698                    let section_text = current_section.join("\n");
699                    if section_text.len() > 20 {
700                        sections.push(section_text);
701                    }
702                }
703                // Start new section
704                current_section.clear();
705                current_section.push(line);
706                in_section = true;
707            } else if in_section {
708                current_section.push(line);
709            }
710        }
711
712        // Add the last section
713        if in_section && !current_section.is_empty() {
714            let section_text = current_section.join("\n");
715            if section_text.len() > 20 {
716                sections.push(section_text);
717            }
718        }
719
720        // If no sections found, try alternative splitting methods
721        if sections.is_empty() {
722            // Try splitting by double newlines (paragraphs)
723            let paragraphs: Vec<&str> = docs.split("\n\n").collect();
724            if paragraphs.len() > 1 {
725                for paragraph in paragraphs {
726                    let trimmed = paragraph.trim();
727                    if trimmed.len() > 50 {
728                        // Only include meaningful paragraphs
729                        sections.push(trimmed.to_string());
730                    }
731                }
732            }
733
734            // If still no good sections or too few, split into chunks
735            if sections.len() < 3 {
736                sections.clear(); // Start fresh
737                let chunk_size = 800; // Characters per chunk
738                let mut start = 0;
739                let mut chunk_count = 0;
740
741                while start < docs.len() && chunk_count < 20 {
742                    // Limit to 20 chunks max
743                    let end = (start + chunk_size).min(docs.len());
744                    // Try to break at a sentence or paragraph boundary
745                    let mut actual_end = end;
746                    if end < docs.len() {
747                        // Look for a good break point
748                        if let Some(pos) = docs[start..end].rfind("\n\n") {
749                            actual_end = start + pos;
750                        } else if let Some(pos) = docs[start..end].rfind(".\n") {
751                            actual_end = start + pos + 1;
752                        } else if let Some(pos) = docs[start..end].rfind(". ") {
753                            actual_end = start + pos + 1;
754                        } else if let Some(pos) = docs[start..end].rfind('\n') {
755                            actual_end = start + pos;
756                        }
757                    }
758
759                    // Make sure we're making progress
760                    if actual_end <= start {
761                        actual_end = end;
762                    }
763
764                    let chunk = docs[start..actual_end].trim();
765                    if !chunk.is_empty() && chunk.len() > 50 {
766                        sections.push(chunk.to_string());
767                        chunk_count += 1;
768                    }
769
770                    start = actual_end;
771                    // Skip whitespace for next chunk
772                    while start < docs.len()
773                        && docs.chars().nth(start).is_some_and(|c| c.is_whitespace())
774                    {
775                        start += 1;
776                    }
777                }
778            }
779        }
780
781        // Return at least something
782        if sections.is_empty() {
783            vec![docs.to_string()]
784        } else {
785            sections
786        }
787    }
788
789    fn extract_section_title(&self, section: &str) -> Option<String> {
790        section
791            .lines()
792            .find(|line| line.starts_with("TITLE: "))
793            .map(|line| line[7..].to_string())
794    }
795
796    fn extract_section_excerpt(&self, section: &str) -> String {
797        // Try to find description, otherwise use first few lines
798        if let Some(desc_line) = section
799            .lines()
800            .find(|line| line.starts_with("DESCRIPTION: "))
801        {
802            let desc = &desc_line[13..];
803            if desc.len() > 300 {
804                format!("{}...", &desc[..300])
805            } else {
806                desc.to_string()
807            }
808        } else {
809            // Take first 300 chars of the section
810            if section.len() > 300 {
811                format!("{}...", &section[..300])
812            } else {
813                section.to_string()
814            }
815        }
816    }
817
818    fn create_unique_excerpt(&self, section: &str, offset: usize) -> String {
819        let lines: Vec<&str> = section.lines().collect();
820        let mut excerpt_lines = Vec::new();
821        let mut char_count = 0;
822
823        // Skip some lines based on offset to get different content for each chunk
824        let skip_lines = offset.saturating_mul(2);
825
826        for line in lines.iter().skip(skip_lines) {
827            let trimmed = line.trim();
828            if !trimmed.is_empty() {
829                excerpt_lines.push(trimmed);
830                char_count += trimmed.len();
831
832                // Stop when we have enough content
833                if char_count > 200 || excerpt_lines.len() >= 3 {
834                    break;
835                }
836            }
837        }
838
839        // If we didn't get enough content, try from the beginning
840        if excerpt_lines.is_empty() {
841            for line in lines.iter().take(5) {
842                let trimmed = line.trim();
843                if !trimmed.is_empty() {
844                    excerpt_lines.push(trimmed);
845                    char_count += trimmed.len();
846                    if char_count > 200 {
847                        break;
848                    }
849                }
850            }
851        }
852
853        let result = excerpt_lines.join(" ");
854        if result.len() > 300 {
855            format!("{}...", &result[..297])
856        } else if result.is_empty() {
857            // Last resort - just take raw content
858            if section.len() > 300 {
859                format!("{}...", &section[..297])
860            } else {
861                section.to_string()
862            }
863        } else {
864            result
865        }
866    }
867
868    pub async fn get_documentation(&self, library: &str, query: Option<&str>) -> Result<String> {
869        let (lib_name, _version) = parse_library_spec(library);
870
871        // Step 1: Resolve library to Context7 ID
872        let (library_id, _library_title) = self.client.resolve_library(lib_name).await?;
873
874        // Step 2: Get documentation
875        self.client.get_documentation(&library_id, query).await
876    }
877}
878
879fn parse_library_spec(spec: &str) -> (&str, Option<&str>) {
880    if let Some(at_pos) = spec.find('@') {
881        let (lib, ver) = spec.split_at(at_pos);
882        (lib, Some(&ver[1..]))
883    } else {
884        (spec, None)
885    }
886}
887
888pub fn fuzzy_find_libraries(query: &str, libraries: &[String]) -> Vec<(String, i64)> {
889    let matcher = SkimMatcherV2::default();
890    let mut matches: Vec<(String, i64)> = libraries
891        .iter()
892        .filter_map(|lib| {
893            matcher
894                .fuzzy_match(lib, query)
895                .map(|score| (lib.clone(), score))
896        })
897        .collect();
898
899    matches.sort_by_key(|(_, score)| -score);
900    matches.truncate(5);
901    matches
902}
manx_cli/search.rs

manx_cli/
search.rs