Skip to main content

st/smart/
smart_read.rs

1//! 📖 SmartRead - Context-Aware File Reading
2//!
3//! This module implements intelligent file reading that focuses on relevant
4//! sections based on task context, achieving 70-90% token reduction while
5//! maintaining all necessary information for the user's current task.
6
7use super::context::ContextAnalyzer;
8use super::{RelevanceScore, SmartResponse, TaskContext, TokenSavings};
9use anyhow::{anyhow, Result};
10use serde::{Deserialize, Serialize};
11use std::fs;
12use std::path::Path;
13
14/// 📖 Smart file reader with context awareness
15pub struct SmartReader {
16    #[allow(dead_code)]
17    context_analyzer: ContextAnalyzer,
18}
19
20/// 📄 A relevant section of a file
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct FileSection {
23    /// Section content
24    pub content: String,
25    /// Line range (start, end)
26    pub line_range: (usize, usize),
27    /// Section type (function, class, comment, etc.)
28    pub section_type: SectionType,
29    /// Relevance score for this section
30    pub relevance: RelevanceScore,
31}
32
33/// 🏷️ Types of file sections we can identify
34#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
35pub enum SectionType {
36    Function,
37    Class,
38    Struct,
39    Enum,
40    Import,
41    Comment,
42    Documentation,
43    Configuration,
44    Test,
45    Error,
46    Unknown,
47}
48
49/// 📊 Smart read response with context-aware results
50pub type SmartReadResponse = SmartResponse<FileSection>;
51
52impl SmartReader {
53    /// Create a new smart reader
54    pub fn new() -> Self {
55        Self {
56            context_analyzer: ContextAnalyzer::new(),
57        }
58    }
59
60    /// 📖 Read file with context awareness
61    pub fn read_contextual(&self, path: &Path, context: &TaskContext) -> Result<SmartReadResponse> {
62        // Read the full file
63        let content = fs::read_to_string(path)
64            .map_err(|e| anyhow!("Failed to read file {}: {}", path.display(), e))?;
65
66        // Split into sections
67        let sections = self.identify_sections(&content, path)?;
68
69        // Score sections by relevance
70        let scored_sections = self.score_sections(&sections, context)?;
71
72        // Filter and categorize by relevance
73        let (primary, secondary) = self.categorize_by_relevance(&scored_sections, context);
74
75        // Calculate token savings
76        let original_tokens = self.estimate_tokens(&content);
77        let compressed_tokens = self.estimate_tokens_for_sections(&primary)
78            + self.estimate_tokens_for_sections(&secondary);
79        let token_savings = TokenSavings::new(original_tokens, compressed_tokens, "smart-read");
80
81        // Generate context summary
82        let context_summary = self.generate_context_summary(&primary, &secondary, context);
83
84        // Generate suggestions
85        let suggestions = self.generate_suggestions(&primary, &secondary, context);
86
87        Ok(SmartReadResponse {
88            primary,
89            secondary,
90            context_summary,
91            token_savings,
92            suggestions,
93        })
94    }
95
96    /// 🔍 Identify sections within file content
97    fn identify_sections(&self, content: &str, path: &Path) -> Result<Vec<FileSection>> {
98        let lines: Vec<&str> = content.lines().collect();
99        let mut sections = Vec::new();
100
101        // Determine file type from extension
102        let extension = path
103            .extension()
104            .and_then(|ext| ext.to_str())
105            .unwrap_or("")
106            .to_lowercase();
107
108        match extension.as_str() {
109            "rs" => self.identify_rust_sections(&lines, &mut sections)?,
110            "py" => self.identify_python_sections(&lines, &mut sections)?,
111            "js" | "ts" => self.identify_javascript_sections(&lines, &mut sections)?,
112            "json" => self.identify_json_sections(&lines, &mut sections)?,
113            "yaml" | "yml" => self.identify_yaml_sections(&lines, &mut sections)?,
114            "md" => self.identify_markdown_sections(&lines, &mut sections)?,
115            _ => self.identify_generic_sections(&lines, &mut sections)?,
116        }
117
118        Ok(sections)
119    }
120
121    /// 🦀 Identify Rust code sections
122    fn identify_rust_sections(
123        &self,
124        lines: &[&str],
125        sections: &mut Vec<FileSection>,
126    ) -> Result<()> {
127        let mut current_section: Option<(usize, SectionType, Vec<String>)> = None;
128
129        for (i, line) in lines.iter().enumerate() {
130            let trimmed = line.trim();
131
132            // Function definitions
133            if trimmed.starts_with("fn ")
134                || trimmed.starts_with("pub fn ")
135                || trimmed.starts_with("async fn ")
136                || trimmed.starts_with("pub async fn ")
137            {
138                self.finish_current_section(&mut current_section, sections);
139                current_section = Some((i, SectionType::Function, vec![line.to_string()]));
140            }
141            // Struct definitions
142            else if trimmed.starts_with("struct ") || trimmed.starts_with("pub struct ") {
143                self.finish_current_section(&mut current_section, sections);
144                current_section = Some((i, SectionType::Struct, vec![line.to_string()]));
145            }
146            // Enum definitions
147            else if trimmed.starts_with("enum ") || trimmed.starts_with("pub enum ") {
148                self.finish_current_section(&mut current_section, sections);
149                current_section = Some((i, SectionType::Enum, vec![line.to_string()]));
150            }
151            // Impl blocks
152            else if trimmed.starts_with("impl ") {
153                self.finish_current_section(&mut current_section, sections);
154                current_section = Some((i, SectionType::Class, vec![line.to_string()]));
155            }
156            // Use statements
157            else if trimmed.starts_with("use ") {
158                if current_section.is_none()
159                    || current_section.as_ref().unwrap().1 != SectionType::Import
160                {
161                    self.finish_current_section(&mut current_section, sections);
162                    current_section = Some((i, SectionType::Import, vec![line.to_string()]));
163                } else if let Some((_, _, ref mut content)) = current_section {
164                    content.push(line.to_string());
165                }
166            }
167            // Documentation comments
168            else if trimmed.starts_with("///") || trimmed.starts_with("//!") {
169                if current_section.is_none()
170                    || current_section.as_ref().unwrap().1 != SectionType::Documentation
171                {
172                    self.finish_current_section(&mut current_section, sections);
173                    current_section = Some((i, SectionType::Documentation, vec![line.to_string()]));
174                } else if let Some((_, _, ref mut content)) = current_section {
175                    content.push(line.to_string());
176                }
177            }
178            // Test functions
179            else if trimmed.contains("#[test]") || trimmed.contains("#[tokio::test]") {
180                self.finish_current_section(&mut current_section, sections);
181                current_section = Some((i, SectionType::Test, vec![line.to_string()]));
182            }
183            // Continue current section
184            else if let Some((_, _, ref mut content)) = current_section {
185                content.push(line.to_string());
186
187                // End section on closing brace at start of line
188                if trimmed == "}" {
189                    self.finish_current_section(&mut current_section, sections);
190                }
191            }
192        }
193
194        // Finish any remaining section
195        self.finish_current_section(&mut current_section, sections);
196        Ok(())
197    }
198
199    /// 🐍 Identify Python code sections
200    fn identify_python_sections(
201        &self,
202        lines: &[&str],
203        sections: &mut Vec<FileSection>,
204    ) -> Result<()> {
205        let mut current_section: Option<(usize, SectionType, Vec<String>)> = None;
206
207        for (i, line) in lines.iter().enumerate() {
208            let trimmed = line.trim();
209
210            // Function definitions
211            if trimmed.starts_with("def ") || trimmed.starts_with("async def ") {
212                self.finish_current_section(&mut current_section, sections);
213                current_section = Some((i, SectionType::Function, vec![line.to_string()]));
214            }
215            // Class definitions
216            else if trimmed.starts_with("class ") {
217                self.finish_current_section(&mut current_section, sections);
218                current_section = Some((i, SectionType::Class, vec![line.to_string()]));
219            }
220            // Import statements
221            else if trimmed.starts_with("import ") || trimmed.starts_with("from ") {
222                if current_section.is_none()
223                    || current_section.as_ref().unwrap().1 != SectionType::Import
224                {
225                    self.finish_current_section(&mut current_section, sections);
226                    current_section = Some((i, SectionType::Import, vec![line.to_string()]));
227                } else if let Some((_, _, ref mut content)) = current_section {
228                    content.push(line.to_string());
229                }
230            }
231            // Continue current section
232            else if let Some((_, _, ref mut content)) = current_section {
233                content.push(line.to_string());
234            }
235        }
236
237        self.finish_current_section(&mut current_section, sections);
238        Ok(())
239    }
240
241    /// 🟨 Identify JavaScript/TypeScript sections
242    fn identify_javascript_sections(
243        &self,
244        lines: &[&str],
245        sections: &mut Vec<FileSection>,
246    ) -> Result<()> {
247        // Similar pattern to Rust but with JS/TS syntax
248        self.identify_generic_sections(lines, sections)
249    }
250
251    /// 📄 Identify JSON sections
252    fn identify_json_sections(
253        &self,
254        lines: &[&str],
255        sections: &mut Vec<FileSection>,
256    ) -> Result<()> {
257        // For JSON, treat the whole file as a configuration section
258        let content = lines.join("\n");
259        sections.push(FileSection {
260            content,
261            line_range: (0, lines.len()),
262            section_type: SectionType::Configuration,
263            relevance: RelevanceScore {
264                score: 0.7,
265                reasons: vec!["JSON configuration file".to_string()],
266                focus_matches: vec![],
267            },
268        });
269        Ok(())
270    }
271
272    /// 📄 Identify YAML sections
273    fn identify_yaml_sections(
274        &self,
275        lines: &[&str],
276        sections: &mut Vec<FileSection>,
277    ) -> Result<()> {
278        // For YAML, treat as configuration
279        let content = lines.join("\n");
280        sections.push(FileSection {
281            content,
282            line_range: (0, lines.len()),
283            section_type: SectionType::Configuration,
284            relevance: RelevanceScore {
285                score: 0.7,
286                reasons: vec!["YAML configuration file".to_string()],
287                focus_matches: vec![],
288            },
289        });
290        Ok(())
291    }
292
293    /// 📝 Identify Markdown sections
294    fn identify_markdown_sections(
295        &self,
296        lines: &[&str],
297        sections: &mut Vec<FileSection>,
298    ) -> Result<()> {
299        let mut current_section: Option<(usize, SectionType, Vec<String>)> = None;
300
301        for (i, line) in lines.iter().enumerate() {
302            let trimmed = line.trim();
303
304            // Headers start new sections
305            if trimmed.starts_with('#') {
306                self.finish_current_section(&mut current_section, sections);
307                current_section = Some((i, SectionType::Documentation, vec![line.to_string()]));
308            }
309            // Continue current section
310            else if let Some((_, _, ref mut content)) = current_section {
311                content.push(line.to_string());
312            }
313            // Start new section if no current section
314            else {
315                current_section = Some((i, SectionType::Documentation, vec![line.to_string()]));
316            }
317        }
318
319        self.finish_current_section(&mut current_section, sections);
320        Ok(())
321    }
322
323    /// 📄 Identify generic file sections
324    fn identify_generic_sections(
325        &self,
326        lines: &[&str],
327        sections: &mut Vec<FileSection>,
328    ) -> Result<()> {
329        // For unknown file types, create one section with the entire content
330        let content = lines.join("\n");
331        sections.push(FileSection {
332            content,
333            line_range: (0, lines.len()),
334            section_type: SectionType::Unknown,
335            relevance: RelevanceScore {
336                score: 0.5,
337                reasons: vec!["Generic file content".to_string()],
338                focus_matches: vec![],
339            },
340        });
341        Ok(())
342    }
343
344    /// ✅ Finish current section and add to sections list
345    fn finish_current_section(
346        &self,
347        current_section: &mut Option<(usize, SectionType, Vec<String>)>,
348        sections: &mut Vec<FileSection>,
349    ) {
350        if let Some((start_line, section_type, content)) = current_section.take() {
351            let end_line = start_line + content.len();
352            sections.push(FileSection {
353                content: content.join("\n"),
354                line_range: (start_line, end_line),
355                section_type,
356                relevance: RelevanceScore {
357                    score: 0.5, // Will be updated by scoring
358                    reasons: vec![],
359                    focus_matches: vec![],
360                },
361            });
362        }
363    }
364
365    /// 📊 Score sections by relevance to context
366    fn score_sections(
367        &self,
368        sections: &[FileSection],
369        context: &TaskContext,
370    ) -> Result<Vec<FileSection>> {
371        let mut scored_sections = Vec::new();
372
373        for section in sections {
374            let mut relevance_score: f32 = 0.0;
375            let mut reasons = Vec::new();
376            let mut focus_matches = Vec::new();
377
378            // Score based on section type
379            relevance_score += match section.section_type {
380                SectionType::Function => 0.8,
381                SectionType::Class | SectionType::Struct => 0.7,
382                SectionType::Import => 0.4,
383                SectionType::Configuration => 0.6,
384                SectionType::Test => 0.5,
385                SectionType::Documentation => 0.3,
386                _ => 0.5,
387            };
388
389            // Score based on content matching focus areas
390            let content_lower = section.content.to_lowercase();
391            for focus_area in &context.focus_areas {
392                for keyword in focus_area.keywords() {
393                    if content_lower.contains(keyword) {
394                        relevance_score += 0.2;
395                        reasons.push(format!("Contains '{}' keyword", keyword));
396                        if !focus_matches.contains(focus_area) {
397                            focus_matches.push(focus_area.clone());
398                        }
399                    }
400                }
401            }
402
403            // Normalize score
404            relevance_score = relevance_score.min(1.0);
405
406            let mut scored_section = section.clone();
407            scored_section.relevance = RelevanceScore {
408                score: relevance_score,
409                reasons,
410                focus_matches,
411            };
412
413            scored_sections.push(scored_section);
414        }
415
416        Ok(scored_sections)
417    }
418
419    /// 🏷️ Categorize sections by relevance threshold
420    fn categorize_by_relevance(
421        &self,
422        sections: &[FileSection],
423        context: &TaskContext,
424    ) -> (Vec<FileSection>, Vec<FileSection>) {
425        let mut primary = Vec::new();
426        let mut secondary = Vec::new();
427
428        for section in sections {
429            if section.relevance.score >= context.relevance_threshold {
430                primary.push(section.clone());
431            } else if section.relevance.score >= context.relevance_threshold * 0.7 {
432                secondary.push(section.clone());
433            }
434            // Sections below 70% of threshold are filtered out
435        }
436
437        // Sort by relevance score (highest first)
438        primary.sort_by(|a, b| b.relevance.score.partial_cmp(&a.relevance.score).unwrap());
439        secondary.sort_by(|a, b| b.relevance.score.partial_cmp(&a.relevance.score).unwrap());
440
441        (primary, secondary)
442    }
443
444    /// 🧮 Estimate token count for content
445    fn estimate_tokens(&self, content: &str) -> usize {
446        // Rough estimation: ~4 characters per token
447        content.len() / 4
448    }
449
450    /// 🧮 Estimate token count for sections
451    fn estimate_tokens_for_sections(&self, sections: &[FileSection]) -> usize {
452        sections
453            .iter()
454            .map(|s| self.estimate_tokens(&s.content))
455            .sum()
456    }
457
458    /// 📝 Generate context summary
459    fn generate_context_summary(
460        &self,
461        primary: &[FileSection],
462        secondary: &[FileSection],
463        context: &TaskContext,
464    ) -> String {
465        format!(
466            "SmartRead analyzed file for task: '{}'. Found {} high-relevance sections and {} medium-relevance sections. Focus areas: {:?}",
467            context.task,
468            primary.len(),
469            secondary.len(),
470            context.focus_areas
471        )
472    }
473
474    /// 💡 Generate proactive suggestions
475    fn generate_suggestions(
476        &self,
477        primary: &[FileSection],
478        secondary: &[FileSection],
479        _context: &TaskContext,
480    ) -> Vec<String> {
481        let mut suggestions = Vec::new();
482
483        if primary.is_empty() {
484            suggestions.push("No highly relevant sections found. Consider adjusting the task context or relevance threshold.".to_string());
485        }
486
487        if secondary.len() > 10 {
488            suggestions.push("Many medium-relevance sections found. Consider using a more specific task context.".to_string());
489        }
490
491        // Suggest related tools based on section types
492        let has_functions = primary
493            .iter()
494            .any(|s| s.section_type == SectionType::Function);
495        let has_tests = primary.iter().any(|s| s.section_type == SectionType::Test);
496
497        if has_functions && !has_tests {
498            suggestions.push("Consider using find_tests to locate related test files.".to_string());
499        }
500
501        suggestions
502    }
503}
504
505impl Default for SmartReader {
506    fn default() -> Self {
507        Self::new()
508    }
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514    // use std::path::PathBuf;  // Commented out as unused
515
516    #[test]
517    fn test_rust_section_identification() {
518        let reader = SmartReader::new();
519        let lines = vec![
520            "use std::collections::HashMap;",
521            "",
522            "/// This is a test function",
523            "pub fn test_function() {",
524            "    println!(\"Hello\");",
525            "}",
526        ];
527
528        let mut sections = Vec::new();
529        reader
530            .identify_rust_sections(&lines, &mut sections)
531            .unwrap();
532
533        assert_eq!(sections.len(), 3); // Import, documentation, and function sections
534        assert_eq!(sections[0].section_type, SectionType::Import);
535        assert_eq!(sections[1].section_type, SectionType::Documentation);
536        assert_eq!(sections[2].section_type, SectionType::Function);
537    }
538}