cadi_core/atomizer/
extractor.rs

1//! Atom Extractor
2//!
3//! Extracts properly bounded code atoms from ASTs.
4
5use serde::{Deserialize, Serialize};
6
7use super::config::AtomizerConfig;
8use crate::error::{CadiError, CadiResult};
9
10/// Kind of code atom
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
12#[serde(rename_all = "snake_case")]
13pub enum AtomKind {
14    /// A function or method
15    Function,
16    /// An async function
17    AsyncFunction,
18    /// A method within a class/impl
19    Method,
20    /// A struct (Rust) or interface (TS)
21    Struct,
22    /// A class
23    Class,
24    /// A trait (Rust) or abstract class
25    Trait,
26    /// An interface (TS/Java)
27    Interface,
28    /// An enum
29    Enum,
30    /// A constant or static value
31    Constant,
32    /// A type alias
33    TypeAlias,
34    /// A module or namespace
35    Module,
36    /// A macro
37    Macro,
38    /// An impl block (Rust)
39    ImplBlock,
40    /// A decorator/attribute (Python/Rust)
41    Decorator,
42}
43
44impl AtomKind {
45    /// Is this a type definition?
46    pub fn is_type(&self) -> bool {
47        matches!(
48            self,
49            AtomKind::Struct
50                | AtomKind::Class
51                | AtomKind::Trait
52                | AtomKind::Interface
53                | AtomKind::Enum
54                | AtomKind::TypeAlias
55        )
56    }
57
58    /// Is this executable code?
59    pub fn is_executable(&self) -> bool {
60        matches!(
61            self,
62            AtomKind::Function | AtomKind::AsyncFunction | AtomKind::Method
63        )
64    }
65}
66
67/// An extracted code atom
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct ExtractedAtom {
70    /// Name of the atom (function name, type name, etc.)
71    pub name: String,
72
73    /// Kind of atom
74    pub kind: AtomKind,
75
76    /// The source code of this atom
77    pub source: String,
78
79    /// Byte offset in original source
80    pub start_byte: usize,
81    pub end_byte: usize,
82
83    /// Line numbers (1-indexed)
84    pub start_line: usize,
85    pub end_line: usize,
86
87    /// Symbols this atom defines/exports
88    pub defines: Vec<String>,
89
90    /// Symbols this atom references/imports
91    pub references: Vec<String>,
92
93    /// Doc comment if present
94    pub doc_comment: Option<String>,
95
96    /// Visibility (public, private, etc.)
97    pub visibility: Visibility,
98
99    /// Parent atom (for nested items like methods in a class)
100    pub parent: Option<String>,
101
102    /// Decorators/attributes applied to this atom
103    pub decorators: Vec<String>,
104}
105
106/// Visibility level
107#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
108#[serde(rename_all = "snake_case")]
109pub enum Visibility {
110    /// Public (exported)
111    Public,
112    /// Private (not exported)
113    #[default]
114    Private,
115    /// Package/crate-level visibility
116    Internal,
117    /// Protected (visible to subclasses)
118    Protected,
119}
120
121/// Atom extractor for a specific language
122pub struct AtomExtractor {
123    config: AtomizerConfig,
124    language: String,
125}
126
127impl AtomExtractor {
128    /// Create a new extractor for a language
129    pub fn new(language: impl Into<String>, config: AtomizerConfig) -> Self {
130        Self {
131            config,
132            language: language.into(),
133        }
134    }
135
136    /// Extract atoms from source code
137    /// 
138    /// When the `ast-parsing` feature is enabled, this uses Tree-sitter.
139    /// Otherwise, falls back to regex-based extraction.
140    pub fn extract(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
141        #[cfg(feature = "ast-parsing")]
142        {
143            use crate::atomizer::languages::*;
144            match self.language.as_str() {
145                "rust" => return RustAtomizer::new(self.config.clone()).extract(source),
146                "c" | "cpp" => return CAtomizer::new(self.config.clone()).extract(source),
147                "csharp" => return CSharpAtomizer::new(self.config.clone()).extract(source),
148                "css" => return CssAtomizer::new(self.config.clone()).extract(source),
149                "glsl" => return GlslAtomizer::new(self.config.clone()).extract(source),
150                _ => {}
151            }
152        }
153
154        match self.language.as_str() {
155            "rust" => self.extract_rust(source),
156            "typescript" | "javascript" => self.extract_typescript(source),
157            "python" => self.extract_python(source),
158            "c" | "cpp" => self.extract_c(source),
159            "csharp" => self.extract_csharp(source),
160            "css" => self.extract_css(source),
161            "glsl" => self.extract_glsl(source),
162            _ => self.extract_fallback(source),
163        }
164    }
165
166    /// Extract atoms from Rust source
167    fn extract_rust(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
168        let mut atoms = Vec::new();
169        let _lines: Vec<&str> = source.lines().collect();
170
171        // Regex-based extraction (fallback when tree-sitter not enabled)
172        // This is a simplified version - the real implementation uses Tree-sitter
173        
174        let fn_regex = regex::Regex::new(
175            r"(?m)^(\s*)(///.*\n)*(\s*)(?:pub(?:\([^)]*\))?\s+)?(async\s+)?fn\s+(\w+)"
176        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
177
178        let struct_regex = regex::Regex::new(
179            r"(?m)^(\s*)(///.*\n)*(\s*)(?:pub(?:\([^)]*\))?\s+)?struct\s+(\w+)"
180        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
181
182        let _enum_regex = regex::Regex::new(
183            r"(?m)^(\s*)(///.*\n)*(\s*)(?:pub(?:\([^)]*\))?\s+)?enum\s+(\w+)"
184        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
185
186        let _trait_regex = regex::Regex::new(
187            r"(?m)^(\s*)(///.*\n)*(\s*)(?:pub(?:\([^)]*\))?\s+)?trait\s+(\w+)"
188        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
189
190        let _impl_regex = regex::Regex::new(
191            r"(?m)^impl(?:<[^>]*>)?\s+(?:(\w+)\s+for\s+)?(\w+)"
192        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
193
194        // Extract functions
195        for cap in fn_regex.captures_iter(source) {
196            let name = cap.get(5).map(|m| m.as_str()).unwrap_or("unknown");
197            let is_async = cap.get(4).is_some();
198            let is_pub = source[..cap.get(0).unwrap().start()]
199                .lines()
200                .last()
201                .map(|l| l.contains("pub"))
202                .unwrap_or(false);
203
204            let start_byte = cap.get(0).unwrap().start();
205            let end_byte = self.find_block_end(source, start_byte);
206            
207            let start_line = source[..start_byte].matches('\n').count() + 1;
208            let end_line = source[..end_byte].matches('\n').count() + 1;
209
210            atoms.push(ExtractedAtom {
211                name: name.to_string(),
212                kind: if is_async { AtomKind::AsyncFunction } else { AtomKind::Function },
213                source: source[start_byte..end_byte].to_string(),
214                start_byte,
215                end_byte,
216                start_line,
217                end_line,
218                defines: vec![name.to_string()],
219                references: self.extract_references(&source[start_byte..end_byte]),
220                doc_comment: self.extract_doc_comment(source, start_byte),
221                visibility: if is_pub { Visibility::Public } else { Visibility::Private },
222                parent: None,
223                decorators: Vec::new(),
224            });
225        }
226
227        // Extract structs
228        for cap in struct_regex.captures_iter(source) {
229            let name = cap.get(4).map(|m| m.as_str()).unwrap_or("unknown");
230            let start_byte = cap.get(0).unwrap().start();
231            let end_byte = self.find_block_end(source, start_byte);
232            
233            atoms.push(ExtractedAtom {
234                name: name.to_string(),
235                kind: AtomKind::Struct,
236                source: source[start_byte..end_byte].to_string(),
237                start_byte,
238                end_byte,
239                start_line: source[..start_byte].matches('\n').count() + 1,
240                end_line: source[..end_byte].matches('\n').count() + 1,
241                defines: vec![name.to_string()],
242                references: self.extract_references(&source[start_byte..end_byte]),
243                doc_comment: self.extract_doc_comment(source, start_byte),
244                visibility: Visibility::Public, // simplified
245                parent: None,
246                decorators: Vec::new(),
247            });
248        }
249
250        // Similar for enums, traits, impl blocks...
251        // (abbreviated for clarity)
252
253        Ok(atoms)
254    }
255
256    /// Extract atoms from TypeScript/JavaScript source
257    fn extract_typescript(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
258        let mut atoms = Vec::new();
259
260        let fn_regex = regex::Regex::new(
261            r"(?m)^(\s*)(export\s+)?(async\s+)?function\s+(\w+)"
262        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
263
264        let class_regex = regex::Regex::new(
265            r"(?m)^(\s*)(export\s+)?class\s+(\w+)"
266        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
267
268        let _interface_regex = regex::Regex::new(
269            r"(?m)^(\s*)(export\s+)?interface\s+(\w+)"
270        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
271
272        let _const_regex = regex::Regex::new(
273            r"(?m)^(\s*)(export\s+)?const\s+(\w+)\s*="
274        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
275
276        // Extract functions
277        for cap in fn_regex.captures_iter(source) {
278            let name = cap.get(4).map(|m| m.as_str()).unwrap_or("unknown");
279            let is_async = cap.get(3).is_some();
280            let is_export = cap.get(2).is_some();
281
282            let start_byte = cap.get(0).unwrap().start();
283            let end_byte = self.find_block_end(source, start_byte);
284
285            atoms.push(ExtractedAtom {
286                name: name.to_string(),
287                kind: if is_async { AtomKind::AsyncFunction } else { AtomKind::Function },
288                source: source[start_byte..end_byte].to_string(),
289                start_byte,
290                end_byte,
291                start_line: source[..start_byte].matches('\n').count() + 1,
292                end_line: source[..end_byte].matches('\n').count() + 1,
293                defines: vec![name.to_string()],
294                references: self.extract_ts_imports(source) // simplified
295                    .into_iter()
296                    .flat_map(|(_, syms)| syms)
297                    .collect(),
298                doc_comment: self.extract_jsdoc(source, start_byte),
299                visibility: if is_export { Visibility::Public } else { Visibility::Private },
300                parent: None,
301                decorators: Vec::new(),
302            });
303        }
304
305        // Classes, interfaces, etc.
306        for cap in class_regex.captures_iter(source) {
307            let name = cap.get(3).map(|m| m.as_str()).unwrap_or("unknown");
308            let start_byte = cap.get(0).unwrap().start();
309            let end_byte = self.find_block_end(source, start_byte);
310
311            atoms.push(ExtractedAtom {
312                name: name.to_string(),
313                kind: AtomKind::Class,
314                source: source[start_byte..end_byte].to_string(),
315                start_byte,
316                end_byte,
317                start_line: source[..start_byte].matches('\n').count() + 1,
318                end_line: source[..end_byte].matches('\n').count() + 1,
319                defines: vec![name.to_string()],
320                references: Vec::new(),
321                doc_comment: None,
322                visibility: Visibility::Public,
323                parent: None,
324                decorators: Vec::new(),
325            });
326        }
327
328        Ok(atoms)
329    }
330
331    /// Extract atoms from Python source
332    fn extract_python(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
333        let mut atoms = Vec::new();
334
335        let fn_regex = regex::Regex::new(
336            r"(?m)^(\s*)(async\s+)?def\s+(\w+)\s*\("
337        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
338
339        let class_regex = regex::Regex::new(
340            r"(?m)^(\s*)class\s+(\w+)"
341        ).map_err(|e| CadiError::AtomizerError(e.to_string()))?;
342
343        for cap in fn_regex.captures_iter(source) {
344            let indent = cap.get(1).map(|m| m.as_str().len()).unwrap_or(0);
345            let name = cap.get(3).map(|m| m.as_str()).unwrap_or("unknown");
346            let is_async = cap.get(2).is_some();
347
348            let start_byte = cap.get(0).unwrap().start();
349            let end_byte = self.find_python_block_end(source, start_byte, indent);
350
351            atoms.push(ExtractedAtom {
352                name: name.to_string(),
353                kind: if is_async { AtomKind::AsyncFunction } else { AtomKind::Function },
354                source: source[start_byte..end_byte].to_string(),
355                start_byte,
356                end_byte,
357                start_line: source[..start_byte].matches('\n').count() + 1,
358                end_line: source[..end_byte].matches('\n').count() + 1,
359                defines: vec![name.to_string()],
360                references: Vec::new(),
361                doc_comment: self.extract_python_docstring(source, start_byte),
362                visibility: if name.starts_with('_') { Visibility::Private } else { Visibility::Public },
363                parent: None,
364                decorators: Vec::new(),
365            });
366        }
367
368        for cap in class_regex.captures_iter(source) {
369            let indent = cap.get(1).map(|m| m.as_str().len()).unwrap_or(0);
370            let name = cap.get(2).map(|m| m.as_str()).unwrap_or("unknown");
371
372            let start_byte = cap.get(0).unwrap().start();
373            let end_byte = self.find_python_block_end(source, start_byte, indent);
374
375            atoms.push(ExtractedAtom {
376                name: name.to_string(),
377                kind: AtomKind::Class,
378                source: source[start_byte..end_byte].to_string(),
379                start_byte,
380                end_byte,
381                start_line: source[..start_byte].matches('\n').count() + 1,
382                end_line: source[..end_byte].matches('\n').count() + 1,
383                defines: vec![name.to_string()],
384                references: Vec::new(),
385                doc_comment: None,
386                visibility: Visibility::Public,
387                parent: None,
388                decorators: Vec::new(),
389            });
390        }
391
392        Ok(atoms)
393    }
394
395    /// Extract atoms from C source
396    fn extract_c(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
397        // Simplified fallback: extract functions
398        let mut atoms = Vec::new();
399        let fn_regex = regex::Regex::new(r"(?m)^(\w+)\s+(\w+)\s*\([^)]*\)\s*\{").unwrap();
400
401        for cap in fn_regex.captures_iter(source) {
402            let name = cap.get(2).map(|m| m.as_str()).unwrap_or("unknown");
403            let start_byte = cap.get(0).unwrap().start();
404            let end_byte = self.find_block_end(source, start_byte);
405
406            atoms.push(ExtractedAtom {
407                name: name.to_string(),
408                kind: AtomKind::Function,
409                source: source[start_byte..end_byte].to_string(),
410                start_byte,
411                end_byte,
412                start_line: source[..start_byte].matches('\n').count() + 1,
413                end_line: source[..end_byte].matches('\n').count() + 1,
414                defines: vec![name.to_string()],
415                references: Vec::new(),
416                doc_comment: None,
417                visibility: Visibility::Public,
418                parent: None,
419                decorators: Vec::new(),
420            });
421        }
422        Ok(atoms)
423    }
424
425    /// Extract atoms from C# source
426    fn extract_csharp(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
427        let mut atoms = Vec::new();
428        let class_regex = regex::Regex::new(r"(?m)^(\s*)(?:public|private|internal|protected)?\s+class\s+(\w+)").unwrap();
429
430        for cap in class_regex.captures_iter(source) {
431            let name = cap.get(2).map(|m| m.as_str()).unwrap_or("unknown");
432            let start_byte = cap.get(0).unwrap().start();
433            let end_byte = self.find_block_end(source, start_byte);
434
435            atoms.push(ExtractedAtom {
436                name: name.to_string(),
437                kind: AtomKind::Class,
438                source: source[start_byte..end_byte].to_string(),
439                start_byte,
440                end_byte,
441                start_line: source[..start_byte].matches('\n').count() + 1,
442                end_line: source[..end_byte].matches('\n').count() + 1,
443                defines: vec![name.to_string()],
444                references: Vec::new(),
445                doc_comment: None,
446                visibility: Visibility::Public,
447                parent: None,
448                decorators: Vec::new(),
449            });
450        }
451        Ok(atoms)
452    }
453
454    /// Extract atoms from CSS source
455    fn extract_css(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
456        let mut atoms = Vec::new();
457        let rule_regex = regex::Regex::new(r"(?m)^([^{]+)\{").unwrap();
458
459        for cap in rule_regex.captures_iter(source) {
460            let name = cap.get(1).map(|m| m.as_str().trim()).unwrap_or("rule");
461            let start_byte = cap.get(0).unwrap().start();
462            let end_byte = self.find_block_end(source, start_byte);
463
464            atoms.push(ExtractedAtom {
465                name: name.to_string(),
466                kind: AtomKind::Constant, // CSS rules are roughly constants/styles
467                source: source[start_byte..end_byte].to_string(),
468                start_byte,
469                end_byte,
470                start_line: source[..start_byte].matches('\n').count() + 1,
471                end_line: source[..end_byte].matches('\n').count() + 1,
472                defines: Vec::new(),
473                references: Vec::new(),
474                doc_comment: None,
475                visibility: Visibility::Public,
476                parent: None,
477                decorators: Vec::new(),
478            });
479        }
480        Ok(atoms)
481    }
482
483    /// Extract atoms from GLSL source
484    fn extract_glsl(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
485        self.extract_c(source)
486    }
487
488    /// Fallback extraction for unsupported languages
489    fn extract_fallback(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
490        // Return the whole file as a single atom
491        Ok(vec![ExtractedAtom {
492            name: "module".to_string(),
493            kind: AtomKind::Module,
494            source: source.to_string(),
495            start_byte: 0,
496            end_byte: source.len(),
497            start_line: 1,
498            end_line: source.lines().count(),
499            defines: Vec::new(),
500            references: Vec::new(),
501            doc_comment: None,
502            visibility: Visibility::Public,
503            parent: None,
504            decorators: Vec::new(),
505        }])
506    }
507
508    // ========================================================================
509    // Helper methods
510    // ========================================================================
511
512    /// Find the end of a brace-delimited block
513    fn find_block_end(&self, source: &str, start: usize) -> usize {
514        let mut depth = 0;
515        let mut in_string = false;
516        let mut string_char = ' ';
517        let mut prev_char = ' ';
518
519        for (i, c) in source[start..].char_indices() {
520            if in_string {
521                if c == string_char && prev_char != '\\' {
522                    in_string = false;
523                }
524            } else {
525                match c {
526                    '"' | '\'' | '`' => {
527                        in_string = true;
528                        string_char = c;
529                    }
530                    '{' => depth += 1,
531                    '}' => {
532                        depth -= 1;
533                        if depth == 0 {
534                            return start + i + 1;
535                        }
536                    }
537                    _ => {}
538                }
539            }
540            prev_char = c;
541        }
542
543        source.len()
544    }
545
546    /// Find the end of a Python block (indentation-based)
547    fn find_python_block_end(&self, source: &str, start: usize, base_indent: usize) -> usize {
548        let lines: Vec<&str> = source[start..].lines().collect();
549        let mut end = start;
550        let mut started = false;
551
552        for line in lines {
553            if line.trim().is_empty() {
554                end += line.len() + 1;
555                continue;
556            }
557
558            let indent = line.len() - line.trim_start().len();
559            
560            if !started {
561                started = true;
562                end += line.len() + 1;
563            } else if indent > base_indent {
564                end += line.len() + 1;
565            } else {
566                break;
567            }
568        }
569
570        end.min(source.len())
571    }
572
573    /// Extract references from Rust code
574    fn extract_references(&self, source: &str) -> Vec<String> {
575        let mut refs = Vec::new();
576        
577        // Extract use statements
578        let use_regex = regex::Regex::new(r"use\s+([\w:]+)").ok();
579        if let Some(re) = use_regex {
580            for cap in re.captures_iter(source) {
581                if let Some(m) = cap.get(1) {
582                    refs.push(m.as_str().to_string());
583                }
584            }
585        }
586        
587        refs
588    }
589
590    /// Extract imports from TypeScript
591    fn extract_ts_imports(&self, source: &str) -> Vec<(String, Vec<String>)> {
592        let mut imports = Vec::new();
593        
594        let import_regex = regex::Regex::new(
595            r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#
596        ).ok();
597        
598        if let Some(re) = import_regex {
599            for cap in re.captures_iter(source) {
600                let symbols: Vec<String> = cap.get(1)
601                    .map(|m| m.as_str())
602                    .unwrap_or("")
603                    .split(',')
604                    .map(|s| s.trim().to_string())
605                    .filter(|s| !s.is_empty())
606                    .collect();
607                
608                let path = cap.get(2).map(|m| m.as_str().to_string()).unwrap_or_default();
609                
610                imports.push((path, symbols));
611            }
612        }
613        
614        imports
615    }
616
617    /// Extract doc comment before a position
618    fn extract_doc_comment(&self, source: &str, pos: usize) -> Option<String> {
619        let before = &source[..pos];
620        let lines: Vec<&str> = before.lines().rev().collect();
621        
622        let mut doc_lines = Vec::new();
623        for line in lines {
624            let trimmed = line.trim();
625            if trimmed.starts_with("///") {
626                doc_lines.push(trimmed.trim_start_matches("///").trim());
627            } else if trimmed.is_empty() {
628                continue;
629            } else {
630                break;
631            }
632        }
633        
634        if doc_lines.is_empty() {
635            None
636        } else {
637            doc_lines.reverse();
638            Some(doc_lines.join("\n"))
639        }
640    }
641
642    /// Extract JSDoc comment
643    fn extract_jsdoc(&self, source: &str, pos: usize) -> Option<String> {
644        let before = &source[..pos];
645        
646        if let Some(start) = before.rfind("/**") {
647            if let Some(end) = before[start..].find("*/") {
648                let comment = &before[start..start + end + 2];
649                return Some(comment.to_string());
650            }
651        }
652        
653        None
654    }
655
656    /// Extract Python docstring
657    fn extract_python_docstring(&self, source: &str, start: usize) -> Option<String> {
658        let after = &source[start..];
659        
660        // Find the colon ending the function definition
661        if let Some(colon_pos) = after.find(':') {
662            let rest = &after[colon_pos + 1..];
663            let trimmed = rest.trim_start();
664            
665            // Check for docstring
666            if trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''") {
667                let quote = &trimmed[..3];
668                if let Some(end) = trimmed[3..].find(quote) {
669                    return Some(trimmed[3..3 + end].to_string());
670                }
671            }
672        }
673        
674        None
675    }
676}
677
678impl ExtractedAtom {
679    /// Get the number of lines in this atom
680    pub fn line_count(&self) -> usize {
681        self.end_line - self.start_line + 1
682    }
683
684    /// Estimate token count
685    pub fn token_estimate(&self) -> usize {
686        self.source.len() / 4
687    }
688
689    /// Is this a public/exported symbol?
690    pub fn is_public(&self) -> bool {
691        self.visibility == Visibility::Public
692    }
693}
694
695#[cfg(test)]
696mod tests {
697    use super::*;
698
699    #[test]
700    fn test_rust_extraction() {
701        let source = r#"
702/// A simple greeting function
703pub fn hello(name: &str) -> String {
704    format!("Hello, {}!", name)
705}
706
707fn private_helper() {
708    // do something
709}
710"#;
711
712        let extractor = AtomExtractor::new("rust", AtomizerConfig::default());
713        let atoms = extractor.extract(source).unwrap();
714
715        assert!(!atoms.is_empty());
716        assert!(atoms.iter().any(|a| a.name == "hello"));
717    }
718
719    #[test]
720    fn test_typescript_extraction() {
721        let source = r#"
722export function greet(name: string): string {
723    return `Hello, ${name}!`;
724}
725
726export class Greeter {
727    greet(name: string) {
728        return `Hello, ${name}`;
729    }
730}
731"#;
732
733        let extractor = AtomExtractor::new("typescript", AtomizerConfig::default());
734        let atoms = extractor.extract(source).unwrap();
735
736        assert!(!atoms.is_empty());
737    }
738
739    #[test]
740    fn test_python_extraction() {
741        let source = r#"
742def hello(name):
743    """Say hello to someone."""
744    print(f"Hello, {name}!")
745
746class Greeter:
747    def greet(self, name):
748        return f"Hello, {name}"
749"#;
750
751        let extractor = AtomExtractor::new("python", AtomizerConfig::default());
752        let atoms = extractor.extract(source).unwrap();
753
754        assert!(!atoms.is_empty());
755    }
756}