1use std::collections::HashMap;
13use std::path::{Path, PathBuf};
14use std::fs;
15use std::io::{BufRead, BufReader};
16use once_cell::sync::Lazy;
17use scribe_core::Result;
18use tree_sitter::{Parser, Language as TsLanguage, Node, Tree};
19
20#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22pub enum TemplateEngine {
23 Handlebars,
25 Mustache,
26 Ejs,
27 Pug,
28 Jade,
29
30 Django,
32 Jinja2,
33 Mako,
34
35 Twig,
37 Smarty,
38
39 Erb,
41 Haml,
42
43 Liquid,
45 Dust,
46 Eta,
47
48 Vue,
50 Svelte,
51 React, Angular,
53
54 Generic,
56}
57
58impl TemplateEngine {
59 pub fn extensions(&self) -> &'static [&'static str] {
61 match self {
62 TemplateEngine::Handlebars => &[".hbs", ".handlebars"],
63 TemplateEngine::Mustache => &[".mustache"],
64 TemplateEngine::Ejs => &[".ejs"],
65 TemplateEngine::Pug => &[".pug"],
66 TemplateEngine::Jade => &[".jade"],
67 TemplateEngine::Django => &[".html", ".htm"], TemplateEngine::Jinja2 => &[".j2", ".jinja", ".jinja2"],
69 TemplateEngine::Mako => &[".mako"],
70 TemplateEngine::Twig => &[".twig"],
71 TemplateEngine::Smarty => &[".tpl"],
72 TemplateEngine::Erb => &[".erb", ".rhtml"],
73 TemplateEngine::Haml => &[".haml"],
74 TemplateEngine::Liquid => &[".liquid"],
75 TemplateEngine::Dust => &[".dust"],
76 TemplateEngine::Eta => &[".eta"],
77 TemplateEngine::Vue => &[".vue"],
78 TemplateEngine::Svelte => &[".svelte"],
79 TemplateEngine::React => &[".jsx", ".tsx"],
80 TemplateEngine::Angular => &[".html"], TemplateEngine::Generic => &[],
82 }
83 }
84
85 pub fn score_boost(&self) -> f64 {
87 match self {
88 TemplateEngine::Handlebars | TemplateEngine::Mustache |
90 TemplateEngine::Jinja2 | TemplateEngine::Twig |
91 TemplateEngine::Liquid => 1.5,
92
93 TemplateEngine::Vue | TemplateEngine::Svelte |
95 TemplateEngine::React => 1.3,
96
97 TemplateEngine::Ejs | TemplateEngine::Pug | TemplateEngine::Erb |
99 TemplateEngine::Haml => 1.2,
100
101 TemplateEngine::Django | TemplateEngine::Angular => 1.0,
103
104 TemplateEngine::Generic => 0.8,
106
107 _ => 1.0,
108 }
109 }
110}
111
112#[derive(Debug, Clone, PartialEq, Eq)]
114pub enum TemplateDetectionMethod {
115 Extension,
117 ContentPattern,
119 DirectoryContext,
121 LanguageHeuristic,
123}
124
125#[derive(Debug, Clone)]
127pub struct TemplateDetectionResult {
128 pub is_template: bool,
130 pub engine: Option<TemplateEngine>,
132 pub detection_method: TemplateDetectionMethod,
134 pub confidence: f64,
136 pub score_boost: f64,
138}
139
140impl TemplateDetectionResult {
141 pub fn not_template() -> Self {
142 Self {
143 is_template: false,
144 engine: None,
145 detection_method: TemplateDetectionMethod::Extension,
146 confidence: 0.0,
147 score_boost: 0.0,
148 }
149 }
150
151 pub fn template(engine: TemplateEngine, method: TemplateDetectionMethod, confidence: f64) -> Self {
152 let score_boost = engine.score_boost();
153 Self {
154 is_template: true,
155 engine: Some(engine),
156 detection_method: method,
157 confidence,
158 score_boost,
159 }
160 }
161}
162
163#[derive(Debug, Clone)]
165pub struct TemplatePattern {
166 pub open_tag: String,
167 pub close_tag: String,
168 pub engine: TemplateEngine,
169 pub min_occurrences: usize,
170}
171
172impl TemplatePattern {
173 pub fn new(open: &str, close: &str, engine: TemplateEngine, min_occurrences: usize) -> Self {
174 Self {
175 open_tag: open.to_string(),
176 close_tag: close.to_string(),
177 engine,
178 min_occurrences,
179 }
180 }
181}
182
183static TEMPLATE_PATTERNS: Lazy<Vec<TemplatePattern>> = Lazy::new(|| {
185 vec![
186 TemplatePattern::new("{{", "}}", TemplateEngine::Handlebars, 2),
188 TemplatePattern::new("{{{", "}}}", TemplateEngine::Handlebars, 1),
189
190 TemplatePattern::new("{%", "%}", TemplateEngine::Jinja2, 2),
192 TemplatePattern::new("{{", "}}", TemplateEngine::Jinja2, 1), TemplatePattern::new("<%", "%>", TemplateEngine::Ejs, 2),
196 TemplatePattern::new("<%=", "%>", TemplateEngine::Ejs, 1),
197 TemplatePattern::new("<%#", "%>", TemplateEngine::Ejs, 1),
198
199 TemplatePattern::new("<#", "#>", TemplateEngine::Generic, 2),
201
202 TemplatePattern::new("${", "}", TemplateEngine::Generic, 3),
204 TemplatePattern::new("@{", "}", TemplateEngine::Generic, 2),
205 TemplatePattern::new("[[", "]]", TemplateEngine::Generic, 2),
206 ]
207});
208
209static EXTENSION_MAP: Lazy<HashMap<&'static str, TemplateEngine>> = Lazy::new(|| {
211 let mut map = HashMap::new();
212
213 map.insert(".njk", TemplateEngine::Jinja2);
215 map.insert(".nunjucks", TemplateEngine::Jinja2);
216 map.insert(".hbs", TemplateEngine::Handlebars);
217 map.insert(".handlebars", TemplateEngine::Handlebars);
218 map.insert(".j2", TemplateEngine::Jinja2);
219 map.insert(".jinja", TemplateEngine::Jinja2);
220 map.insert(".jinja2", TemplateEngine::Jinja2);
221 map.insert(".twig", TemplateEngine::Twig);
222 map.insert(".liquid", TemplateEngine::Liquid);
223 map.insert(".mustache", TemplateEngine::Mustache);
224 map.insert(".ejs", TemplateEngine::Ejs);
225 map.insert(".erb", TemplateEngine::Erb);
226 map.insert(".rhtml", TemplateEngine::Erb);
227 map.insert(".haml", TemplateEngine::Haml);
228 map.insert(".pug", TemplateEngine::Pug);
229 map.insert(".jade", TemplateEngine::Jade);
230 map.insert(".dust", TemplateEngine::Dust);
231 map.insert(".eta", TemplateEngine::Eta);
232 map.insert(".svelte", TemplateEngine::Svelte);
233 map.insert(".vue", TemplateEngine::Vue);
234 map.insert(".jsx", TemplateEngine::React);
235 map.insert(".tsx", TemplateEngine::React);
236
237 map
238});
239
240static SINGLE_PATTERNS: &[&str] = &[
242 "ng-", "v-", ":", "data-bind", "handlebars", "jinja", "mustache", "twig", "liquid", ];
252
253static TEMPLATE_DIRECTORIES: &[&str] = &[
255 "template",
256 "templates",
257 "_includes",
258 "_layouts",
259 "layout",
260 "layouts",
261 "view",
262 "views",
263 "component",
264 "components",
265 "partial",
266 "partials",
267];
268
269pub struct TemplateDetector {
271 parsers: HashMap<String, Parser>,
273 content_cache: HashMap<PathBuf, String>,
275 max_cache_size: usize,
277}
278
279impl std::fmt::Debug for TemplateDetector {
280 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
281 f.debug_struct("TemplateDetector")
282 .field("parsers", &format!("[{} parsers]", self.parsers.len()))
283 .field("content_cache", &format!("[{} cached items]", self.content_cache.len()))
284 .field("max_cache_size", &self.max_cache_size)
285 .finish()
286 }
287}
288
289impl TemplateDetector {
290 pub fn new() -> Result<Self> {
292 let mut parsers = HashMap::new();
293
294 let mut html_parser = Parser::new();
296 html_parser.set_language(tree_sitter_html::language())
297 .map_err(|e| scribe_core::ScribeError::parse(format!("Failed to set HTML language: {}", e)))?;
298 parsers.insert("html".to_string(), html_parser);
299
300 Ok(Self {
301 parsers,
302 content_cache: HashMap::new(),
303 max_cache_size: 100, })
305 }
306
307 pub fn detect_template(&mut self, file_path: &str) -> Result<TemplateDetectionResult> {
309 let path = Path::new(file_path);
310
311 if let Some(result) = self.detect_by_extension(path) {
313 return Ok(result);
314 }
315
316 if let Some(result) = self.detect_by_directory_context(path) {
318 return Ok(result);
319 }
320
321 if self.should_analyze_content(path) {
323 if let Some(result) = self.detect_by_content_patterns(path)? {
324 return Ok(result);
325 }
326 }
327
328 if let Some(result) = self.detect_by_language_heuristics(path) {
330 return Ok(result);
331 }
332
333 Ok(TemplateDetectionResult::not_template())
334 }
335
336 pub fn get_score_boost(&self, file_path: &str) -> Result<f64> {
338 let path = Path::new(file_path);
340
341 if let Some(engine) = self.detect_engine_by_extension(path) {
343 return Ok(engine.score_boost());
344 }
345
346 if self.is_in_template_directory(path) {
348 return Ok(1.2); }
350
351 Ok(0.0)
352 }
353
354 fn detect_by_extension(&self, path: &Path) -> Option<TemplateDetectionResult> {
356 if let Some(engine) = self.detect_engine_by_extension(path) {
357 return Some(TemplateDetectionResult::template(
358 engine,
359 TemplateDetectionMethod::Extension,
360 0.95 ));
362 }
363 None
364 }
365
366 fn detect_engine_by_extension(&self, path: &Path) -> Option<TemplateEngine> {
367 let extension = path.extension()?.to_str()?.to_lowercase();
368 let ext_with_dot = format!(".{}", extension);
369
370 EXTENSION_MAP.get(ext_with_dot.as_str()).cloned()
371 }
372
373 fn detect_by_directory_context(&self, path: &Path) -> Option<TemplateDetectionResult> {
375 if self.is_in_template_directory(path) {
376 if let Some(ext) = path.extension() {
378 let ext_str = ext.to_str()?.to_lowercase();
379 if matches!(ext_str.as_str(), "html" | "htm" | "xml") {
380 return Some(TemplateDetectionResult::template(
381 TemplateEngine::Generic,
382 TemplateDetectionMethod::DirectoryContext,
383 0.7 ));
385 }
386 }
387 }
388 None
389 }
390
391 fn is_in_template_directory(&self, path: &Path) -> bool {
392 let path_str = path.to_string_lossy().to_lowercase();
393 TEMPLATE_DIRECTORIES.iter().any(|dir| path_str.contains(dir))
394 }
395
396 fn should_analyze_content(&self, path: &Path) -> bool {
398 if let Some(ext) = path.extension() {
400 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
401 return matches!(ext_str.as_str(), "html" | "htm" | "xml" | "js" | "ts");
402 }
403 false
404 }
405
406 fn detect_by_content_patterns(&mut self, path: &Path) -> Result<Option<TemplateDetectionResult>> {
408 let content = self.read_file_content(path)?;
409
410 for pattern in TEMPLATE_PATTERNS.iter() {
412 let occurrences = self.count_pattern_occurrences(&content, &pattern.open_tag, &pattern.close_tag);
413
414 if occurrences >= pattern.min_occurrences {
415 return Ok(Some(TemplateDetectionResult::template(
416 pattern.engine.clone(),
417 TemplateDetectionMethod::ContentPattern,
418 0.8 )));
420 }
421 }
422
423 if self.should_use_ast_analysis(path) {
425 if let Some(result) = self.analyze_with_ast(path, &content)? {
426 return Ok(Some(result));
427 }
428 }
429
430 let content_lower = content.to_lowercase();
432 for &pattern in SINGLE_PATTERNS {
433 if content_lower.contains(pattern) {
434 return Ok(Some(TemplateDetectionResult::template(
435 TemplateEngine::Generic,
436 TemplateDetectionMethod::ContentPattern,
437 0.6 )));
439 }
440 }
441
442 Ok(None)
443 }
444
445 fn detect_by_language_heuristics(&self, path: &Path) -> Option<TemplateDetectionResult> {
447 if let Some(ext) = path.extension() {
452 let ext_str = ext.to_str()?.to_lowercase();
453
454 if matches!(ext_str.as_str(), "jsx" | "tsx") {
456 return Some(TemplateDetectionResult::template(
457 TemplateEngine::React,
458 TemplateDetectionMethod::LanguageHeuristic,
459 0.9
460 ));
461 }
462 }
463
464 None
465 }
466
467 fn read_file_content(&mut self, path: &Path) -> Result<String> {
469 if let Some(content) = self.content_cache.get(path) {
471 return Ok(content.clone());
472 }
473
474 let file = fs::File::open(path)?;
476 let reader = BufReader::new(file);
477 let mut content = String::new();
478 let mut bytes_read = 0;
479 const MAX_READ_SIZE: usize = 2048;
480
481 for line in reader.lines() {
482 let line = line?;
483 if bytes_read + line.len() > MAX_READ_SIZE {
484 break;
485 }
486 content.push_str(&line);
487 content.push('\n');
488 bytes_read += line.len() + 1;
489 }
490
491 if self.content_cache.len() < self.max_cache_size {
493 self.content_cache.insert(path.to_path_buf(), content.clone());
494 }
495
496 Ok(content)
497 }
498
499 fn count_pattern_occurrences(&self, content: &str, open_tag: &str, close_tag: &str) -> usize {
501 let open_count = content.matches(open_tag).count();
502 let close_count = content.matches(close_tag).count();
503
504 open_count.min(close_count)
506 }
507
508 fn should_use_ast_analysis(&self, path: &Path) -> bool {
510 if let Some(ext) = path.extension() {
511 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
512 return matches!(ext_str.as_str(), "html" | "htm" | "xml" | "vue" | "svelte");
513 }
514 false
515 }
516
517 fn analyze_with_ast(&mut self, path: &Path, content: &str) -> Result<Option<TemplateDetectionResult>> {
519 if let Some(parser) = self.parsers.get_mut("html") {
520 if let Some(tree) = parser.parse(content, None) {
521 let root_node = tree.root_node();
522
523 if self.has_template_attributes(&root_node) {
525 let engine = self.detect_template_engine_from_ast(&root_node, path);
526 return Ok(Some(TemplateDetectionResult::template(
527 engine,
528 TemplateDetectionMethod::ContentPattern,
529 0.85 )));
531 }
532 }
533 }
534 Ok(None)
535 }
536
537 fn has_template_attributes(&self, node: &Node) -> bool {
539 let template_indicators = [
540 "v-", "ng-", "*ng", ":bind", "@click", "{{{", "{{", "<%", "{%", ];
550
551 self.node_contains_patterns(node, &template_indicators)
552 }
553
554 fn node_contains_patterns(&self, node: &Node, patterns: &[&str]) -> bool {
556 if patterns.iter().any(|&pattern| node.kind().contains(pattern)) {
558 return true;
559 }
560
561 if node.kind() == "text" || node.kind() == "attribute_value" {
563 return true; }
567
568 for i in 0..node.child_count() {
570 if let Some(child) = node.child(i) {
571 if self.node_contains_patterns(&child, patterns) {
572 return true;
573 }
574 }
575 }
576
577 false
578 }
579
580 fn detect_template_engine_from_ast(&self, node: &Node, path: &Path) -> TemplateEngine {
582 if let Some(ext) = path.extension() {
584 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
585 match ext_str.as_str() {
586 "vue" => return TemplateEngine::Vue,
587 "svelte" => return TemplateEngine::Svelte,
588 _ => {}
589 }
590 }
591
592 if self.has_vue_patterns(node) {
594 TemplateEngine::Vue
595 } else if self.has_angular_patterns(node) {
596 TemplateEngine::Angular
597 } else if self.has_react_patterns(node) {
598 TemplateEngine::React
599 } else {
600 TemplateEngine::Generic
601 }
602 }
603
604 fn has_vue_patterns(&self, node: &Node) -> bool {
606 let vue_patterns = ["v-if", "v-for", "v-model", "v-bind", "@click", ":class"];
607 self.node_contains_patterns(node, &vue_patterns)
608 }
609
610 fn has_angular_patterns(&self, node: &Node) -> bool {
612 let angular_patterns = ["*ngFor", "*ngIf", "(click)", "[class]", "[(ngModel)]"];
613 self.node_contains_patterns(node, &angular_patterns)
614 }
615
616 fn has_react_patterns(&self, node: &Node) -> bool {
618 let react_patterns = ["className", "onClick", "onChange"];
619 self.node_contains_patterns(node, &react_patterns)
620 }
621
622 pub fn clear_cache(&mut self) {
624 self.content_cache.clear();
625 }
626}
627
628impl Default for TemplateDetector {
629 fn default() -> Self {
630 Self::new().expect("Failed to create TemplateDetector")
631 }
632}
633
634pub fn is_template_file(file_path: &str) -> Result<bool> {
636 let mut detector = TemplateDetector::new()?;
637 let result = detector.detect_template(file_path)?;
638 Ok(result.is_template)
639}
640
641pub fn get_template_score_boost(file_path: &str) -> Result<f64> {
643 let detector = TemplateDetector::new()?;
644 detector.get_score_boost(file_path)
645}
646
647#[cfg(test)]
648mod tests {
649 use super::*;
650 use std::fs;
651 use tempfile::NamedTempFile;
652 use std::io::Write;
653
654 fn create_test_file(content: &str, extension: &str) -> NamedTempFile {
655 let mut file = NamedTempFile::new().unwrap();
656 file.write_all(content.as_bytes()).unwrap();
657
658 let path = file.path().with_extension(extension);
660 std::fs::rename(file.path(), &path).unwrap();
661
662 file
663 }
664
665 #[test]
666 fn test_extension_based_detection() {
667 let detector = TemplateDetector::new().unwrap();
668
669 assert_eq!(
671 detector.detect_engine_by_extension(Path::new("template.hbs")),
672 Some(TemplateEngine::Handlebars)
673 );
674
675 assert_eq!(
676 detector.detect_engine_by_extension(Path::new("view.j2")),
677 Some(TemplateEngine::Jinja2)
678 );
679
680 assert_eq!(
681 detector.detect_engine_by_extension(Path::new("component.jsx")),
682 Some(TemplateEngine::React)
683 );
684
685 assert_eq!(
687 detector.detect_engine_by_extension(Path::new("script.js")),
688 None
689 );
690 }
691
692 #[test]
693 fn test_directory_context_detection() {
694 let detector = TemplateDetector::new().unwrap();
695
696 assert!(detector.is_in_template_directory(Path::new("templates/layout.html")));
697 assert!(detector.is_in_template_directory(Path::new("src/components/header.html")));
698 assert!(!detector.is_in_template_directory(Path::new("src/utils/helper.js")));
699 }
700
701 #[test]
702 fn test_pattern_counting() {
703 let detector = TemplateDetector::new().unwrap();
704 let content = "Hello {{ name }}! Welcome to {{ site }}.";
705
706 assert_eq!(detector.count_pattern_occurrences(content, "{{", "}}"), 2);
707 assert_eq!(detector.count_pattern_occurrences(content, "{%", "%}"), 0);
708 }
709
710 #[test]
711 fn test_template_score_boost() {
712 let detector = TemplateDetector::new().unwrap();
713
714 assert!(detector.get_score_boost("template.hbs").unwrap() > 1.0);
716
717 assert_eq!(detector.get_score_boost("script.js").unwrap(), 0.0);
719 }
720
721 #[test]
722 fn test_engine_score_boost() {
723 assert!(TemplateEngine::Handlebars.score_boost() > 1.0);
724 assert!(TemplateEngine::React.score_boost() > 1.0);
725 assert!(TemplateEngine::Generic.score_boost() < 1.0);
726 }
727}