1use once_cell::sync::Lazy;
13use scribe_core::Result;
14use std::collections::HashMap;
15use std::fs;
16use std::io::{BufRead, BufReader};
17use std::path::{Path, PathBuf};
18use tree_sitter::{Language as TsLanguage, Node, Parser, Tree};
19
20#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22pub enum TemplateEngine {
23 Handlebars,
25 Mustache,
26 Ejs,
27 Pug,
28 Jade,
29
30 Django,
32 Jinja2,
33 Mako,
34
35 Twig,
37 Smarty,
38
39 Erb,
41 Haml,
42
43 Liquid,
45 Dust,
46 Eta,
47
48 Vue,
50 Svelte,
51 React, Angular,
53
54 Generic,
56}
57
58impl TemplateEngine {
59 pub fn extensions(&self) -> &'static [&'static str] {
61 match self {
62 TemplateEngine::Handlebars => &[".hbs", ".handlebars"],
63 TemplateEngine::Mustache => &[".mustache"],
64 TemplateEngine::Ejs => &[".ejs"],
65 TemplateEngine::Pug => &[".pug"],
66 TemplateEngine::Jade => &[".jade"],
67 TemplateEngine::Django => &[".html", ".htm"], TemplateEngine::Jinja2 => &[".j2", ".jinja", ".jinja2"],
69 TemplateEngine::Mako => &[".mako"],
70 TemplateEngine::Twig => &[".twig"],
71 TemplateEngine::Smarty => &[".tpl"],
72 TemplateEngine::Erb => &[".erb", ".rhtml"],
73 TemplateEngine::Haml => &[".haml"],
74 TemplateEngine::Liquid => &[".liquid"],
75 TemplateEngine::Dust => &[".dust"],
76 TemplateEngine::Eta => &[".eta"],
77 TemplateEngine::Vue => &[".vue"],
78 TemplateEngine::Svelte => &[".svelte"],
79 TemplateEngine::React => &[".jsx", ".tsx"],
80 TemplateEngine::Angular => &[".html"], TemplateEngine::Generic => &[],
82 }
83 }
84
85 pub fn score_boost(&self) -> f64 {
87 match self {
88 TemplateEngine::Handlebars
90 | TemplateEngine::Mustache
91 | TemplateEngine::Jinja2
92 | TemplateEngine::Twig
93 | TemplateEngine::Liquid => 1.5,
94
95 TemplateEngine::Vue | TemplateEngine::Svelte | TemplateEngine::React => 1.3,
97
98 TemplateEngine::Ejs
100 | TemplateEngine::Pug
101 | TemplateEngine::Erb
102 | TemplateEngine::Haml => 1.2,
103
104 TemplateEngine::Django | TemplateEngine::Angular => 1.0,
106
107 TemplateEngine::Generic => 0.8,
109
110 _ => 1.0,
111 }
112 }
113}
114
115#[derive(Debug, Clone, PartialEq, Eq)]
117pub enum TemplateDetectionMethod {
118 Extension,
120 ContentPattern,
122 DirectoryContext,
124 LanguageHeuristic,
126}
127
128#[derive(Debug, Clone)]
130pub struct TemplateDetectionResult {
131 pub is_template: bool,
133 pub engine: Option<TemplateEngine>,
135 pub detection_method: TemplateDetectionMethod,
137 pub confidence: f64,
139 pub score_boost: f64,
141}
142
143impl TemplateDetectionResult {
144 pub fn not_template() -> Self {
145 Self {
146 is_template: false,
147 engine: None,
148 detection_method: TemplateDetectionMethod::Extension,
149 confidence: 0.0,
150 score_boost: 0.0,
151 }
152 }
153
154 pub fn template(
155 engine: TemplateEngine,
156 method: TemplateDetectionMethod,
157 confidence: f64,
158 ) -> Self {
159 let score_boost = engine.score_boost();
160 Self {
161 is_template: true,
162 engine: Some(engine),
163 detection_method: method,
164 confidence,
165 score_boost,
166 }
167 }
168}
169
170#[derive(Debug, Clone)]
172pub struct TemplatePattern {
173 pub open_tag: String,
174 pub close_tag: String,
175 pub engine: TemplateEngine,
176 pub min_occurrences: usize,
177}
178
179impl TemplatePattern {
180 pub fn new(open: &str, close: &str, engine: TemplateEngine, min_occurrences: usize) -> Self {
181 Self {
182 open_tag: open.to_string(),
183 close_tag: close.to_string(),
184 engine,
185 min_occurrences,
186 }
187 }
188}
189
190static TEMPLATE_PATTERNS: Lazy<Vec<TemplatePattern>> = Lazy::new(|| {
192 vec![
193 TemplatePattern::new("{{", "}}", TemplateEngine::Handlebars, 2),
195 TemplatePattern::new("{{{", "}}}", TemplateEngine::Handlebars, 1),
196 TemplatePattern::new("{%", "%}", TemplateEngine::Jinja2, 2),
198 TemplatePattern::new("{{", "}}", TemplateEngine::Jinja2, 1), TemplatePattern::new("<%", "%>", TemplateEngine::Ejs, 2),
201 TemplatePattern::new("<%=", "%>", TemplateEngine::Ejs, 1),
202 TemplatePattern::new("<%#", "%>", TemplateEngine::Ejs, 1),
203 TemplatePattern::new("<#", "#>", TemplateEngine::Generic, 2),
205 TemplatePattern::new("${", "}", TemplateEngine::Generic, 3),
207 TemplatePattern::new("@{", "}", TemplateEngine::Generic, 2),
208 TemplatePattern::new("[[", "]]", TemplateEngine::Generic, 2),
209 ]
210});
211
212static EXTENSION_MAP: Lazy<HashMap<&'static str, TemplateEngine>> = Lazy::new(|| {
214 let mut map = HashMap::new();
215
216 map.insert(".njk", TemplateEngine::Jinja2);
218 map.insert(".nunjucks", TemplateEngine::Jinja2);
219 map.insert(".hbs", TemplateEngine::Handlebars);
220 map.insert(".handlebars", TemplateEngine::Handlebars);
221 map.insert(".j2", TemplateEngine::Jinja2);
222 map.insert(".jinja", TemplateEngine::Jinja2);
223 map.insert(".jinja2", TemplateEngine::Jinja2);
224 map.insert(".twig", TemplateEngine::Twig);
225 map.insert(".liquid", TemplateEngine::Liquid);
226 map.insert(".mustache", TemplateEngine::Mustache);
227 map.insert(".ejs", TemplateEngine::Ejs);
228 map.insert(".erb", TemplateEngine::Erb);
229 map.insert(".rhtml", TemplateEngine::Erb);
230 map.insert(".haml", TemplateEngine::Haml);
231 map.insert(".pug", TemplateEngine::Pug);
232 map.insert(".jade", TemplateEngine::Jade);
233 map.insert(".dust", TemplateEngine::Dust);
234 map.insert(".eta", TemplateEngine::Eta);
235 map.insert(".svelte", TemplateEngine::Svelte);
236 map.insert(".vue", TemplateEngine::Vue);
237 map.insert(".jsx", TemplateEngine::React);
238 map.insert(".tsx", TemplateEngine::React);
239
240 map
241});
242
243static SINGLE_PATTERNS: &[&str] = &[
245 "ng-", "v-", ":", "data-bind", "handlebars", "jinja", "mustache", "twig", "liquid", ];
255
256static TEMPLATE_DIRECTORIES: &[&str] = &[
258 "template",
259 "templates",
260 "_includes",
261 "_layouts",
262 "layout",
263 "layouts",
264 "view",
265 "views",
266 "component",
267 "components",
268 "partial",
269 "partials",
270];
271
272pub struct TemplateDetector {
274 parsers: HashMap<String, Parser>,
276 content_cache: HashMap<PathBuf, String>,
278 max_cache_size: usize,
280}
281
282impl std::fmt::Debug for TemplateDetector {
283 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
284 f.debug_struct("TemplateDetector")
285 .field("parsers", &format!("[{} parsers]", self.parsers.len()))
286 .field(
287 "content_cache",
288 &format!("[{} cached items]", self.content_cache.len()),
289 )
290 .field("max_cache_size", &self.max_cache_size)
291 .finish()
292 }
293}
294
295impl TemplateDetector {
296 pub fn new() -> Result<Self> {
298 let mut parsers = HashMap::new();
299
300 let mut html_parser = Parser::new();
302 html_parser
303 .set_language(tree_sitter_html::language())
304 .map_err(|e| {
305 scribe_core::ScribeError::parse(format!("Failed to set HTML language: {}", e))
306 })?;
307 parsers.insert("html".to_string(), html_parser);
308
309 Ok(Self {
310 parsers,
311 content_cache: HashMap::new(),
312 max_cache_size: 100, })
314 }
315
316 pub fn detect_template(&mut self, file_path: &str) -> Result<TemplateDetectionResult> {
318 let path = Path::new(file_path);
319
320 if let Some(result) = self.detect_by_extension(path) {
322 return Ok(result);
323 }
324
325 if let Some(result) = self.detect_by_directory_context(path) {
327 return Ok(result);
328 }
329
330 if self.should_analyze_content(path) {
332 if let Some(result) = self.detect_by_content_patterns(path)? {
333 return Ok(result);
334 }
335 }
336
337 if let Some(result) = self.detect_by_language_heuristics(path) {
339 return Ok(result);
340 }
341
342 Ok(TemplateDetectionResult::not_template())
343 }
344
345 pub fn get_score_boost(&self, file_path: &str) -> Result<f64> {
347 let path = Path::new(file_path);
349
350 if let Some(engine) = self.detect_engine_by_extension(path) {
352 return Ok(engine.score_boost());
353 }
354
355 if self.is_in_template_directory(path) {
357 return Ok(1.2); }
359
360 Ok(0.0)
361 }
362
363 fn detect_by_extension(&self, path: &Path) -> Option<TemplateDetectionResult> {
365 if let Some(engine) = self.detect_engine_by_extension(path) {
366 return Some(TemplateDetectionResult::template(
367 engine,
368 TemplateDetectionMethod::Extension,
369 0.95, ));
371 }
372 None
373 }
374
375 fn detect_engine_by_extension(&self, path: &Path) -> Option<TemplateEngine> {
376 let extension = path.extension()?.to_str()?.to_lowercase();
377 let ext_with_dot = format!(".{}", extension);
378
379 EXTENSION_MAP.get(ext_with_dot.as_str()).cloned()
380 }
381
382 fn detect_by_directory_context(&self, path: &Path) -> Option<TemplateDetectionResult> {
384 if self.is_in_template_directory(path) {
385 if let Some(ext) = path.extension() {
387 let ext_str = ext.to_str()?.to_lowercase();
388 if matches!(ext_str.as_str(), "html" | "htm" | "xml") {
389 return Some(TemplateDetectionResult::template(
390 TemplateEngine::Generic,
391 TemplateDetectionMethod::DirectoryContext,
392 0.7, ));
394 }
395 }
396 }
397 None
398 }
399
400 fn is_in_template_directory(&self, path: &Path) -> bool {
401 let path_str = path.to_string_lossy().to_lowercase();
402 TEMPLATE_DIRECTORIES
403 .iter()
404 .any(|dir| path_str.contains(dir))
405 }
406
407 fn should_analyze_content(&self, path: &Path) -> bool {
409 if let Some(ext) = path.extension() {
411 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
412 return matches!(ext_str.as_str(), "html" | "htm" | "xml" | "js" | "ts");
413 }
414 false
415 }
416
417 fn detect_by_content_patterns(
419 &mut self,
420 path: &Path,
421 ) -> Result<Option<TemplateDetectionResult>> {
422 let content = self.read_file_content(path)?;
423
424 for pattern in TEMPLATE_PATTERNS.iter() {
426 let occurrences =
427 self.count_pattern_occurrences(&content, &pattern.open_tag, &pattern.close_tag);
428
429 if occurrences >= pattern.min_occurrences {
430 return Ok(Some(TemplateDetectionResult::template(
431 pattern.engine.clone(),
432 TemplateDetectionMethod::ContentPattern,
433 0.8, )));
435 }
436 }
437
438 if self.should_use_ast_analysis(path) {
440 if let Some(result) = self.analyze_with_ast(path, &content)? {
441 return Ok(Some(result));
442 }
443 }
444
445 let content_lower = content.to_lowercase();
447 for &pattern in SINGLE_PATTERNS {
448 if content_lower.contains(pattern) {
449 return Ok(Some(TemplateDetectionResult::template(
450 TemplateEngine::Generic,
451 TemplateDetectionMethod::ContentPattern,
452 0.6, )));
454 }
455 }
456
457 Ok(None)
458 }
459
460 fn detect_by_language_heuristics(&self, path: &Path) -> Option<TemplateDetectionResult> {
462 if let Some(ext) = path.extension() {
467 let ext_str = ext.to_str()?.to_lowercase();
468
469 if matches!(ext_str.as_str(), "jsx" | "tsx") {
471 return Some(TemplateDetectionResult::template(
472 TemplateEngine::React,
473 TemplateDetectionMethod::LanguageHeuristic,
474 0.9,
475 ));
476 }
477 }
478
479 None
480 }
481
482 fn read_file_content(&mut self, path: &Path) -> Result<String> {
484 if let Some(content) = self.content_cache.get(path) {
486 return Ok(content.clone());
487 }
488
489 let file = fs::File::open(path)?;
491 let reader = BufReader::new(file);
492 let mut content = String::new();
493 let mut bytes_read = 0;
494 const MAX_READ_SIZE: usize = 2048;
495
496 for line in reader.lines() {
497 let line = line?;
498 if bytes_read + line.len() > MAX_READ_SIZE {
499 break;
500 }
501 content.push_str(&line);
502 content.push('\n');
503 bytes_read += line.len() + 1;
504 }
505
506 if self.content_cache.len() < self.max_cache_size {
508 self.content_cache
509 .insert(path.to_path_buf(), content.clone());
510 }
511
512 Ok(content)
513 }
514
515 fn count_pattern_occurrences(&self, content: &str, open_tag: &str, close_tag: &str) -> usize {
517 let open_count = content.matches(open_tag).count();
518 let close_count = content.matches(close_tag).count();
519
520 open_count.min(close_count)
522 }
523
524 fn should_use_ast_analysis(&self, path: &Path) -> bool {
526 if let Some(ext) = path.extension() {
527 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
528 return matches!(ext_str.as_str(), "html" | "htm" | "xml" | "vue" | "svelte");
529 }
530 false
531 }
532
533 fn analyze_with_ast(
535 &mut self,
536 path: &Path,
537 content: &str,
538 ) -> Result<Option<TemplateDetectionResult>> {
539 if let Some(parser) = self.parsers.get_mut("html") {
540 if let Some(tree) = parser.parse(content, None) {
541 let root_node = tree.root_node();
542
543 if self.has_template_attributes(&root_node) {
545 let engine = self.detect_template_engine_from_ast(&root_node, path);
546 return Ok(Some(TemplateDetectionResult::template(
547 engine,
548 TemplateDetectionMethod::ContentPattern,
549 0.85, )));
551 }
552 }
553 }
554 Ok(None)
555 }
556
557 fn has_template_attributes(&self, node: &Node) -> bool {
559 let template_indicators = [
560 "v-", "ng-", "*ng", ":bind", "@click", "{{{", "{{", "<%", "{%", ];
570
571 self.node_contains_patterns(node, &template_indicators)
572 }
573
574 fn node_contains_patterns(&self, node: &Node, patterns: &[&str]) -> bool {
576 if patterns
578 .iter()
579 .any(|&pattern| node.kind().contains(pattern))
580 {
581 return true;
582 }
583
584 if node.kind() == "text" || node.kind() == "attribute_value" {
586 return true; }
590
591 for i in 0..node.child_count() {
593 if let Some(child) = node.child(i) {
594 if self.node_contains_patterns(&child, patterns) {
595 return true;
596 }
597 }
598 }
599
600 false
601 }
602
603 fn detect_template_engine_from_ast(&self, node: &Node, path: &Path) -> TemplateEngine {
605 if let Some(ext) = path.extension() {
607 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
608 match ext_str.as_str() {
609 "vue" => return TemplateEngine::Vue,
610 "svelte" => return TemplateEngine::Svelte,
611 _ => {}
612 }
613 }
614
615 if self.has_vue_patterns(node) {
617 TemplateEngine::Vue
618 } else if self.has_angular_patterns(node) {
619 TemplateEngine::Angular
620 } else if self.has_react_patterns(node) {
621 TemplateEngine::React
622 } else {
623 TemplateEngine::Generic
624 }
625 }
626
627 fn has_vue_patterns(&self, node: &Node) -> bool {
629 let vue_patterns = ["v-if", "v-for", "v-model", "v-bind", "@click", ":class"];
630 self.node_contains_patterns(node, &vue_patterns)
631 }
632
633 fn has_angular_patterns(&self, node: &Node) -> bool {
635 let angular_patterns = ["*ngFor", "*ngIf", "(click)", "[class]", "[(ngModel)]"];
636 self.node_contains_patterns(node, &angular_patterns)
637 }
638
639 fn has_react_patterns(&self, node: &Node) -> bool {
641 let react_patterns = ["className", "onClick", "onChange"];
642 self.node_contains_patterns(node, &react_patterns)
643 }
644
645 pub fn clear_cache(&mut self) {
647 self.content_cache.clear();
648 }
649}
650
651impl Default for TemplateDetector {
652 fn default() -> Self {
653 Self::new().expect("Failed to create TemplateDetector")
654 }
655}
656
657pub fn is_template_file(file_path: &str) -> Result<bool> {
659 let mut detector = TemplateDetector::new()?;
660 let result = detector.detect_template(file_path)?;
661 Ok(result.is_template)
662}
663
664pub fn get_template_score_boost(file_path: &str) -> Result<f64> {
666 let detector = TemplateDetector::new()?;
667 detector.get_score_boost(file_path)
668}
669
670#[cfg(test)]
671mod tests {
672 use super::*;
673 use std::fs;
674 use std::io::Write;
675 use tempfile::NamedTempFile;
676
677 fn create_test_file(content: &str, extension: &str) -> NamedTempFile {
678 let mut file = NamedTempFile::new().unwrap();
679 file.write_all(content.as_bytes()).unwrap();
680
681 let path = file.path().with_extension(extension);
683 std::fs::rename(file.path(), &path).unwrap();
684
685 file
686 }
687
688 #[test]
689 fn test_extension_based_detection() {
690 let detector = TemplateDetector::new().unwrap();
691
692 assert_eq!(
694 detector.detect_engine_by_extension(Path::new("template.hbs")),
695 Some(TemplateEngine::Handlebars)
696 );
697
698 assert_eq!(
699 detector.detect_engine_by_extension(Path::new("view.j2")),
700 Some(TemplateEngine::Jinja2)
701 );
702
703 assert_eq!(
704 detector.detect_engine_by_extension(Path::new("component.jsx")),
705 Some(TemplateEngine::React)
706 );
707
708 assert_eq!(
710 detector.detect_engine_by_extension(Path::new("script.js")),
711 None
712 );
713 }
714
715 #[test]
716 fn test_directory_context_detection() {
717 let detector = TemplateDetector::new().unwrap();
718
719 assert!(detector.is_in_template_directory(Path::new("templates/layout.html")));
720 assert!(detector.is_in_template_directory(Path::new("src/components/header.html")));
721 assert!(!detector.is_in_template_directory(Path::new("src/utils/helper.js")));
722 }
723
724 #[test]
725 fn test_pattern_counting() {
726 let detector = TemplateDetector::new().unwrap();
727 let content = "Hello {{ name }}! Welcome to {{ site }}.";
728
729 assert_eq!(detector.count_pattern_occurrences(content, "{{", "}}"), 2);
730 assert_eq!(detector.count_pattern_occurrences(content, "{%", "%}"), 0);
731 }
732
733 #[test]
734 fn test_template_score_boost() {
735 let detector = TemplateDetector::new().unwrap();
736
737 assert!(detector.get_score_boost("template.hbs").unwrap() > 1.0);
739
740 assert_eq!(detector.get_score_boost("script.js").unwrap(), 0.0);
742 }
743
744 #[test]
745 fn test_engine_score_boost() {
746 assert!(TemplateEngine::Handlebars.score_boost() > 1.0);
747 assert!(TemplateEngine::React.score_boost() > 1.0);
748 assert!(TemplateEngine::Generic.score_boost() < 1.0);
749 }
750}