skill_veil_core/analyzer/
document.rs1use crate::analyzer::assessment::assess_artifact;
2use crate::analyzer::binary_magic::detect_binary_disguise_kind;
3use crate::analyzer::references::extract_references;
4use crate::analyzer::types::{AnalyzerError, CodeBlock, Section, SkillDocument};
5use crate::ports::{FileSystemError, FileSystemProvider, MarkdownParser};
6use std::path::{Path, PathBuf};
7
8impl SkillDocument {
9 pub fn from_file_with_provider<P: MarkdownParser, F: FileSystemProvider>(
10 path: impl AsRef<Path>,
11 parser: &P,
12 fs_provider: &F,
13 ) -> Result<Self, AnalyzerError> {
14 let path = path.as_ref();
15 let bytes = fs_provider
16 .read_file_bytes(path)
17 .map_err(file_system_error_to_io_error)?
18 .as_bytes()
19 .to_vec();
20 let binary_disguise_kind = detect_binary_disguise_kind(path, &bytes).map(str::to_owned);
25 let decode_warning = std::str::from_utf8(&bytes).is_err();
26 let content = String::from_utf8_lossy(&bytes).into_owned();
27 Self::parse_with_parser(path.to_path_buf(), content, parser).map(|mut doc| {
28 doc.decode_warning = decode_warning;
29 doc.binary_disguise_kind = binary_disguise_kind;
30 doc
31 })
32 }
33
34 pub fn parse_with_parser<P: MarkdownParser + ?Sized>(
35 path: PathBuf,
36 content: String,
37 parser: &P,
38 ) -> Result<Self, AnalyzerError> {
39 let name = path
40 .file_stem()
41 .and_then(|s| s.to_str())
42 .unwrap_or("unknown")
43 .to_string();
44
45 let (sections, parse_warning) = match parser.parse_sections(&content) {
46 Ok(sections) => (sections, false),
47 Err(error) => {
48 tracing::warn!(
49 "Failed to parse markdown sections in {}: {error}; continuing with empty sections",
50 path.display()
51 );
52 (Vec::new(), true)
53 }
54 };
55 let referenced_files = extract_references(&content, &path);
56 let assessment = assess_artifact(path.as_path(), &content, §ions, &referenced_files);
57
58 Ok(Self {
59 path,
60 name,
61 extension_kind: assessment.extension_kind,
62 identity_source: assessment.identity_source,
63 structural_validity: assessment.structural_validity,
64 classification: assessment.classification,
65 structural_signals: assessment.structural_signals,
66 decode_warning: false,
67 parse_warning,
68 binary_disguise_kind: None,
69 sections,
70 raw_content: content,
71 referenced_files,
72 })
73 }
74
75 pub fn get_section(&self, name: &str) -> Option<&Section> {
76 let name_lower = name.to_lowercase();
77 self.sections.iter().find(|s| s.name == name_lower)
78 }
79
80 pub fn all_code_blocks(&self) -> Vec<&CodeBlock> {
81 self.sections
82 .iter()
83 .flat_map(|s| s.code_blocks.iter())
84 .collect()
85 }
86
87 pub fn has_code_language(&self, lang: &str) -> bool {
97 self.all_code_blocks()
98 .iter()
99 .any(|cb| cb.language.as_deref() == Some(lang))
100 }
101}
102
103fn file_system_error_to_io_error(error: FileSystemError) -> std::io::Error {
104 match error {
105 FileSystemError::IoError(error) => error,
106 FileSystemError::PathNotFound(path) => {
107 std::io::Error::new(std::io::ErrorKind::NotFound, path.display().to_string())
108 }
109 }
110}
111
112#[cfg(test)]
113mod tests {
114 use crate::adapters::PulldownMarkdownParser;
115 use crate::analyzer::types::SkillDocument;
116 use std::path::PathBuf;
117
118 fn parse_doc(content: &str) -> SkillDocument {
119 let parser = PulldownMarkdownParser::new();
120 SkillDocument::parse_with_parser(
121 PathBuf::from("/tmp/skill.md"),
122 content.to_string(),
123 &parser,
124 )
125 .expect("parse_with_parser must succeed for the inline fixture")
126 }
127
128 #[test]
133 fn has_code_language_matches_uppercase_fence_when_caller_uses_lowercase() {
134 let upper = parse_doc("## Setup\n```Python\nprint('hi')\n```\n");
135 let screaming = parse_doc("## Setup\n```PYTHON\nprint('hi')\n```\n");
136 assert!(upper.has_code_language("python"));
137 assert!(screaming.has_code_language("python"));
138 }
139
140 #[test]
145 fn has_code_language_returns_false_for_unknown_lang() {
146 let doc = parse_doc("## Setup\n```python\nprint('hi')\n```\n");
147 assert!(!doc.has_code_language("ruby"));
148 }
149}