use crate::analyzer::assessment::assess_artifact;
use crate::analyzer::binary_magic::detect_binary_disguise_kind;
use crate::analyzer::references::extract_references;
use crate::analyzer::types::{AnalyzerError, CodeBlock, Section, SkillDocument};
use crate::ports::{FileSystemError, FileSystemProvider, MarkdownParser};
use std::path::{Path, PathBuf};
impl SkillDocument {
pub fn from_file_with_provider<P: MarkdownParser, F: FileSystemProvider>(
path: impl AsRef<Path>,
parser: &P,
fs_provider: &F,
) -> Result<Self, AnalyzerError> {
let path = path.as_ref();
let bytes = fs_provider
.read_file_bytes(path)
.map_err(file_system_error_to_io_error)?
.as_bytes()
.to_vec();
let binary_disguise_kind = detect_binary_disguise_kind(path, &bytes).map(str::to_owned);
let decode_warning = std::str::from_utf8(&bytes).is_err();
let content = String::from_utf8_lossy(&bytes).into_owned();
Self::parse_with_parser(path.to_path_buf(), content, parser).map(|mut doc| {
doc.decode_warning = decode_warning;
doc.binary_disguise_kind = binary_disguise_kind;
doc
})
}
pub fn parse_with_parser<P: MarkdownParser + ?Sized>(
path: PathBuf,
content: String,
parser: &P,
) -> Result<Self, AnalyzerError> {
let name = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("unknown")
.to_string();
let (sections, parse_warning) = match parser.parse_sections(&content) {
Ok(sections) => (sections, false),
Err(error) => {
tracing::warn!(
"Failed to parse markdown sections in {}: {error}; continuing with empty sections",
path.display()
);
(Vec::new(), true)
}
};
let referenced_files = extract_references(&content, &path);
let assessment = assess_artifact(path.as_path(), &content, §ions, &referenced_files);
Ok(Self {
path,
name,
extension_kind: assessment.extension_kind,
identity_source: assessment.identity_source,
structural_validity: assessment.structural_validity,
classification: assessment.classification,
structural_signals: assessment.structural_signals,
decode_warning: false,
parse_warning,
binary_disguise_kind: None,
sections,
raw_content: content,
referenced_files,
})
}
pub fn get_section(&self, name: &str) -> Option<&Section> {
let name_lower = name.to_lowercase();
self.sections.iter().find(|s| s.name == name_lower)
}
pub fn all_code_blocks(&self) -> Vec<&CodeBlock> {
self.sections
.iter()
.flat_map(|s| s.code_blocks.iter())
.collect()
}
pub fn has_code_language(&self, lang: &str) -> bool {
self.all_code_blocks()
.iter()
.any(|cb| cb.language.as_deref() == Some(lang))
}
}
fn file_system_error_to_io_error(error: FileSystemError) -> std::io::Error {
match error {
FileSystemError::IoError(error) => error,
FileSystemError::PathNotFound(path) => {
std::io::Error::new(std::io::ErrorKind::NotFound, path.display().to_string())
}
}
}
#[cfg(test)]
mod tests {
use crate::adapters::PulldownMarkdownParser;
use crate::analyzer::types::SkillDocument;
use std::path::PathBuf;
fn parse_doc(content: &str) -> SkillDocument {
let parser = PulldownMarkdownParser::new();
SkillDocument::parse_with_parser(
PathBuf::from("/tmp/skill.md"),
content.to_string(),
&parser,
)
.expect("parse_with_parser must succeed for the inline fixture")
}
#[test]
fn has_code_language_matches_uppercase_fence_when_caller_uses_lowercase() {
let upper = parse_doc("## Setup\n```Python\nprint('hi')\n```\n");
let screaming = parse_doc("## Setup\n```PYTHON\nprint('hi')\n```\n");
assert!(upper.has_code_language("python"));
assert!(screaming.has_code_language("python"));
}
#[test]
fn has_code_language_returns_false_for_unknown_lang() {
let doc = parse_doc("## Setup\n```python\nprint('hi')\n```\n");
assert!(!doc.has_code_language("ruby"));
}
}