use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
use globset::Glob;
use globset::GlobSet;
use globset::GlobSetBuilder;
use ignore::gitignore::Gitignore;
use ignore::gitignore::GitignoreBuilder;
use crate::Block;
use crate::BlockType;
use crate::MdtError;
use crate::MdtResult;
use crate::config::CodeBlockFilter;
use crate::config::DEFAULT_MAX_FILE_SIZE;
use crate::config::MdtConfig;
use crate::config::PaddingConfig;
use crate::engine::validate_transformers;
use crate::parser::parse_with_diagnostics;
use crate::source_scanner::parse_source_with_diagnostics;
#[derive(Debug, Clone)]
pub struct ScanOptions {
pub exclude_patterns: Vec<String>,
pub include_set: GlobSet,
pub template_paths: Vec<PathBuf>,
pub max_file_size: u64,
pub disable_gitignore: bool,
pub markdown_codeblocks: CodeBlockFilter,
pub excluded_blocks: Vec<String>,
}
impl Default for ScanOptions {
fn default() -> Self {
Self {
exclude_patterns: Vec::new(),
include_set: GlobSet::empty(),
template_paths: Vec::new(),
max_file_size: DEFAULT_MAX_FILE_SIZE,
disable_gitignore: false,
markdown_codeblocks: CodeBlockFilter::default(),
excluded_blocks: Vec::new(),
}
}
}
impl ScanOptions {
pub fn from_config(config: Option<&MdtConfig>) -> Self {
let exclude_patterns = config
.map(|c| c.exclude.patterns.clone())
.unwrap_or_default();
let include_patterns = config.map(|c| &c.include.patterns[..]).unwrap_or_default();
let template_paths = config
.map(|c| c.templates.paths.clone())
.unwrap_or_default();
let max_file_size = config.map_or(DEFAULT_MAX_FILE_SIZE, |c| c.max_file_size);
let disable_gitignore = config.is_some_and(|c| c.disable_gitignore);
let markdown_codeblocks = config
.map(|c| c.exclude.markdown_codeblocks.clone())
.unwrap_or_default();
let excluded_blocks = config.map(|c| c.exclude.blocks.clone()).unwrap_or_default();
let include_set = build_glob_set(include_patterns);
Self {
exclude_patterns,
include_set,
template_paths,
max_file_size,
disable_gitignore,
markdown_codeblocks,
excluded_blocks,
}
}
}
#[derive(Debug, Clone, Default)]
#[allow(clippy::struct_excessive_bools)]
pub struct ValidationOptions {
pub ignore_unclosed_blocks: bool,
pub ignore_unused_blocks: bool,
pub ignore_invalid_names: bool,
pub ignore_invalid_transformers: bool,
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum DiagnosticKind {
UnclosedBlock { name: String },
UnknownTransformer { name: String },
InvalidTransformerArgs {
name: String,
expected: String,
got: usize,
},
UnusedProvider { name: String },
}
#[derive(Debug, Clone)]
pub struct ProjectDiagnostic {
pub file: PathBuf,
pub kind: DiagnosticKind,
pub line: usize,
pub column: usize,
}
impl ProjectDiagnostic {
pub fn is_error(&self, options: &ValidationOptions) -> bool {
match &self.kind {
DiagnosticKind::UnclosedBlock { .. } => !options.ignore_unclosed_blocks,
DiagnosticKind::UnknownTransformer { .. }
| DiagnosticKind::InvalidTransformerArgs { .. } => !options.ignore_invalid_transformers,
DiagnosticKind::UnusedProvider { .. } => !options.ignore_unused_blocks,
}
}
pub fn message(&self) -> String {
match &self.kind {
DiagnosticKind::UnclosedBlock { name } => {
format!("missing closing tag for block `{name}`")
}
DiagnosticKind::UnknownTransformer { name } => {
format!("unknown transformer `{name}`")
}
DiagnosticKind::InvalidTransformerArgs {
name,
expected,
got,
} => format!("transformer `{name}` expects {expected} argument(s), got {got}"),
DiagnosticKind::UnusedProvider { name } => {
format!("provider block `{name}` has no consumers")
}
}
}
}
#[derive(Debug)]
pub struct Project {
pub providers: HashMap<String, ProviderEntry>,
pub consumers: Vec<ConsumerEntry>,
pub diagnostics: Vec<ProjectDiagnostic>,
}
#[derive(Debug)]
pub struct ProjectContext {
pub project: Project,
pub data: HashMap<String, serde_json::Value>,
pub padding: Option<PaddingConfig>,
}
impl ProjectContext {
pub fn find_missing_providers(&self) -> Vec<String> {
find_missing_providers(&self.project)
}
}
#[derive(Debug, Clone)]
pub struct ProviderEntry {
pub block: Block,
pub file: PathBuf,
pub content: String,
}
#[derive(Debug, Clone)]
pub struct ConsumerEntry {
pub block: Block,
pub file: PathBuf,
pub content: String,
}
pub fn scan_project(root: &Path) -> MdtResult<Project> {
scan_project_with_options(root, &ScanOptions::default())
}
pub fn scan_project_with_config(root: &Path) -> MdtResult<ProjectContext> {
let config = MdtConfig::load(root)?;
let options = ScanOptions::from_config(config.as_ref());
let project = scan_project_with_options(root, &options)?;
let padding = config.as_ref().and_then(|c| c.padding.clone());
let data = match config {
Some(config) => config.load_data(root)?,
None => HashMap::new(),
};
Ok(ProjectContext {
project,
data,
padding,
})
}
fn build_glob_set(patterns: &[String]) -> GlobSet {
let mut builder = GlobSetBuilder::new();
for pattern in patterns {
if let Ok(glob) = Glob::new(pattern) {
builder.add(glob);
}
}
builder.build().unwrap_or_else(|_| GlobSet::empty())
}
pub fn normalize_line_endings(content: &str) -> String {
if content.contains('\r') {
content.replace("\r\n", "\n").replace('\r', "\n")
} else {
content.to_string()
}
}
pub fn scan_project_with_options(root: &Path, options: &ScanOptions) -> MdtResult<Project> {
let mut providers: HashMap<String, ProviderEntry> = HashMap::new();
let mut consumers = Vec::new();
let mut files = collect_files(root, &options.exclude_patterns, options.disable_gitignore)?;
for template_dir in &options.template_paths {
let abs_dir = root.join(template_dir);
if abs_dir.is_dir() {
let extra_files = collect_files(
&abs_dir,
&options.exclude_patterns,
options.disable_gitignore,
)?;
for f in extra_files {
if !files.contains(&f) {
files.push(f);
}
}
}
}
let custom_exclude = build_exclude_matcher(root, &options.exclude_patterns)?;
if !options.include_set.is_empty() {
collect_included_files(
root,
root,
&options.include_set,
&custom_exclude,
&mut files,
)?;
}
let mut diagnostics: Vec<ProjectDiagnostic> = Vec::new();
for file in &files {
let metadata = std::fs::metadata(file)?;
if metadata.len() > options.max_file_size {
return Err(MdtError::FileTooLarge {
path: file.display().to_string(),
size: metadata.len(),
limit: options.max_file_size,
});
}
let raw_content = std::fs::read_to_string(file)?;
let content = normalize_line_endings(&raw_content);
let (blocks, parse_diagnostics) = if is_markdown_file(file) {
parse_with_diagnostics(&content)?
} else {
parse_source_with_diagnostics(&content, &options.markdown_codeblocks)?
};
for diag in parse_diagnostics {
let project_diag = match diag {
crate::parser::ParseDiagnostic::UnclosedBlock { name, line, column } => {
ProjectDiagnostic {
file: file.clone(),
kind: DiagnosticKind::UnclosedBlock { name },
line,
column,
}
}
crate::parser::ParseDiagnostic::UnknownTransformer { name, line, column } => {
ProjectDiagnostic {
file: file.clone(),
kind: DiagnosticKind::UnknownTransformer { name },
line,
column,
}
}
crate::parser::ParseDiagnostic::InvalidTransformerArgs {
name,
expected,
got,
line,
column,
} => {
ProjectDiagnostic {
file: file.clone(),
kind: DiagnosticKind::InvalidTransformerArgs {
name,
expected,
got,
},
line,
column,
}
}
};
diagnostics.push(project_diag);
}
let is_template = file
.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| name.ends_with(".t.md"));
for block in &blocks {
if let Err(MdtError::InvalidTransformerArgs {
name,
expected,
got,
}) = validate_transformers(&block.transformers)
{
diagnostics.push(ProjectDiagnostic {
file: file.clone(),
kind: DiagnosticKind::InvalidTransformerArgs {
name,
expected,
got,
},
line: block.opening.start.line,
column: block.opening.start.column,
});
}
}
for block in blocks {
if options
.excluded_blocks
.iter()
.any(|name| name == &block.name)
{
continue;
}
let block_content = extract_content_between_tags(&content, &block);
match block.r#type {
BlockType::Provider => {
if !is_template {
continue;
}
if let Some(existing) = providers.get(&block.name) {
return Err(MdtError::DuplicateProvider {
name: block.name.clone(),
first_file: existing.file.display().to_string(),
second_file: file.display().to_string(),
});
}
providers.insert(
block.name.clone(),
ProviderEntry {
block,
file: file.clone(),
content: block_content,
},
);
}
BlockType::Consumer => {
consumers.push(ConsumerEntry {
block,
file: file.clone(),
content: block_content,
});
}
}
}
}
let referenced_names: HashSet<&str> = consumers.iter().map(|c| c.block.name.as_str()).collect();
for (name, entry) in &providers {
if !referenced_names.contains(name.as_str()) {
diagnostics.push(ProjectDiagnostic {
file: entry.file.clone(),
kind: DiagnosticKind::UnusedProvider { name: name.clone() },
line: entry.block.opening.start.line,
column: entry.block.opening.start.column,
});
}
}
Ok(Project {
providers,
consumers,
diagnostics,
})
}
pub fn extract_content_between_tags(source: &str, block: &Block) -> String {
let start_offset = block.opening.end.offset;
let end_offset = block.closing.start.offset;
if start_offset >= end_offset || end_offset > source.len() {
return String::new();
}
source[start_offset..end_offset].to_string()
}
fn build_exclude_matcher(root: &Path, patterns: &[String]) -> MdtResult<Gitignore> {
let mut builder = GitignoreBuilder::new(root);
for pattern in patterns {
builder.add_line(None, pattern).map_err(|e| {
MdtError::ConfigParse(format!("invalid exclude pattern `{pattern}`: {e}"))
})?;
}
builder
.build()
.map_err(|e| MdtError::ConfigParse(format!("failed to build exclude rules: {e}")))
}
fn build_gitignore(root: &Path) -> Gitignore {
let mut builder = GitignoreBuilder::new(root);
let gitignore_path = root.join(".gitignore");
if gitignore_path.exists() {
let _ = builder.add(gitignore_path);
}
builder.build().unwrap_or_else(|_| {
let empty = GitignoreBuilder::new(root);
empty.build().unwrap_or_else(|_| {
Gitignore::empty()
})
})
}
fn collect_files(
root: &Path,
exclude_patterns: &[String],
disable_gitignore: bool,
) -> MdtResult<Vec<PathBuf>> {
let mut files = Vec::new();
let mut visited_dirs = HashSet::new();
let gitignore = if disable_gitignore {
Gitignore::empty()
} else {
build_gitignore(root)
};
let custom_exclude = build_exclude_matcher(root, exclude_patterns)?;
walk_dir(
root,
root,
&mut files,
true,
&gitignore,
&custom_exclude,
&mut visited_dirs,
)?;
files.sort();
Ok(files)
}
#[allow(clippy::only_used_in_recursion)]
fn walk_dir(
root: &Path,
dir: &Path,
files: &mut Vec<PathBuf>,
is_root: bool,
gitignore: &Gitignore,
custom_exclude: &Gitignore,
visited_dirs: &mut HashSet<PathBuf>,
) -> MdtResult<()> {
if !dir.is_dir() {
return Ok(());
}
let canonical = dir.canonicalize().unwrap_or_else(|_| dir.to_path_buf());
if !visited_dirs.insert(canonical.clone()) {
return Err(MdtError::SymlinkCycle {
path: dir.display().to_string(),
});
}
let entries = std::fs::read_dir(dir)?;
for entry in entries {
let entry = entry?;
let path = entry.path();
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.starts_with('.') || name == "node_modules" || name == "target" {
continue;
}
}
let is_dir = path.is_dir();
if gitignore.matched(&path, is_dir).is_ignore() {
continue;
}
if custom_exclude.matched(&path, is_dir).is_ignore() {
continue;
}
if is_dir {
if !is_root && path.join("mdt.toml").exists() {
continue;
}
walk_dir(
root,
&path,
files,
false,
gitignore,
custom_exclude,
visited_dirs,
)?;
} else if is_scannable_file(&path) {
files.push(path);
}
}
Ok(())
}
fn collect_included_files(
root: &Path,
dir: &Path,
include_set: &GlobSet,
exclude_matcher: &Gitignore,
files: &mut Vec<PathBuf>,
) -> MdtResult<()> {
if !dir.is_dir() {
return Ok(());
}
let entries = std::fs::read_dir(dir)?;
for entry in entries {
let entry = entry?;
let path = entry.path();
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.starts_with('.') || name == "node_modules" || name == "target" {
continue;
}
}
let is_dir = path.is_dir();
if exclude_matcher.matched(&path, is_dir).is_ignore() {
continue;
}
if let Ok(rel_path) = path.strip_prefix(root) {
if path.is_file() && include_set.is_match(rel_path) && !files.contains(&path) {
files.push(path.clone());
}
}
if is_dir {
collect_included_files(root, &path, include_set, exclude_matcher, files)?;
}
}
Ok(())
}
fn is_scannable_file(path: &Path) -> bool {
let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
return false;
};
matches!(
ext,
"md" | "mdx"
| "markdown"
| "rs" | "ts"
| "tsx" | "js"
| "jsx" | "py"
| "go" | "java"
| "kt" | "swift"
| "c" | "cpp"
| "h" | "cs"
)
}
fn is_markdown_file(path: &Path) -> bool {
let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
return false;
};
matches!(ext, "md" | "mdx" | "markdown")
}
pub fn is_template_file(path: &Path) -> bool {
path.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| name.ends_with(".t.md"))
}
pub fn find_missing_providers(project: &Project) -> Vec<String> {
let mut missing = Vec::new();
for consumer in &project.consumers {
if !project.providers.contains_key(&consumer.block.name)
&& !missing.contains(&consumer.block.name)
{
missing.push(consumer.block.name.clone());
}
}
missing
}
pub fn validate_project(project: &Project) -> MdtResult<()> {
let missing = find_missing_providers(project);
if let Some(name) = missing.into_iter().next() {
return Err(MdtError::MissingProvider(name));
}
Ok(())
}