Skip to main content

orbok_fs/
policy.rs

1//! Compiled source policy (RFC-003 §6, RFC-004 scanner inputs).
2//!
3//! Pattern semantics are deliberately simple and documented:
4//! - an exclude pattern matches when it equals any path component
5//!   (".git", "node_modules", "target") or, in `*.ext` form, the file
6//!   name extension;
7//! - include patterns apply to file names only, in `*.ext` form or as an
8//!   exact name; an empty include list means "all supported types".
9
10use orbok_core::{HiddenFilePolicy, SymlinkPolicy};
11use orbok_db::repo::SourceRecord;
12use std::path::Path;
13
14/// Default exclude set (RFC-003 §6.3).
15pub const DEFAULT_EXCLUDES: &[&str] = &[
16    ".git",
17    "node_modules",
18    "target",
19    "dist",
20    "build",
21    ".cache",
22    ".venv",
23    "__pycache__",
24];
25
26/// File-type classification for scanner cataloging (RFC-005 §5).
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum FileTypeClass {
29    Supported,
30    Unsupported,
31}
32
33/// Extensions of the initial supported formats (RFC-005 §5). Source-code
34/// extensions cover the common cases; unknown ones catalog as
35/// unsupported rather than failing.
36const SUPPORTED_EXTENSIONS: &[&str] = &[
37    // text-oriented documents
38    "txt", "log", "md", "markdown", "html", "htm", "pdf", "docx", "csv",
39    // source code (line-aware text)
40    "rs", "py", "js", "ts", "jsx", "tsx", "java", "c", "h", "cpp", "hpp", "go", "rb", "php",
41    "sh", "bash", "sql", "toml", "yaml", "yml", "json", "xml", "css",
42];
43
44/// A source policy compiled for fast per-entry checks.
45#[derive(Debug, Clone)]
46pub struct CompiledPolicy {
47    pub hidden_file_policy: HiddenFilePolicy,
48    pub symlink_policy: SymlinkPolicy,
49    pub max_file_size_bytes: Option<u64>,
50    include_extensions: Vec<String>,
51    include_names: Vec<String>,
52    exclude_components: Vec<String>,
53    exclude_extensions: Vec<String>,
54}
55
56impl CompiledPolicy {
57    /// Compile from a catalog source record. The default excludes are
58    /// always active in addition to user excludes.
59    pub fn from_source(source: &SourceRecord) -> Self {
60        let mut include_extensions = Vec::new();
61        let mut include_names = Vec::new();
62        for pattern in &source.include_patterns {
63            match pattern.strip_prefix("*.") {
64                Some(ext) => include_extensions.push(ext.to_ascii_lowercase()),
65                None => include_names.push(pattern.clone()),
66            }
67        }
68        let mut exclude_components: Vec<String> =
69            DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
70        let mut exclude_extensions = Vec::new();
71        for pattern in &source.exclude_patterns {
72            match pattern.strip_prefix("*.") {
73                Some(ext) => exclude_extensions.push(ext.to_ascii_lowercase()),
74                None => exclude_components.push(pattern.clone()),
75            }
76        }
77        Self {
78            hidden_file_policy: source.hidden_file_policy,
79            symlink_policy: source.symlink_policy,
80            max_file_size_bytes: source.max_file_size_bytes,
81            include_extensions,
82            include_names,
83            exclude_components,
84            exclude_extensions,
85        }
86    }
87
88    /// Whether a directory or file component is excluded by name.
89    pub fn component_excluded(&self, name: &str) -> bool {
90        self.exclude_components.iter().any(|p| p == name)
91    }
92
93    /// Whether a component is hidden (dotfile convention).
94    pub fn component_hidden(name: &str) -> bool {
95        name.starts_with('.')
96    }
97
98    /// Whether a file name passes the include/exclude pattern rules.
99    pub fn file_included(&self, file_name: &str) -> bool {
100        let ext = extension_of(file_name);
101        if let Some(ext) = &ext {
102            if self.exclude_extensions.iter().any(|e| e == ext) {
103                return false;
104            }
105        }
106        if self.component_excluded(file_name) {
107            return false;
108        }
109        if self.include_extensions.is_empty() && self.include_names.is_empty() {
110            return true;
111        }
112        if self.include_names.iter().any(|n| n == file_name) {
113            return true;
114        }
115        match ext {
116            Some(ext) => self.include_extensions.iter().any(|e| e == &ext),
117            None => false,
118        }
119    }
120
121    /// Whether a file size is within the policy limit.
122    pub fn size_allowed(&self, size: u64) -> bool {
123        match self.max_file_size_bytes {
124            Some(max) => size <= max,
125            None => true,
126        }
127    }
128}
129
130/// Supported/unsupported classification by extension (RFC-004 §10,
131/// RFC-005 §5).
132pub fn classify_file_type(path: &Path) -> FileTypeClass {
133    match path
134        .extension()
135        .and_then(|e| e.to_str())
136        .map(|e| e.to_ascii_lowercase())
137    {
138        Some(ext) if SUPPORTED_EXTENSIONS.contains(&ext.as_str()) => FileTypeClass::Supported,
139        _ => FileTypeClass::Unsupported,
140    }
141}
142
143fn extension_of(file_name: &str) -> Option<String> {
144    Path::new(file_name)
145        .extension()
146        .and_then(|e| e.to_str())
147        .map(|e| e.to_ascii_lowercase())
148}