Skip to main content

cordance_scan/
lib.rs

1//! Repository scanners. Deterministic surface classification by path and filename.
2//!
3//! Walks a target directory, hashes every file with SHA-256, and assigns
4//! each one a `SourceClass` taxonomy bucket (e.g. `ProjectAdr`,
5//! `EngineeringDoctrinePrinciple`, `ProjectAgentFile`, `BlockedSurface`).
6//! Classification is **content-agnostic by design** — it never reads prose
7//! to decide what something is, only path and filename.
8//!
9//! Block rules cover runtime exhaust (`.cordance/`, `.git/`, `.claude/cache/`,
10//! `node_modules/`, `target/`, …), secret/credential filenames (`id_rsa`,
11//! `.env`, `secrets.json`, `Credentials.*`), and OS junk (`.DS_Store`,
12//! `Thumbs.db`). Case folding keeps `SECRET-FOO.TXT` blocked alongside
13//! `secret-foo.txt` on default-case-insensitive NTFS / APFS.
14//!
15//! # Golden path
16//!
17//! ```no_run
18//! use camino::Utf8PathBuf;
19//!
20//! let target = Utf8PathBuf::from(".");
21//! let sources = cordance_scan::scan_repo(&target).expect("scan succeeds");
22//!
23//! for record in &sources {
24//!     if record.blocked {
25//!         eprintln!(
26//!             "blocked: {} ({})",
27//!             record.path,
28//!             record.blocked_reason.as_deref().unwrap_or("?"),
29//!         );
30//!     } else {
31//!         println!(
32//!             "{:?}: {} ({} bytes)",
33//!             record.class, record.path, record.size_bytes,
34//!         );
35//!     }
36//! }
37//! ```
38
39#![forbid(unsafe_code)]
40#![deny(clippy::unwrap_used, clippy::expect_used)]
41#![cfg_attr(test, allow(clippy::expect_used, clippy::unwrap_used))]
42
43use camino::Utf8PathBuf;
44use cordance_core::source::{SourceClass, SourceRecord};
45
46pub mod blocked;
47pub mod classifier;
48pub mod hasher;
49pub mod walker;
50
51#[derive(Debug, thiserror::Error)]
52pub enum ScanError {
53    #[error("io error reading {path}: {source}")]
54    Io {
55        path: Utf8PathBuf,
56        #[source]
57        source: std::io::Error,
58    },
59    #[error("path is not valid utf-8: {0}")]
60    NonUtf8Path(String),
61}
62
63/// Top-level scan entrypoint. Delegates to `walker::walk`.
64#[allow(clippy::missing_errors_doc)]
65pub fn scan_repo(root: &Utf8PathBuf) -> Result<Vec<SourceRecord>, ScanError> {
66    walker::walk(root)
67}
68
69/// Classify a path using only its repo-relative location. No content reads.
70#[must_use]
71pub fn classify_by_path(rel_path: &str) -> SourceClass {
72    classifier::classify(rel_path)
73}