Skip to main content

cordance_core/
source.rs

1//! Source records: every byte cordance consumes is one of these.
2
3use camino::Utf8PathBuf;
4use chrono::{DateTime, Utc};
5use serde::{Deserialize, Serialize};
6
7/// Classification of a single source file by the role it plays.
8///
9/// This taxonomy is **content-agnostic by design**: classification comes from
10/// the directory and filename a file lives at, never from prose content. If a
11/// rule depends on file content, that's an `AdviseFinding`, not a class.
12#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum SourceClass {
15    EngineeringDoctrinePrinciple,
16    EngineeringDoctrinePattern,
17    EngineeringDoctrineChecklist,
18    EngineeringDoctrineTooling,
19    EngineeringDoctrineGlossary,
20    EngineeringDoctrineEvolution,
21    ProjectDoctrine,
22    ProjectAdr,
23    ProjectSchema,
24    ProjectContract,
25    ProjectTest,
26    ProjectCi,
27    ProjectReleaseGate,
28    ProjectSourceCode,
29    ProjectReadme,
30    ProjectAgentFile,
31    AxiomAlgorithm,
32    AxiomPolicy,
33    AxiomSchema,
34    AxiomTool,
35    AxiomTemplate,
36    AxiomWorkflow,
37    AxiomSkill,
38    CortexReceipt,
39    CortexFixture,
40    GeneratedManaged,
41    BlockedSurface,
42    Unclassified,
43}
44
45/// Where a source class fits in the six-bucket authority-surfaces taxonomy
46/// used by `pai-axiom-project-harness-target.v1`. Multiple `SourceClass`
47/// values map to the same `SurfaceCategory`.
48#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
49#[serde(rename_all = "snake_case")]
50pub enum SurfaceCategory {
51    ProductSpec,
52    Adrs,
53    Doctrine,
54    TestsOrEvals,
55    RuntimeRoots,
56    ReleaseGates,
57}
58
59#[allow(clippy::use_self)]
60impl SourceClass {
61    /// Map a source class to its harness-target bucket.
62    #[must_use]
63    pub const fn surface_category(self) -> Option<SurfaceCategory> {
64        use SourceClass as S;
65        use SurfaceCategory as C;
66        match self {
67            S::ProjectReadme | S::ProjectDoctrine => Some(C::ProductSpec),
68            S::ProjectAdr => Some(C::Adrs),
69            S::EngineeringDoctrinePrinciple
70            | S::EngineeringDoctrinePattern
71            | S::EngineeringDoctrineChecklist
72            | S::EngineeringDoctrineTooling
73            | S::EngineeringDoctrineGlossary
74            | S::EngineeringDoctrineEvolution => Some(C::Doctrine),
75            S::ProjectTest | S::ProjectSchema | S::ProjectContract => Some(C::TestsOrEvals),
76            S::ProjectCi | S::ProjectReleaseGate => Some(C::ReleaseGates),
77            _ => None,
78        }
79    }
80}
81
82/// One scanned source file.
83#[derive(Clone, Debug, Serialize, Deserialize)]
84pub struct SourceRecord {
85    /// Stable identifier (`{class}:{path}` with `/` separators).
86    pub id: String,
87    /// Repo-relative path, forward-slashed.
88    pub path: Utf8PathBuf,
89    pub class: SourceClass,
90    /// sha256 of the file bytes, hex-encoded lowercase.
91    pub sha256: String,
92    /// Size in bytes.
93    pub size_bytes: u64,
94    /// Last-modified time if the filesystem reports one.
95    ///
96    /// Round-4 bughunt #10: the OS-reported mtime is per-machine and
97    /// per-checkout. Embedding it in `pack.json` makes the on-disk shape
98    /// non-deterministic across operators with the same commit. `#[serde(skip)]`
99    /// keeps the field in memory (the scanner still populates it for in-process
100    /// tools that want to inspect freshness) but excludes it from
101    /// serialisation. Deserialisation defaults to `None`. The deterministic
102    /// truth lives in `sha256` + the git commit log.
103    #[serde(skip)]
104    pub modified: Option<DateTime<Utc>>,
105    /// Was this file ignored (matched a blocked-surface rule)?
106    #[serde(default)]
107    pub blocked: bool,
108    /// Reason for blocking, if `blocked`.
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub blocked_reason: Option<String>,
111}
112
113impl SourceRecord {
114    #[must_use]
115    pub fn stable_id(class: SourceClass, path: &Utf8PathBuf) -> String {
116        let class_str =
117            serde_json::to_string(&class).unwrap_or_else(|_| "\"unclassified\"".to_string());
118        format!("{}:{}", class_str.trim_matches('"'), path.as_str())
119    }
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn doctrine_classes_map_to_doctrine_bucket() {
128        assert_eq!(
129            SourceClass::EngineeringDoctrinePrinciple.surface_category(),
130            Some(SurfaceCategory::Doctrine)
131        );
132        assert_eq!(
133            SourceClass::EngineeringDoctrineChecklist.surface_category(),
134            Some(SurfaceCategory::Doctrine)
135        );
136    }
137
138    #[test]
139    fn adr_maps_to_adrs() {
140        assert_eq!(
141            SourceClass::ProjectAdr.surface_category(),
142            Some(SurfaceCategory::Adrs)
143        );
144    }
145
146    #[test]
147    fn source_record_roundtrips() {
148        let r = SourceRecord {
149            id: "project_adr:docs/adr/0001.md".into(),
150            path: "docs/adr/0001.md".into(),
151            class: SourceClass::ProjectAdr,
152            sha256: "deadbeef".repeat(8),
153            size_bytes: 1234,
154            modified: None,
155            blocked: false,
156            blocked_reason: None,
157        };
158        let s = serde_json::to_string(&r).expect("serialize");
159        let back: SourceRecord = serde_json::from_str(&s).expect("deserialize");
160        assert_eq!(back.path, r.path);
161    }
162}