keyhog_core/source.rs
1//! Source trait and chunk types: the abstraction for pluggable input backends.
2
3// Debt bucket: 9 items predating the crate floor raising `missing_docs` to
4// `warn`. Remove this allow once every Source-trait item is documented.
5#![allow(missing_docs)]
6
7use crate::SensitiveString;
8use serde::Serialize;
9use thiserror::Error;
10
11/// A scannable chunk of text with metadata about where it came from.
12///
13/// # Examples
14///
15/// ```rust
16/// use keyhog_core::{Chunk, ChunkMetadata};
17///
18/// let chunk = Chunk {
19/// data: "API_KEY=sk_live_example".into(),
20/// metadata: ChunkMetadata {
21/// source_type: "filesystem".into(),
22/// path: Some("app.env".into()),
23/// ..Default::default()
24/// },
25/// };
26///
27/// assert_eq!(chunk.metadata.path.as_deref(), Some("app.env"));
28/// ```
29#[derive(Debug, Clone, Serialize)]
30pub struct Chunk {
31 /// UTF-8 text content to scan.
32 pub data: SensitiveString,
33 /// Provenance details used in findings and reporters.
34 pub metadata: ChunkMetadata,
35}
36
37impl From<String> for Chunk {
38 fn from(data: String) -> Self {
39 Self {
40 data: data.into(),
41 metadata: ChunkMetadata::default(),
42 }
43 }
44}
45
46impl From<&str> for Chunk {
47 fn from(data: &str) -> Self {
48 Self::from(data.to_string())
49 }
50}
51
52/// Metadata that tracks the source location for a scanned chunk.
53///
54/// # Examples
55///
56/// ```rust
57/// use keyhog_core::ChunkMetadata;
58///
59/// let metadata = ChunkMetadata {
60/// source_type: "git-diff".into(),
61/// path: Some("src/lib.rs".into()),
62/// commit: Some("abc123".into()),
63/// author: Some("Dev".into()),
64/// date: Some("2026-03-26T00:00:00Z".into()),
65/// ..Default::default()
66/// };
67///
68/// assert_eq!(metadata.source_type, "git-diff");
69/// ```
70#[derive(Debug, Clone, Serialize, Default)]
71pub struct ChunkMetadata {
72 pub source_type: String,
73 pub path: Option<String>,
74 pub commit: Option<String>,
75 pub author: Option<String>,
76 pub date: Option<String>,
77 pub base_offset: usize,
78 /// File mtime in nanoseconds since UNIX epoch, when the source can
79 /// surface it cheaply (filesystem walks). Optional because non-fs
80 /// sources (stdin, http, git diffs) don't have a meaningful mtime.
81 /// Populated to drive the merkle-index metadata fast-path.
82 #[serde(default, skip_serializing_if = "Option::is_none")]
83 pub mtime_ns: Option<u64>,
84 /// File size in bytes, when known cheaply at chunk-production time.
85 /// Same shape and rationale as `mtime_ns`.
86 #[serde(default, skip_serializing_if = "Option::is_none")]
87 pub size_bytes: Option<u64>,
88}
89
90/// Produces chunks of text for the scanner to process.
91/// Each implementation handles a different input source.
92///
93/// # Examples
94///
95/// ```rust
96/// use keyhog_core::{Chunk, ChunkMetadata, Source, SourceError};
97///
98/// struct StaticSource;
99///
100/// impl Source for StaticSource {
101/// fn name(&self) -> &str {
102/// "static"
103/// }
104///
105/// fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_> {
106/// Box::new(std::iter::once(Ok(Chunk {
107/// data: "TOKEN=value".into(),
108/// metadata: ChunkMetadata {
109/// source_type: "static".into(),
110/// ..Default::default()
111/// },
112/// })))
113/// }
114///
115/// fn as_any(&self) -> &dyn std::any::Any {
116/// self
117/// }
118/// }
119///
120/// let source = StaticSource;
121/// assert_eq!(source.name(), "static");
122/// ```
123pub trait Source: Send + Sync {
124 /// Human-readable source name used in warnings and telemetry.
125 fn name(&self) -> &str;
126 /// Yield all readable chunks from this source.
127 fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_>;
128 /// Support downcasting to concrete types.
129 fn as_any(&self) -> &dyn std::any::Any;
130}
131
132/// Errors returned by input sources while enumerating or reading content.
133///
134/// # Examples
135///
136/// ```rust
137/// use keyhog_core::SourceError;
138///
139/// let error = SourceError::Other("pass a readable file or directory".into());
140/// assert!(error.to_string().contains("Fix"));
141/// ```
142#[derive(Debug, Error)]
143pub enum SourceError {
144 #[error(
145 "failed to read source: {0}. Fix: check the path exists, is readable, and is not a broken symlink"
146 )]
147 Io(#[from] std::io::Error),
148 #[error(
149 "failed to access git source: {0}. Fix: run inside a valid git repository and verify the requested refs exist"
150 )]
151 Git(String),
152 #[error(
153 "failed to read source: {0}. Fix: adjust the source settings or input so KeyHog can read plain text safely"
154 )]
155 Other(String),
156}