keyhog_core/source.rs
1//! Source trait and chunk types: the abstraction for pluggable input backends.
2
3use serde::Serialize;
4use thiserror::Error;
5
6/// A scannable chunk of text with metadata about where it came from.
7///
8/// # Examples
9///
10/// ```rust
11/// use keyhog_core::{Chunk, ChunkMetadata};
12///
13/// let chunk = Chunk {
14/// data: "API_KEY=sk_live_example".into(),
15/// metadata: ChunkMetadata {
16/// source_type: "filesystem".into(),
17/// path: Some("app.env".into()),
18/// commit: None,
19/// author: None,
20/// date: None,
21/// },
22/// };
23///
24/// assert_eq!(chunk.metadata.path.as_deref(), Some("app.env"));
25/// ```
26#[derive(Debug, Clone, Serialize)]
27pub struct Chunk {
28 /// UTF-8 text content to scan.
29 pub data: String,
30 /// Provenance details used in findings and reporters.
31 pub metadata: ChunkMetadata,
32}
33
34/// Metadata that tracks the source location for a scanned chunk.
35///
36/// # Examples
37///
38/// ```rust
39/// use keyhog_core::ChunkMetadata;
40///
41/// let metadata = ChunkMetadata {
42/// source_type: "git-diff".into(),
43/// path: Some("src/lib.rs".into()),
44/// commit: Some("abc123".into()),
45/// author: Some("Dev".into()),
46/// date: Some("2026-03-26T00:00:00Z".into()),
47/// };
48///
49/// assert_eq!(metadata.source_type, "git-diff");
50/// ```
51#[derive(Debug, Clone, Serialize, Default)]
52pub struct ChunkMetadata {
53 /// Logical source backend, such as `filesystem` or `git`.
54 pub source_type: String,
55 /// Best-effort file path or object key.
56 ///
57 /// Paths stored here must be valid UTF-8. Callers that originate from
58 /// non-UTF-8 paths should preserve them using an escaped representation
59 /// before constructing [`ChunkMetadata`].
60 pub path: Option<String>,
61 /// Commit identifier for git-derived chunks.
62 pub commit: Option<String>,
63 /// Author name when available from history sources.
64 pub author: Option<String>,
65 /// Source timestamp when available from history sources.
66 pub date: Option<String>,
67}
68
69/// Produces chunks of text for the scanner to process.
70/// Each implementation handles a different input source.
71///
72/// # Examples
73///
74/// ```rust
75/// use keyhog_core::{Chunk, ChunkMetadata, Source, SourceError};
76///
77/// struct StaticSource;
78///
79/// impl Source for StaticSource {
80/// fn name(&self) -> &str {
81/// "static"
82/// }
83///
84/// fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_> {
85/// Box::new(std::iter::once(Ok(Chunk {
86/// data: "TOKEN=value".into(),
87/// metadata: ChunkMetadata {
88/// source_type: "static".into(),
89/// path: None,
90/// commit: None,
91/// author: None,
92/// date: None,
93/// },
94/// })))
95/// }
96///
97/// fn as_any(&self) -> &dyn std::any::Any {
98/// self
99/// }
100/// }
101///
102/// let source = StaticSource;
103/// assert_eq!(source.name(), "static");
104/// ```
105pub trait Source: Send + Sync {
106 /// Human-readable source name used in warnings and telemetry.
107 fn name(&self) -> &str;
108 /// Yield all readable chunks from this source.
109 fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_>;
110 /// Support downcasting to concrete types.
111 fn as_any(&self) -> &dyn std::any::Any;
112}
113
114/// Errors returned by input sources while enumerating or reading content.
115///
116/// # Examples
117///
118/// ```rust
119/// use keyhog_core::SourceError;
120///
121/// let error = SourceError::Other("pass a readable file or directory".into());
122/// assert!(error.to_string().contains("Fix"));
123/// ```
124#[derive(Debug, Error)]
125pub enum SourceError {
126 #[error(
127 "failed to read source: {0}. Fix: check the path exists, is readable, and is not a broken symlink"
128 )]
129 Io(#[from] std::io::Error),
130 #[error(
131 "failed to access git source: {0}. Fix: run inside a valid git repository and verify the requested refs exist"
132 )]
133 Git(String),
134 #[error(
135 "failed to read source: {0}. Fix: adjust the source settings or input so KeyHog can read plain text safely"
136 )]
137 Other(String),
138}