keyhog_core/source.rs
1//! Source trait and chunk types: the abstraction for pluggable input backends.
2
3use crate::SensitiveString;
4use serde::Serialize;
5use thiserror::Error;
6
7/// A scannable chunk of text with metadata about where it came from.
8///
9/// # Examples
10///
11/// ```rust
12/// use keyhog_core::{Chunk, ChunkMetadata};
13///
14/// let chunk = Chunk {
15/// data: "API_KEY=sk_live_example".into(),
16/// metadata: ChunkMetadata {
17/// source_type: "filesystem".into(),
18/// path: Some("app.env".into()),
19/// ..Default::default()
20/// },
21/// };
22///
23/// assert_eq!(chunk.metadata.path.as_deref(), Some("app.env"));
24/// ```
25#[derive(Debug, Clone, Serialize)]
26pub struct Chunk {
27 /// UTF-8 text content to scan.
28 pub data: SensitiveString,
29 /// Provenance details used in findings and reporters.
30 pub metadata: ChunkMetadata,
31}
32
33/// Metadata that tracks the source location for a scanned chunk.
34///
35/// # Examples
36///
37/// ```rust
38/// use keyhog_core::ChunkMetadata;
39///
40/// let metadata = ChunkMetadata {
41/// source_type: "git-diff".into(),
42/// path: Some("src/lib.rs".into()),
43/// commit: Some("abc123".into()),
44/// author: Some("Dev".into()),
45/// date: Some("2026-03-26T00:00:00Z".into()),
46/// ..Default::default()
47/// };
48///
49/// assert_eq!(metadata.source_type, "git-diff");
50/// ```
51#[derive(Debug, Clone, Serialize, Default)]
52pub struct ChunkMetadata {
53 pub source_type: String,
54 pub path: Option<String>,
55 pub commit: Option<String>,
56 pub author: Option<String>,
57 pub date: Option<String>,
58 pub base_offset: usize,
59 /// File mtime in nanoseconds since UNIX epoch, when the source can
60 /// surface it cheaply (filesystem walks). Optional because non-fs
61 /// sources (stdin, http, git diffs) don't have a meaningful mtime.
62 /// Populated to drive the merkle-index metadata fast-path.
63 #[serde(default, skip_serializing_if = "Option::is_none")]
64 pub mtime_ns: Option<u64>,
65 /// File size in bytes, when known cheaply at chunk-production time.
66 /// Same shape and rationale as `mtime_ns`.
67 #[serde(default, skip_serializing_if = "Option::is_none")]
68 pub size_bytes: Option<u64>,
69}
70
71/// Produces chunks of text for the scanner to process.
72/// Each implementation handles a different input source.
73///
74/// # Examples
75///
76/// ```rust
77/// use keyhog_core::{Chunk, ChunkMetadata, Source, SourceError};
78///
79/// struct StaticSource;
80///
81/// impl Source for StaticSource {
82/// fn name(&self) -> &str {
83/// "static"
84/// }
85///
86/// fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_> {
87/// Box::new(std::iter::once(Ok(Chunk {
88/// data: "TOKEN=value".into(),
89/// metadata: ChunkMetadata {
90/// source_type: "static".into(),
91/// ..Default::default()
92/// },
93/// })))
94/// }
95///
96/// fn as_any(&self) -> &dyn std::any::Any {
97/// self
98/// }
99/// }
100///
101/// let source = StaticSource;
102/// assert_eq!(source.name(), "static");
103/// ```
104pub trait Source: Send + Sync {
105 /// Human-readable source name used in warnings and telemetry.
106 fn name(&self) -> &str;
107 /// Yield all readable chunks from this source.
108 fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_>;
109 /// Support downcasting to concrete types.
110 fn as_any(&self) -> &dyn std::any::Any;
111}
112
113/// Errors returned by input sources while enumerating or reading content.
114///
115/// # Examples
116///
117/// ```rust
118/// use keyhog_core::SourceError;
119///
120/// let error = SourceError::Other("pass a readable file or directory".into());
121/// assert!(error.to_string().contains("Fix"));
122/// ```
123#[derive(Debug, Error)]
124pub enum SourceError {
125 #[error(
126 "failed to read source: {0}. Fix: check the path exists, is readable, and is not a broken symlink"
127 )]
128 Io(#[from] std::io::Error),
129 #[error(
130 "failed to access git source: {0}. Fix: run inside a valid git repository and verify the requested refs exist"
131 )]
132 Git(String),
133 #[error(
134 "failed to read source: {0}. Fix: adjust the source settings or input so KeyHog can read plain text safely"
135 )]
136 Other(String),
137}