Skip to main content

keyhog_core/
source.rs

1//! Source trait and chunk types: the abstraction for pluggable input backends.
2
3use crate::SensitiveString;
4use serde::Serialize;
5use thiserror::Error;
6
7/// A scannable chunk of text with metadata about where it came from.
8///
9/// # Examples
10///
11/// ```rust
12/// use keyhog_core::{Chunk, ChunkMetadata};
13///
14/// let chunk = Chunk {
15///     data: "API_KEY=sk_live_example".into(),
16///     metadata: ChunkMetadata {
17///         source_type: "filesystem".into(),
18///         path: Some("app.env".into()),
19///         ..Default::default()
20///     },
21/// };
22///
23/// assert_eq!(chunk.metadata.path.as_deref(), Some("app.env"));
24/// ```
25#[derive(Debug, Clone, Serialize)]
26pub struct Chunk {
27    /// UTF-8 text content to scan.
28    pub data: SensitiveString,
29    /// Provenance details used in findings and reporters.
30    pub metadata: ChunkMetadata,
31}
32
33/// Metadata that tracks the source location for a scanned chunk.
34///
35/// # Examples
36///
37/// ```rust
38/// use keyhog_core::ChunkMetadata;
39///
40/// let metadata = ChunkMetadata {
41///     source_type: "git-diff".into(),
42///     path: Some("src/lib.rs".into()),
43///     commit: Some("abc123".into()),
44///     author: Some("Dev".into()),
45///     date: Some("2026-03-26T00:00:00Z".into()),
46///     ..Default::default()
47/// };
48///
49/// assert_eq!(metadata.source_type, "git-diff");
50/// ```
51#[derive(Debug, Clone, Serialize, Default)]
52pub struct ChunkMetadata {
53    pub source_type: String,
54    pub path: Option<String>,
55    pub commit: Option<String>,
56    pub author: Option<String>,
57    pub date: Option<String>,
58    pub base_offset: usize,
59    /// File mtime in nanoseconds since UNIX epoch, when the source can
60    /// surface it cheaply (filesystem walks). Optional because non-fs
61    /// sources (stdin, http, git diffs) don't have a meaningful mtime.
62    /// Populated to drive the merkle-index metadata fast-path.
63    #[serde(default, skip_serializing_if = "Option::is_none")]
64    pub mtime_ns: Option<u64>,
65    /// File size in bytes, when known cheaply at chunk-production time.
66    /// Same shape and rationale as `mtime_ns`.
67    #[serde(default, skip_serializing_if = "Option::is_none")]
68    pub size_bytes: Option<u64>,
69}
70
71/// Produces chunks of text for the scanner to process.
72/// Each implementation handles a different input source.
73///
74/// # Examples
75///
76/// ```rust
77/// use keyhog_core::{Chunk, ChunkMetadata, Source, SourceError};
78///
79/// struct StaticSource;
80///
81/// impl Source for StaticSource {
82///     fn name(&self) -> &str {
83///         "static"
84///     }
85///
86///     fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_> {
87///         Box::new(std::iter::once(Ok(Chunk {
88///             data: "TOKEN=value".into(),
89///             metadata: ChunkMetadata {
90///                 source_type: "static".into(),
91///                 ..Default::default()
92///             },
93///         })))
94///     }
95///
96///     fn as_any(&self) -> &dyn std::any::Any {
97///         self
98///     }
99/// }
100///
101/// let source = StaticSource;
102/// assert_eq!(source.name(), "static");
103/// ```
104pub trait Source: Send + Sync {
105    /// Human-readable source name used in warnings and telemetry.
106    fn name(&self) -> &str;
107    /// Yield all readable chunks from this source.
108    fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_>;
109    /// Support downcasting to concrete types.
110    fn as_any(&self) -> &dyn std::any::Any;
111}
112
113/// Errors returned by input sources while enumerating or reading content.
114///
115/// # Examples
116///
117/// ```rust
118/// use keyhog_core::SourceError;
119///
120/// let error = SourceError::Other("pass a readable file or directory".into());
121/// assert!(error.to_string().contains("Fix"));
122/// ```
123#[derive(Debug, Error)]
124pub enum SourceError {
125    #[error(
126        "failed to read source: {0}. Fix: check the path exists, is readable, and is not a broken symlink"
127    )]
128    Io(#[from] std::io::Error),
129    #[error(
130        "failed to access git source: {0}. Fix: run inside a valid git repository and verify the requested refs exist"
131    )]
132    Git(String),
133    #[error(
134        "failed to read source: {0}. Fix: adjust the source settings or input so KeyHog can read plain text safely"
135    )]
136    Other(String),
137}