Skip to main content

keyhog_core/
source.rs

1//! Source trait and chunk types: the abstraction for pluggable input backends.
2
3use serde::Serialize;
4use thiserror::Error;
5
6/// A scannable chunk of text with metadata about where it came from.
7///
8/// # Examples
9///
10/// ```rust
11/// use keyhog_core::{Chunk, ChunkMetadata};
12///
13/// let chunk = Chunk {
14///     data: "API_KEY=sk_live_example".into(),
15///     metadata: ChunkMetadata {
16///         source_type: "filesystem".into(),
17///         path: Some("app.env".into()),
18///         commit: None,
19///         author: None,
20///         date: None,
21///     },
22/// };
23///
24/// assert_eq!(chunk.metadata.path.as_deref(), Some("app.env"));
25/// ```
26#[derive(Debug, Clone, Serialize)]
27pub struct Chunk {
28    /// UTF-8 text content to scan.
29    pub data: String,
30    /// Provenance details used in findings and reporters.
31    pub metadata: ChunkMetadata,
32}
33
34/// Metadata that tracks the source location for a scanned chunk.
35///
36/// # Examples
37///
38/// ```rust
39/// use keyhog_core::ChunkMetadata;
40///
41/// let metadata = ChunkMetadata {
42///     source_type: "git-diff".into(),
43///     path: Some("src/lib.rs".into()),
44///     commit: Some("abc123".into()),
45///     author: Some("Dev".into()),
46///     date: Some("2026-03-26T00:00:00Z".into()),
47/// };
48///
49/// assert_eq!(metadata.source_type, "git-diff");
50/// ```
51#[derive(Debug, Clone, Serialize)]
52pub struct ChunkMetadata {
53    /// Logical source backend, such as `filesystem` or `git`.
54    pub source_type: String,
55    /// Best-effort file path or object key.
56    pub path: Option<String>,
57    /// Commit identifier for git-derived chunks.
58    pub commit: Option<String>,
59    /// Author name when available from history sources.
60    pub author: Option<String>,
61    /// Source timestamp when available from history sources.
62    pub date: Option<String>,
63}
64
65/// Produces chunks of text for the scanner to process.
66/// Each implementation handles a different input source.
67///
68/// # Examples
69///
70/// ```rust
71/// use keyhog_core::{Chunk, ChunkMetadata, Source, SourceError};
72///
73/// struct StaticSource;
74///
75/// impl Source for StaticSource {
76///     fn name(&self) -> &str {
77///         "static"
78///     }
79///
80///     fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_> {
81///         Box::new(std::iter::once(Ok(Chunk {
82///             data: "TOKEN=value".into(),
83///             metadata: ChunkMetadata {
84///                 source_type: "static".into(),
85///                 path: None,
86///                 commit: None,
87///                 author: None,
88///                 date: None,
89///             },
90///         })))
91///     }
92/// }
93///
94/// let source = StaticSource;
95/// assert_eq!(source.name(), "static");
96/// ```
97pub trait Source {
98    /// Human-readable source name used in warnings and telemetry.
99    fn name(&self) -> &str;
100    /// Yield all readable chunks from this source.
101    fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_>;
102}
103
104/// Errors returned by input sources while enumerating or reading content.
105///
106/// # Examples
107///
108/// ```rust
109/// use keyhog_core::SourceError;
110///
111/// let error = SourceError::Other("pass a readable file or directory".into());
112/// assert!(error.to_string().contains("Fix"));
113/// ```
114#[derive(Debug, Error)]
115pub enum SourceError {
116    #[error(
117        "failed to read source: {0}. Fix: check the path exists, is readable, and is not a broken symlink"
118    )]
119    Io(#[from] std::io::Error),
120    #[error(
121        "failed to access git source: {0}. Fix: run inside a valid git repository and verify the requested refs exist"
122    )]
123    Git(String),
124    #[error(
125        "failed to read source: {0}. Fix: adjust the source settings or input so KeyHog can read plain text safely"
126    )]
127    Other(String),
128}