use std::{
env,
ffi::{OsStr, OsString},
fs,
fs::{File, OpenOptions},
io,
io::{BufRead, BufReader, Read, Write},
os::unix::ffi::{OsStrExt, OsStringExt},
path::{Path, PathBuf},
str,
};
use bstr::{BString, ByteSlice};
use chrono::prelude::{Local, TimeZone};
use clap::{Parser, Subcommand};
use regex::bytes::Regex;
use rusqlite::{Connection, Result, TransactionBehavior};
use tempfile::NamedTempFile;
type SecretPatterns = (Vec<(&'static str, &'static str)>, regex::bytes::RegexSet);
#[derive(Debug, Default, serde::Serialize, serde::Deserialize)]
struct SyncOptions {
#[serde(skip_serializing_if = "Option::is_none")]
scrub: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
scrub_scan: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
scrub_confidence: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
scrub_dry_run: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
no_secret_filter: Option<bool>,
}
const REGEX_SIZE_LIMIT_DEFAULT: usize = 50 * 1024 * 1024; const REGEX_SIZE_LIMIT_ALL: usize = 100 * 1024 * 1024;
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct PxhArgs {
#[clap(long, env = "PXH_DB_PATH")]
db: Option<PathBuf>,
#[clap(subcommand)]
command: Commands,
}
#[derive(Subcommand, Debug)]
enum Commands {
#[clap(visible_alias = "s", about = "search for and display history entries")]
Show(ShowCommand),
#[clap(visible_alias = "r", about = "interactive history search (Ctrl-R replacement)")]
Recall(pxh::recall::RecallCommand),
#[clap(about = "install pxh helpers by modifying your shell rc file")]
Install(InstallCommand),
#[clap(about = "import history entries from your existing shell history or from an export")]
Import(ImportCommand),
#[clap(about = "export full history as JSON")]
Export(ExportCommand),
#[clap(about = "synchronize to and from a directory of other pxh history databases")]
Sync(SyncCommand),
#[clap(about = "scrub (remove) history entries matching the prompted-for string")]
Scrub(ScrubCommand),
#[clap(about = "(internal) invoked by the shell to insert a history entry")]
Insert(InsertCommand),
#[clap(about = "(internal) seal the previous inserted command to mark status, timing, etc")]
Seal(SealCommand),
#[clap(about = "(internal) shell configuration suitable for `source`'ing to enable pxh")]
ShellConfig(ShellConfigCommand),
#[clap(
about = "perform ANALYZE and VACUUM on the specified database files to optimize performance and reclaim space"
)]
Maintenance(MaintenanceCommand),
#[clap(about = "scan history for potential secrets and sensitive data")]
Scan(ScanCommand),
}
#[derive(Parser, Debug)]
struct InstallCommand {
#[clap(help = "shell to install helpers into")]
shellname: String,
}
#[derive(Parser, Debug)]
struct ShowCommand {
#[clap(short = 'i', long, help = "perform case-insensitive matching", default_value_t = false)]
ignore_case: bool,
#[clap(
short,
long,
default_value_t = 50,
help = "display at most this many entries; 0 for unlimited"
)]
limit: usize,
#[clap(short, long, help = "display extra fields in the output")]
verbose: bool,
#[clap(long, help = "suppress headers")]
suppress_headers: bool,
#[clap(long, help = "show entries that were populated while in the current working directory")]
here: bool,
#[clap(
long,
help = "alters --here; instead of the current working directory, use the specified directory"
)]
working_directory: Option<PathBuf>,
#[clap(
long,
help = "display only commands from the specified session (use $PXH_SESSION_ID for this session)"
)]
session: Option<String>,
#[clap(
long,
help = "if specified, list of patterns can be matched in any order against command lines"
)]
loosen: bool,
#[clap(
help = "one or more regular expressions to search through history entries; multiple values joined by `.*\\s.*`"
)]
patterns: Vec<String>,
}
#[derive(Parser, Debug)]
struct ImportCommand {
#[clap(long, help = "path to history file to import")]
histfile: PathBuf,
#[clap(long, help = "type of shell history specified by --histfile")]
shellname: String,
#[clap(long, help = "hostname to tag imported entries with (defaults to current hostname)")]
hostname: Option<OsString>,
#[clap(long, help = "username to tag importen entries with (defaults to current user)")]
username: Option<OsString>,
}
#[derive(Parser, Debug)]
struct SyncCommand {
#[clap(help = "Directory for sync operations (required for directory-based sync)")]
dirname: Option<PathBuf>,
#[clap(
long,
help = "Only export the current database; do not read other databases",
default_value_t = false
)]
export_only: bool,
#[clap(long, help = "Remote host to sync with via SSH")]
remote: Option<String>,
#[clap(
long,
help = "Only send database to remote (no receive)",
conflicts_with = "receive_only"
)]
send_only: bool,
#[clap(
long,
help = "Only receive database from remote (no send)",
conflicts_with = "send_only"
)]
receive_only: bool,
#[clap(long, help = "Remote database path")]
remote_db: Option<PathBuf>,
#[clap(
short = 'e',
long,
default_value = "ssh",
help = "SSH command to use for connection (like rsync's -e option)"
)]
ssh_cmd: String,
#[clap(long, default_value = "pxh", help = "Path to pxh binary on the remote host")]
remote_pxh: String,
#[clap(long, help = "Internal: run in server mode")]
server: bool,
#[clap(long, help = "Only sync commands from the last N days", value_name = "DAYS")]
since: Option<u32>,
#[clap(long, help = "Use stdin/stdout for sync instead of SSH (for testing)")]
stdin_stdout: bool,
#[clap(long, help = "Disable automatic filtering of potential secrets during sync import")]
no_secret_filter: bool,
}
#[derive(Parser, Debug)]
struct ScrubCommand {
#[clap(long, help = "Scrub from this histfile instead of (or in addition to) the database")]
histfile: Option<PathBuf>,
#[clap(
long,
conflicts_with = "histfile",
help = "Scrub from all .db files in this directory (e.g., sync folder)"
)]
dir: Option<PathBuf>,
#[clap(
long,
conflicts_with_all = ["histfile", "dir"],
help = "Scrub from remote host via SSH sync protocol"
)]
remote: Option<String>,
#[clap(
short = 'e',
long,
default_value = "ssh",
requires = "remote",
help = "SSH command to use for connection (like rsync's -e option)"
)]
ssh_cmd: String,
#[clap(long, requires = "remote", help = "Path to pxh binary on the remote host")]
remote_pxh: Option<String>,
#[clap(long, requires = "remote", help = "Remote database path")]
remote_db: Option<PathBuf>,
#[clap(short = 'n', long, help = "Dry-run mode (only display the rows, don't actually scrub)")]
dry_run: bool,
#[clap(
long,
help = "Use secret detection to find entries to scrub (instead of interactive prompt)"
)]
scan: bool,
#[clap(
short,
long,
default_value = "critical",
help = "Confidence level for --scan: critical, high, low, or all"
)]
confidence: String,
#[clap(
long,
requires = "histfile",
help = "Shell format for histfile (bash or zsh); auto-detected if not specified"
)]
shellname: Option<String>,
#[clap(short = 'y', long, help = "Skip confirmation prompt")]
yes: bool,
#[clap(
help = "The string to scrub (for interactive mode). Prefer being prompted interactively."
)]
contraband: Option<String>,
}
#[derive(Parser, Debug)]
struct InsertCommand {
#[clap(long)]
shellname: String,
#[clap(long)]
hostname: OsString,
#[clap(long)]
username: OsString,
#[clap(long)]
working_directory: Option<PathBuf>, #[clap(long)]
exit_status: Option<i64>,
#[clap(long)]
session_id: i64,
#[clap(long)]
start_unix_timestamp: Option<i64>, #[clap(long)]
end_unix_timestamp: Option<i64>,
command: Vec<OsString>,
}
#[derive(Parser, Debug)]
struct SealCommand {
#[clap(long)]
session_id: i64,
#[clap(long)]
exit_status: i32,
#[clap(long)]
end_unix_timestamp: i64,
}
#[derive(Parser, Debug)]
struct ShellConfigCommand {
shellname: String,
#[clap(long, help = "Don't bind Ctrl-R to pxh recall")]
no_ctrl_r: bool,
}
#[derive(Parser, Debug)]
struct ExportCommand {}
#[derive(Parser, Debug)]
struct MaintenanceCommand {
#[clap(
help = "Path(s) to SQLite database files to maintain (if not specified, maintains the current database)"
)]
files: Vec<PathBuf>,
}
#[derive(Parser, Debug)]
struct ScanCommand {
#[clap(
short,
long,
default_value = "critical",
help = "Confidence level: critical, high, low, or all"
)]
confidence: String,
#[clap(short, long, help = "Output as JSON")]
json: bool,
#[clap(short, long, help = "Verbose output with pattern details")]
verbose: bool,
#[clap(long, help = "Scan this histfile instead of the database")]
histfile: Option<PathBuf>,
#[clap(
long,
conflicts_with = "histfile",
help = "Scan all .db files in this directory (e.g., sync folder)"
)]
dir: Option<PathBuf>,
#[clap(
long,
requires = "histfile",
help = "Shell format for histfile (bash or zsh); auto-detected if not specified"
)]
shellname: Option<String>,
}
impl ImportCommand {
fn go(&self, mut conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
let invocations = match self.shellname.as_ref() {
"zsh" => pxh::import_zsh_history(
&self.histfile,
self.hostname.as_ref().map(|v| v.as_bytes().into()),
self.username.as_ref().map(|v| v.as_bytes().into()),
),
"bash" => pxh::import_bash_history(
&self.histfile,
self.hostname.as_ref().map(|v| v.as_bytes().into()),
self.username.as_ref().map(|v| v.as_bytes().into()),
),
"json" => pxh::import_json_history(&self.histfile),
_ => Err(Box::from(format!("Unsupported shell: {} (PRs welcome!)", self.shellname))),
}?;
let tx = conn.transaction()?;
for invocation in invocations {
invocation.insert(&tx)?;
}
tx.commit()?;
Ok(())
}
}
impl ShellConfigCommand {
fn go(&self) -> Result<(), Box<dyn std::error::Error>> {
let config = pxh::recall::Config::load();
let disable_ctrl_r = self.no_ctrl_r || config.shell.disable_ctrl_r;
let contents = match self.shellname.as_str() {
"zsh" => String::from(include_str!("shell_configs/pxh.zsh")),
"bash" => {
let mut contents = String::new();
contents.push_str(include_str!("shell_configs/bash-preexec/bash-preexec.sh"));
contents.push_str(include_str!("shell_configs/pxh.bash"));
contents
}
_ => {
return Err(Box::from(format!(
"Unsupported shell: {} (PRs welcome!)",
self.shellname
)));
}
};
let contents = if disable_ctrl_r {
contents
.lines()
.filter(|line| !line.contains("PXH_CTRL_R_BINDING"))
.collect::<Vec<_>>()
.join("\n")
+ "\n"
} else {
contents
};
io::stdout().write_all(contents.as_bytes())?;
io::stdout().flush()?;
Ok(())
}
}
impl InstallCommand {
fn go(&self) -> Result<(), Box<dyn std::error::Error>> {
let shellname = self.shellname.as_ref();
let rc_file = match shellname {
"zsh" => ".zshrc",
"bash" => ".bashrc",
_ => return Err(Box::from(format!("Unsupported shell: {shellname} (PRs welcome!)"))),
};
let mut pb = home::home_dir().ok_or("Unable to determine your homedir")?;
pb.push(rc_file);
let file = File::open(&pb)?;
let reader = BufReader::new(&file);
for line in reader.lines() {
let line = line.unwrap();
if line.contains("pxh shell-config") {
println!("Shell config already present in {}; taking no action.", pb.display());
return Ok(());
}
}
let mut file = OpenOptions::new().append(true).open(&pb)?;
write!(file, "\n# Install the pxh shell helpers to add interactive history realtime.")?;
writeln!(
file,
r#"
if command -v pxh &> /dev/null; then
eval "$(pxh shell-config {shellname})"
fi"#
)?;
println!("Shell config successfully added to {}.", pb.display());
println!(
"pxh will be active for all new shell sessions. To activate for this session, run:"
);
println!(" source <(pxh shell-config {shellname})");
Ok(())
}
}
impl SealCommand {
fn go(&self, conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
conn.execute(
r#"
UPDATE command_history SET exit_status = ?, end_unix_timestamp = ?
WHERE exit_status is NULL
AND end_unix_timestamp IS NULL
AND id = (SELECT MAX(id) FROM command_history hi WHERE hi.session_id = ?)"#,
(self.exit_status, self.end_unix_timestamp, self.session_id),
)?;
Ok(())
}
}
impl ExportCommand {
fn go(&self, conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
let mut stmt = conn.prepare(
r#"
SELECT session_id, full_command, shellname, hostname, username, working_directory, exit_status, start_unix_timestamp, end_unix_timestamp
FROM command_history h
ORDER BY id"#,
)?;
let rows: Result<Vec<pxh::Invocation>, _> =
stmt.query_map([], pxh::Invocation::from_row)?.collect();
let rows = rows?;
pxh::json_export(&rows)?;
Ok(())
}
}
impl MaintenanceCommand {
fn go(&self, default_conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
fn get_db_info(conn: &Connection) -> Result<(i64, i64, i64), Box<dyn std::error::Error>> {
let page_count: i64 = conn.query_row("PRAGMA page_count", [], |r| r.get(0))?;
let page_size: i64 = conn.query_row("PRAGMA page_size", [], |r| r.get(0))?;
let freelist_count: i64 = conn.query_row("PRAGMA freelist_count", [], |r| r.get(0))?;
Ok((page_count, page_size, freelist_count))
}
fn maintain_database(
conn: &Connection,
db_name: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let (page_count, page_size, freelist_count) = get_db_info(conn)?;
let total_size = page_count * page_size;
let freelist_size = freelist_count * page_size;
println!("Database '{db_name}' information before maintenance:");
println!(" Total size: {:.2} MB", total_size as f64 / 1024.0 / 1024.0);
println!(" Free space: {:.2} MB", freelist_size as f64 / 1024.0 / 1024.0);
println!(" Page count: {page_count}");
println!(" Page size: {page_size} bytes");
println!(" Freelist count: {freelist_count}");
let command_count: i64 = conn
.query_row("SELECT COUNT(*) FROM command_history", [], |r| r.get(0))
.unwrap_or_default(); println!(" Command history entries: {command_count}");
println!();
println!("Looking for non-standard tables to clean up...");
let mut cleanup_count = 0;
let standard_tables = ["command_history", "settings", "sqlite_sequence"];
let mut stmt = conn.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'
EXCEPT SELECT name FROM sqlite_master WHERE name IN (?1, ?2, ?3)")?;
let non_standard_tables: Vec<String> = stmt
.query_map(
[&standard_tables[0], &standard_tables[1], &standard_tables[2]],
|row| row.get(0),
)?
.collect::<Result<Vec<String>, _>>()?;
for table_name in non_standard_tables {
if table_name.starts_with("KEEP_") {
println!(" Keeping user table: {table_name}");
continue;
}
println!(" Dropping non-standard table: {table_name}");
conn.execute(&format!("DROP TABLE IF EXISTS {table_name}"), [])?;
cleanup_count += 1;
}
if cleanup_count > 0 {
println!("Cleaned up {cleanup_count} non-standard tables");
} else {
println!("No non-standard tables found to clean up");
}
println!("Looking for non-standard indexes to clean up...");
let standard_indexes =
["idx_command_history_unique", "history_session_id", "history_start_time"];
let mut stmt = conn.prepare(
"SELECT name FROM sqlite_master WHERE type='index' AND
name NOT LIKE 'sqlite_autoindex_%' AND
tbl_name NOT LIKE 'sqlite_%' AND
name NOT IN (?1, ?2, ?3)",
)?;
let non_standard_indexes: Vec<String> = stmt
.query_map(
[&standard_indexes[0], &standard_indexes[1], &standard_indexes[2]],
|row| row.get(0),
)?
.collect::<Result<Vec<String>, _>>()?;
cleanup_count = 0;
for index_name in non_standard_indexes {
if index_name.starts_with("KEEP_") {
println!(" Keeping user index: {index_name}");
continue;
}
match conn.execute(&format!("DROP INDEX IF EXISTS {index_name}"), []) {
Ok(_) => {
println!(" Dropping non-standard index: {index_name}");
cleanup_count += 1;
}
Err(e) => {
println!(" Skipping index {index_name}: {e}");
}
}
}
if cleanup_count > 0 {
println!("Cleaned up {cleanup_count} non-standard indexes");
} else {
println!("No non-standard indexes found to clean up");
}
println!("Running ANALYZE...");
conn.execute("ANALYZE", [])?;
println!("ANALYZE completed successfully.");
println!("Running VACUUM...");
conn.execute("VACUUM", [])?;
println!("VACUUM completed successfully.");
let (page_count, page_size, freelist_count) = get_db_info(conn)?;
let total_size = page_count * page_size;
let freelist_size = freelist_count * page_size;
println!("\nDatabase '{db_name}' information after maintenance:");
println!(" Total size: {:.2} MB", total_size as f64 / 1024.0 / 1024.0);
println!(" Free space: {:.2} MB", freelist_size as f64 / 1024.0 / 1024.0);
println!(" Page count: {page_count}");
println!(" Page size: {page_size} bytes");
println!(" Freelist count: {freelist_count}");
println!("\nDatabase '{db_name}' maintenance completed.");
Ok(())
}
if self.files.is_empty() {
return maintain_database(&default_conn, "default");
}
let mut success = true;
for file_path in &self.files {
let file_str = file_path.to_string_lossy();
println!("\nPerforming maintenance on: {file_str}");
match Connection::open(file_path) {
Ok(conn) => {
if let Err(err) = maintain_database(&conn, &file_str) {
println!("Error maintaining database '{file_str}': {err}");
success = false;
}
}
Err(err) => {
println!("Error opening database '{file_str}': {err}");
success = false;
}
}
}
if success {
println!("\nAll database maintenance operations completed successfully.");
Ok(())
} else {
Err("One or more database maintenance operations failed".into())
}
}
}
#[derive(Clone, serde::Serialize)]
struct ScanMatch {
command: String,
pattern: String,
timestamp: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
working_directory: Option<String>,
#[serde(skip)]
rowid: i64,
#[serde(skip)]
original_line: Option<String>,
}
impl ScanCommand {
fn go(&self, conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
if let Some(ref dir) = self.dir {
return self.go_dir_mode(dir);
}
let matches = self.run_scan(&conn)?;
if self.json {
println!("{}", serde_json::to_string_pretty(&matches)?);
return Ok(());
}
if matches.is_empty() {
println!("No potential secrets found.");
return Ok(());
}
println!("Found {} potential secret(s):\n", matches.len());
Self::display_matches(&matches, self.verbose);
Ok(())
}
fn go_dir_mode(&self, dir: &Path) -> Result<(), Box<dyn std::error::Error>> {
if !dir.exists() {
return Err(format!("Directory does not exist: {}", dir.display()).into());
}
let (patterns, regex_set) = build_secret_patterns(&self.confidence)?;
if patterns.is_empty() {
println!("No patterns available for confidence level '{}'.", self.confidence);
return Ok(());
}
let db_extension = OsStr::new("db");
let mut entries: Vec<_> = Vec::new();
let mut skipped_entries = 0usize;
for entry_result in fs::read_dir(dir)? {
match entry_result {
Ok(entry) => {
if entry.path().extension() == Some(db_extension) {
entries.push(entry);
}
}
Err(e) => {
eprintln!("Warning: Failed to read directory entry: {e}");
skipped_entries += 1;
}
}
}
if entries.is_empty() {
println!("No .db files found in {}", dir.display());
return Ok(());
}
println!("Scanning {} database files in {}\n", entries.len(), dir.display());
let mut all_matches: Vec<ScanMatch> = Vec::new();
let mut files_with_matches = 0usize;
let mut skipped_files = 0usize;
let num_files = entries.len();
for entry in &entries {
let path = entry.path();
let conn = match Connection::open(&path) {
Ok(c) => c,
Err(e) => {
eprintln!("Warning: Failed to open {}: {e}", path.display());
skipped_files += 1;
continue;
}
};
let schema = include_str!("base_schema.sql");
if let Err(e) = conn.execute_batch(schema) {
eprintln!("Warning: Failed to initialize schema for {}: {e}", path.display());
skipped_files += 1;
continue;
}
let mut matches: Vec<ScanMatch> = Vec::new();
scan_database(&conn, ®ex_set, &patterns, &mut matches, usize::MAX)?;
if !matches.is_empty() {
println!("{}:", path.display());
if self.json {
all_matches.extend(matches.iter().cloned());
} else {
Self::display_matches(&matches, self.verbose);
}
files_with_matches += 1;
}
}
if self.json {
println!("{}", serde_json::to_string_pretty(&all_matches)?);
} else if files_with_matches == 0 && skipped_files == 0 {
println!("No potential secrets found.");
} else {
println!(
"\nSummary: Found potential secrets in {} of {} database(s).",
files_with_matches, num_files
);
}
let total_skipped = skipped_entries + skipped_files;
if total_skipped > 0 {
eprintln!(
"WARNING: {} file(s) could not be processed. Results may be incomplete.",
total_skipped
);
return Err(format!(
"Scan incomplete: {} file(s) could not be processed",
total_skipped
)
.into());
}
Ok(())
}
fn run_scan(&self, conn: &Connection) -> Result<Vec<ScanMatch>, Box<dyn std::error::Error>> {
let (patterns, regex_set) = build_secret_patterns(&self.confidence)?;
if patterns.is_empty() {
return Ok(vec![]);
}
let mut matches: Vec<ScanMatch> = Vec::new();
if let Some(ref histfile) = self.histfile {
let content = fs::read(histfile)?;
let shellname = self.shellname.clone().unwrap_or_else(|| detect_shell_format(&content));
scan_histfile(&content, &shellname, ®ex_set, &patterns, &mut matches, usize::MAX)?;
} else {
scan_database(conn, ®ex_set, &patterns, &mut matches, usize::MAX)?;
}
Ok(matches)
}
fn display_matches(matches: &[ScanMatch], verbose: bool) {
for m in matches {
let time_str = m.timestamp.map_or_else(
|| "n/a".to_string(),
|ts| {
Local
.timestamp_opt(ts, 0)
.single()
.map(|t| t.format("%Y-%m-%d %H:%M:%S").to_string())
.unwrap_or_else(|| "n/a".to_string())
},
);
if verbose {
println!(" [{time_str}] {}", m.pattern);
if let Some(ref wd) = m.working_directory {
println!(" Directory: {wd}");
}
println!(" {}\n", m.command);
} else {
println!(" [{time_str}] {}", m.pattern);
println!(" {}\n", m.command);
}
}
}
}
fn build_secret_patterns(confidence: &str) -> Result<SecretPatterns, Box<dyn std::error::Error>> {
use pxh::secrets_patterns::{PATTERNS_CRITICAL, PATTERNS_HIGH, PATTERNS_LOW};
use regex::bytes::RegexSetBuilder;
let (patterns, size_limit): (Vec<(&str, &str)>, usize) = match confidence {
"critical" => (PATTERNS_CRITICAL.to_vec(), REGEX_SIZE_LIMIT_DEFAULT),
"high" => (PATTERNS_HIGH.to_vec(), REGEX_SIZE_LIMIT_DEFAULT),
"low" => (PATTERNS_LOW.to_vec(), REGEX_SIZE_LIMIT_DEFAULT),
"all" => {
let mut all = PATTERNS_CRITICAL.to_vec();
all.extend(PATTERNS_HIGH);
all.extend(PATTERNS_LOW);
(all, REGEX_SIZE_LIMIT_ALL)
}
_ => {
return Err(format!(
"Invalid confidence level: '{}'. Use 'critical', 'high', 'low', or 'all'",
confidence
)
.into());
}
};
let regex_patterns: Vec<&str> = patterns.iter().map(|(_, regex)| *regex).collect();
let regex_set = RegexSetBuilder::new(®ex_patterns).size_limit(size_limit).build()?;
Ok((patterns, regex_set))
}
fn is_bash_timestamp_line(line: &[u8]) -> bool {
line.starts_with(b"#") && line.len() > 1 && line[1..].iter().all(|&b| b.is_ascii_digit())
}
fn prompt_for_confirmation() -> Result<bool, Box<dyn std::error::Error>> {
print!("Proceed with scrubbing? [y/N] ");
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
Ok(input.trim().eq_ignore_ascii_case("y"))
}
fn detect_shell_format(content: &[u8]) -> String {
for line in content.split(|&b| b == b'\n').take(10) {
if line.is_empty() {
continue;
}
if line.starts_with(b": ") && line.contains(&b';') {
return "zsh".to_string();
}
if is_bash_timestamp_line(line) {
return "bash".to_string();
}
}
eprintln!(
"Warning: Could not detect shell format, defaulting to bash. Use --shellname to specify."
);
"bash".to_string()
}
fn scan_database(
conn: &Connection,
regex_set: ®ex::bytes::RegexSet,
patterns: &[(&str, &str)],
matches: &mut Vec<ScanMatch>,
limit: usize,
) -> Result<(), Box<dyn std::error::Error>> {
let mut stmt = conn.prepare(
"SELECT rowid, full_command, start_unix_timestamp, working_directory FROM command_history",
)?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
if matches.len() >= limit {
break;
}
let rowid: i64 = row.get(0)?;
let command: Vec<u8> = row.get(1)?;
let timestamp: Option<i64> = row.get(2)?;
let working_directory: Option<Vec<u8>> = row.get(3)?;
let matched_indices: Vec<usize> = regex_set.matches(&command).into_iter().collect();
for idx in matched_indices {
if matches.len() >= limit {
break;
}
matches.push(ScanMatch {
command: String::from_utf8_lossy(&command).to_string(),
pattern: patterns[idx].0.to_string(),
timestamp,
working_directory: working_directory
.as_ref()
.map(|wd| String::from_utf8_lossy(wd).to_string()),
rowid,
original_line: None,
});
}
}
Ok(())
}
fn scan_histfile(
content: &[u8],
shellname: &str,
regex_set: ®ex::bytes::RegexSet,
patterns: &[(&str, &str)],
matches: &mut Vec<ScanMatch>,
limit: usize,
) -> Result<(), Box<dyn std::error::Error>> {
let lines: Vec<&[u8]> = content.split(|&b| b == b'\n').collect();
let mut prev_timestamp_line: Option<&[u8]> = None;
for line in &lines {
if matches.len() >= limit {
break;
}
if line.is_empty() {
prev_timestamp_line = None;
continue;
}
let parsed = parse_histfile_line(line, shellname, prev_timestamp_line);
if shellname == "bash" && is_bash_timestamp_line(line) {
prev_timestamp_line = Some(line);
continue;
}
prev_timestamp_line = None;
let Some((command, timestamp, lines_to_remove)) = parsed else {
continue;
};
if command.is_empty() {
continue;
}
let matched_indices: Vec<usize> = regex_set.matches(&command).into_iter().collect();
for idx in matched_indices {
if matches.len() >= limit {
break;
}
matches.push(ScanMatch {
command: String::from_utf8_lossy(&command).to_string(),
pattern: patterns[idx].0.to_string(),
timestamp,
working_directory: None,
rowid: 0,
original_line: Some(lines_to_remove.clone()),
});
}
}
Ok(())
}
fn parse_histfile_line(
line: &[u8],
shellname: &str,
prev_timestamp_line: Option<&[u8]>,
) -> Option<(Vec<u8>, Option<i64>, String)> {
match shellname {
"zsh" => {
let semi_pos = line.iter().position(|&b| b == b';')?;
let fields = &line[..semi_pos];
let parts: Vec<&[u8]> = fields.splitn(3, |&b| b == b':').collect();
if parts.len() < 3 {
return None;
}
let command = line[semi_pos + 1..].to_vec();
let timestamp = parts.get(1).and_then(|ts| {
let ts_str = String::from_utf8_lossy(ts);
ts_str.trim().parse::<i64>().ok()
});
Some((command, timestamp, String::from_utf8_lossy(line).to_string()))
}
"bash" => {
if is_bash_timestamp_line(line) {
return None;
}
let mut lines_to_remove = String::new();
let timestamp = if let Some(ts_line) = prev_timestamp_line {
lines_to_remove.push_str(&String::from_utf8_lossy(ts_line));
lines_to_remove.push('\n');
String::from_utf8_lossy(&ts_line[1..]).trim().parse::<i64>().ok()
} else {
None
};
lines_to_remove.push_str(&String::from_utf8_lossy(line));
Some((line.to_vec(), timestamp, lines_to_remove))
}
_ => Some((line.to_vec(), None, String::from_utf8_lossy(line).to_string())),
}
}
fn scrub_from_database(
conn: &Connection,
matches: &[ScanMatch],
) -> Result<usize, Box<dyn std::error::Error>> {
let mut rowids_to_delete: Vec<i64> = matches.iter().map(|m| m.rowid).collect();
rowids_to_delete.sort();
rowids_to_delete.dedup();
for rowid in &rowids_to_delete {
conn.execute("DELETE FROM command_history WHERE rowid = ?", [rowid])?;
}
Ok(rowids_to_delete.len())
}
fn scrub_from_histfile(
histfile: &Path,
matches: &[ScanMatch],
) -> Result<usize, Box<dyn std::error::Error>> {
let mut lines_to_remove: Vec<String> = matches
.iter()
.filter_map(|m| m.original_line.clone())
.flat_map(|s| s.lines().map(String::from).collect::<Vec<_>>())
.collect();
lines_to_remove.sort();
lines_to_remove.dedup();
let entry_count = matches.iter().filter(|m| m.original_line.is_some()).count();
let lines_refs: Vec<&str> = lines_to_remove.iter().map(|s| s.as_str()).collect();
pxh::atomically_remove_matching_lines_from_file(histfile, &lines_refs)?;
Ok(entry_count)
}
impl SyncCommand {
fn create_filtered_db_copy(
&self,
conn: &mut Connection,
) -> Result<NamedTempFile, Box<dyn std::error::Error>> {
let temp_file = NamedTempFile::new()?;
conn.execute("VACUUM INTO ?", (temp_file.path().to_str(),))?;
if let Some(days) = self.since {
let temp_conn = Connection::open(temp_file.path())?;
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
let threshold = now - (days as i64 * 86400);
temp_conn.execute(
"DELETE FROM command_history WHERE start_unix_timestamp <= ?",
[threshold],
)?;
temp_conn.execute("VACUUM", ())?;
drop(temp_conn); }
Ok(temp_file)
}
fn go(&self, mut conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
if (self.send_only || self.receive_only) && self.remote.is_none() && !self.stdin_stdout {
return Err(Box::from(
"--send-only and --receive-only flags require --remote or --stdin-stdout to be specified",
));
}
if self.remote.is_some() && self.dirname.is_some() {
return Err(Box::from("Cannot specify both --remote and a directory path"));
}
if self.server {
return self.handle_server_mode(&mut conn);
}
if self.stdin_stdout || self.remote.is_some() {
return self.handle_remote_sync(&mut conn);
}
let dirname =
self.dirname.as_ref().ok_or("Directory path is required for directory-based sync")?;
if !dirname.exists() {
fs::create_dir(dirname)?;
}
let mut output_path = dirname.clone();
let original_hostname =
pxh::get_setting(&conn, "original_hostname")?.unwrap_or_else(pxh::get_hostname);
output_path.push(original_hostname.to_path_lossy());
output_path.set_extension("db");
let output_path_str =
output_path.to_str().ok_or("Unable to represent output filename as a string")?;
if !self.export_only {
let entries = fs::read_dir(dirname)?;
let db_extension = OsStr::new("db");
let filter_secrets = !self.no_secret_filter;
for entry in entries {
let path = entry?.path();
if path.extension() == Some(db_extension) && output_path != path {
print!("Syncing from {}...", path.to_string_lossy());
let (other_count, added_count, filtered_count) =
Self::merge_database_from_file_filtered(&mut conn, path, filter_secrets)?;
if filtered_count > 0 {
println!(
"done, considered {other_count} rows, added {added_count}, filtered {filtered_count}"
);
} else {
println!("done, considered {other_count} rows and added {added_count}");
}
}
}
}
let temp_file = NamedTempFile::new_in(dirname.as_path())?;
conn.execute("VACUUM INTO ?", (temp_file.path().to_str(),))?;
temp_file.persist(output_path_str)?;
if self.export_only {
println!("Backed-up database to {output_path_str}");
} else {
println!("Saved merged database to {output_path_str}");
}
Ok(())
}
fn handle_remote_sync(&self, conn: &mut Connection) -> Result<(), Box<dyn std::error::Error>> {
let mode = if self.send_only {
"send"
} else if self.receive_only {
"receive"
} else {
"bidirectional"
};
let mut child = if self.stdin_stdout {
None
} else {
let host = self.remote.as_ref().ok_or("Remote host required for SSH sync")?;
println!("Syncing with {host}...");
let (ssh_cmd, ssh_args) = pxh::helpers::parse_ssh_command(&self.ssh_cmd);
let remote_db_path =
self.remote_db.clone().unwrap_or_else(|| PathBuf::from("~/.pxh/pxh.db"));
let remote_pxh = pxh::helpers::determine_remote_pxh_path(&self.remote_pxh);
let mut remote_command =
format!("{} --db {} sync --server", remote_pxh, remote_db_path.display());
if let Some(days) = self.since {
remote_command.push_str(&format!(" --since {days}"));
}
let mut cmd = std::process::Command::new(&ssh_cmd);
cmd.args(&ssh_args)
.arg(host)
.arg(&remote_command)
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::inherit());
Some(cmd.spawn().map_err(|e| format!("Failed to spawn SSH command: {e}"))?)
};
let (mut stdin_writer, mut stdout_reader) = if self.stdin_stdout {
(
Box::new(std::io::stdout()) as Box<dyn Write>,
Box::new(std::io::stdin()) as Box<dyn Read>,
)
} else {
if let Some(ref mut child) = child {
let stdin = child.stdin.take().ok_or("Failed to get stdin from SSH process")?;
let stdout = child.stdout.take().ok_or("Failed to get stdout from SSH process")?;
(Box::new(stdin) as Box<dyn Write>, Box::new(stdout) as Box<dyn Read>)
} else {
return Err(Box::from("No child process available"));
}
};
stdin_writer.write_all(mode.as_bytes())?;
stdin_writer.write_all(b"\n")?;
stdin_writer.flush()?;
match mode {
"send" => {
self.send_database(&mut stdin_writer, conn)?;
drop(stdin_writer);
}
"receive" => {
drop(stdin_writer);
self.receive_database(&mut stdout_reader, conn)?;
}
"bidirectional" => {
self.send_database(&mut stdin_writer, conn)?;
drop(stdin_writer);
self.receive_database(&mut stdout_reader, conn)?;
}
_ => unreachable!(),
}
if let Some(mut child) = child {
let status = child.wait()?;
if !status.success() {
return Err(Box::from("Remote sync failed"));
}
}
if !self.stdin_stdout {
println!("Sync completed successfully");
}
Ok(())
}
fn merge_database_from_file_filtered(
conn: &mut Connection,
path: PathBuf,
filter_secrets: bool,
) -> Result<(i64, i64, i64), Box<dyn std::error::Error>> {
let tx = conn.transaction()?;
let before_count: i64 =
tx.prepare("SELECT COUNT(*) FROM main.command_history")?.query_row((), |r| r.get(0))?;
tx.execute("ATTACH DATABASE ? AS other", (path.as_os_str().as_bytes(),))?;
let mut filtered_count: i64 = 0;
if filter_secrets {
let (patterns, regex_set) = build_secret_patterns("critical")?;
if patterns.is_empty() {
tx.execute(
r#"
INSERT OR IGNORE INTO main.command_history (
session_id, full_command, shellname, hostname, username,
working_directory, exit_status, start_unix_timestamp, end_unix_timestamp
)
SELECT session_id, full_command, shellname, hostname, username,
working_directory, exit_status, start_unix_timestamp, end_unix_timestamp
FROM other.command_history
"#,
(),
)?;
} else {
let mut stmt = tx.prepare(
r#"
SELECT session_id, full_command, shellname, hostname, username,
working_directory, exit_status, start_unix_timestamp, end_unix_timestamp
FROM other.command_history
"#,
)?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let full_command: Vec<u8> = row.get(1)?;
if regex_set.is_match(&full_command) {
filtered_count += 1;
continue;
}
tx.execute(
r#"
INSERT OR IGNORE INTO main.command_history (
session_id, full_command, shellname, hostname, username,
working_directory, exit_status, start_unix_timestamp, end_unix_timestamp
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
"#,
(
row.get::<_, i64>(0)?,
&full_command,
row.get::<_, String>(2)?,
row.get::<_, Option<Vec<u8>>>(3)?,
row.get::<_, Option<Vec<u8>>>(4)?,
row.get::<_, Option<Vec<u8>>>(5)?,
row.get::<_, Option<i64>>(6)?,
row.get::<_, Option<i64>>(7)?,
row.get::<_, Option<i64>>(8)?,
),
)?;
}
}
} else {
tx.execute(
r#"
INSERT OR IGNORE INTO main.command_history (
session_id, full_command, shellname, hostname, username,
working_directory, exit_status, start_unix_timestamp, end_unix_timestamp
)
SELECT session_id, full_command, shellname, hostname, username,
working_directory, exit_status, start_unix_timestamp, end_unix_timestamp
FROM other.command_history
"#,
(),
)?;
}
let after_count: i64 =
tx.prepare("SELECT COUNT(*) FROM main.command_history")?.query_row((), |r| r.get(0))?;
let other_count: i64 = tx
.prepare("SELECT COUNT(*) FROM other.command_history")?
.query_row((), |r| r.get(0))?;
tx.commit()?;
conn.execute("DETACH DATABASE other", ())?;
Ok((other_count, after_count - before_count, filtered_count))
}
fn handle_server_mode(&self, conn: &mut Connection) -> Result<(), Box<dyn std::error::Error>> {
let mut mode = String::new();
std::io::stdin().read_line(&mut mode)?;
if mode.is_empty() {
return Err(Box::from("No sync mode received"));
}
let mode = mode.trim();
let (base_mode, options) = if mode.ends_with("-v2") {
let mut options_json = String::new();
std::io::stdin().read_line(&mut options_json)?;
let options: SyncOptions = serde_json::from_str(options_json.trim())
.map_err(|e| format!("Failed to parse v2 protocol options: {e}. Client and server may have incompatible versions."))?;
(mode.strip_suffix("-v2").unwrap(), options)
} else {
(mode, SyncOptions::default())
};
match base_mode {
"send" => {
self.receive_database_with_options(&mut std::io::stdin(), conn, &options)?;
}
"receive" => {
self.send_database(&mut std::io::stdout(), conn)?;
}
"bidirectional" => {
self.receive_database_with_options(&mut std::io::stdin(), conn, &options)?;
self.send_database(&mut std::io::stdout(), conn)?;
}
"scrub" => {
let result = self.execute_remote_scrub(conn, &options)?;
println!("{result}");
}
_ => return Err(Box::from(format!("Unknown sync mode: {mode}"))),
}
Ok(())
}
fn execute_remote_scrub(
&self,
conn: &Connection,
options: &SyncOptions,
) -> Result<String, Box<dyn std::error::Error>> {
let dry_run = options.scrub_dry_run.unwrap_or(false);
let confidence = options.scrub_confidence.as_deref().unwrap_or("critical");
let mut matches: Vec<ScanMatch> = Vec::new();
if options.scrub_scan.unwrap_or(false) {
let (patterns, regex_set) = build_secret_patterns(confidence)?;
if !patterns.is_empty() {
scan_database(conn, ®ex_set, &patterns, &mut matches, usize::MAX)?;
}
} else if let Some(ref pattern) = options.scrub {
let mut stmt = conn.prepare(
"SELECT rowid, full_command, start_unix_timestamp, working_directory FROM command_history WHERE INSTR(full_command, ?) > 0",
)?;
let mut rows = stmt.query([pattern.as_bytes()])?;
while let Some(row) = rows.next()? {
matches.push(ScanMatch {
rowid: row.get(0)?,
command: String::from_utf8_lossy(&row.get::<_, Vec<u8>>(1)?).to_string(),
pattern: "manual".to_string(),
timestamp: row.get(2)?,
working_directory: row
.get::<_, Option<Vec<u8>>>(3)?
.map(|v| String::from_utf8_lossy(&v).to_string()),
original_line: None,
});
}
} else {
return Err("Remote scrub requires --scan or a pattern".into());
}
if matches.is_empty() {
return Ok("No entries matched for scrubbing.".to_string());
}
if dry_run {
return Ok(format!("Dry-run: {} entries would be scrubbed.", matches.len()));
}
let count = scrub_from_database(conn, &matches)?;
Ok(format!("Scrubbed {} entries from remote database.", count))
}
fn send_database<W: Write>(
&self,
writer: &mut W,
conn: &mut Connection,
) -> Result<(), Box<dyn std::error::Error>> {
let temp_file = self.create_filtered_db_copy(conn)?;
let metadata = std::fs::metadata(temp_file.path())?;
let size = metadata.len();
writer.write_all(&size.to_le_bytes())?;
let mut file = File::open(temp_file.path())?;
io::copy(&mut file, writer)?;
writer.flush()?;
Ok(())
}
fn receive_database<R: Read>(
&self,
reader: &mut R,
conn: &mut Connection,
) -> Result<(), Box<dyn std::error::Error>> {
let options =
SyncOptions { no_secret_filter: Some(self.no_secret_filter), ..Default::default() };
self.receive_database_with_options(reader, conn, &options)
}
fn receive_database_with_options<R: Read>(
&self,
reader: &mut R,
conn: &mut Connection,
options: &SyncOptions,
) -> Result<(), Box<dyn std::error::Error>> {
let mut size_bytes = [0u8; 8];
reader.read_exact(&mut size_bytes)?;
let size = u64::from_le_bytes(size_bytes);
let mut data = vec![0u8; size as usize];
reader.read_exact(&mut data)?;
let temp_file = tempfile::NamedTempFile::new()?;
std::fs::write(temp_file.path(), &data)?;
let filter_secrets = !options.no_secret_filter.unwrap_or(false);
let (other_count, added_count, filtered_count) = Self::merge_database_from_file_filtered(
conn,
temp_file.path().to_path_buf(),
filter_secrets,
)?;
let current_hostname = pxh::get_hostname();
let current_db_path =
conn.path().map(|p| p.to_string()).unwrap_or_else(|| "in-memory".to_string());
let mut msg = format!(
"{current_hostname}: Merged into {current_db_path} considered {other_count} entries, added {added_count} entries"
);
if filtered_count > 0 {
msg.push_str(&format!(", filtered {filtered_count} potential secrets"));
}
eprintln!("{msg}");
Ok(())
}
}
trait PrintableCommand {
fn verbose(&self) -> bool;
fn suppress_headers(&self) -> bool;
fn display_limit(&self) -> usize;
fn extra_filter_step(
&self,
rows: Vec<pxh::Invocation>,
) -> Result<Vec<pxh::Invocation>, Box<dyn std::error::Error>>;
fn present_results(&self, conn: &Connection) -> Result<(), Box<dyn std::error::Error>> {
let mut stmt = conn.prepare(
r#"
SELECT session_id, full_command, shellname, working_directory, hostname, username, exit_status, start_unix_timestamp, end_unix_timestamp
FROM memdb.show_results sr, command_history h
WHERE sr.ch_rowid = h.rowid
ORDER BY ch_start_unix_timestamp DESC, ch_id DESC
"#)?;
let rows: Result<Vec<pxh::Invocation>, _> =
stmt.query_map([], pxh::Invocation::from_row)?.collect();
let rows = self.extra_filter_step(rows?)?;
if self.verbose() {
pxh::present_results_human_readable(
&["start_time", "duration", "session", "context", "status", "command"],
&rows,
self.suppress_headers(),
)?;
} else {
pxh::present_results_human_readable(
&["start_time", "command"],
&rows,
self.suppress_headers(),
)?;
}
Ok(())
}
}
impl PrintableCommand for ScrubCommand {
fn verbose(&self) -> bool {
false
}
fn suppress_headers(&self) -> bool {
false
}
fn display_limit(&self) -> usize {
0
}
fn extra_filter_step(
&self,
rows: Vec<pxh::Invocation>,
) -> Result<Vec<pxh::Invocation>, Box<dyn std::error::Error>> {
Ok(rows)
}
}
impl ScrubCommand {
fn go(&self, conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
if let Some(ref dir) = self.dir {
return self.go_dir_mode(dir);
}
if let Some(ref remote) = self.remote {
return self.go_remote_mode(remote);
}
if self.scan { self.go_scan_mode(&conn) } else { self.go_interactive_mode(conn) }
}
fn go_dir_mode(&self, dir: &Path) -> Result<(), Box<dyn std::error::Error>> {
if !dir.exists() {
return Err(format!("Directory does not exist: {}", dir.display()).into());
}
let (patterns, regex_set) = if self.scan {
let result = build_secret_patterns(&self.confidence)?;
if result.0.is_empty() {
println!("No patterns available for confidence level '{}'.", self.confidence);
return Ok(());
}
result
} else if self.contraband.is_none() {
return Err("Directory mode requires --scan or a contraband pattern".into());
} else {
(vec![], regex::bytes::RegexSet::empty())
};
let db_extension = OsStr::new("db");
let mut entries: Vec<_> = Vec::new();
let mut skipped_entries = 0usize;
for entry_result in fs::read_dir(dir)? {
match entry_result {
Ok(entry) => {
if entry.path().extension() == Some(db_extension) {
entries.push(entry);
}
}
Err(e) => {
eprintln!("Warning: Failed to read directory entry: {e}");
skipped_entries += 1;
}
}
}
if entries.is_empty() {
println!("No .db files found in {}", dir.display());
return Ok(());
}
println!("Found {} database files in {}", entries.len(), dir.display());
let mut total_matches = 0;
let mut skipped_files = 0usize;
let mut file_match_counts: Vec<(PathBuf, Vec<ScanMatch>)> = Vec::new();
for entry in &entries {
let path = entry.path();
print!("Scanning {}...", path.display());
let conn = match Connection::open(&path) {
Ok(c) => c,
Err(e) => {
eprintln!(" failed to open: {e}");
skipped_files += 1;
continue;
}
};
let schema = include_str!("base_schema.sql");
if let Err(e) = conn.execute_batch(schema) {
eprintln!(" schema error: {e}");
skipped_files += 1;
continue;
}
let mut matches: Vec<ScanMatch> = Vec::new();
if self.scan {
scan_database(&conn, ®ex_set, &patterns, &mut matches, usize::MAX)?;
} else if let Some(ref contraband) = self.contraband {
let mut stmt = conn.prepare(
"SELECT rowid, full_command, start_unix_timestamp, working_directory FROM command_history WHERE INSTR(full_command, ?) > 0"
)?;
let mut rows = stmt.query([contraband.as_bytes()])?;
while let Some(row) = rows.next()? {
matches.push(ScanMatch {
rowid: row.get(0)?,
command: String::from_utf8_lossy(&row.get::<_, Vec<u8>>(1)?).to_string(),
pattern: "manual".to_string(),
timestamp: row.get(2)?,
working_directory: row
.get::<_, Option<Vec<u8>>>(3)?
.map(|v| String::from_utf8_lossy(&v).to_string()),
original_line: None,
});
}
}
println!(" {} entries", matches.len());
total_matches += matches.len();
if !matches.is_empty() {
file_match_counts.push((path, matches));
}
}
let total_skipped = skipped_entries + skipped_files;
if total_skipped > 0 {
eprintln!(
"WARNING: {} file(s) could not be processed. Results may be incomplete.",
total_skipped
);
}
if total_matches == 0 {
println!("\nNo entries found to scrub.");
if total_skipped > 0 {
return Err(format!(
"Scrub incomplete: {} file(s) could not be processed",
total_skipped
)
.into());
}
return Ok(());
}
println!("\nFound {} entries across {} file(s).", total_matches, file_match_counts.len());
if self.dry_run {
println!("Dry-run mode: no changes made.");
if total_skipped > 0 {
return Err(format!(
"Scrub incomplete: {} file(s) could not be processed",
total_skipped
)
.into());
}
return Ok(());
}
if !self.yes {
print!("Proceed with scrubbing? [y/N] ");
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
if !input.trim().eq_ignore_ascii_case("y") {
println!("Aborted.");
return Ok(());
}
}
let mut total_scrubbed = 0;
let mut files_modified = 0;
for (path, matches) in file_match_counts {
let conn = Connection::open(&path)?;
let count = scrub_from_database(&conn, &matches)?;
total_scrubbed += count;
files_modified += 1;
}
println!("Scrubbed {} entries from {} file(s).", total_scrubbed, files_modified);
if total_skipped > 0 {
return Err(format!(
"Scrub incomplete: {} file(s) could not be processed",
total_skipped
)
.into());
}
Ok(())
}
fn go_remote_mode(&self, remote: &str) -> Result<(), Box<dyn std::error::Error>> {
if !self.scan && self.contraband.is_none() {
return Err("Remote scrub requires --scan or a contraband pattern".into());
}
let scrub_pattern = if self.scan {
None } else {
self.contraband.clone()
};
let remote_pxh = self.remote_pxh.as_deref().unwrap_or("pxh");
let remote_pxh = pxh::helpers::determine_remote_pxh_path(remote_pxh);
let remote_db_path =
self.remote_db.clone().unwrap_or_else(|| PathBuf::from("~/.pxh/pxh.db"));
let (ssh_cmd, ssh_args) = pxh::helpers::parse_ssh_command(&self.ssh_cmd);
let remote_command =
format!("{} --db {} sync --server", remote_pxh, remote_db_path.display());
let mut cmd = std::process::Command::new(&ssh_cmd);
cmd.args(&ssh_args)
.arg(remote)
.arg(&remote_command)
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::inherit());
let mut child = cmd.spawn().map_err(|e| format!("Failed to spawn SSH command: {e}"))?;
let mut stdin_writer = child.stdin.take().ok_or("Failed to get stdin from SSH process")?;
let mut stdout_reader =
child.stdout.take().ok_or("Failed to get stdout from SSH process")?;
let options = SyncOptions {
scrub: scrub_pattern,
scrub_scan: if self.scan { Some(true) } else { None },
scrub_confidence: if self.scan { Some(self.confidence.clone()) } else { None },
scrub_dry_run: if self.dry_run { Some(true) } else { None },
no_secret_filter: None,
};
stdin_writer.write_all(b"scrub-v2\n")?;
let options_json = serde_json::to_string(&options)?;
stdin_writer.write_all(options_json.as_bytes())?;
stdin_writer.write_all(b"\n")?;
stdin_writer.flush()?;
drop(stdin_writer);
let mut response = String::new();
stdout_reader.read_to_string(&mut response)?;
let status = child.wait()?;
if !status.success() {
return Err(format!("Remote scrub failed: {}", response).into());
}
println!("{}", response.trim());
Ok(())
}
fn go_scan_mode(&self, conn: &Connection) -> Result<(), Box<dyn std::error::Error>> {
let (patterns, regex_set) = build_secret_patterns(&self.confidence)?;
if patterns.is_empty() {
println!("No patterns available.");
return Ok(());
}
let mut matches: Vec<ScanMatch> = Vec::new();
if let Some(ref histfile) = self.histfile {
let content = fs::read(histfile)?;
let shellname = self.shellname.clone().unwrap_or_else(|| detect_shell_format(&content));
scan_histfile(&content, &shellname, ®ex_set, &patterns, &mut matches, usize::MAX)?;
} else {
scan_database(conn, ®ex_set, &patterns, &mut matches, usize::MAX)?;
}
if matches.is_empty() {
println!("No potential secrets found. Nothing to scrub.");
return Ok(());
}
println!("Found {} potential secret(s) to scrub:\n", matches.len());
ScanCommand::display_matches(&matches, false);
if self.dry_run {
println!("Dry-run mode: no changes made.");
return Ok(());
}
if !self.yes && !prompt_for_confirmation()? {
println!("Aborted.");
return Ok(());
}
if let Some(ref histfile) = self.histfile {
let count = scrub_from_histfile(histfile, &matches)?;
println!("Scrubbed {} entries from {}.", count, histfile.display());
} else {
let count = scrub_from_database(conn, &matches)?;
println!("Scrubbed {} entries from database.", count);
}
Ok(())
}
fn go_interactive_mode(&self, mut conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
if self.histfile.is_some() && self.contraband.is_none() {
return Err(
"Interactive mode with --histfile requires specifying the string to scrub".into()
);
}
let contraband = match &self.contraband {
Some(value) => {
println!(
"WARNING: specifying the contraband on the command line is inherently risky; prefer not specifying it\n"
);
value.clone()
}
None => {
let mut input = String::new();
print!("String to scrub: ");
std::io::stdout().flush()?;
io::stdin().read_line(&mut input)?;
input.trim_end().into()
}
};
if contraband.is_empty() {
println!(); return Err(String::from("String to scrub must be non-empty; aborting.").into());
}
conn.execute("DELETE FROM memdb.show_results", ())?;
conn.execute(
r#"
INSERT INTO memdb.show_results (ch_rowid, ch_start_unix_timestamp, ch_id)
SELECT rowid, start_unix_timestamp, id
FROM command_history h
WHERE INSTR(full_command, ?) > 0
ORDER BY start_unix_timestamp DESC, id DESC"#,
(&contraband,),
)?;
println!("Entries to scrub from pxh database...\n");
self.present_results(&conn)?;
if self.dry_run {
println!("\nDry-run, no entries scrubbed.");
return Ok(());
}
if !self.yes && !prompt_for_confirmation()? {
println!("Aborted.");
return Ok(());
}
let tx = conn.transaction()?;
tx.execute(
"DELETE FROM command_history WHERE rowid IN (SELECT ch_rowid FROM memdb.show_results)",
(),
)?;
tx.commit()?;
if let Some(ref histfile) = self.histfile {
pxh::atomically_remove_lines_from_file(histfile, &contraband)?;
println!("\nEntries scrubbed from database and {}.", histfile.display());
} else {
println!("\nEntries scrubbed from database.");
}
Ok(())
}
}
impl PrintableCommand for ShowCommand {
fn extra_filter_step(
&self,
rows: Vec<pxh::Invocation>,
) -> Result<Vec<pxh::Invocation>, Box<dyn std::error::Error>> {
let regexes: Result<Vec<Regex>, _> =
self.patterns.iter().skip(1).map(|s| Regex::new(s.as_str())).collect();
let regexes = regexes?;
Ok(rows
.into_iter()
.filter(|row| match_all_regexes(row, ®exes))
.rev()
.take(self.display_limit())
.collect())
}
fn verbose(&self) -> bool {
self.verbose
}
fn suppress_headers(&self) -> bool {
self.suppress_headers
}
fn display_limit(&self) -> usize {
self.limit
}
}
impl ShowCommand {
fn go(&self, conn: Connection) -> Result<(), Box<dyn std::error::Error>> {
let pattern = if self.loosen {
self.patterns.first().map_or_else(String::default, String::clone)
} else {
self.patterns.join(".*\\s.*")
};
let pattern =
if self.ignore_case { format!("(?i){}", pattern.to_lowercase()) } else { pattern };
conn.execute("DELETE FROM memdb.show_results", ())?;
let working_directory = self.working_directory.as_ref().map_or_else(
|| {
env::var_os("PWD")
.map(PathBuf::from)
.or_else(|| env::current_dir().ok())
.unwrap_or_default()
},
|v| v.clone(),
);
if let Some(ref maybe_session_hex) = self.session {
let session_id = i64::from_str_radix(maybe_session_hex, 16)?;
conn.execute(
r#"
INSERT INTO memdb.show_results (ch_rowid, ch_start_unix_timestamp, ch_id)
SELECT rowid, start_unix_timestamp, id
FROM command_history h
WHERE full_command REGEXP ? AND session_id = ?
ORDER BY start_unix_timestamp DESC, id DESC
LIMIT ?"#,
(pattern, session_id, self.display_limit() as i64),
)?;
} else if self.here {
conn.execute(
r#"
INSERT INTO memdb.show_results (ch_rowid, ch_start_unix_timestamp, ch_id)
SELECT rowid, start_unix_timestamp, id
FROM command_history h
WHERE working_directory = CAST(? as blob)
AND full_command REGEXP ?
ORDER BY start_unix_timestamp DESC, id DESC
LIMIT ?"#,
(working_directory.to_string_lossy(), pattern, self.display_limit() as i64),
)?;
} else {
conn.execute(
r#"
INSERT INTO memdb.show_results (ch_rowid, ch_start_unix_timestamp, ch_id)
SELECT rowid, start_unix_timestamp, id
FROM command_history h
WHERE full_command REGEXP ?
ORDER BY start_unix_timestamp DESC, id DESC
LIMIT ?"#,
(pattern, self.display_limit() as i64),
)?;
}
self.present_results(&conn)
}
}
fn match_all_regexes(row: &pxh::Invocation, regexes: &[Regex]) -> bool {
regexes.iter().all(|regex| regex.is_match(row.command.as_slice()))
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let args_vec = env::args().collect::<Vec<_>>();
let is_pxhs = pxh::helpers::determine_is_pxhs(&args_vec);
let mut args = if is_pxhs {
let program = args_vec[0].clone(); let rest = args_vec.iter().skip(1).cloned();
let combined_args = std::iter::once(program)
.chain(std::iter::once(String::from("show")))
.chain(rest)
.collect::<Vec<_>>();
PxhArgs::parse_from(combined_args)
} else {
PxhArgs::parse()
};
let make_conn = || pxh::sqlite_connection(&args.db);
match &mut args.command {
Commands::ShellConfig(cmd) => {
cmd.go()?;
}
Commands::Install(cmd) => {
cmd.go()?;
}
Commands::Import(cmd) => {
cmd.go(make_conn()?)?;
}
Commands::Export(cmd) => {
cmd.go(make_conn()?)?;
}
Commands::Show(cmd) => {
let actual_limit =
if cmd.limit == 0 || cmd.loosen { i32::MAX as usize } else { cmd.limit };
cmd.limit = actual_limit;
cmd.go(make_conn()?)?;
}
Commands::Scrub(cmd) => {
cmd.go(make_conn()?)?;
}
Commands::Seal(cmd) => {
cmd.go(make_conn()?)?;
}
Commands::Sync(cmd) => {
cmd.go(make_conn()?)?;
}
Commands::Maintenance(cmd) => {
cmd.go(make_conn()?)?;
}
Commands::Scan(cmd) => {
cmd.go(make_conn()?)?;
}
Commands::Recall(cmd) => {
cmd.go(make_conn()?)?;
}
Commands::Insert(cmd) => {
let mut conn = make_conn()?;
let invocation = pxh::Invocation {
command: cmd.command.join(OsStr::new(" ")).as_bytes().into(),
shellname: cmd.shellname.clone(),
working_directory: cmd
.working_directory
.as_ref()
.map(|v| BString::from(v.as_path().as_os_str().as_bytes())),
hostname: Some(BString::from(cmd.hostname.clone().into_vec())),
username: Some(BString::from(cmd.username.clone().into_vec())),
exit_status: cmd.exit_status,
start_unix_timestamp: cmd.start_unix_timestamp,
end_unix_timestamp: cmd.end_unix_timestamp,
session_id: cmd.session_id,
};
let tx = conn.transaction_with_behavior(TransactionBehavior::Deferred)?;
invocation.insert(&tx)?;
tx.commit()?;
}
}
Ok(())
}