use serde::Deserialize;
use std::path::Path;
use std::process::Stdio;
use std::time;
use tokio::io::AsyncReadExt;
use tokio::process::Command;
use crate::Result;
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
#[serde(tag = "type", content = "data")]
#[serde(rename_all = "snake_case")]
pub enum Message {
Begin(Begin),
End(End),
Match(Match),
Context(Context),
Summary(Summary),
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct Begin {
pub path: Option<Data>,
}
impl Begin {
pub(crate) fn path(&self) -> Option<String> {
as_path(&self.path)
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct End {
path: Option<Data>,
binary_offset: Option<u64>,
stats: Stats,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct Summary {
elapsed_total: Duration,
stats: Stats,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct Match {
pub path: Option<Data>,
pub lines: Data,
line_number: Option<u64>,
absolute_offset: u64,
pub submatches: Vec<SubMatch>,
}
impl Match {
pub(crate) fn path(&self) -> Option<String> {
as_path(&self.path)
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct Context {
pub path: Option<Data>,
pub lines: Data,
line_number: Option<u64>,
absolute_offset: u64,
submatches: Vec<SubMatch>,
}
impl Context {
pub(crate) fn path(&self) -> Option<String> {
as_path(&self.path)
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct SubMatch {
#[serde(rename = "match")]
m: Data,
start: usize,
end: usize,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
#[serde(untagged)]
pub enum Data {
Text { text: String },
Bytes { bytes: String },
}
fn as_path(data: &Option<Data>) -> Option<String> {
let data = match data {
Some(data) => data,
None => return None,
};
match data {
Data::Text { text } => Some(text.clone()),
_ => None,
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
struct Stats {
elapsed: Duration,
searches: u64,
searches_with_match: u64,
bytes_searched: u64,
bytes_printed: u64,
matched_lines: u64,
matches: u64,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
struct Duration {
#[serde(flatten)]
duration: time::Duration,
human: String,
}
pub fn json_decode(jsonlines: &str) -> Result<Vec<Message>> {
Ok(serde_json::Deserializer::from_str(jsonlines)
.into_iter()
.collect::<std::result::Result<Vec<Message>, serde_json::Error>>()?)
}
pub struct RipgrepCommand {
command: String,
default_args: Vec<String>,
}
impl Default for RipgrepCommand {
fn default() -> Self {
Self {
command: "rg".to_string(),
default_args: ["--json", "--trim", "-C3", "--ignore-case", "-tmarkdown"]
.into_iter()
.map(String::from)
.collect(),
}
}
}
impl RipgrepCommand {
fn validate_needle(needle: &str) -> Result<()> {
if needle.is_empty() {
return Err(crate::Error::Validation(
"Search needle cannot be empty".to_string(),
));
}
if needle.starts_with('-') {
return Err(crate::Error::Validation(
"Search needle cannot start with dash (potential flag injection)".to_string(),
));
}
if needle.len() > 1000 {
return Err(crate::Error::Validation(
"Search needle too long (max 1000 characters)".to_string(),
));
}
Ok(())
}
pub async fn run(&self, needle: &str, haystack: &Path) -> Result<Vec<Message>> {
Self::validate_needle(needle)?;
self.run_with_extra_args(needle, haystack, &[]).await
}
fn is_safe_ripgrep_flag(&self, flag: &str) -> bool {
matches!(
flag,
"--all-match"
| "-e"
| "--glob"
| "-t"
| "--max-count"
| "-C"
| "--case-sensitive"
| "--ignore-case"
| "-i"
| "--line-number"
| "-n"
| "--with-filename"
| "-H"
| "--no-heading"
| "--color=never"
| "--json"
| "--heading"
| "--trim"
| "--context"
| "--after-context"
| "--before-context"
| "-A"
| "-B"
)
}
pub async fn run_with_extra_args(
&self,
needle: &str,
haystack: &Path,
extra_args: &[String],
) -> Result<Vec<Message>> {
Self::validate_needle(needle)?;
for arg in extra_args {
if arg.starts_with('-') && !self.is_safe_ripgrep_flag(arg) {
log::warn!("Potentially unsafe ripgrep argument rejected: {}", arg);
return Err(crate::Error::Validation(format!(
"Unsafe ripgrep argument: {}",
arg
)));
}
}
let args: Vec<String> = self
.default_args
.clone()
.into_iter()
.chain(extra_args.iter().cloned())
.chain(vec![
needle.to_string(),
haystack.to_string_lossy().to_string(),
])
.collect();
log::debug!("Running ripgrep with args: {:?}", args);
log::info!("🚀 Executing: {} {}", &self.command, args.join(" "));
let mut child = Command::new(&self.command)
.args(args)
.stdout(Stdio::piped())
.spawn()?;
let mut stdout = child.stdout.take().expect("Stdout is not available");
let read = async move {
let mut data = String::new();
stdout.read_to_string(&mut data).await.map(|_| data)
};
let output = read.await?;
log::debug!(
"Raw ripgrep output ({} bytes): {}",
output.len(),
&output[..std::cmp::min(500, output.len())]
);
let messages = json_decode(&output)?;
log::debug!("JSON decode produced {} messages", messages.len());
Ok(messages)
}
fn validate_parameter_value(key: &str, value: &str) -> Result<()> {
if value.len() > 200 {
return Err(crate::Error::Validation(format!(
"Parameter value too long for {}: max 200 characters",
key
)));
}
if value.starts_with('-') {
return Err(crate::Error::Validation(format!(
"Parameter value cannot start with dash for {}",
key
)));
}
match key {
"max_count" | "context" => {
if value.parse::<u32>().is_err() {
return Err(crate::Error::Validation(format!(
"Parameter {} must be a positive integer",
key
)));
}
}
_ => {}
}
Ok(())
}
pub fn parse_extra_parameters(
&self,
extra_params: &std::collections::HashMap<String, String>,
) -> Vec<String> {
let mut args = Vec::new();
if extra_params.is_empty() {
log::debug!("No extra parameters to process");
return args;
}
log::debug!(
"Processing {} extra parameters: {:?}",
extra_params.len(),
extra_params
);
for (key, value) in extra_params {
if let Err(e) = Self::validate_parameter_value(key, value) {
log::warn!("Invalid parameter {}: {}", key, e);
continue;
}
match key.as_str() {
"tag" => {
log::info!("🏷️ Processing tag filter: '{}'", value);
if !args.contains(&"--all-match".to_string()) {
args.push("--all-match".to_string());
log::debug!("Added --all-match flag for tag filtering");
}
let tags: Vec<&str> = value
.split(|c: char| c == ',' || c.is_whitespace())
.filter(|s| !s.is_empty())
.collect();
if tags.is_empty() {
args.push("-e".to_string());
args.push(value.clone());
log::info!("Added tag pattern: {}", value);
} else {
for t in tags {
args.push("-e".to_string());
args.push(t.to_string());
log::info!("Added tag pattern: {}", t);
}
}
log::info!(
"🔍 Tag filtering will require search results to contain ALL specified patterns"
);
}
"glob" => {
args.push("--glob".to_string());
args.push(value.clone());
log::debug!("Added glob pattern: {}", value);
}
"type" => {
args.push("-t".to_string());
args.push(value.clone());
log::debug!("Added type filter: {}", value);
}
"max_count" => {
args.push("--max-count".to_string());
args.push(value.clone());
log::debug!("Added max count: {}", value);
}
"context" => {
args.push("-C".to_string());
args.push(value.clone());
log::debug!("Added context lines: {}", value);
}
"case_sensitive" => {
if value.to_lowercase() == "true" {
args.push("--case-sensitive".to_string());
log::debug!("Enabled case-sensitive search");
}
}
_ => {
log::warn!("Unknown ripgrep parameter: {} = {}", key, value);
}
}
}
args
}
}