use crate::aggregation::{aggregate, AggregatedItem, AggregationStrategy};
use crate::decomposition::{decompose, SubQuery};
use crate::error::CliError;
use crate::parallel::execute_parallel_searches;
use crate::synthesis::{synthesize, SynthFormat, SynthesizedReport};
use crate::types::{Config, SearchOutput};
use serde::{Deserialize, Serialize};
use std::time::Instant;
use tokio_util::sync::CancellationToken;
pub const MAX_SUB_QUERIES: usize = 12;
pub const DEFAULT_MAX_SUB_QUERIES: usize = 5;
pub const MAX_DEPTH: u32 = 3;
pub const RRF_K: u32 = 60;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SubQueryStrategy {
Heuristic,
Manual,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AggregationStrategyKind {
Rrf,
DedupeByUrl,
}
#[derive(Debug, Clone)]
pub struct DeepResearchArgs {
pub query: String,
pub max_sub_queries: usize,
pub sub_query_strategy: SubQueryStrategy,
pub sub_queries_file: Option<std::path::PathBuf>,
pub aggregation: AggregationStrategyKind,
pub depth: u32,
pub fetch_content: bool,
pub synthesize: bool,
pub budget_tokens: usize,
pub synth_format: SynthFormat,
}
impl Default for DeepResearchArgs {
fn default() -> Self {
Self {
query: String::new(),
max_sub_queries: DEFAULT_MAX_SUB_QUERIES,
sub_query_strategy: SubQueryStrategy::Heuristic,
sub_queries_file: None,
aggregation: AggregationStrategyKind::Rrf,
depth: 0,
fetch_content: false,
synthesize: false,
budget_tokens: 4000,
synth_format: SynthFormat::Markdown,
}
}
}
impl DeepResearchArgs {
pub fn validate(&self) -> Result<(), String> {
if self.max_sub_queries == 0 {
return Err(format!(
"--max-sub-queries must be at least 1 (got {})",
self.max_sub_queries
));
}
if self.max_sub_queries > MAX_SUB_QUERIES {
return Err(format!(
"--max-sub-queries cannot exceed {} (got {})",
MAX_SUB_QUERIES, self.max_sub_queries
));
}
if self.depth > MAX_DEPTH {
return Err(format!(
"--depth cannot exceed {} (got {})",
MAX_DEPTH, self.depth
));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubQueryOutcome {
#[serde(rename = "texto")]
pub text: String,
#[serde(rename = "estrategia")]
pub strategy: String,
#[serde(rename = "status")]
pub status: String,
#[serde(rename = "tempo_ms")]
pub elapsed_ms: u64,
#[serde(rename = "mensagem_erro", skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeepResearchOutput {
#[serde(rename = "tipo")]
pub kind: String,
#[serde(rename = "metadados")]
pub metadata: DeepResearchMetadata,
#[serde(rename = "resultados")]
pub results: Vec<AggregatedItem>,
#[serde(rename = "sintese", skip_serializing_if = "Option::is_none")]
pub synth: Option<SynthesizedReport>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeepResearchMetadata {
#[serde(rename = "query_original")]
pub original_query: String,
#[serde(rename = "sub_queries")]
pub sub_queries: Vec<SubQueryOutcome>,
#[serde(rename = "estrategia_agregacao")]
pub aggregation_strategy: String,
#[serde(rename = "total_resultados_unicos")]
pub unique_result_count: usize,
#[serde(rename = "tempo_total_ms")]
pub total_elapsed_ms: u64,
#[serde(rename = "nivel_cascata")]
pub cascade_level: Option<u8>,
}
pub async fn run_deep_research(
args: DeepResearchArgs,
cfg: &Config,
cancel: CancellationToken,
) -> Result<DeepResearchOutput, CliError> {
args.validate()
.map_err(|e| CliError::InvalidConfig { message: e })?;
let start_total = Instant::now();
let sub_queries: Vec<SubQuery> = decompose(
&args.query,
args.sub_query_strategy,
args.sub_queries_file.as_deref(),
args.max_sub_queries,
&cancel,
)
.await?;
let per_query_outputs: Vec<SearchOutput> = execute_parallel_searches(
sub_queries.iter().map(|q| q.text.clone()).collect(),
cfg.clone(),
cancel.clone(),
)
.await?
.searches;
let mut outcomes: Vec<SubQueryOutcome> = sub_queries
.iter()
.zip(per_query_outputs.iter())
.map(|(q, o)| SubQueryOutcome {
text: q.text.clone(),
strategy: q.strategy_label().to_string(),
status: if o.error.is_some() { "erro" } else { "ok" }.to_string(),
elapsed_ms: o.metadata.execution_time_ms,
error: o.error.clone(),
})
.collect();
let aggregation_strategy = match args.aggregation {
AggregationStrategyKind::Rrf => AggregationStrategy::Rrf(RRF_K),
AggregationStrategyKind::DedupeByUrl => AggregationStrategy::DedupeByUrl,
};
let aggregated = aggregate(&per_query_outputs, aggregation_strategy);
let synth = if args.synthesize {
Some(synthesize(
&aggregated,
&args.query,
args.synth_format,
args.budget_tokens,
))
} else {
None
};
let cascade_level = per_query_outputs
.iter()
.filter_map(|o| o.metadata.cascade_level)
.max()
.map(|v| v as u8);
if args.depth > 0 {
let planned = SubQueryOutcome {
text: format!("<reflective depth={} not implemented>", args.depth),
strategy: "depth".to_string(),
status: "planejado".to_string(),
elapsed_ms: 0,
error: None,
};
outcomes.push(planned);
}
Ok(DeepResearchOutput {
kind: "deep_research".to_string(),
metadata: DeepResearchMetadata {
original_query: args.query,
sub_queries: outcomes,
aggregation_strategy: match args.aggregation {
AggregationStrategyKind::Rrf => "rrf".to_string(),
AggregationStrategyKind::DedupeByUrl => "dedupe_by_url".to_string(),
},
unique_result_count: aggregated.len(),
total_elapsed_ms: start_total.elapsed().as_millis() as u64,
cascade_level,
},
results: aggregated,
synth,
})
}