Skip to main content

llmgrep/
output.rs

1//! Output formatting and response types for llmgrep.
2//!
3//! This module defines the public API types for serializing search results
4//! in various formats (human-readable, JSON, pretty-printed JSON).
5
6use crate::ast::AstContext;
7use chrono::Utc;
8use clap::ValueEnum;
9use serde::Serialize;
10use std::fmt;
11
12const SCHEMA_VERSION: &str = "1.0.0";
13
14/// Output format for search results.
15///
16/// Determines how search results are displayed to the user.
17#[derive(Clone, Copy, Debug, ValueEnum)]
18pub enum OutputFormat {
19    /// Human-readable formatted output with colors and indentation
20    Human,
21    /// Compact JSON output (single line)
22    Json,
23    /// Pretty-printed JSON with indentation
24    Pretty,
25}
26
27impl fmt::Display for OutputFormat {
28    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
29        let value = match self {
30            OutputFormat::Human => "human",
31            OutputFormat::Json => "json",
32            OutputFormat::Pretty => "pretty",
33        };
34        write!(f, "{}", value)
35    }
36}
37
38/// Performance metrics for search operations.
39///
40/// Tracks timing breakdown for different phases of search execution.
41/// Used internally for debugging and performance analysis.
42#[derive(Serialize, Clone, Debug, Default)]
43pub struct PerformanceMetrics {
44    /// Time taken to detect backend format (SQLite vs Native-V2) in milliseconds
45    pub backend_detection_ms: u64,
46    /// Time taken to execute the core search query in milliseconds
47    pub query_execution_ms: u64,
48    /// Time taken to format results for output in milliseconds
49    pub output_formatting_ms: u64,
50    /// Total time from start to finish in milliseconds
51    pub total_ms: u64,
52}
53
54impl PerformanceMetrics {
55    /// Create a new PerformanceMetrics instance with all fields set to zero.
56    pub fn new() -> Self {
57        Self::default()
58    }
59}
60
61/// JSON response wrapper with metadata.
62///
63/// All JSON responses from llmgrep follow this structure for consistency
64/// and versioning.
65///
66/// # Type Parameters
67///
68/// * `T` - The response data type (e.g., `SearchResponse`, `ReferenceSearchResponse`)
69///
70/// # Example
71///
72/// ```json
73/// {
74///   "schema_version": "1.0.0",
75///   "execution_id": "1234567890-abcd",
76///   "tool": "llmgrep",
77///   "timestamp": "2024-01-15T10:30:00Z",
78///   "partial": false,
79///   "data": { ... }
80/// }
81/// ```
82#[derive(Serialize)]
83pub struct JsonResponse<T> {
84    /// Schema version for response structure compatibility
85    pub schema_version: &'static str,
86    /// Unique execution identifier (timestamp + process ID)
87    pub execution_id: String,
88    /// Tool name (always "llmgrep")
89    pub tool: &'static str,
90    /// ISO 8601 timestamp of when the search was executed
91    pub timestamp: String,
92    /// Whether results are partial (e.g., candidates limit hit)
93    pub partial: bool,
94    /// Optional performance metrics (only included when requested)
95    #[serde(skip_serializing_if = "Option::is_none")]
96    pub performance: Option<PerformanceMetrics>,
97    /// The actual response data
98    pub data: T,
99}
100
101/// Error response structure for JSON output.
102///
103/// Provides structured error information with remediation hints.
104#[derive(Serialize)]
105pub struct ErrorResponse {
106    /// Error code (e.g., "LLM-E001", "LLM-E105")
107    pub code: String,
108    /// Error category/type
109    pub error: String,
110    /// Human-readable error message
111    pub message: String,
112    /// Optional span information for source-level errors
113    pub span: Option<Span>,
114    /// Suggested remediation steps
115    pub remediation: Option<String>,
116}
117
118/// Source code location information.
119///
120/// Represents a contiguous span of source code with line/column information
121/// for display and navigation.
122#[derive(Serialize)]
123pub struct Span {
124    /// Unique span identifier
125    pub span_id: String,
126    /// Absolute path to the source file
127    pub file_path: String,
128    /// Byte offset from file start (inclusive)
129    pub byte_start: u64,
130    /// Byte offset from file start (exclusive)
131    pub byte_end: u64,
132    /// 1-based line number of span start
133    pub start_line: u64,
134    /// 1-based column number of span start
135    pub start_col: u64,
136    /// 1-based line number of span end
137    pub end_line: u64,
138    /// 1-based column number of span end
139    pub end_col: u64,
140    /// Optional context lines before/after the span
141    #[serde(skip_serializing_if = "Option::is_none")]
142    pub context: Option<SpanContext>,
143}
144
145/// Context lines surrounding a span.
146///
147/// Provides before/after/selected lines for displaying search results
148/// with surrounding code context.
149#[derive(Serialize)]
150pub struct SpanContext {
151    /// Lines before the matched span
152    pub before: Vec<String>,
153    /// The matched lines (the span content)
154    pub selected: Vec<String>,
155    /// Lines after the matched span
156    pub after: Vec<String>,
157    /// Whether context was truncated due to size limits
158    pub truncated: bool,
159}
160
161/// A symbol match from a search operation.
162///
163/// Represents a single symbol (function, struct, enum, etc.) that matched
164/// the search criteria, with all available metadata.
165#[derive(Serialize)]
166pub struct SymbolMatch {
167    /// Unique match identifier
168    pub match_id: String,
169    /// Source code location
170    pub span: Span,
171    /// Symbol name (e.g., "function_name")
172    pub name: String,
173    /// Symbol kind (e.g., "function_item", "struct_item")
174    pub kind: String,
175    /// Parent symbol name (if nested)
176    pub parent: Option<String>,
177    /// 32-character BLAKE3 hash symbol ID
178    pub symbol_id: Option<String>,
179    /// Relevance score (higher = more relevant)
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub score: Option<u64>,
182    /// Fully-qualified name
183    #[serde(skip_serializing_if = "Option::is_none")]
184    pub fqn: Option<String>,
185    /// Canonical (normalized) fully-qualified name
186    #[serde(skip_serializing_if = "Option::is_none")]
187    pub canonical_fqn: Option<String>,
188    /// Display-friendly fully-qualified name
189    #[serde(skip_serializing_if = "Option::is_none")]
190    pub display_fqn: Option<String>,
191    /// SHA-256 hash of the symbol content
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub content_hash: Option<String>,
194    /// Symbol kind from code_chunks table (legacy field)
195    #[serde(skip_serializing_if = "Option::is_none")]
196    pub symbol_kind_from_chunk: Option<String>,
197    /// Source code snippet
198    #[serde(skip_serializing_if = "Option::is_none")]
199    pub snippet: Option<String>,
200    /// Whether the snippet was truncated due to size limits
201    #[serde(skip_serializing_if = "Option::is_none")]
202    pub snippet_truncated: Option<bool>,
203    // Label fields (language and normalized kind)
204    /// Programming language (rust, python, etc.)
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub language: Option<String>,
207    /// Normalized symbol kind (lowercase, standardized)
208    #[serde(skip_serializing_if = "Option::is_none")]
209    pub kind_normalized: Option<String>,
210    // Metrics fields (from symbol_metrics table)
211    /// AST complexity score
212    #[serde(skip_serializing_if = "Option::is_none")]
213    pub complexity_score: Option<u64>,
214    /// Number of incoming references (fan-in)
215    #[serde(skip_serializing_if = "Option::is_none")]
216    pub fan_in: Option<u64>,
217    /// Number of outgoing calls (fan-out)
218    #[serde(skip_serializing_if = "Option::is_none")]
219    pub fan_out: Option<u64>,
220    /// Cyclomatic complexity
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub cyclomatic_complexity: Option<u64>,
223    // AST fields (from ast_nodes table)
224    /// AST context (depth, parent_kind, children, decision_points)
225    #[serde(skip_serializing_if = "Option::is_none")]
226    pub ast_context: Option<AstContext>,
227    // Condense fields (SCC membership from magellan condense)
228    /// Supernode ID for strongly-connected component members
229    #[serde(skip_serializing_if = "Option::is_none")]
230    pub supernode_id: Option<String>,
231}
232
233/// A reference match from a reference search operation.
234///
235/// Represents a location where a symbol is referenced (used) in code.
236/// Used by the `--mode references` search mode.
237#[derive(Serialize)]
238pub struct ReferenceMatch {
239    /// Unique match identifier
240    pub match_id: String,
241    /// Source code location of the reference
242    pub span: Span,
243    /// Name of the symbol being referenced
244    pub referenced_symbol: String,
245    /// Kind of reference (read, write, call, etc.)
246    pub reference_kind: Option<String>,
247    /// Symbol ID of the referenced symbol
248    pub target_symbol_id: Option<String>,
249    /// Relevance score
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub score: Option<u64>,
252    /// SHA-256 hash of the content
253    #[serde(skip_serializing_if = "Option::is_none")]
254    pub content_hash: Option<String>,
255    /// Symbol kind from code_chunks table (legacy field)
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub symbol_kind_from_chunk: Option<String>,
258    /// Source code snippet at the reference location
259    #[serde(skip_serializing_if = "Option::is_none")]
260    pub snippet: Option<String>,
261    /// Whether the snippet was truncated
262    #[serde(skip_serializing_if = "Option::is_none")]
263    pub snippet_truncated: Option<bool>,
264}
265
266/// A call match from a call search operation.
267///
268/// Represents a function call relationship between a caller and callee.
269/// Used by the `--mode calls` search mode.
270#[derive(Serialize)]
271pub struct CallMatch {
272    /// Unique match identifier
273    pub match_id: String,
274    /// Source code location of the call
275    pub span: Span,
276    /// Name of the calling symbol
277    pub caller: String,
278    /// Name of the called symbol
279    pub callee: String,
280    /// Symbol ID of the caller
281    pub caller_symbol_id: Option<String>,
282    /// Symbol ID of the callee
283    pub callee_symbol_id: Option<String>,
284    /// Relevance score
285    #[serde(skip_serializing_if = "Option::is_none")]
286    pub score: Option<u64>,
287    /// SHA-256 hash of the content
288    #[serde(skip_serializing_if = "Option::is_none")]
289    pub content_hash: Option<String>,
290    /// Symbol kind from code_chunks table (legacy field)
291    #[serde(skip_serializing_if = "Option::is_none")]
292    pub symbol_kind_from_chunk: Option<String>,
293    /// Source code snippet showing the call
294    #[serde(skip_serializing_if = "Option::is_none")]
295    pub snippet: Option<String>,
296    /// Whether the snippet was truncated
297    #[serde(skip_serializing_if = "Option::is_none")]
298    pub snippet_truncated: Option<bool>,
299}
300
301/// Response from a symbol search operation.
302///
303/// Contains all matching symbols along with search metadata.
304#[derive(Serialize)]
305pub struct SearchResponse {
306    /// List of matching symbols
307    pub results: Vec<SymbolMatch>,
308    /// The search query string
309    pub query: String,
310    /// Path filter that was applied (if any)
311    #[serde(skip_serializing_if = "Option::is_none")]
312    pub path_filter: Option<String>,
313    /// Kind filter that was applied (if any)
314    #[serde(skip_serializing_if = "Option::is_none")]
315    pub kind_filter: Option<String>,
316    /// Total number of matches (may be greater than results.len() if limited)
317    pub total_count: u64,
318    /// Optional notice (e.g., results truncated, algorithm applied)
319    #[serde(skip_serializing_if = "Option::is_none")]
320    pub notice: Option<String>,
321}
322
323/// Response from a reference search operation.
324///
325/// Contains all locations where a symbol is referenced.
326#[derive(Serialize)]
327pub struct ReferenceSearchResponse {
328    /// List of reference locations
329    pub results: Vec<ReferenceMatch>,
330    /// The search query string
331    pub query: String,
332    /// Path filter that was applied (if any)
333    #[serde(skip_serializing_if = "Option::is_none")]
334    pub path_filter: Option<String>,
335    /// Total number of reference matches
336    pub total_count: u64,
337}
338
339/// Response from a call search operation.
340///
341/// Contains all function call relationships matching the search.
342#[derive(Serialize)]
343pub struct CallSearchResponse {
344    /// List of call relationships
345    pub results: Vec<CallMatch>,
346    /// The search query string
347    pub query: String,
348    /// Path filter that was applied (if any)
349    #[serde(skip_serializing_if = "Option::is_none")]
350    pub path_filter: Option<String>,
351    /// Total number of call matches
352    pub total_count: u64,
353}
354
355/// Combined response for searches that include symbols, references, and calls.
356///
357/// Used when `--mode combined` is specified, providing all three types of
358/// results in a single response.
359#[derive(Serialize)]
360pub struct CombinedSearchResponse {
361    /// The search query string
362    pub query: String,
363    /// Path filter that was applied (if any)
364    #[serde(skip_serializing_if = "Option::is_none")]
365    pub path_filter: Option<String>,
366    /// Symbol search results
367    pub symbols: SearchResponse,
368    /// Reference search results
369    pub references: ReferenceSearchResponse,
370    /// Call search results
371    pub calls: CallSearchResponse,
372    /// Total count across all search modes
373    pub total_count: u64,
374    /// Description of the limit mode applied (e.g., "unlimited", "per-mode")
375    pub limit_mode: String,
376}
377
378/// Create a JSON response with the provided data.
379///
380/// # Type Parameters
381///
382/// * `T` - The response data type
383///
384/// # Returns
385///
386/// A `JsonResponse` wrapper with metadata and the provided data.
387pub fn json_response<T>(data: T) -> JsonResponse<T> {
388    json_response_with_partial(data, false)
389}
390
391/// Create a JSON response with a partial flag.
392///
393/// # Type Parameters
394///
395/// * `T` - The response data type
396///
397/// # Arguments
398///
399/// * `data` - The response data
400/// * `partial` - Whether results are partial (e.g., hit candidate limit)
401///
402/// # Returns
403///
404/// A `JsonResponse` wrapper with the partial flag set.
405pub fn json_response_with_partial<T>(data: T, partial: bool) -> JsonResponse<T> {
406    json_response_with_partial_and_performance(data, partial, None)
407}
408
409/// Create a JSON response with partial flag and performance metrics.
410///
411/// # Type Parameters
412///
413/// * `T` - The response data type
414///
415/// # Arguments
416///
417/// * `data` - The response data
418/// * `partial` - Whether results are partial
419/// * `performance` - Optional performance metrics
420///
421/// # Returns
422///
423/// A fully-populated `JsonResponse` wrapper.
424pub fn json_response_with_partial_and_performance<T>(
425    data: T,
426    partial: bool,
427    performance: Option<PerformanceMetrics>,
428) -> JsonResponse<T> {
429    JsonResponse {
430        schema_version: SCHEMA_VERSION,
431        execution_id: execution_id(),
432        tool: "llmgrep",
433        timestamp: Utc::now().to_rfc3339(),
434        partial,
435        performance,
436        data,
437    }
438}
439
440/// Generate a unique execution ID.
441///
442/// Combines the current Unix timestamp with the process ID for uniqueness.
443///
444/// # Returns
445///
446/// A hexadecimal string in the format `{timestamp}-{pid}`.
447pub fn execution_id() -> String {
448    let timestamp = Utc::now().timestamp();
449    let pid = std::process::id();
450    format!("{:x}-{:x}", timestamp, pid)
451}