Skip to main content

coding_agent_search/
export.rs

1//! Export functionality for search results.
2//!
3//! Provides conversion of search results to various output formats:
4//! - Markdown - formatted with headers, code blocks, and metadata
5//! - JSON - structured data for programmatic use
6//! - Plain Text - simple, copy-paste friendly format
7
8use crate::search::query::SearchHit;
9use chrono::{DateTime, Utc};
10
11/// Supported export formats
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13pub enum ExportFormat {
14    /// Markdown format with headers, code blocks, and rich formatting
15    #[default]
16    Markdown,
17    /// JSON format for programmatic consumption
18    Json,
19    /// Plain text format for simple copy-paste
20    PlainText,
21}
22
23impl ExportFormat {
24    fn metadata(self) -> (&'static str, &'static str, Self) {
25        match self {
26            Self::Markdown => ("Markdown", "md", Self::Json),
27            Self::Json => ("JSON", "json", Self::PlainText),
28            Self::PlainText => ("Plain Text", "txt", Self::Markdown),
29        }
30    }
31
32    /// Get the display name for this format
33    pub fn name(self) -> &'static str {
34        self.metadata().0
35    }
36
37    /// Get the file extension for this format
38    pub fn extension(self) -> &'static str {
39        self.metadata().1
40    }
41
42    /// Cycle to the next export format
43    pub fn next(self) -> Self {
44        self.metadata().2
45    }
46
47    /// List all available formats
48    pub fn all() -> &'static [Self] {
49        &[Self::Markdown, Self::Json, Self::PlainText]
50    }
51}
52
53/// Options for export customization
54#[derive(Debug, Clone)]
55pub struct ExportOptions {
56    /// Include full content (not just snippets)
57    pub include_content: bool,
58    /// Include score in output
59    pub include_score: bool,
60    /// Include source path
61    pub include_path: bool,
62    /// Maximum snippet length (0 = unlimited)
63    pub max_snippet_len: usize,
64    /// Query string (for header/metadata)
65    pub query: Option<String>,
66}
67
68impl Default for ExportOptions {
69    fn default() -> Self {
70        Self {
71            include_content: false,
72            include_score: true,
73            include_path: true,
74            max_snippet_len: 500,
75            query: None,
76        }
77    }
78}
79
80/// Export search results to the specified format
81pub fn export_results(hits: &[SearchHit], format: ExportFormat, options: &ExportOptions) -> String {
82    match format {
83        ExportFormat::Markdown => export_markdown(hits, options),
84        ExportFormat::Json => export_json(hits, options),
85        ExportFormat::PlainText => export_plain_text(hits, options),
86    }
87}
88
89/// Escape special Markdown characters to prevent formatting issues or injection.
90fn escape_markdown(text: &str) -> String {
91    text.replace('\\', "\\\\")
92        .replace('|', "\\|")
93        .replace('*', "\\*")
94        .replace('_', "\\_")
95        .replace('[', "\\[")
96        .replace(']', "\\]")
97        .replace('<', "\\<")
98        .replace('>', "\\>")
99        .replace('`', "\\`")
100        .replace('\n', " ") // Replace newlines with space to prevent breaking tables
101        .replace('\r', "") // Remove carriage returns
102}
103
104/// Determine the appropriate code block delimiter (e.g., ``` or ````) based on content.
105fn get_code_block_delimiter(content: &str) -> String {
106    let mut max_backticks = 0;
107    let mut current = 0;
108    for c in content.chars() {
109        if c == '`' {
110            current += 1;
111        } else {
112            max_backticks = max_backticks.max(current);
113            current = 0;
114        }
115    }
116    max_backticks = max_backticks.max(current);
117
118    let needed = (max_backticks + 1).max(3);
119    "`".repeat(needed)
120}
121
122fn get_code_span_delimiter(content: &str) -> String {
123    let mut max_backticks = 0;
124    let mut current = 0;
125    for c in content.chars() {
126        if c == '`' {
127            current += 1;
128        } else {
129            max_backticks = max_backticks.max(current);
130            current = 0;
131        }
132    }
133    max_backticks = max_backticks.max(current);
134
135    "`".repeat(max_backticks + 1)
136}
137
138fn markdown_code_span(content: &str) -> String {
139    let delim = get_code_span_delimiter(content);
140    if content.starts_with('`') || content.ends_with('`') {
141        format!("{delim} {content} {delim}")
142    } else {
143        format!("{delim}{content}{delim}")
144    }
145}
146
147/// Export to Markdown format
148fn export_markdown(hits: &[SearchHit], options: &ExportOptions) -> String {
149    let mut output = String::new();
150
151    // Header
152    output.push_str("# Search Results\n\n");
153
154    if let Some(query) = &options.query {
155        output.push_str(&format!("**Query:** {}\n\n", markdown_code_span(query)));
156    }
157
158    output.push_str(&format!(
159        "**Results:** {} | **Exported:** {}\n\n",
160        hits.len(),
161        Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
162    ));
163
164    output.push_str("---\n\n");
165
166    // Results
167    for (i, hit) in hits.iter().enumerate() {
168        let safe_title = escape_markdown(&hit.title);
169        output.push_str(&format!("## {}. {}\n\n", i + 1, safe_title));
170
171        // Metadata table
172        output.push_str("| Field | Value |\n");
173        output.push_str("|-------|-------|\n");
174        output.push_str(&format!("| Agent | {} |\n", escape_markdown(&hit.agent)));
175        output.push_str(&format!(
176            "| Workspace | {} |\n",
177            escape_markdown(&hit.workspace)
178        ));
179
180        if options.include_score {
181            output.push_str(&format!("| Score | {:.2} |\n", hit.score));
182        }
183
184        if let Some(ts) = hit.created_at
185            && let Some(dt) = DateTime::from_timestamp_millis(ts)
186        {
187            output.push_str(&format!("| Date | {} |\n", dt.format("%Y-%m-%d %H:%M")));
188        }
189
190        if options.include_path {
191            let path_display = if hit.source_path.chars().count() > 60 {
192                let skip = hit.source_path.chars().count() - 57;
193                format!(
194                    "...{}",
195                    hit.source_path.chars().skip(skip).collect::<String>()
196                )
197            } else {
198                hit.source_path.clone()
199            };
200            output.push_str(&format!(
201                "| Source | {} |\n",
202                escape_markdown(&path_display)
203            ));
204
205            if let Some(line) = hit.line_number {
206                output.push_str(&format!("| Line | {line} |\n"));
207            }
208        }
209
210        output.push('\n');
211
212        // Snippet
213        output.push_str("### Snippet\n\n");
214        let snippet = truncate_text(&hit.snippet, options.max_snippet_len);
215        let delim = get_code_block_delimiter(&snippet);
216        output.push_str(&format!("{}\n", delim));
217        output.push_str(&snippet);
218        if !snippet.ends_with('\n') {
219            output.push('\n');
220        }
221        output.push_str(&format!("{}\n\n", delim));
222
223        // Full content (optional)
224        if options.include_content && !hit.content.is_empty() {
225            output.push_str("<details>\n<summary>Full Content</summary>\n\n");
226            let content_delim = get_code_block_delimiter(&hit.content);
227            output.push_str(&format!("{}\n", content_delim));
228            output.push_str(&hit.content);
229            if !hit.content.ends_with('\n') {
230                output.push('\n');
231            }
232            output.push_str(&format!("{}\n\n", content_delim));
233            output.push_str("</details>\n\n");
234        }
235
236        output.push_str("---\n\n");
237    }
238
239    output
240}
241
242/// Export to JSON format
243fn export_json(hits: &[SearchHit], options: &ExportOptions) -> String {
244    let exported_at = Utc::now().to_rfc3339();
245    let export_data = export_json_value(hits, options, &exported_at);
246
247    serde_json::to_string_pretty(&export_data).unwrap_or_else(|_| "{}".to_string())
248}
249
250fn export_json_value(
251    hits: &[SearchHit],
252    options: &ExportOptions,
253    exported_at: &str,
254) -> serde_json::Value {
255    serde_json::json!({
256        "query": options.query,
257        "count": hits.len(),
258        "exported_at": exported_at,
259        "hits": hits
260            .iter()
261            .map(|hit| export_hit_json(hit, options))
262            .collect::<Vec<_>>()
263    })
264}
265
266fn export_hit_json(hit: &SearchHit, options: &ExportOptions) -> serde_json::Value {
267    let mut obj = export_hit_base_json(hit, options);
268
269    if options.include_score {
270        let score = if hit.score.is_finite() {
271            hit.score
272        } else {
273            0.0
274        };
275        obj["score"] = serde_json::json!(score);
276    }
277
278    if options.include_path {
279        obj["source_path"] = serde_json::json!(hit.source_path);
280        if let Some(line) = hit.line_number {
281            obj["line_number"] = serde_json::json!(line);
282        }
283    }
284
285    if let Some(ts) = hit.created_at {
286        obj["created_at"] = serde_json::json!(ts);
287        if let Some(dt) = DateTime::from_timestamp_millis(ts) {
288            obj["created_at_formatted"] = serde_json::json!(dt.to_rfc3339());
289        }
290    }
291
292    if options.include_content && !hit.content.is_empty() {
293        obj["content"] = serde_json::json!(hit.content);
294    }
295
296    obj
297}
298
299fn export_hit_base_json(hit: &SearchHit, options: &ExportOptions) -> serde_json::Value {
300    serde_json::json!({
301        "title": hit.title,
302        "agent": hit.agent,
303        "workspace": hit.workspace,
304        "snippet": truncate_text(&hit.snippet, options.max_snippet_len),
305    })
306}
307
308/// Export to plain text format
309fn export_plain_text(hits: &[SearchHit], options: &ExportOptions) -> String {
310    let mut output = String::new();
311
312    // Header
313    output.push_str("SEARCH RESULTS\n");
314    output.push_str(&"=".repeat(60));
315    output.push('\n');
316
317    if let Some(query) = &options.query {
318        output.push_str(&format!("Query: {query}\n"));
319    }
320
321    output.push_str(&format!(
322        "Results: {} | Exported: {}\n",
323        hits.len(),
324        Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
325    ));
326
327    output.push_str(&"=".repeat(60));
328    output.push_str("\n\n");
329
330    // Results
331    for (i, hit) in hits.iter().enumerate() {
332        output.push_str(&format!("[{}] {}\n", i + 1, hit.title));
333        output.push_str(&"-".repeat(60));
334        output.push('\n');
335
336        output.push_str(&format!("Agent: {}\n", hit.agent));
337        output.push_str(&format!("Workspace: {}\n", hit.workspace));
338
339        if options.include_score {
340            output.push_str(&format!("Score: {:.2}\n", hit.score));
341        }
342
343        if let Some(ts) = hit.created_at
344            && let Some(dt) = DateTime::from_timestamp_millis(ts)
345        {
346            output.push_str(&format!("Date: {}\n", dt.format("%Y-%m-%d %H:%M")));
347        }
348
349        if options.include_path {
350            output.push_str(&format!("Source: {}\n", hit.source_path));
351            if let Some(line) = hit.line_number {
352                output.push_str(&format!("Line: {line}\n"));
353            }
354        }
355
356        output.push('\n');
357        output.push_str("Snippet:\n");
358        let snippet = truncate_text(&hit.snippet, options.max_snippet_len);
359        for line in snippet.lines() {
360            output.push_str(&format!("  {line}\n"));
361        }
362
363        if options.include_content && !hit.content.is_empty() {
364            output.push_str("\nFull Content:\n");
365            for line in hit.content.lines() {
366                output.push_str(&format!("  {line}\n"));
367            }
368        }
369
370        output.push('\n');
371    }
372
373    output
374}
375
376/// Truncate text to max length (in characters), adding ellipsis if needed.
377///
378/// When max_len <= 3, truncates without ellipsis to avoid exceeding max_len.
379fn truncate_text(text: &str, max_len: usize) -> String {
380    if max_len == 0 {
381        return text.to_string();
382    }
383
384    let mut chars = text.chars();
385    let mut preview: String = chars.by_ref().take(max_len).collect();
386
387    if chars.next().is_none() {
388        return preview;
389    }
390
391    // For very small max_len (≤3), truncate without ellipsis to avoid exceeding limit
392    if max_len <= 3 {
393        return preview;
394    }
395
396    let take = max_len.saturating_sub(3);
397    preview.truncate(preview.chars().take(take).map(|c| c.len_utf8()).sum());
398    preview.push_str("...");
399    preview
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    fn sample_hit() -> SearchHit {
407        SearchHit {
408            title: "Test Result".to_string(),
409            snippet: "This is a test snippet".to_string(),
410            content: "Full content here".to_string(),
411            content_hash: crate::search::query::stable_content_hash("Full content here"),
412            conversation_id: None,
413            score: 8.5,
414            source_path: "/path/to/file.jsonl".to_string(),
415            agent: "claude_code".to_string(),
416            workspace: "/projects/test".to_string(),
417            workspace_original: None,
418            created_at: Some(1700000000000),
419            line_number: Some(42),
420            match_type: crate::search::query::MatchType::Exact,
421            source_id: "local".to_string(),
422            origin_kind: "local".to_string(),
423            origin_host: None,
424        }
425    }
426
427    fn assert_json_field(value: &serde_json::Value, key: &str, expected: serde_json::Value) {
428        assert_eq!(
429            value.get(key),
430            Some(&expected),
431            "unexpected JSON field `{key}` in {value}"
432        );
433    }
434
435    #[test]
436    fn test_export_format_cycle() {
437        let format = ExportFormat::Markdown;
438        assert_eq!(format.next(), ExportFormat::Json);
439        assert_eq!(format.next().next(), ExportFormat::PlainText);
440        assert_eq!(format.next().next().next(), ExportFormat::Markdown);
441    }
442
443    #[test]
444    fn test_export_format_extension() {
445        assert_eq!(ExportFormat::Markdown.extension(), "md");
446        assert_eq!(ExportFormat::Json.extension(), "json");
447        assert_eq!(ExportFormat::PlainText.extension(), "txt");
448    }
449
450    #[test]
451    fn test_truncate_text() {
452        assert_eq!(truncate_text("short", 100), "short");
453        assert_eq!(truncate_text("this is long text", 10), "this is...");
454        assert_eq!(truncate_text("any", 0), "any");
455
456        // Edge case: very small max_len should not exceed limit
457        assert_eq!(truncate_text("hello", 3), "hel"); // No ellipsis for max_len <= 3
458        assert_eq!(truncate_text("hello", 2), "he");
459        assert_eq!(truncate_text("hello", 1), "h");
460        assert_eq!(truncate_text("hello", 4), "h..."); // max_len > 3 gets ellipsis
461    }
462
463    #[test]
464    fn test_export_markdown() {
465        let hits = vec![sample_hit()];
466        let options = ExportOptions::default();
467        let output = export_markdown(&hits, &options);
468
469        assert!(output.contains("# Search Results"));
470        assert!(output.contains("Test Result"));
471        // underscores are escaped in markdown
472        assert!(output.contains("claude\\_code"));
473        assert!(output.contains("```"));
474    }
475
476    #[test]
477    fn test_export_markdown_preserves_backticks_in_query() {
478        let options = ExportOptions {
479            query: Some("literal `foo` search".to_string()),
480            ..ExportOptions::default()
481        };
482        let output = export_markdown(&[], &options);
483
484        assert!(output.contains("**Query:** ``literal `foo` search``"));
485        assert!(
486            !output.contains("literal foo search"),
487            "query backticks must not be stripped from Markdown export: {output}"
488        );
489    }
490
491    #[test]
492    fn test_export_json() {
493        let hits = vec![sample_hit()];
494        let options = ExportOptions::default();
495        let output = export_json(&hits, &options);
496
497        assert!(output.contains("\"count\": 1"));
498        assert!(output.contains("\"agent\": \"claude_code\""));
499    }
500
501    #[test]
502    fn test_export_json_value_shape() {
503        let hits = vec![sample_hit()];
504        let options = ExportOptions {
505            query: Some("authentication error".to_string()),
506            ..ExportOptions::default()
507        };
508
509        let projected = export_json_value(&hits, &options, "2026-04-26T17:26:00Z");
510
511        assert_eq!(
512            projected,
513            serde_json::json!({
514                "query": "authentication error",
515                "count": 1,
516                "exported_at": "2026-04-26T17:26:00Z",
517                "hits": [{
518                    "title": "Test Result",
519                    "agent": "claude_code",
520                    "workspace": "/projects/test",
521                    "snippet": "This is a test snippet",
522                    "score": 8.5,
523                    "source_path": "/path/to/file.jsonl",
524                    "line_number": 42,
525                    "created_at": 1700000000000i64,
526                    "created_at_formatted": "2023-11-14T22:13:20+00:00"
527                }]
528            })
529        );
530    }
531
532    #[test]
533    fn test_export_hit_json_shape() {
534        let mut hit = sample_hit();
535        hit.score = f32::NAN;
536        let options = ExportOptions {
537            include_content: true,
538            include_score: true,
539            include_path: true,
540            max_snippet_len: 10,
541            query: Some("ignored by hit projection".to_string()),
542        };
543
544        let projected = export_hit_json(&hit, &options);
545
546        for (key, expected) in [
547            ("title", serde_json::json!("Test Result")),
548            ("agent", serde_json::json!("claude_code")),
549            ("workspace", serde_json::json!("/projects/test")),
550            ("snippet", serde_json::json!("This is...")),
551            ("score", serde_json::json!(0.0)),
552            ("source_path", serde_json::json!("/path/to/file.jsonl")),
553            ("line_number", serde_json::json!(42)),
554            ("created_at", serde_json::json!(1700000000000i64)),
555            (
556                "created_at_formatted",
557                serde_json::json!("2023-11-14T22:13:20+00:00"),
558            ),
559            ("content", serde_json::json!("Full content here")),
560        ] {
561            assert_json_field(&projected, key, expected);
562        }
563        assert_eq!(projected.as_object().expect("object").len(), 10);
564    }
565
566    #[test]
567    fn test_export_plain_text() {
568        let hits = vec![sample_hit()];
569        let options = ExportOptions::default();
570        let output = export_plain_text(&hits, &options);
571
572        assert!(output.contains("SEARCH RESULTS"));
573        assert!(output.contains("[1] Test Result"));
574        assert!(output.contains("Agent: claude_code"));
575    }
576
577    #[test]
578    fn test_export_markdown_escapes_special_chars() {
579        let mut hit = sample_hit();
580        hit.title = "[Link](javascript:alert(1))".to_string();
581        hit.agent = "agent|pipe".to_string();
582        hit.content = "Contains ``` backticks".to_string();
583
584        let options = ExportOptions {
585            include_content: true,
586            ..ExportOptions::default()
587        };
588        let output = export_markdown(&[hit], &options);
589
590        assert!(output.contains("\\[Link\\](javascript:alert(1))"));
591        assert!(output.contains("agent\\|pipe"));
592        // Should use 4 backticks because content has 3
593        assert!(output.contains("````\nContains ``` backticks"));
594    }
595}