Skip to main content

coding_agent_search/
export.rs

1//! Export functionality for search results.
2//!
3//! Provides conversion of search results to various output formats:
4//! - Markdown - formatted with headers, code blocks, and metadata
5//! - JSON - structured data for programmatic use
6//! - Plain Text - simple, copy-paste friendly format
7
8use chrono::{DateTime, Utc};
9use serde_json::{Map, Value};
10use std::borrow::Cow;
11
12use crate::search::query::SearchHit;
13
14/// Supported export formats
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum ExportFormat {
17    /// Markdown format with headers, code blocks, and rich formatting
18    #[default]
19    Markdown,
20    /// JSON format for programmatic consumption
21    Json,
22    /// Plain text format for simple copy-paste
23    PlainText,
24}
25
26impl ExportFormat {
27    fn metadata(self) -> (&'static str, &'static str, Self) {
28        match self {
29            Self::Markdown => ("Markdown", "md", Self::Json),
30            Self::Json => ("JSON", "json", Self::PlainText),
31            Self::PlainText => ("Plain Text", "txt", Self::Markdown),
32        }
33    }
34
35    /// Get the display name for this format
36    pub fn name(self) -> &'static str {
37        self.metadata().0
38    }
39
40    /// Get the file extension for this format
41    pub fn extension(self) -> &'static str {
42        self.metadata().1
43    }
44
45    /// Cycle to the next export format
46    pub fn next(self) -> Self {
47        self.metadata().2
48    }
49
50    /// List all available formats
51    pub fn all() -> &'static [Self] {
52        &[Self::Markdown, Self::Json, Self::PlainText]
53    }
54}
55
56/// Options for export customization
57#[derive(Debug, Clone)]
58pub struct ExportOptions {
59    /// Include full content (not just snippets)
60    pub include_content: bool,
61    /// Include score in output
62    pub include_score: bool,
63    /// Include source path
64    pub include_path: bool,
65    /// Maximum snippet length (0 = unlimited)
66    pub max_snippet_len: usize,
67    /// Query string (for header/metadata)
68    pub query: Option<String>,
69}
70
71impl Default for ExportOptions {
72    fn default() -> Self {
73        Self {
74            include_content: false,
75            include_score: true,
76            include_path: true,
77            max_snippet_len: 500,
78            query: None,
79        }
80    }
81}
82
83/// Export search results to the specified format
84pub fn export_results(hits: &[SearchHit], format: ExportFormat, options: &ExportOptions) -> String {
85    match format {
86        ExportFormat::Markdown => export_markdown(hits, options),
87        ExportFormat::Json => export_json(hits, options),
88        ExportFormat::PlainText => export_plain_text(hits, options),
89    }
90}
91
92/// Escape special Markdown characters to prevent formatting issues or injection.
93fn escape_markdown(text: &str) -> String {
94    text.replace('\\', "\\\\")
95        .replace('|', "\\|")
96        .replace('*', "\\*")
97        .replace('_', "\\_")
98        .replace('[', "\\[")
99        .replace(']', "\\]")
100        .replace('<', "\\<")
101        .replace('>', "\\>")
102        .replace('`', "\\`")
103        .replace('\n', " ") // Replace newlines with space to prevent breaking tables
104        .replace('\r', "") // Remove carriage returns
105}
106
107/// Determine the appropriate code block delimiter (e.g., ``` or ````) based on content.
108fn get_code_block_delimiter(content: &str) -> String {
109    let mut max_backticks = 0;
110    let mut current = 0;
111    for c in content.chars() {
112        if c == '`' {
113            current += 1;
114        } else {
115            max_backticks = max_backticks.max(current);
116            current = 0;
117        }
118    }
119    max_backticks = max_backticks.max(current);
120
121    let needed = (max_backticks + 1).max(3);
122    "`".repeat(needed)
123}
124
125fn get_code_span_delimiter(content: &str) -> String {
126    let mut max_backticks = 0;
127    let mut current = 0;
128    for c in content.chars() {
129        if c == '`' {
130            current += 1;
131        } else {
132            max_backticks = max_backticks.max(current);
133            current = 0;
134        }
135    }
136    max_backticks = max_backticks.max(current);
137
138    "`".repeat(max_backticks + 1)
139}
140
141fn markdown_code_span(content: &str) -> String {
142    let delim = get_code_span_delimiter(content);
143    if content.starts_with('`') || content.ends_with('`') {
144        format!("{delim} {content} {delim}")
145    } else {
146        format!("{delim}{content}{delim}")
147    }
148}
149
150/// Export to Markdown format
151fn export_markdown(hits: &[SearchHit], options: &ExportOptions) -> String {
152    let mut output = String::new();
153
154    // Header
155    output.push_str("# Search Results\n\n");
156
157    if let Some(query) = &options.query {
158        output.push_str(&format!("**Query:** {}\n\n", markdown_code_span(query)));
159    }
160
161    output.push_str(&format!(
162        "**Results:** {} | **Exported:** {}\n\n",
163        hits.len(),
164        Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
165    ));
166
167    output.push_str("---\n\n");
168
169    // Results
170    for (i, hit) in hits.iter().enumerate() {
171        let safe_title = escape_markdown(&hit.title);
172        output.push_str(&format!("## {}. {}\n\n", i + 1, safe_title));
173
174        // Metadata table
175        output.push_str("| Field | Value |\n");
176        output.push_str("|-------|-------|\n");
177        output.push_str(&format!("| Agent | {} |\n", escape_markdown(&hit.agent)));
178        output.push_str(&format!(
179            "| Workspace | {} |\n",
180            escape_markdown(&hit.workspace)
181        ));
182
183        if options.include_score {
184            output.push_str(&format!("| Score | {:.2} |\n", hit.score));
185        }
186
187        if let Some(ts) = hit.created_at
188            && let Some(dt) = DateTime::from_timestamp_millis(ts)
189        {
190            output.push_str(&format!("| Date | {} |\n", dt.format("%Y-%m-%d %H:%M")));
191        }
192
193        if options.include_path {
194            let source_path_chars = hit.source_path.chars().count();
195            let path_display: Cow<'_, str> = if source_path_chars > 60 {
196                let skip = source_path_chars - 57;
197                Cow::Owned(format!(
198                    "...{}",
199                    hit.source_path.chars().skip(skip).collect::<String>()
200                ))
201            } else {
202                Cow::Borrowed(hit.source_path.as_str())
203            };
204            output.push_str(&format!(
205                "| Source | {} |\n",
206                escape_markdown(path_display.as_ref())
207            ));
208
209            if let Some(line) = hit.line_number {
210                output.push_str(&format!("| Line | {line} |\n"));
211            }
212        }
213
214        output.push('\n');
215
216        // Snippet
217        output.push_str("### Snippet\n\n");
218        let snippet = truncate_text(&hit.snippet, options.max_snippet_len);
219        let delim = get_code_block_delimiter(&snippet);
220        output.push_str(&format!("{}\n", delim));
221        output.push_str(&snippet);
222        if !snippet.ends_with('\n') {
223            output.push('\n');
224        }
225        output.push_str(&format!("{}\n\n", delim));
226
227        // Full content (optional)
228        if options.include_content && !hit.content.is_empty() {
229            output.push_str("<details>\n<summary>Full Content</summary>\n\n");
230            let content_delim = get_code_block_delimiter(&hit.content);
231            output.push_str(&format!("{}\n", content_delim));
232            output.push_str(&hit.content);
233            if !hit.content.ends_with('\n') {
234                output.push('\n');
235            }
236            output.push_str(&format!("{}\n\n", content_delim));
237            output.push_str("</details>\n\n");
238        }
239
240        output.push_str("---\n\n");
241    }
242
243    output
244}
245
246/// Export to JSON format
247fn export_json(hits: &[SearchHit], options: &ExportOptions) -> String {
248    let exported_at = Utc::now().to_rfc3339();
249    let export_data = export_json_value(hits, options, &exported_at);
250
251    serde_json::to_string_pretty(&export_data).unwrap_or_else(|_| "{}".to_string())
252}
253
254fn export_json_value(
255    hits: &[SearchHit],
256    options: &ExportOptions,
257    exported_at: &str,
258) -> serde_json::Value {
259    serde_json::json!({
260        "query": options.query,
261        "count": hits.len(),
262        "exported_at": exported_at,
263        "hits": hits
264            .iter()
265            .map(|hit| export_hit_json(hit, options))
266            .collect::<Vec<_>>()
267    })
268}
269
270fn export_hit_json(hit: &SearchHit, options: &ExportOptions) -> serde_json::Value {
271    let mut obj = export_hit_base_json(hit, options);
272
273    if options.include_score {
274        let score = if hit.score.is_finite() {
275            hit.score
276        } else {
277            0.0
278        };
279        obj.insert("score".to_string(), serde_json::json!(score));
280    }
281
282    if options.include_path {
283        obj.insert(
284            "source_path".to_string(),
285            serde_json::json!(hit.source_path),
286        );
287        if let Some(line) = hit.line_number {
288            obj.insert("line_number".to_string(), serde_json::json!(line));
289        }
290    }
291
292    if let Some(ts) = hit.created_at {
293        obj.insert("created_at".to_string(), serde_json::json!(ts));
294        if let Some(dt) = DateTime::from_timestamp_millis(ts) {
295            obj.insert(
296                "created_at_formatted".to_string(),
297                serde_json::json!(dt.to_rfc3339()),
298            );
299        }
300    }
301
302    if options.include_content && !hit.content.is_empty() {
303        obj.insert("content".to_string(), serde_json::json!(hit.content));
304    }
305
306    Value::Object(obj)
307}
308
309fn export_hit_base_json(hit: &SearchHit, options: &ExportOptions) -> Map<String, Value> {
310    Map::from_iter([
311        ("title".to_string(), serde_json::json!(hit.title)),
312        ("agent".to_string(), serde_json::json!(hit.agent)),
313        ("workspace".to_string(), serde_json::json!(hit.workspace)),
314        (
315            "snippet".to_string(),
316            serde_json::json!(truncate_text(&hit.snippet, options.max_snippet_len)),
317        ),
318    ])
319}
320
321/// Export to plain text format
322fn export_plain_text(hits: &[SearchHit], options: &ExportOptions) -> String {
323    let mut output = String::new();
324
325    // Header
326    output.push_str("SEARCH RESULTS\n");
327    output.push_str(&"=".repeat(60));
328    output.push('\n');
329
330    if let Some(query) = &options.query {
331        output.push_str(&format!("Query: {query}\n"));
332    }
333
334    output.push_str(&format!(
335        "Results: {} | Exported: {}\n",
336        hits.len(),
337        Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
338    ));
339
340    output.push_str(&"=".repeat(60));
341    output.push_str("\n\n");
342
343    // Results
344    for (i, hit) in hits.iter().enumerate() {
345        output.push_str(&format!("[{}] {}\n", i + 1, hit.title));
346        output.push_str(&"-".repeat(60));
347        output.push('\n');
348
349        output.push_str(&format!("Agent: {}\n", hit.agent));
350        output.push_str(&format!("Workspace: {}\n", hit.workspace));
351
352        if options.include_score {
353            output.push_str(&format!("Score: {:.2}\n", hit.score));
354        }
355
356        if let Some(ts) = hit.created_at
357            && let Some(dt) = DateTime::from_timestamp_millis(ts)
358        {
359            output.push_str(&format!("Date: {}\n", dt.format("%Y-%m-%d %H:%M")));
360        }
361
362        if options.include_path {
363            output.push_str(&format!("Source: {}\n", hit.source_path));
364            if let Some(line) = hit.line_number {
365                output.push_str(&format!("Line: {line}\n"));
366            }
367        }
368
369        output.push('\n');
370        output.push_str("Snippet:\n");
371        let snippet = truncate_text(&hit.snippet, options.max_snippet_len);
372        for line in snippet.lines() {
373            output.push_str(&format!("  {line}\n"));
374        }
375
376        if options.include_content && !hit.content.is_empty() {
377            output.push_str("\nFull Content:\n");
378            for line in hit.content.lines() {
379                output.push_str(&format!("  {line}\n"));
380            }
381        }
382
383        output.push('\n');
384    }
385
386    output
387}
388
389/// Truncate text to max length (in characters), adding ellipsis if needed.
390///
391/// When max_len <= 3, truncates without ellipsis to avoid exceeding max_len.
392fn truncate_text(text: &str, max_len: usize) -> String {
393    if max_len == 0 {
394        return text.to_string();
395    }
396
397    let mut chars = text.chars();
398    let mut preview: String = chars.by_ref().take(max_len).collect();
399
400    if chars.next().is_none() {
401        return preview;
402    }
403
404    // For very small max_len (≤3), truncate without ellipsis to avoid exceeding limit
405    if max_len <= 3 {
406        return preview;
407    }
408
409    let take = max_len.saturating_sub(3);
410    preview.truncate(preview.chars().take(take).map(|c| c.len_utf8()).sum());
411    preview.push_str("...");
412    preview
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418
419    fn sample_hit() -> SearchHit {
420        SearchHit {
421            title: "Test Result".to_string(),
422            snippet: "This is a test snippet".to_string(),
423            content: "Full content here".to_string(),
424            content_hash: crate::search::query::stable_content_hash("Full content here"),
425            conversation_id: None,
426            score: 8.5,
427            source_path: "/path/to/file.jsonl".to_string(),
428            agent: "claude_code".to_string(),
429            workspace: "/projects/test".to_string(),
430            workspace_original: None,
431            created_at: Some(1700000000000),
432            line_number: Some(42),
433            match_type: crate::search::query::MatchType::Exact,
434            source_id: "local".to_string(),
435            origin_kind: "local".to_string(),
436            origin_host: None,
437        }
438    }
439
440    fn assert_json_field(value: &serde_json::Value, key: &str, expected: serde_json::Value) {
441        assert_eq!(
442            value.get(key),
443            Some(&expected),
444            "unexpected JSON field `{key}` in {value}"
445        );
446    }
447
448    #[test]
449    fn test_export_format_cycle() {
450        let format = ExportFormat::Markdown;
451        assert_eq!(format.next(), ExportFormat::Json);
452        assert_eq!(format.next().next(), ExportFormat::PlainText);
453        assert_eq!(format.next().next().next(), ExportFormat::Markdown);
454    }
455
456    #[test]
457    fn test_export_format_extension() {
458        assert_eq!(ExportFormat::Markdown.extension(), "md");
459        assert_eq!(ExportFormat::Json.extension(), "json");
460        assert_eq!(ExportFormat::PlainText.extension(), "txt");
461    }
462
463    #[test]
464    fn test_truncate_text() {
465        assert_eq!(truncate_text("short", 100), "short");
466        assert_eq!(truncate_text("this is long text", 10), "this is...");
467        assert_eq!(truncate_text("any", 0), "any");
468
469        // Edge case: very small max_len should not exceed limit
470        assert_eq!(truncate_text("hello", 3), "hel"); // No ellipsis for max_len <= 3
471        assert_eq!(truncate_text("hello", 2), "he");
472        assert_eq!(truncate_text("hello", 1), "h");
473        assert_eq!(truncate_text("hello", 4), "h..."); // max_len > 3 gets ellipsis
474    }
475
476    #[test]
477    fn test_export_markdown() {
478        let hits = vec![sample_hit()];
479        let options = ExportOptions::default();
480        let output = export_markdown(&hits, &options);
481
482        assert!(output.contains("# Search Results"));
483        assert!(output.contains("Test Result"));
484        // underscores are escaped in markdown
485        assert!(output.contains("claude\\_code"));
486        assert!(output.contains("```"));
487    }
488
489    #[test]
490    fn test_export_markdown_preserves_backticks_in_query() {
491        let options = ExportOptions {
492            query: Some("literal `foo` search".to_string()),
493            ..ExportOptions::default()
494        };
495        let output = export_markdown(&[], &options);
496
497        assert!(output.contains("**Query:** ``literal `foo` search``"));
498        assert!(
499            !output.contains("literal foo search"),
500            "query backticks must not be stripped from Markdown export: {output}"
501        );
502    }
503
504    #[test]
505    fn test_export_json() {
506        let hits = vec![sample_hit()];
507        let options = ExportOptions::default();
508        let output = export_json(&hits, &options);
509
510        assert!(output.contains("\"count\": 1"));
511        assert!(output.contains("\"agent\": \"claude_code\""));
512    }
513
514    #[test]
515    fn test_export_json_value_shape() {
516        let hits = vec![sample_hit()];
517        let options = ExportOptions {
518            query: Some("authentication error".to_string()),
519            ..ExportOptions::default()
520        };
521
522        let projected = export_json_value(&hits, &options, "2026-04-26T17:26:00Z");
523
524        assert_eq!(
525            projected,
526            serde_json::json!({
527                "query": "authentication error",
528                "count": 1,
529                "exported_at": "2026-04-26T17:26:00Z",
530                "hits": [{
531                    "title": "Test Result",
532                    "agent": "claude_code",
533                    "workspace": "/projects/test",
534                    "snippet": "This is a test snippet",
535                    "score": 8.5,
536                    "source_path": "/path/to/file.jsonl",
537                    "line_number": 42,
538                    "created_at": 1700000000000i64,
539                    "created_at_formatted": "2023-11-14T22:13:20+00:00"
540                }]
541            })
542        );
543    }
544
545    #[test]
546    fn test_export_hit_json_shape() {
547        let mut hit = sample_hit();
548        hit.score = f32::NAN;
549        let options = ExportOptions {
550            include_content: true,
551            include_score: true,
552            include_path: true,
553            max_snippet_len: 10,
554            query: Some("ignored by hit projection".to_string()),
555        };
556
557        let projected = export_hit_json(&hit, &options);
558
559        for (key, expected) in [
560            ("title", serde_json::json!("Test Result")),
561            ("agent", serde_json::json!("claude_code")),
562            ("workspace", serde_json::json!("/projects/test")),
563            ("snippet", serde_json::json!("This is...")),
564            ("score", serde_json::json!(0.0)),
565            ("source_path", serde_json::json!("/path/to/file.jsonl")),
566            ("line_number", serde_json::json!(42)),
567            ("created_at", serde_json::json!(1700000000000i64)),
568            (
569                "created_at_formatted",
570                serde_json::json!("2023-11-14T22:13:20+00:00"),
571            ),
572            ("content", serde_json::json!("Full content here")),
573        ] {
574            assert_json_field(&projected, key, expected);
575        }
576        assert_eq!(projected.as_object().expect("object").len(), 10);
577    }
578
579    #[test]
580    fn test_export_plain_text() {
581        let hits = vec![sample_hit()];
582        let options = ExportOptions::default();
583        let output = export_plain_text(&hits, &options);
584
585        assert!(output.contains("SEARCH RESULTS"));
586        assert!(output.contains("[1] Test Result"));
587        assert!(output.contains("Agent: claude_code"));
588    }
589
590    #[test]
591    fn test_export_markdown_escapes_special_chars() {
592        let mut hit = sample_hit();
593        hit.title = "[Link](javascript:alert(1))".to_string();
594        hit.agent = "agent|pipe".to_string();
595        hit.content = "Contains ``` backticks".to_string();
596
597        let options = ExportOptions {
598            include_content: true,
599            ..ExportOptions::default()
600        };
601        let output = export_markdown(&[hit], &options);
602
603        assert!(output.contains("\\[Link\\](javascript:alert(1))"));
604        assert!(output.contains("agent\\|pipe"));
605        // Should use 4 backticks because content has 3
606        assert!(output.contains("````\nContains ``` backticks"));
607    }
608}