Skip to main content

tokmd_format/
lib.rs

1//! # tokmd-format
2//!
3//! **Tier 2 (Formatting)**
4//!
5//! This crate handles the rendering and serialization of `tokmd` receipts.
6//! It supports Markdown, TSV, JSON, JSONL, CSV, and CycloneDX formats.
7//!
8//! ## What belongs here
9//! * Serialization logic (JSON/CSV/CycloneDX)
10//! * Markdown and TSV table rendering
11//! * Output file writing
12//! * Redaction integration (via tokmd-redact re-exports)
13//! * ScanArgs construction (single source of truth)
14//!
15//! ## What does NOT belong here
16//! * Business logic (calculating stats)
17//! * CLI argument parsing
18//! * Analysis-specific formatting (use tokmd-analysis-format)
19
20use std::borrow::Cow;
21use std::fmt::Write as FmtWrite;
22use std::fs::File;
23use std::io::{self, BufWriter, Write};
24use std::path::Path;
25use std::path::PathBuf;
26use std::time::{SystemTime, UNIX_EPOCH};
27
28use anyhow::Result;
29use serde::Serialize;
30use time::OffsetDateTime;
31use time::format_description::well_known::Rfc3339;
32
33use tokmd_settings::ScanOptions;
34use tokmd_types::{
35    ExportArgs, ExportArgsMeta, ExportData, ExportFormat, ExportReceipt, FileKind, FileRow,
36    LangArgs, LangArgsMeta, LangReceipt, LangReport, ModuleArgs, ModuleArgsMeta, ModuleReceipt,
37    ModuleReport, RedactMode, ScanArgs, ScanStatus, TableFormat, ToolInfo,
38};
39
40fn now_ms() -> u128 {
41    SystemTime::now()
42        .duration_since(UNIX_EPOCH)
43        .unwrap_or_default()
44        .as_millis()
45}
46
47/// Normalize a path to forward slashes and strip leading `./` for cross-platform stability.
48///
49/// This is the canonical normalization function for scan inputs. Use this
50/// before storing paths in receipts to ensure consistent output across OS.
51pub fn normalize_scan_input(p: &Path) -> String {
52    let mut s = p.display().to_string().replace('\\', "/");
53    while s.starts_with("./") {
54        s = s.strip_prefix("./").unwrap().to_string();
55    }
56    if s.is_empty() { ".".to_string() } else { s }
57}
58
59/// Construct `ScanArgs` with optional redaction applied.
60///
61/// This is the single source of truth for building `ScanArgs` from CLI inputs.
62/// All commands that produce receipts should use this function to ensure
63/// consistent redaction and normalization behavior.
64///
65/// # Redaction Behavior
66///
67/// - `None` or `Some(RedactMode::None)`: Paths shown as-is (normalized only)
68/// - `Some(RedactMode::Paths)`: Hash file paths, preserve extension
69/// - `Some(RedactMode::All)`: Hash paths and excluded patterns
70pub fn scan_args(paths: &[PathBuf], global: &ScanOptions, redact: Option<RedactMode>) -> ScanArgs {
71    let should_redact = redact == Some(RedactMode::Paths) || redact == Some(RedactMode::All);
72    let excluded_redacted = should_redact && !global.excluded.is_empty();
73
74    let mut args = ScanArgs {
75        paths: paths.iter().map(|p| normalize_scan_input(p)).collect(),
76        excluded: if should_redact {
77            global.excluded.iter().map(|p| short_hash(p)).collect()
78        } else {
79            global.excluded.clone()
80        },
81        excluded_redacted,
82        config: global.config,
83        hidden: global.hidden,
84        no_ignore: global.no_ignore,
85        no_ignore_parent: global.no_ignore || global.no_ignore_parent,
86        no_ignore_dot: global.no_ignore || global.no_ignore_dot,
87        no_ignore_vcs: global.no_ignore || global.no_ignore_vcs,
88        treat_doc_strings_as_comments: global.treat_doc_strings_as_comments,
89    };
90
91    if should_redact {
92        args.paths = args.paths.iter().map(|p| redact_path(p)).collect();
93    }
94    args
95}
96
97// -----------------------
98// Language summary output
99// -----------------------
100
101/// Write a language report to a writer.
102///
103/// This is the core implementation that can be tested with any `Write` sink.
104pub fn write_lang_report_to<W: Write>(
105    mut out: W,
106    report: &LangReport,
107    global: &ScanOptions,
108    args: &LangArgs,
109) -> Result<()> {
110    match args.format {
111        TableFormat::Md => {
112            out.write_all(render_lang_md(report).as_bytes())?;
113        }
114        TableFormat::Tsv => {
115            out.write_all(render_lang_tsv(report).as_bytes())?;
116        }
117        TableFormat::Json => {
118            let receipt = LangReceipt {
119                schema_version: tokmd_types::SCHEMA_VERSION,
120                generated_at_ms: now_ms(),
121                tool: ToolInfo::current(),
122                mode: "lang".to_string(),
123                status: ScanStatus::Complete,
124                warnings: vec![],
125                scan: scan_args(&args.paths, global, None),
126                args: LangArgsMeta {
127                    format: "json".to_string(),
128                    top: report.top,
129                    with_files: report.with_files,
130                    children: report.children,
131                },
132                report: report.clone(),
133            };
134            writeln!(out, "{}", serde_json::to_string(&receipt)?)?;
135        }
136    }
137    Ok(())
138}
139
140/// Print a language report to stdout.
141///
142/// Thin wrapper around [`write_lang_report_to`] for stdout.
143pub fn print_lang_report(report: &LangReport, global: &ScanOptions, args: &LangArgs) -> Result<()> {
144    let stdout = io::stdout();
145    let out = stdout.lock();
146    write_lang_report_to(out, report, global, args)
147}
148
149fn render_lang_md(report: &LangReport) -> String {
150    let mut s = String::new();
151
152    if report.with_files {
153        s.push_str("|Lang|Code|Lines|Files|Bytes|Tokens|Avg|\n");
154        s.push_str("|---|---:|---:|---:|---:|---:|---:|\n");
155        for r in &report.rows {
156            let _ = writeln!(
157                s,
158                "|{}|{}|{}|{}|{}|{}|{}|",
159                r.lang, r.code, r.lines, r.files, r.bytes, r.tokens, r.avg_lines
160            );
161        }
162        let _ = writeln!(
163            s,
164            "|**Total**|{}|{}|{}|{}|{}|{}|",
165            report.total.code,
166            report.total.lines,
167            report.total.files,
168            report.total.bytes,
169            report.total.tokens,
170            report.total.avg_lines
171        );
172    } else {
173        s.push_str("|Lang|Code|Lines|Bytes|Tokens|\n");
174        s.push_str("|---|---:|---:|---:|---:|\n");
175        for r in &report.rows {
176            let _ = writeln!(
177                s,
178                "|{}|{}|{}|{}|{}|",
179                r.lang, r.code, r.lines, r.bytes, r.tokens
180            );
181        }
182        let _ = writeln!(
183            s,
184            "|**Total**|{}|{}|{}|{}|",
185            report.total.code, report.total.lines, report.total.bytes, report.total.tokens
186        );
187    }
188
189    s
190}
191
192fn render_lang_tsv(report: &LangReport) -> String {
193    let mut s = String::new();
194
195    if report.with_files {
196        s.push_str("Lang\tCode\tLines\tFiles\tBytes\tTokens\tAvg\n");
197        for r in &report.rows {
198            let _ = writeln!(
199                s,
200                "{}\t{}\t{}\t{}\t{}\t{}\t{}",
201                r.lang, r.code, r.lines, r.files, r.bytes, r.tokens, r.avg_lines
202            );
203        }
204        let _ = writeln!(
205            s,
206            "Total\t{}\t{}\t{}\t{}\t{}\t{}",
207            report.total.code,
208            report.total.lines,
209            report.total.files,
210            report.total.bytes,
211            report.total.tokens,
212            report.total.avg_lines
213        );
214    } else {
215        s.push_str("Lang\tCode\tLines\tBytes\tTokens\n");
216        for r in &report.rows {
217            let _ = writeln!(
218                s,
219                "{}\t{}\t{}\t{}\t{}",
220                r.lang, r.code, r.lines, r.bytes, r.tokens
221            );
222        }
223        let _ = writeln!(
224            s,
225            "Total\t{}\t{}\t{}\t{}",
226            report.total.code, report.total.lines, report.total.bytes, report.total.tokens
227        );
228    }
229
230    s
231}
232
233// ---------------------
234// Module summary output
235// ---------------------
236
237/// Write a module report to a writer.
238///
239/// This is the core implementation that can be tested with any `Write` sink.
240pub fn write_module_report_to<W: Write>(
241    mut out: W,
242    report: &ModuleReport,
243    global: &ScanOptions,
244    args: &ModuleArgs,
245) -> Result<()> {
246    match args.format {
247        TableFormat::Md => {
248            out.write_all(render_module_md(report).as_bytes())?;
249        }
250        TableFormat::Tsv => {
251            out.write_all(render_module_tsv(report).as_bytes())?;
252        }
253        TableFormat::Json => {
254            let receipt = ModuleReceipt {
255                schema_version: tokmd_types::SCHEMA_VERSION,
256                generated_at_ms: now_ms(),
257                tool: ToolInfo::current(),
258                mode: "module".to_string(),
259                status: ScanStatus::Complete,
260                warnings: vec![],
261                scan: scan_args(&args.paths, global, None),
262                args: ModuleArgsMeta {
263                    format: "json".to_string(),
264                    top: report.top,
265                    module_roots: report.module_roots.clone(),
266                    module_depth: report.module_depth,
267                    children: report.children,
268                },
269                report: report.clone(),
270            };
271            writeln!(out, "{}", serde_json::to_string(&receipt)?)?;
272        }
273    }
274    Ok(())
275}
276
277/// Print a module report to stdout.
278///
279/// Thin wrapper around [`write_module_report_to`] for stdout.
280pub fn print_module_report(
281    report: &ModuleReport,
282    global: &ScanOptions,
283    args: &ModuleArgs,
284) -> Result<()> {
285    let stdout = io::stdout();
286    let out = stdout.lock();
287    write_module_report_to(out, report, global, args)
288}
289
290fn render_module_md(report: &ModuleReport) -> String {
291    let mut s = String::new();
292    s.push_str("|Module|Code|Lines|Files|Bytes|Tokens|Avg|\n");
293    s.push_str("|---|---:|---:|---:|---:|---:|---:|\n");
294    for r in &report.rows {
295        let _ = writeln!(
296            s,
297            "|{}|{}|{}|{}|{}|{}|{}|",
298            r.module, r.code, r.lines, r.files, r.bytes, r.tokens, r.avg_lines
299        );
300    }
301    let _ = writeln!(
302        s,
303        "|**Total**|{}|{}|{}|{}|{}|{}|",
304        report.total.code,
305        report.total.lines,
306        report.total.files,
307        report.total.bytes,
308        report.total.tokens,
309        report.total.avg_lines
310    );
311    s
312}
313
314fn render_module_tsv(report: &ModuleReport) -> String {
315    let mut s = String::new();
316    s.push_str("Module\tCode\tLines\tFiles\tBytes\tTokens\tAvg\n");
317    for r in &report.rows {
318        let _ = writeln!(
319            s,
320            "{}\t{}\t{}\t{}\t{}\t{}\t{}",
321            r.module, r.code, r.lines, r.files, r.bytes, r.tokens, r.avg_lines
322        );
323    }
324    let _ = writeln!(
325        s,
326        "Total\t{}\t{}\t{}\t{}\t{}\t{}",
327        report.total.code,
328        report.total.lines,
329        report.total.files,
330        report.total.bytes,
331        report.total.tokens,
332        report.total.avg_lines
333    );
334    s
335}
336
337// -----------------
338// Export (datasets)
339// -----------------
340
341#[derive(Debug, Clone, Serialize)]
342struct ExportMeta {
343    #[serde(rename = "type")]
344    ty: &'static str,
345    schema_version: u32,
346    generated_at_ms: u128,
347    tool: ToolInfo,
348    mode: String,
349    status: ScanStatus,
350    warnings: Vec<String>,
351    scan: ScanArgs,
352    args: ExportArgsMeta,
353}
354
355#[derive(Debug, Clone, Serialize)]
356struct JsonlRow<'a> {
357    #[serde(rename = "type")]
358    ty: &'static str,
359    #[serde(flatten)]
360    row: &'a FileRow,
361}
362
363pub fn write_export(export: &ExportData, global: &ScanOptions, args: &ExportArgs) -> Result<()> {
364    match &args.output {
365        Some(path) => {
366            let file = File::create(path)?;
367            let mut out = BufWriter::new(file);
368            write_export_to(&mut out, export, global, args)?;
369            out.flush()?;
370        }
371        None => {
372            let stdout = io::stdout();
373            let mut out = stdout.lock();
374            write_export_to(&mut out, export, global, args)?;
375            out.flush()?;
376        }
377    }
378    Ok(())
379}
380
381fn write_export_to<W: Write>(
382    out: &mut W,
383    export: &ExportData,
384    global: &ScanOptions,
385    args: &ExportArgs,
386) -> Result<()> {
387    match args.format {
388        ExportFormat::Csv => write_export_csv(out, export, args),
389        ExportFormat::Jsonl => write_export_jsonl(out, export, global, args),
390        ExportFormat::Json => write_export_json(out, export, global, args),
391        ExportFormat::Cyclonedx => write_export_cyclonedx(out, export, args.redact),
392    }
393}
394
395fn write_export_csv<W: Write>(out: &mut W, export: &ExportData, args: &ExportArgs) -> Result<()> {
396    let mut wtr = csv::WriterBuilder::new().has_headers(true).from_writer(out);
397    wtr.write_record([
398        "path", "module", "lang", "kind", "code", "comments", "blanks", "lines", "bytes", "tokens",
399    ])?;
400
401    for r in redact_rows(&export.rows, args.redact) {
402        let code = r.code.to_string();
403        let comments = r.comments.to_string();
404        let blanks = r.blanks.to_string();
405        let lines = r.lines.to_string();
406        let bytes = r.bytes.to_string();
407        let tokens = r.tokens.to_string();
408        let kind = match r.kind {
409            FileKind::Parent => "parent",
410            FileKind::Child => "child",
411        };
412
413        wtr.write_record([
414            r.path.as_str(),
415            r.module.as_str(),
416            r.lang.as_str(),
417            kind,
418            &code,
419            &comments,
420            &blanks,
421            &lines,
422            &bytes,
423            &tokens,
424        ])?;
425    }
426
427    wtr.flush()?;
428    Ok(())
429}
430
431fn write_export_jsonl<W: Write>(
432    out: &mut W,
433    export: &ExportData,
434    global: &ScanOptions,
435    args: &ExportArgs,
436) -> Result<()> {
437    if args.meta {
438        let should_redact = args.redact == RedactMode::Paths || args.redact == RedactMode::All;
439        let strip_prefix_redacted = should_redact && args.strip_prefix.is_some();
440
441        let meta = ExportMeta {
442            ty: "meta",
443            schema_version: tokmd_types::SCHEMA_VERSION,
444            generated_at_ms: now_ms(),
445            tool: ToolInfo::current(),
446            mode: "export".to_string(),
447            status: ScanStatus::Complete,
448            warnings: vec![],
449            scan: scan_args(&args.paths, global, Some(args.redact)),
450            args: ExportArgsMeta {
451                format: args.format,
452                module_roots: export.module_roots.clone(),
453                module_depth: export.module_depth,
454                children: export.children,
455                min_code: args.min_code,
456                max_rows: args.max_rows,
457                redact: args.redact,
458                strip_prefix: if should_redact {
459                    args.strip_prefix
460                        .as_ref()
461                        .map(|p| redact_path(&p.display().to_string().replace('\\', "/")))
462                } else {
463                    args.strip_prefix
464                        .as_ref()
465                        .map(|p| p.display().to_string().replace('\\', "/"))
466                },
467                strip_prefix_redacted,
468            },
469        };
470        writeln!(out, "{}", serde_json::to_string(&meta)?)?;
471    }
472
473    for row in redact_rows(&export.rows, args.redact) {
474        let wrapper = JsonlRow {
475            ty: "row",
476            row: &row,
477        };
478        writeln!(out, "{}", serde_json::to_string(&wrapper)?)?;
479    }
480    Ok(())
481}
482
483fn write_export_json<W: Write>(
484    out: &mut W,
485    export: &ExportData,
486    global: &ScanOptions,
487    args: &ExportArgs,
488) -> Result<()> {
489    if args.meta {
490        let should_redact = args.redact == RedactMode::Paths || args.redact == RedactMode::All;
491        let strip_prefix_redacted = should_redact && args.strip_prefix.is_some();
492
493        let receipt = ExportReceipt {
494            schema_version: tokmd_types::SCHEMA_VERSION,
495            generated_at_ms: now_ms(),
496            tool: ToolInfo::current(),
497            mode: "export".to_string(),
498            status: ScanStatus::Complete,
499            warnings: vec![],
500            scan: scan_args(&args.paths, global, Some(args.redact)),
501            args: ExportArgsMeta {
502                format: args.format,
503                module_roots: export.module_roots.clone(),
504                module_depth: export.module_depth,
505                children: export.children,
506                min_code: args.min_code,
507                max_rows: args.max_rows,
508                redact: args.redact,
509                strip_prefix: if should_redact {
510                    args.strip_prefix
511                        .as_ref()
512                        .map(|p| redact_path(&p.display().to_string().replace('\\', "/")))
513                } else {
514                    args.strip_prefix
515                        .as_ref()
516                        .map(|p| p.display().to_string().replace('\\', "/"))
517                },
518                strip_prefix_redacted,
519            },
520            data: ExportData {
521                rows: redact_rows(&export.rows, args.redact)
522                    .map(|c| c.into_owned())
523                    .collect(),
524                module_roots: export.module_roots.clone(),
525                module_depth: export.module_depth,
526                children: export.children,
527            },
528        };
529        writeln!(out, "{}", serde_json::to_string(&receipt)?)?;
530    } else {
531        writeln!(
532            out,
533            "{}",
534            serde_json::to_string(&redact_rows(&export.rows, args.redact).collect::<Vec<_>>())?
535        )?;
536    }
537    Ok(())
538}
539
540fn redact_rows(rows: &[FileRow], mode: RedactMode) -> impl Iterator<Item = Cow<'_, FileRow>> {
541    rows.iter().map(move |r| {
542        if mode == RedactMode::None {
543            Cow::Borrowed(r)
544        } else {
545            let mut owned = r.clone();
546            if mode == RedactMode::Paths || mode == RedactMode::All {
547                owned.path = redact_path(&owned.path);
548            }
549            if mode == RedactMode::All {
550                owned.module = short_hash(&owned.module);
551            }
552            Cow::Owned(owned)
553        }
554    })
555}
556
557// Re-export redaction functions for backwards compatibility
558pub use tokmd_redact::{redact_path, short_hash};
559
560// -----------------
561// CycloneDX SBOM
562// -----------------
563
564#[derive(Debug, Clone, Serialize)]
565#[serde(rename_all = "camelCase")]
566struct CycloneDxBom {
567    bom_format: &'static str,
568    spec_version: &'static str,
569    serial_number: String,
570    version: u32,
571    metadata: CycloneDxMetadata,
572    components: Vec<CycloneDxComponent>,
573}
574
575#[derive(Debug, Clone, Serialize)]
576struct CycloneDxMetadata {
577    timestamp: String,
578    tools: Vec<CycloneDxTool>,
579}
580
581#[derive(Debug, Clone, Serialize)]
582struct CycloneDxTool {
583    vendor: &'static str,
584    name: &'static str,
585    version: String,
586}
587
588#[derive(Debug, Clone, Serialize)]
589struct CycloneDxComponent {
590    #[serde(rename = "type")]
591    ty: &'static str,
592    name: String,
593    #[serde(skip_serializing_if = "Option::is_none")]
594    group: Option<String>,
595    #[serde(skip_serializing_if = "Vec::is_empty")]
596    properties: Vec<CycloneDxProperty>,
597}
598
599#[derive(Debug, Clone, Serialize)]
600struct CycloneDxProperty {
601    name: String,
602    value: String,
603}
604
605fn write_export_cyclonedx<W: Write>(
606    out: &mut W,
607    export: &ExportData,
608    redact: RedactMode,
609) -> Result<()> {
610    let timestamp = OffsetDateTime::now_utc()
611        .format(&Rfc3339)
612        .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string());
613
614    // Apply redaction to rows before generating components
615    let components: Vec<CycloneDxComponent> = redact_rows(&export.rows, redact)
616        .map(|row| {
617            let mut properties = vec![
618                CycloneDxProperty {
619                    name: "tokmd:lang".to_string(),
620                    value: row.lang.clone(),
621                },
622                CycloneDxProperty {
623                    name: "tokmd:code".to_string(),
624                    value: row.code.to_string(),
625                },
626                CycloneDxProperty {
627                    name: "tokmd:comments".to_string(),
628                    value: row.comments.to_string(),
629                },
630                CycloneDxProperty {
631                    name: "tokmd:blanks".to_string(),
632                    value: row.blanks.to_string(),
633                },
634                CycloneDxProperty {
635                    name: "tokmd:lines".to_string(),
636                    value: row.lines.to_string(),
637                },
638                CycloneDxProperty {
639                    name: "tokmd:bytes".to_string(),
640                    value: row.bytes.to_string(),
641                },
642                CycloneDxProperty {
643                    name: "tokmd:tokens".to_string(),
644                    value: row.tokens.to_string(),
645                },
646            ];
647
648            // Add kind if it's a child
649            if row.kind == FileKind::Child {
650                properties.push(CycloneDxProperty {
651                    name: "tokmd:kind".to_string(),
652                    value: "child".to_string(),
653                });
654            }
655
656            CycloneDxComponent {
657                ty: "file",
658                name: row.path.clone(),
659                group: if row.module.is_empty() {
660                    None
661                } else {
662                    Some(row.module.clone())
663                },
664                properties,
665            }
666        })
667        .collect();
668
669    let bom = CycloneDxBom {
670        bom_format: "CycloneDX",
671        spec_version: "1.6",
672        serial_number: format!("urn:uuid:{}", uuid::Uuid::new_v4()),
673        version: 1,
674        metadata: CycloneDxMetadata {
675            timestamp,
676            tools: vec![CycloneDxTool {
677                vendor: "tokmd",
678                name: "tokmd",
679                version: env!("CARGO_PKG_VERSION").to_string(),
680            }],
681        },
682        components,
683    };
684
685    writeln!(out, "{}", serde_json::to_string_pretty(&bom)?)?;
686    Ok(())
687}
688
689// -----------------
690// Run command helpers
691// -----------------
692
693/// Write a lang report as JSON to a file path.
694///
695/// This is a convenience function for the `run` command that accepts
696/// pre-constructed `ScanArgs` and `LangArgsMeta` rather than requiring
697/// the full CLI args structs.
698pub fn write_lang_json_to_file(
699    path: &Path,
700    report: &LangReport,
701    scan: &ScanArgs,
702    args_meta: &LangArgsMeta,
703) -> Result<()> {
704    let receipt = LangReceipt {
705        schema_version: tokmd_types::SCHEMA_VERSION,
706        generated_at_ms: now_ms(),
707        tool: ToolInfo::current(),
708        mode: "lang".to_string(),
709        status: ScanStatus::Complete,
710        warnings: vec![],
711        scan: scan.clone(),
712        args: args_meta.clone(),
713        report: report.clone(),
714    };
715    let file = File::create(path)?;
716    serde_json::to_writer(file, &receipt)?;
717    Ok(())
718}
719
720/// Write a module report as JSON to a file path.
721///
722/// This is a convenience function for the `run` command that accepts
723/// pre-constructed `ScanArgs` and `ModuleArgsMeta` rather than requiring
724/// the full CLI args structs.
725pub fn write_module_json_to_file(
726    path: &Path,
727    report: &ModuleReport,
728    scan: &ScanArgs,
729    args_meta: &ModuleArgsMeta,
730) -> Result<()> {
731    let receipt = ModuleReceipt {
732        schema_version: tokmd_types::SCHEMA_VERSION,
733        generated_at_ms: now_ms(),
734        tool: ToolInfo::current(),
735        mode: "module".to_string(),
736        status: ScanStatus::Complete,
737        warnings: vec![],
738        scan: scan.clone(),
739        args: args_meta.clone(),
740        report: report.clone(),
741    };
742    let file = File::create(path)?;
743    serde_json::to_writer(file, &receipt)?;
744    Ok(())
745}
746
747/// Write export data as JSONL to a file path.
748///
749/// This is a convenience function for the `run` command that accepts
750/// pre-constructed `ScanArgs` and `ExportArgsMeta` rather than requiring
751/// the full `ScanOptions` and `ExportArgs` structs.
752pub fn write_export_jsonl_to_file(
753    path: &Path,
754    export: &ExportData,
755    scan: &ScanArgs,
756    args_meta: &ExportArgsMeta,
757) -> Result<()> {
758    let file = File::create(path)?;
759    let mut out = BufWriter::new(file);
760
761    let meta = ExportMeta {
762        ty: "meta",
763        schema_version: tokmd_types::SCHEMA_VERSION,
764        generated_at_ms: now_ms(),
765        tool: ToolInfo::current(),
766        mode: "export".to_string(),
767        status: ScanStatus::Complete,
768        warnings: vec![],
769        scan: scan.clone(),
770        args: args_meta.clone(),
771    };
772    writeln!(out, "{}", serde_json::to_string(&meta)?)?;
773
774    for row in redact_rows(&export.rows, args_meta.redact) {
775        let wrapper = JsonlRow {
776            ty: "row",
777            row: &row,
778        };
779        writeln!(out, "{}", serde_json::to_string(&wrapper)?)?;
780    }
781
782    out.flush()?;
783    Ok(())
784}
785
786// -----------------
787// Diff output
788// -----------------
789
790use tokmd_types::{DiffReceipt, DiffRow, DiffTotals, LangRow};
791
792/// Compute diff rows from two lang reports.
793pub fn compute_diff_rows(from_report: &LangReport, to_report: &LangReport) -> Vec<DiffRow> {
794    // Collect all languages from both reports
795    let mut all_langs: Vec<String> = from_report
796        .rows
797        .iter()
798        .chain(to_report.rows.iter())
799        .map(|r| r.lang.clone())
800        .collect();
801    all_langs.sort();
802    all_langs.dedup();
803
804    all_langs
805        .into_iter()
806        .filter_map(|lang_name| {
807            let old_row = from_report.rows.iter().find(|r| r.lang == lang_name);
808            let new_row = to_report.rows.iter().find(|r| r.lang == lang_name);
809
810            let old = old_row.cloned().unwrap_or_else(|| LangRow {
811                lang: lang_name.clone(),
812                code: 0,
813                lines: 0,
814                files: 0,
815                bytes: 0,
816                tokens: 0,
817                avg_lines: 0,
818            });
819            let new = new_row.cloned().unwrap_or_else(|| LangRow {
820                lang: lang_name.clone(),
821                code: 0,
822                lines: 0,
823                files: 0,
824                bytes: 0,
825                tokens: 0,
826                avg_lines: 0,
827            });
828
829            // Skip if no change
830            if old.code == new.code
831                && old.lines == new.lines
832                && old.files == new.files
833                && old.bytes == new.bytes
834                && old.tokens == new.tokens
835            {
836                return None;
837            }
838
839            Some(DiffRow {
840                lang: lang_name,
841                old_code: old.code,
842                new_code: new.code,
843                delta_code: new.code as i64 - old.code as i64,
844                old_lines: old.lines,
845                new_lines: new.lines,
846                delta_lines: new.lines as i64 - old.lines as i64,
847                old_files: old.files,
848                new_files: new.files,
849                delta_files: new.files as i64 - old.files as i64,
850                old_bytes: old.bytes,
851                new_bytes: new.bytes,
852                delta_bytes: new.bytes as i64 - old.bytes as i64,
853                old_tokens: old.tokens,
854                new_tokens: new.tokens,
855                delta_tokens: new.tokens as i64 - old.tokens as i64,
856            })
857        })
858        .collect()
859}
860
861/// Compute totals from diff rows.
862pub fn compute_diff_totals(rows: &[DiffRow]) -> DiffTotals {
863    let mut totals = DiffTotals {
864        old_code: 0,
865        new_code: 0,
866        delta_code: 0,
867        old_lines: 0,
868        new_lines: 0,
869        delta_lines: 0,
870        old_files: 0,
871        new_files: 0,
872        delta_files: 0,
873        old_bytes: 0,
874        new_bytes: 0,
875        delta_bytes: 0,
876        old_tokens: 0,
877        new_tokens: 0,
878        delta_tokens: 0,
879    };
880
881    for row in rows {
882        totals.old_code += row.old_code;
883        totals.new_code += row.new_code;
884        totals.delta_code += row.delta_code;
885        totals.old_lines += row.old_lines;
886        totals.new_lines += row.new_lines;
887        totals.delta_lines += row.delta_lines;
888        totals.old_files += row.old_files;
889        totals.new_files += row.new_files;
890        totals.delta_files += row.delta_files;
891        totals.old_bytes += row.old_bytes;
892        totals.new_bytes += row.new_bytes;
893        totals.delta_bytes += row.delta_bytes;
894        totals.old_tokens += row.old_tokens;
895        totals.new_tokens += row.new_tokens;
896        totals.delta_tokens += row.delta_tokens;
897    }
898
899    totals
900}
901
902fn format_delta(delta: i64) -> String {
903    if delta > 0 {
904        format!("+{}", delta)
905    } else {
906        delta.to_string()
907    }
908}
909
910#[derive(Debug, Clone, Copy, PartialEq, Eq)]
911pub enum DiffColorMode {
912    Off,
913    Ansi,
914}
915
916#[derive(Debug, Clone, Copy, PartialEq, Eq)]
917pub struct DiffRenderOptions {
918    pub compact: bool,
919    pub color: DiffColorMode,
920}
921
922impl Default for DiffRenderOptions {
923    fn default() -> Self {
924        Self {
925            compact: false,
926            color: DiffColorMode::Off,
927        }
928    }
929}
930
931fn format_delta_colored(delta: i64, mode: DiffColorMode) -> String {
932    let raw = format_delta(delta);
933    if mode == DiffColorMode::Off {
934        return raw;
935    }
936    if delta > 0 {
937        format!("\x1b[32m{}\x1b[0m", raw)
938    } else if delta < 0 {
939        format!("\x1b[31m{}\x1b[0m", raw)
940    } else {
941        format!("\x1b[33m{}\x1b[0m", raw)
942    }
943}
944
945fn format_pct_delta_colored(delta_pct: f64, mode: DiffColorMode) -> String {
946    let raw = format!("{:+.1}%", delta_pct);
947    if mode == DiffColorMode::Off {
948        return raw;
949    }
950    if delta_pct > 0.0 {
951        format!("\x1b[32m{}\x1b[0m", raw)
952    } else if delta_pct < 0.0 {
953        format!("\x1b[31m{}\x1b[0m", raw)
954    } else {
955        format!("\x1b[33m{}\x1b[0m", raw)
956    }
957}
958
959fn percent_change(old: usize, new: usize) -> f64 {
960    if old > 0 {
961        ((new as f64 - old as f64) / old as f64) * 100.0
962    } else if new > 0 {
963        100.0
964    } else {
965        0.0
966    }
967}
968
969/// Render diff as Markdown table with optional compact/color behavior.
970pub fn render_diff_md_with_options(
971    from_source: &str,
972    to_source: &str,
973    rows: &[DiffRow],
974    totals: &DiffTotals,
975    options: DiffRenderOptions,
976) -> String {
977    let mut s = String::new();
978
979    let _ = writeln!(s, "## Diff: {} → {}", from_source, to_source);
980    s.push('\n');
981
982    let languages_added = rows
983        .iter()
984        .filter(|r| r.old_code == 0 && r.new_code > 0)
985        .count();
986    let languages_removed = rows
987        .iter()
988        .filter(|r| r.old_code > 0 && r.new_code == 0)
989        .count();
990    let languages_modified = rows
991        .len()
992        .saturating_sub(languages_added + languages_removed);
993
994    if options.compact {
995        s.push_str("### Summary\n\n");
996        s.push_str("|Metric|Value|\n");
997        s.push_str("|---|---:|\n");
998        let _ = writeln!(s, "|From LOC|{}|", totals.old_code);
999        let _ = writeln!(s, "|To LOC|{}|", totals.new_code);
1000        let _ = writeln!(
1001            s,
1002            "|Delta LOC|{}|",
1003            format_delta_colored(totals.delta_code, options.color)
1004        );
1005        let _ = writeln!(
1006            s,
1007            "|LOC Change|{}|",
1008            format_pct_delta_colored(
1009                percent_change(totals.old_code, totals.new_code),
1010                options.color
1011            )
1012        );
1013        let _ = writeln!(
1014            s,
1015            "|Delta Lines|{}|",
1016            format_delta_colored(totals.delta_lines, options.color)
1017        );
1018        let _ = writeln!(
1019            s,
1020            "|Delta Files|{}|",
1021            format_delta_colored(totals.delta_files, options.color)
1022        );
1023        let _ = writeln!(
1024            s,
1025            "|Delta Bytes|{}|",
1026            format_delta_colored(totals.delta_bytes, options.color)
1027        );
1028        let _ = writeln!(
1029            s,
1030            "|Delta Tokens|{}|",
1031            format_delta_colored(totals.delta_tokens, options.color)
1032        );
1033        let _ = writeln!(s, "|Languages changed|{}|", rows.len());
1034        let _ = writeln!(s, "|Languages added|{}|", languages_added);
1035        let _ = writeln!(s, "|Languages removed|{}|", languages_removed);
1036        let _ = writeln!(s, "|Languages modified|{}|", languages_modified);
1037        return s;
1038    }
1039
1040    // Summary comparison table
1041    s.push_str("### Summary\n\n");
1042    s.push_str("|Metric|From|To|Delta|Change|\n");
1043    s.push_str("|---|---:|---:|---:|---:|\n");
1044
1045    let _ = writeln!(
1046        s,
1047        "|LOC|{}|{}|{}|{}|",
1048        totals.old_code,
1049        totals.new_code,
1050        format_delta_colored(totals.delta_code, options.color),
1051        format_pct_delta_colored(
1052            percent_change(totals.old_code, totals.new_code),
1053            options.color
1054        )
1055    );
1056    let _ = writeln!(
1057        s,
1058        "|Lines|{}|{}|{}|{}|",
1059        totals.old_lines,
1060        totals.new_lines,
1061        format_delta_colored(totals.delta_lines, options.color),
1062        format_pct_delta_colored(
1063            percent_change(totals.old_lines, totals.new_lines),
1064            options.color
1065        )
1066    );
1067    let _ = writeln!(
1068        s,
1069        "|Files|{}|{}|{}|{}|",
1070        totals.old_files,
1071        totals.new_files,
1072        format_delta_colored(totals.delta_files, options.color),
1073        format_pct_delta_colored(
1074            percent_change(totals.old_files, totals.new_files),
1075            options.color
1076        )
1077    );
1078    let _ = writeln!(
1079        s,
1080        "|Bytes|{}|{}|{}|{}|",
1081        totals.old_bytes,
1082        totals.new_bytes,
1083        format_delta_colored(totals.delta_bytes, options.color),
1084        format_pct_delta_colored(
1085            percent_change(totals.old_bytes, totals.new_bytes),
1086            options.color
1087        )
1088    );
1089    let _ = writeln!(
1090        s,
1091        "|Tokens|{}|{}|{}|{}|",
1092        totals.old_tokens,
1093        totals.new_tokens,
1094        format_delta_colored(totals.delta_tokens, options.color),
1095        format_pct_delta_colored(
1096            percent_change(totals.old_tokens, totals.new_tokens),
1097            options.color
1098        )
1099    );
1100    s.push('\n');
1101
1102    s.push_str("### Language Movement\n\n");
1103    s.push_str("|Type|Count|\n");
1104    s.push_str("|---|---:|\n");
1105    let _ = writeln!(s, "|Changed|{}|", rows.len());
1106    let _ = writeln!(s, "|Added|{}|", languages_added);
1107    let _ = writeln!(s, "|Removed|{}|", languages_removed);
1108    let _ = writeln!(s, "|Modified|{}|", languages_modified);
1109    s.push('\n');
1110
1111    // Detailed language breakdown
1112    s.push_str("### Language Breakdown\n\n");
1113    s.push_str("|Language|Old LOC|New LOC|Delta|\n");
1114    s.push_str("|---|---:|---:|---:|\n");
1115
1116    for row in rows {
1117        let _ = writeln!(
1118            s,
1119            "|{}|{}|{}|{}|",
1120            row.lang,
1121            row.old_code,
1122            row.new_code,
1123            format_delta_colored(row.delta_code, options.color)
1124        );
1125    }
1126
1127    let _ = writeln!(
1128        s,
1129        "|**Total**|{}|{}|{}|",
1130        totals.old_code,
1131        totals.new_code,
1132        format_delta_colored(totals.delta_code, options.color)
1133    );
1134
1135    s
1136}
1137
1138/// Render diff as Markdown table.
1139pub fn render_diff_md(
1140    from_source: &str,
1141    to_source: &str,
1142    rows: &[DiffRow],
1143    totals: &DiffTotals,
1144) -> String {
1145    render_diff_md_with_options(
1146        from_source,
1147        to_source,
1148        rows,
1149        totals,
1150        DiffRenderOptions::default(),
1151    )
1152}
1153
1154/// Create a DiffReceipt for JSON output.
1155pub fn create_diff_receipt(
1156    from_source: &str,
1157    to_source: &str,
1158    rows: Vec<DiffRow>,
1159    totals: DiffTotals,
1160) -> DiffReceipt {
1161    DiffReceipt {
1162        schema_version: tokmd_types::SCHEMA_VERSION,
1163        generated_at_ms: now_ms(),
1164        tool: ToolInfo::current(),
1165        mode: "diff".to_string(),
1166        from_source: from_source.to_string(),
1167        to_source: to_source.to_string(),
1168        diff_rows: rows,
1169        totals,
1170    }
1171}
1172
1173// =============================================================================
1174// Public test helpers - expose internal functions for integration tests
1175// =============================================================================
1176
1177/// Write CSV export to a writer (exposed for testing).
1178#[doc(hidden)]
1179pub fn write_export_csv_to<W: Write>(
1180    out: &mut W,
1181    export: &ExportData,
1182    args: &ExportArgs,
1183) -> Result<()> {
1184    write_export_csv(out, export, args)
1185}
1186
1187/// Write JSONL export to a writer (exposed for testing).
1188#[doc(hidden)]
1189pub fn write_export_jsonl_to<W: Write>(
1190    out: &mut W,
1191    export: &ExportData,
1192    global: &ScanOptions,
1193    args: &ExportArgs,
1194) -> Result<()> {
1195    write_export_jsonl(out, export, global, args)
1196}
1197
1198/// Write JSON export to a writer (exposed for testing).
1199#[doc(hidden)]
1200pub fn write_export_json_to<W: Write>(
1201    out: &mut W,
1202    export: &ExportData,
1203    global: &ScanOptions,
1204    args: &ExportArgs,
1205) -> Result<()> {
1206    write_export_json(out, export, global, args)
1207}
1208
1209/// Write CycloneDX export to a writer (exposed for testing).
1210#[doc(hidden)]
1211pub fn write_export_cyclonedx_to<W: Write>(
1212    out: &mut W,
1213    export: &ExportData,
1214    redact: RedactMode,
1215) -> Result<()> {
1216    write_export_cyclonedx(out, export, redact)
1217}
1218
1219#[cfg(test)]
1220mod tests {
1221    use super::*;
1222    use proptest::prelude::*;
1223    use tokmd_settings::ChildrenMode;
1224    use tokmd_types::{LangRow, ModuleRow, Totals};
1225
1226    fn sample_lang_report(with_files: bool) -> LangReport {
1227        LangReport {
1228            rows: vec![
1229                LangRow {
1230                    lang: "Rust".to_string(),
1231                    code: 1000,
1232                    lines: 1200,
1233                    files: 10,
1234                    bytes: 50000,
1235                    tokens: 2500,
1236                    avg_lines: 120,
1237                },
1238                LangRow {
1239                    lang: "TOML".to_string(),
1240                    code: 50,
1241                    lines: 60,
1242                    files: 2,
1243                    bytes: 1000,
1244                    tokens: 125,
1245                    avg_lines: 30,
1246                },
1247            ],
1248            total: Totals {
1249                code: 1050,
1250                lines: 1260,
1251                files: 12,
1252                bytes: 51000,
1253                tokens: 2625,
1254                avg_lines: 105,
1255            },
1256            with_files,
1257            children: ChildrenMode::Collapse,
1258            top: 0,
1259        }
1260    }
1261
1262    fn sample_module_report() -> ModuleReport {
1263        ModuleReport {
1264            rows: vec![
1265                ModuleRow {
1266                    module: "crates/foo".to_string(),
1267                    code: 800,
1268                    lines: 950,
1269                    files: 8,
1270                    bytes: 40000,
1271                    tokens: 2000,
1272                    avg_lines: 119,
1273                },
1274                ModuleRow {
1275                    module: "crates/bar".to_string(),
1276                    code: 200,
1277                    lines: 250,
1278                    files: 2,
1279                    bytes: 10000,
1280                    tokens: 500,
1281                    avg_lines: 125,
1282                },
1283            ],
1284            total: Totals {
1285                code: 1000,
1286                lines: 1200,
1287                files: 10,
1288                bytes: 50000,
1289                tokens: 2500,
1290                avg_lines: 120,
1291            },
1292            module_roots: vec!["crates".to_string()],
1293            module_depth: 2,
1294            children: tokmd_settings::ChildIncludeMode::Separate,
1295            top: 0,
1296        }
1297    }
1298
1299    fn sample_file_rows() -> Vec<FileRow> {
1300        vec![
1301            FileRow {
1302                path: "src/lib.rs".to_string(),
1303                module: "src".to_string(),
1304                lang: "Rust".to_string(),
1305                kind: FileKind::Parent,
1306                code: 100,
1307                comments: 20,
1308                blanks: 10,
1309                lines: 130,
1310                bytes: 1000,
1311                tokens: 250,
1312            },
1313            FileRow {
1314                path: "tests/test.rs".to_string(),
1315                module: "tests".to_string(),
1316                lang: "Rust".to_string(),
1317                kind: FileKind::Parent,
1318                code: 50,
1319                comments: 5,
1320                blanks: 5,
1321                lines: 60,
1322                bytes: 500,
1323                tokens: 125,
1324            },
1325        ]
1326    }
1327
1328    // ========================
1329    // Language Markdown Render Tests
1330    // ========================
1331
1332    #[test]
1333    fn render_lang_md_without_files() {
1334        let report = sample_lang_report(false);
1335        let output = render_lang_md(&report);
1336
1337        // Check header
1338        assert!(output.contains("|Lang|Code|Lines|Bytes|Tokens|"));
1339        // Check no Files/Avg columns
1340        assert!(!output.contains("|Files|"));
1341        assert!(!output.contains("|Avg|"));
1342        // Check row data
1343        assert!(output.contains("|Rust|1000|1200|50000|2500|"));
1344        assert!(output.contains("|TOML|50|60|1000|125|"));
1345        // Check total
1346        assert!(output.contains("|**Total**|1050|1260|51000|2625|"));
1347    }
1348
1349    #[test]
1350    fn render_lang_md_with_files() {
1351        let report = sample_lang_report(true);
1352        let output = render_lang_md(&report);
1353
1354        // Check header includes Files and Avg
1355        assert!(output.contains("|Lang|Code|Lines|Files|Bytes|Tokens|Avg|"));
1356        // Check row data includes file counts
1357        assert!(output.contains("|Rust|1000|1200|10|50000|2500|120|"));
1358        assert!(output.contains("|TOML|50|60|2|1000|125|30|"));
1359        // Check total
1360        assert!(output.contains("|**Total**|1050|1260|12|51000|2625|105|"));
1361    }
1362
1363    #[test]
1364    fn render_lang_md_table_structure() {
1365        let report = sample_lang_report(true);
1366        let output = render_lang_md(&report);
1367
1368        // Verify markdown table structure
1369        let lines: Vec<&str> = output.lines().collect();
1370        assert!(lines.len() >= 4); // header, separator, 2 data rows, total
1371
1372        // Check separator line
1373        assert!(lines[1].contains("|---|"));
1374        assert!(lines[1].contains(":")); // Right-aligned columns
1375    }
1376
1377    // ========================
1378    // Language TSV Render Tests
1379    // ========================
1380
1381    #[test]
1382    fn render_lang_tsv_without_files() {
1383        let report = sample_lang_report(false);
1384        let output = render_lang_tsv(&report);
1385
1386        // Check header
1387        assert!(output.starts_with("Lang\tCode\tLines\tBytes\tTokens\n"));
1388        // Check no Files/Avg columns
1389        assert!(!output.contains("\tFiles\t"));
1390        assert!(!output.contains("\tAvg"));
1391        // Check row data
1392        assert!(output.contains("Rust\t1000\t1200\t50000\t2500"));
1393        assert!(output.contains("TOML\t50\t60\t1000\t125"));
1394        // Check total
1395        assert!(output.contains("Total\t1050\t1260\t51000\t2625"));
1396    }
1397
1398    #[test]
1399    fn render_lang_tsv_with_files() {
1400        let report = sample_lang_report(true);
1401        let output = render_lang_tsv(&report);
1402
1403        // Check header includes Files and Avg
1404        assert!(output.starts_with("Lang\tCode\tLines\tFiles\tBytes\tTokens\tAvg\n"));
1405        // Check row data includes file counts
1406        assert!(output.contains("Rust\t1000\t1200\t10\t50000\t2500\t120"));
1407        assert!(output.contains("TOML\t50\t60\t2\t1000\t125\t30"));
1408    }
1409
1410    #[test]
1411    fn render_lang_tsv_tab_separated() {
1412        let report = sample_lang_report(false);
1413        let output = render_lang_tsv(&report);
1414
1415        // Each data line should have exactly 4 tabs (5 columns)
1416        for line in output.lines().skip(1) {
1417            // Skip header
1418            if line.starts_with("Total") || line.starts_with("Rust") || line.starts_with("TOML") {
1419                assert_eq!(line.matches('\t').count(), 4);
1420            }
1421        }
1422    }
1423
1424    // ========================
1425    // Module Markdown Render Tests
1426    // ========================
1427
1428    #[test]
1429    fn render_module_md_structure() {
1430        let report = sample_module_report();
1431        let output = render_module_md(&report);
1432
1433        // Check header
1434        assert!(output.contains("|Module|Code|Lines|Files|Bytes|Tokens|Avg|"));
1435        // Check module data
1436        assert!(output.contains("|crates/foo|800|950|8|40000|2000|119|"));
1437        assert!(output.contains("|crates/bar|200|250|2|10000|500|125|"));
1438        // Check total
1439        assert!(output.contains("|**Total**|1000|1200|10|50000|2500|120|"));
1440    }
1441
1442    #[test]
1443    fn render_module_md_table_format() {
1444        let report = sample_module_report();
1445        let output = render_module_md(&report);
1446
1447        let lines: Vec<&str> = output.lines().collect();
1448        // Header, separator, 2 rows, total
1449        assert_eq!(lines.len(), 5);
1450        // Separator has right-alignment markers
1451        assert!(lines[1].contains("---:"));
1452    }
1453
1454    // ========================
1455    // Module TSV Render Tests
1456    // ========================
1457
1458    #[test]
1459    fn render_module_tsv_structure() {
1460        let report = sample_module_report();
1461        let output = render_module_tsv(&report);
1462
1463        // Check header
1464        assert!(output.starts_with("Module\tCode\tLines\tFiles\tBytes\tTokens\tAvg\n"));
1465        // Check data
1466        assert!(output.contains("crates/foo\t800\t950\t8\t40000\t2000\t119"));
1467        assert!(output.contains("crates/bar\t200\t250\t2\t10000\t500\t125"));
1468        // Check total
1469        assert!(output.contains("Total\t1000\t1200\t10\t50000\t2500\t120"));
1470    }
1471
1472    #[test]
1473    fn render_module_tsv_tab_count() {
1474        let report = sample_module_report();
1475        let output = render_module_tsv(&report);
1476
1477        // Each data line should have exactly 6 tabs (7 columns)
1478        for line in output.lines() {
1479            assert_eq!(line.matches('\t').count(), 6);
1480        }
1481    }
1482
1483    // ========================
1484    // Redaction Tests
1485    // ========================
1486
1487    #[test]
1488    fn redact_rows_none_mode() {
1489        let rows = sample_file_rows();
1490        let redacted: Vec<_> = redact_rows(&rows, RedactMode::None).collect();
1491
1492        // Should be identical
1493        assert_eq!(redacted.len(), rows.len());
1494        assert_eq!(redacted[0].path, "src/lib.rs");
1495        assert_eq!(redacted[0].module, "src");
1496    }
1497
1498    #[test]
1499    fn redact_rows_paths_mode() {
1500        let rows = sample_file_rows();
1501        let redacted: Vec<_> = redact_rows(&rows, RedactMode::Paths).collect();
1502
1503        // Paths should be redacted (16 char hash + extension)
1504        assert_ne!(redacted[0].path, "src/lib.rs");
1505        assert!(redacted[0].path.ends_with(".rs"));
1506        assert_eq!(redacted[0].path.len(), 16 + 3); // hash + ".rs"
1507
1508        // Module should NOT be redacted
1509        assert_eq!(redacted[0].module, "src");
1510    }
1511
1512    #[test]
1513    fn redact_rows_all_mode() {
1514        let rows = sample_file_rows();
1515        let redacted: Vec<_> = redact_rows(&rows, RedactMode::All).collect();
1516
1517        // Paths should be redacted
1518        assert_ne!(redacted[0].path, "src/lib.rs");
1519        assert!(redacted[0].path.ends_with(".rs"));
1520
1521        // Module should ALSO be redacted (16 char hash)
1522        assert_ne!(redacted[0].module, "src");
1523        assert_eq!(redacted[0].module.len(), 16);
1524    }
1525
1526    #[test]
1527    fn redact_rows_preserves_other_fields() {
1528        let rows = sample_file_rows();
1529        let redacted: Vec<_> = redact_rows(&rows, RedactMode::All).collect();
1530
1531        // All other fields should be preserved
1532        assert_eq!(redacted[0].lang, "Rust");
1533        assert_eq!(redacted[0].kind, FileKind::Parent);
1534        assert_eq!(redacted[0].code, 100);
1535        assert_eq!(redacted[0].comments, 20);
1536        assert_eq!(redacted[0].blanks, 10);
1537        assert_eq!(redacted[0].lines, 130);
1538        assert_eq!(redacted[0].bytes, 1000);
1539        assert_eq!(redacted[0].tokens, 250);
1540    }
1541
1542    // ========================
1543    // Path Normalization Tests
1544    // ========================
1545
1546    #[test]
1547    fn normalize_scan_input_forward_slash() {
1548        let p = Path::new("src/lib.rs");
1549        let normalized = normalize_scan_input(p);
1550        assert_eq!(normalized, "src/lib.rs");
1551    }
1552
1553    #[test]
1554    fn normalize_scan_input_backslash_to_forward() {
1555        let p = Path::new("src\\lib.rs");
1556        let normalized = normalize_scan_input(p);
1557        assert_eq!(normalized, "src/lib.rs");
1558    }
1559
1560    #[test]
1561    fn normalize_scan_input_strips_dot_slash() {
1562        let p = Path::new("./src/lib.rs");
1563        let normalized = normalize_scan_input(p);
1564        assert_eq!(normalized, "src/lib.rs");
1565    }
1566
1567    #[test]
1568    fn normalize_scan_input_current_dir() {
1569        let p = Path::new(".");
1570        let normalized = normalize_scan_input(p);
1571        assert_eq!(normalized, ".");
1572    }
1573
1574    // ========================
1575    // Property-Based Tests
1576    // ========================
1577
1578    proptest! {
1579        #[test]
1580        fn normalize_scan_input_no_backslash(s in "[a-zA-Z0-9_/\\\\.]+") {
1581            let p = Path::new(&s);
1582            let normalized = normalize_scan_input(p);
1583            prop_assert!(!normalized.contains('\\'), "Should not contain backslash: {}", normalized);
1584        }
1585
1586        #[test]
1587        fn normalize_scan_input_no_leading_dot_slash(s in "[a-zA-Z0-9_/\\\\.]+") {
1588            let p = Path::new(&s);
1589            let normalized = normalize_scan_input(p);
1590            prop_assert!(!normalized.starts_with("./"), "Should not start with ./: {}", normalized);
1591        }
1592
1593        #[test]
1594        fn redact_rows_preserves_count(
1595            code in 0usize..10000,
1596            comments in 0usize..1000,
1597            blanks in 0usize..500
1598        ) {
1599            let rows = vec![FileRow {
1600                path: "test/file.rs".to_string(),
1601                module: "test".to_string(),
1602                lang: "Rust".to_string(),
1603                kind: FileKind::Parent,
1604                code,
1605                comments,
1606                blanks,
1607                lines: code + comments + blanks,
1608                bytes: 1000,
1609                tokens: 250,
1610            }];
1611
1612            for mode in [RedactMode::None, RedactMode::Paths, RedactMode::All] {
1613                let redacted: Vec<_> = redact_rows(&rows, mode).collect();
1614                prop_assert_eq!(redacted.len(), 1);
1615                prop_assert_eq!(redacted[0].code, code);
1616                prop_assert_eq!(redacted[0].comments, comments);
1617                prop_assert_eq!(redacted[0].blanks, blanks);
1618            }
1619        }
1620
1621        #[test]
1622        fn redact_rows_paths_end_with_extension(ext in "[a-z]{1,4}") {
1623            let path = format!("some/path/file.{}", ext);
1624            let rows = vec![FileRow {
1625                path: path.clone(),
1626                module: "some".to_string(),
1627                lang: "Test".to_string(),
1628                kind: FileKind::Parent,
1629                code: 100,
1630                comments: 10,
1631                blanks: 5,
1632                lines: 115,
1633                bytes: 1000,
1634                tokens: 250,
1635            }];
1636
1637            let redacted: Vec<_> = redact_rows(&rows, RedactMode::Paths).collect();
1638            prop_assert!(redacted[0].path.ends_with(&format!(".{}", ext)),
1639                "Redacted path '{}' should end with .{}", redacted[0].path, ext);
1640        }
1641    }
1642
1643    // ========================
1644    // Snapshot Tests
1645    // ========================
1646
1647    #[test]
1648    fn snapshot_lang_md_with_files() {
1649        let report = sample_lang_report(true);
1650        let output = render_lang_md(&report);
1651        insta::assert_snapshot!(output);
1652    }
1653
1654    #[test]
1655    fn snapshot_lang_md_without_files() {
1656        let report = sample_lang_report(false);
1657        let output = render_lang_md(&report);
1658        insta::assert_snapshot!(output);
1659    }
1660
1661    #[test]
1662    fn snapshot_lang_tsv_with_files() {
1663        let report = sample_lang_report(true);
1664        let output = render_lang_tsv(&report);
1665        insta::assert_snapshot!(output);
1666    }
1667
1668    #[test]
1669    fn snapshot_module_md() {
1670        let report = sample_module_report();
1671        let output = render_module_md(&report);
1672        insta::assert_snapshot!(output);
1673    }
1674
1675    #[test]
1676    fn snapshot_module_tsv() {
1677        let report = sample_module_report();
1678        let output = render_module_tsv(&report);
1679        insta::assert_snapshot!(output);
1680    }
1681
1682    // ========================
1683    // Diff Render Tests
1684    // ========================
1685
1686    #[test]
1687    fn test_render_diff_md_smoke() {
1688        // Kills mutants: render_diff_md -> String::new() / "xyzzy".into()
1689        let from = LangReport {
1690            rows: vec![LangRow {
1691                lang: "Rust".to_string(),
1692                code: 10,
1693                lines: 10,
1694                files: 1,
1695                bytes: 100,
1696                tokens: 20,
1697                avg_lines: 10,
1698            }],
1699            total: Totals {
1700                code: 10,
1701                lines: 10,
1702                files: 1,
1703                bytes: 100,
1704                tokens: 20,
1705                avg_lines: 10,
1706            },
1707            with_files: false,
1708            children: ChildrenMode::Collapse,
1709            top: 0,
1710        };
1711
1712        let to = LangReport {
1713            rows: vec![LangRow {
1714                lang: "Rust".to_string(),
1715                code: 12,
1716                lines: 12,
1717                files: 1,
1718                bytes: 120,
1719                tokens: 24,
1720                avg_lines: 12,
1721            }],
1722            total: Totals {
1723                code: 12,
1724                lines: 12,
1725                files: 1,
1726                bytes: 120,
1727                tokens: 24,
1728                avg_lines: 12,
1729            },
1730            with_files: false,
1731            children: ChildrenMode::Collapse,
1732            top: 0,
1733        };
1734
1735        let rows = compute_diff_rows(&from, &to);
1736        assert_eq!(rows.len(), 1);
1737        assert_eq!(rows[0].lang, "Rust");
1738        assert_eq!(rows[0].delta_code, 2);
1739
1740        let totals = compute_diff_totals(&rows);
1741        assert_eq!(totals.delta_code, 2);
1742
1743        let md = render_diff_md("from", "to", &rows, &totals);
1744
1745        assert!(!md.trim().is_empty(), "diff markdown must not be empty");
1746        assert!(md.contains("from"));
1747        assert!(md.contains("to"));
1748        assert!(md.contains("Rust"));
1749        assert!(md.contains("|LOC|"));
1750        assert!(md.contains("|Lines|"));
1751        assert!(md.contains("|Files|"));
1752        assert!(md.contains("|Bytes|"));
1753        assert!(md.contains("|Tokens|"));
1754        assert!(md.contains("### Language Movement"));
1755    }
1756
1757    #[test]
1758    fn test_render_diff_md_compact_includes_movement_counts() {
1759        let from = LangReport {
1760            rows: vec![LangRow {
1761                lang: "Rust".to_string(),
1762                code: 10,
1763                lines: 10,
1764                files: 1,
1765                bytes: 100,
1766                tokens: 20,
1767                avg_lines: 10,
1768            }],
1769            total: Totals {
1770                code: 10,
1771                lines: 10,
1772                files: 1,
1773                bytes: 100,
1774                tokens: 20,
1775                avg_lines: 10,
1776            },
1777            with_files: false,
1778            children: ChildrenMode::Collapse,
1779            top: 0,
1780        };
1781        let to = LangReport {
1782            rows: vec![
1783                LangRow {
1784                    lang: "Rust".to_string(),
1785                    code: 12,
1786                    lines: 12,
1787                    files: 1,
1788                    bytes: 120,
1789                    tokens: 24,
1790                    avg_lines: 12,
1791                },
1792                LangRow {
1793                    lang: "Python".to_string(),
1794                    code: 8,
1795                    lines: 8,
1796                    files: 1,
1797                    bytes: 80,
1798                    tokens: 16,
1799                    avg_lines: 8,
1800                },
1801            ],
1802            total: Totals {
1803                code: 20,
1804                lines: 20,
1805                files: 2,
1806                bytes: 200,
1807                tokens: 40,
1808                avg_lines: 10,
1809            },
1810            with_files: false,
1811            children: ChildrenMode::Collapse,
1812            top: 0,
1813        };
1814        let rows = compute_diff_rows(&from, &to);
1815        let totals = compute_diff_totals(&rows);
1816        let md = render_diff_md_with_options(
1817            "from",
1818            "to",
1819            &rows,
1820            &totals,
1821            DiffRenderOptions {
1822                compact: true,
1823                color: DiffColorMode::Off,
1824            },
1825        );
1826
1827        assert!(md.contains("|Delta Lines|"));
1828        assert!(md.contains("|Delta Files|"));
1829        assert!(md.contains("|Delta Bytes|"));
1830        assert!(md.contains("|Delta Tokens|"));
1831        assert!(md.contains("|Languages added|1|"));
1832        assert!(md.contains("|Languages modified|1|"));
1833    }
1834
1835    #[test]
1836    fn test_compute_diff_rows_language_added() {
1837        // Tests language being added (was 0, now has code)
1838        let from = LangReport {
1839            rows: vec![],
1840            total: Totals {
1841                code: 0,
1842                lines: 0,
1843                files: 0,
1844                bytes: 0,
1845                tokens: 0,
1846                avg_lines: 0,
1847            },
1848            with_files: false,
1849            children: ChildrenMode::Collapse,
1850            top: 0,
1851        };
1852
1853        let to = LangReport {
1854            rows: vec![LangRow {
1855                lang: "Python".to_string(),
1856                code: 100,
1857                lines: 120,
1858                files: 5,
1859                bytes: 5000,
1860                tokens: 250,
1861                avg_lines: 24,
1862            }],
1863            total: Totals {
1864                code: 100,
1865                lines: 120,
1866                files: 5,
1867                bytes: 5000,
1868                tokens: 250,
1869                avg_lines: 24,
1870            },
1871            with_files: false,
1872            children: ChildrenMode::Collapse,
1873            top: 0,
1874        };
1875
1876        let rows = compute_diff_rows(&from, &to);
1877        assert_eq!(rows.len(), 1);
1878        assert_eq!(rows[0].lang, "Python");
1879        assert_eq!(rows[0].old_code, 0);
1880        assert_eq!(rows[0].new_code, 100);
1881        assert_eq!(rows[0].delta_code, 100);
1882    }
1883
1884    #[test]
1885    fn test_compute_diff_rows_language_removed() {
1886        // Tests language being removed (had code, now 0)
1887        let from = LangReport {
1888            rows: vec![LangRow {
1889                lang: "Go".to_string(),
1890                code: 50,
1891                lines: 60,
1892                files: 2,
1893                bytes: 2000,
1894                tokens: 125,
1895                avg_lines: 30,
1896            }],
1897            total: Totals {
1898                code: 50,
1899                lines: 60,
1900                files: 2,
1901                bytes: 2000,
1902                tokens: 125,
1903                avg_lines: 30,
1904            },
1905            with_files: false,
1906            children: ChildrenMode::Collapse,
1907            top: 0,
1908        };
1909
1910        let to = LangReport {
1911            rows: vec![],
1912            total: Totals {
1913                code: 0,
1914                lines: 0,
1915                files: 0,
1916                bytes: 0,
1917                tokens: 0,
1918                avg_lines: 0,
1919            },
1920            with_files: false,
1921            children: ChildrenMode::Collapse,
1922            top: 0,
1923        };
1924
1925        let rows = compute_diff_rows(&from, &to);
1926        assert_eq!(rows.len(), 1);
1927        assert_eq!(rows[0].lang, "Go");
1928        assert_eq!(rows[0].old_code, 50);
1929        assert_eq!(rows[0].new_code, 0);
1930        assert_eq!(rows[0].delta_code, -50);
1931    }
1932
1933    #[test]
1934    fn test_compute_diff_rows_unchanged_excluded() {
1935        // Tests that unchanged languages are excluded from diff
1936        let report = LangReport {
1937            rows: vec![LangRow {
1938                lang: "Rust".to_string(),
1939                code: 100,
1940                lines: 100,
1941                files: 1,
1942                bytes: 1000,
1943                tokens: 250,
1944                avg_lines: 100,
1945            }],
1946            total: Totals {
1947                code: 100,
1948                lines: 100,
1949                files: 1,
1950                bytes: 1000,
1951                tokens: 250,
1952                avg_lines: 100,
1953            },
1954            with_files: false,
1955            children: ChildrenMode::Collapse,
1956            top: 0,
1957        };
1958
1959        let rows = compute_diff_rows(&report, &report);
1960        assert!(rows.is_empty(), "unchanged languages should be excluded");
1961    }
1962
1963    #[test]
1964    fn test_format_delta() {
1965        // Kills mutants in format_delta function
1966        assert_eq!(format_delta(5), "+5");
1967        assert_eq!(format_delta(0), "0");
1968        assert_eq!(format_delta(-3), "-3");
1969    }
1970
1971    // ========================
1972    // write_*_to Tests (mutation killers)
1973    // ========================
1974
1975    fn sample_global_args() -> ScanOptions {
1976        ScanOptions::default()
1977    }
1978
1979    fn sample_lang_args(format: TableFormat) -> LangArgs {
1980        LangArgs {
1981            paths: vec![PathBuf::from(".")],
1982            format,
1983            top: 0,
1984            files: false,
1985            children: ChildrenMode::Collapse,
1986        }
1987    }
1988
1989    fn sample_module_args(format: TableFormat) -> ModuleArgs {
1990        ModuleArgs {
1991            paths: vec![PathBuf::from(".")],
1992            format,
1993            top: 0,
1994            module_roots: vec!["crates".to_string()],
1995            module_depth: 2,
1996            children: tokmd_settings::ChildIncludeMode::Separate,
1997        }
1998    }
1999
2000    #[test]
2001    fn write_lang_report_to_md_writes_content() {
2002        let report = sample_lang_report(true);
2003        let global = sample_global_args();
2004        let args = sample_lang_args(TableFormat::Md);
2005        let mut buf = Vec::new();
2006
2007        write_lang_report_to(&mut buf, &report, &global, &args).unwrap();
2008        let output = String::from_utf8(buf).unwrap();
2009
2010        assert!(!output.is_empty(), "output must not be empty");
2011        assert!(output.contains("|Lang|"), "must contain markdown header");
2012        assert!(output.contains("|Rust|"), "must contain Rust row");
2013        assert!(output.contains("|**Total**|"), "must contain total row");
2014    }
2015
2016    #[test]
2017    fn write_lang_report_to_tsv_writes_content() {
2018        let report = sample_lang_report(false);
2019        let global = sample_global_args();
2020        let args = sample_lang_args(TableFormat::Tsv);
2021        let mut buf = Vec::new();
2022
2023        write_lang_report_to(&mut buf, &report, &global, &args).unwrap();
2024        let output = String::from_utf8(buf).unwrap();
2025
2026        assert!(!output.is_empty(), "output must not be empty");
2027        assert!(output.contains("Lang\t"), "must contain TSV header");
2028        assert!(output.contains("Rust\t"), "must contain Rust row");
2029        assert!(output.contains("Total\t"), "must contain total row");
2030    }
2031
2032    #[test]
2033    fn write_lang_report_to_json_writes_receipt() {
2034        let report = sample_lang_report(true);
2035        let global = sample_global_args();
2036        let args = sample_lang_args(TableFormat::Json);
2037        let mut buf = Vec::new();
2038
2039        write_lang_report_to(&mut buf, &report, &global, &args).unwrap();
2040        let output = String::from_utf8(buf).unwrap();
2041
2042        assert!(!output.is_empty(), "output must not be empty");
2043        // Parse as JSON to verify valid receipt
2044        let receipt: LangReceipt = serde_json::from_str(&output).unwrap();
2045        assert_eq!(receipt.mode, "lang");
2046        assert_eq!(receipt.report.rows.len(), 2);
2047        assert_eq!(receipt.report.total.code, 1050);
2048    }
2049
2050    #[test]
2051    fn write_module_report_to_md_writes_content() {
2052        let report = sample_module_report();
2053        let global = sample_global_args();
2054        let args = sample_module_args(TableFormat::Md);
2055        let mut buf = Vec::new();
2056
2057        write_module_report_to(&mut buf, &report, &global, &args).unwrap();
2058        let output = String::from_utf8(buf).unwrap();
2059
2060        assert!(!output.is_empty(), "output must not be empty");
2061        assert!(output.contains("|Module|"), "must contain markdown header");
2062        assert!(output.contains("|crates/foo|"), "must contain module row");
2063        assert!(output.contains("|**Total**|"), "must contain total row");
2064    }
2065
2066    #[test]
2067    fn write_module_report_to_tsv_writes_content() {
2068        let report = sample_module_report();
2069        let global = sample_global_args();
2070        let args = sample_module_args(TableFormat::Tsv);
2071        let mut buf = Vec::new();
2072
2073        write_module_report_to(&mut buf, &report, &global, &args).unwrap();
2074        let output = String::from_utf8(buf).unwrap();
2075
2076        assert!(!output.is_empty(), "output must not be empty");
2077        assert!(output.contains("Module\t"), "must contain TSV header");
2078        assert!(output.contains("crates/foo\t"), "must contain module row");
2079        assert!(output.contains("Total\t"), "must contain total row");
2080    }
2081
2082    #[test]
2083    fn write_module_report_to_json_writes_receipt() {
2084        let report = sample_module_report();
2085        let global = sample_global_args();
2086        let args = sample_module_args(TableFormat::Json);
2087        let mut buf = Vec::new();
2088
2089        write_module_report_to(&mut buf, &report, &global, &args).unwrap();
2090        let output = String::from_utf8(buf).unwrap();
2091
2092        assert!(!output.is_empty(), "output must not be empty");
2093        // Parse as JSON to verify valid receipt
2094        let receipt: ModuleReceipt = serde_json::from_str(&output).unwrap();
2095        assert_eq!(receipt.mode, "module");
2096        assert_eq!(receipt.report.rows.len(), 2);
2097        assert_eq!(receipt.report.total.code, 1000);
2098    }
2099}