wsl-clip-core 0.5.1

Core library for wsl-clip clipboard bridge
Documentation
// <FILE>src/text_processor.rs</FILE> - <DESC>Merged Sanitizer + Tree Renderer</DESC>
// <VERS>VERSION: 3.2.1 - 2025-12-08T00:00:00Z</VERS>
// <WCTX>Align sanitizer dependency name with workspace.</WCTX>
// <CLOG>Switched sanitizer import to clip_sanitize.</CLOG>

use crate::debug_logger::create_logger;
use crate::fnc_tree_renderer;
use anyhow::{Context, Result};
use chrono::Utc;
use regex::Regex;
use std::fs::File;
use std::io::{self, BufRead, BufReader, Write};
use std::path::PathBuf;
use clip_sanitize::{FlowDirection, HygieneOptions, LineEnding, Sanitizer};
/// Options controlling text sanitization, formatting, and tree rendering.
pub struct TextOptions {
    pub no_header: bool,
    pub strip_ansi: bool,
    pub use_markdown: bool,
    pub use_crlf: bool,
    pub show_tree: bool,
}
/// Streams processed content directly to the writer.
/// Returns (total_bytes_written, files_processed_count).
/// Stream sanitized text from files or stdin into the provided writer.
/// Returns `(bytes_written, files_processed)`.
pub fn process_input<W: Write>(
    files: Option<Vec<PathBuf>>,
    opts: &TextOptions,
    writer: &mut W,
) -> Result<(usize, usize)> {
    let log = create_logger("text_processor");
    let mut total_bytes = 0;
    let mut files_processed = 0;
    // Configure the Sanitizer (From Main Branch)
    let sanitizer = if opts.strip_ansi {
        Sanitizer::new(FlowDirection::LinuxToWindows)
            .line_ending(if opts.use_crlf {
                LineEnding::CrLf
            } else {
                LineEnding::Lf
            })
            .repair(true)
            .hygiene(HygieneOptions::default())
    } else {
        Sanitizer::new(FlowDirection::Custom)
            .line_ending(if opts.use_crlf {
                LineEnding::CrLf
            } else {
                LineEnding::None
            })
            .repair(false)
            .hygiene(HygieneOptions {
                replace_nbsps: false,
                fix_smart_quotes: false,
                strip_invisibles: false,
            })
    };
    // ANSI Regex (Local Layer)
    let ansi_regex = if opts.strip_ansi {
        Some(Regex::new(r"\x1B\[([0-9]{1,2}(;[0-9]{1,2})*)?[m|K]").unwrap())
    } else {
        None
    };
    let timestamp = Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
    // Helper to write a line with transforms
    let write_line = |w: &mut W, line: &str| -> Result<usize> {
        // 1. Strip ANSI
        let pre_processed = if let Some(re) = &ansi_regex {
            re.replace_all(line, "").to_string()
        } else {
            line.to_string()
        };
        // 2. Run Sanitizer Pipeline
        let (processed_cow, _report) = sanitizer
            .process(pre_processed.as_bytes())
            .map_err(|e| anyhow::anyhow!("Sanitization failed: {}", e))?;
        w.write_all(&processed_cow)?;
        // Re-add newline
        if opts.use_crlf {
            w.write_all(b"\r\n")?;
            Ok(processed_cow.len() + 2)
        } else {
            w.write_all(b"\n")?;
            Ok(processed_cow.len() + 1)
        }
    };
    if let Some(mut file_list) = files {
        if file_list.is_empty() {
            return Ok((0, 0));
        }
        file_list.sort();
        // Tree Rendering (From Road Branch)
        if opts.show_tree {
            log.debug("Rendering file tree");
            let tree = fnc_tree_renderer::render_tree(&file_list);
            let tree_header = format!("File Hierarchy:\n{}\n", tree);
            let chunk = if opts.use_markdown {
                format!("```text\n{}```\n\n", tree_header)
            } else {
                format!("{}\n", tree_header)
            };
            let chunk = if opts.use_crlf {
                chunk.replace("\n", "\r\n")
            } else {
                chunk
            };
            writer.write_all(chunk.as_bytes())?;
            total_bytes += chunk.len();
        }
        log.debug(&format!("Processing {} files (streaming)", file_list.len()));
        let total_files_in_list = file_list.len();
        let mut processed_list = Vec::new();
        for path in file_list {
            if !path.exists() || !path.is_file() {
                log.warn(&format!("Skipped invalid file: {:?}", path));
                continue;
            }
            files_processed += 1;
            processed_list.push(path.to_string_lossy().to_string());
            // Header
            if !opts.no_header {
                let header = format!("# FILE: {} READ: {}\n", path.display(), timestamp);
                let chunk = if opts.use_crlf {
                    header.replace("\n", "\r\n")
                } else {
                    header
                };
                writer.write_all(chunk.as_bytes())?;
                total_bytes += chunk.len();
            }
            // Markdown Start
            if opts.use_markdown {
                let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
                let md_block = format!("```{}\n", ext);
                let chunk = if opts.use_crlf {
                    md_block.replace("\n", "\r\n")
                } else {
                    md_block
                };
                writer.write_all(chunk.as_bytes())?;
                total_bytes += chunk.len();
            }
            // Stream Content
            let file =
                File::open(&path).with_context(|| format!("Failed to read file: {:?}", path))?;
            let reader = BufReader::new(file);
            for line_res in reader.lines() {
                let line = line_res.context("Failed to read line")?;
                total_bytes += write_line(writer, &line)?;
            }
            // Markdown End
            if opts.use_markdown {
                let md_end = "```\n";
                let chunk = if opts.use_crlf {
                    md_end.replace("\n", "\r\n")
                } else {
                    md_end.to_string()
                };
                writer.write_all(chunk.as_bytes())?;
                total_bytes += chunk.len();
            }
            // Spacer
            if !opts.no_header {
                if opts.use_crlf {
                    writer.write_all(b"\r\n")?;
                    total_bytes += 2;
                } else {
                    writer.write_all(b"\n")?;
                    total_bytes += 1;
                }
            }
        }
        if !opts.no_header && total_files_in_list > 1 {
            let footer = format!("# End of FILES. SENT: {}\n", processed_list.join(" "));
            let chunk = if opts.use_crlf {
                footer.replace("\n", "\r\n")
            } else {
                footer
            };
            writer.write_all(chunk.as_bytes())?;
            total_bytes += chunk.len();
        }
    } else {
        // Stdin Mode
        log.debug("Reading from Stdin (Streaming)");
        if atty::is(atty::Stream::Stdin) {
            anyhow::bail!("No input provided. Pipe data or specify files.");
        }
        let stdin = io::stdin();
        let reader = stdin.lock();
        for line_res in reader.lines() {
            let line = line_res.context("Failed to read line from stdin")?;
            total_bytes += write_line(writer, &line)?;
        }
    }
    Ok((total_bytes, files_processed))
}

// <FILE>src/text_processor.rs</FILE> - <DESC>Merged Sanitizer + Tree Renderer</DESC>
// <VERS>END OF VERSION: 3.2.1 - 2025-12-08T00:00:00Z</VERS>