use anyhow::{Context, Result};
use camino::Utf8PathBuf;
use std::fs::File;
use std::io::{self, Read, Write};
use std::path::Path;
use crate::input::FileOrStdin;
use crate::{Extractor, Tag, Tagged, TextData};
fn tag_content(content: &[u8], extractor: &Extractor, output: &mut dyn Write) -> Result<()> {
let mut tagged = Tagged::new(content);
for m in extractor.match_iter(content) {
tagged = tagged.tag(Tag::new(m.as_matched_str(), m.as_str()).with_range(m.range()));
}
if !tagged.tags().is_empty() {
let text_str = String::from_utf8_lossy(content).to_string();
let mut tagged = tagged;
tagged.set_text_data(TextData { text: text_str });
tagged.write_json(output)?;
writeln!(output)?;
}
Ok(())
}
pub fn tag_file(path: &Path, extractor: &Extractor, output: &mut dyn Write) -> Result<()> {
let mut content = Vec::new();
let mut file =
File::open(path).with_context(|| format!("Failed to open file: {}", path.display()))?;
file.read_to_end(&mut content)
.with_context(|| format!("Failed to read file: {}", path.display()))?;
tag_content(&content, extractor, output)
}
pub fn tag_files(
paths: &[Utf8PathBuf],
extractor: &Extractor,
output: &mut dyn Write,
) -> Result<()> {
for path in paths {
let input = FileOrStdin::from_path(path.clone());
match input {
FileOrStdin::File(path) => {
let path = path.as_std_path();
tag_file(path, extractor, output)?;
}
FileOrStdin::Stdin => {
let mut content = Vec::new();
io::stdin()
.read_to_end(&mut content)
.context("Failed to read from stdin")?;
tag_content(&content, extractor, output)?;
}
}
}
Ok(())
}