hygg 0.1.20

Simplifying the way you read
use crate::{args::Args, binary_lookup::which};
use hygg_shared::normalize_file_path;
use std::io::{self, IsTerminal, Read};
use std::process::{Command, Stdio};

pub(crate) struct PreparedInput {
  pub(crate) lines: Vec<String>,
  pub(crate) temp_file: Option<String>,
  pub(crate) raw_content: Option<String>,
}

pub(crate) fn read_stdin_content() -> Option<String> {
  if io::stdin().is_terminal() {
    return None;
  }

  let mut buffer = String::new();
  match io::stdin().read_to_string(&mut buffer) {
    Ok(_) if buffer.is_empty() => None,
    Ok(_) => Some(buffer),
    Err(_) => None,
  }
}

pub(crate) fn prepare_input(
  args: &Args,
  stdin_content: Option<String>,
  ocr_enabled: bool,
) -> Result<PreparedInput, Box<dyn std::error::Error>> {
  if let Some(content) = stdin_content {
    return Ok(PreparedInput {
      lines: cli_justify::justify(&content, args.col),
      temp_file: None,
      raw_content: Some(content),
    });
  }

  let Some(file) = resolve_input_file(args.file.clone()) else {
    return Ok(PreparedInput {
      lines: vec![],
      temp_file: None,
      raw_content: None,
    });
  };

  process_file_input(args, &file, ocr_enabled)
}

pub(crate) fn cleanup_temp_file(
  temp_file: Option<&str>,
) -> Result<(), Box<dyn std::error::Error>> {
  if let Some(path) = temp_file
    && std::path::Path::new(path).exists()
  {
    std::fs::remove_file(path)?;
  }
  Ok(())
}

/// If the resolved input file is a `.pdf`, return its path. Returns None
/// for any other extension, or when no file was supplied.
pub(crate) fn resolve_pdf_path(args: &Args) -> Option<String> {
  let file = resolve_input_file(args.file.clone())?;
  let extension = std::path::Path::new(&file)
    .extension()
    .and_then(|ext| ext.to_str())
    .map(|ext| ext.to_lowercase());
  if extension.as_deref() == Some("pdf") { Some(file) } else { None }
}

fn resolve_input_file(args_file: Option<String>) -> Option<String> {
  if let Some(file) = args_file {
    return Some(file);
  }

  let args_vec: Vec<String> = std::env::args().collect();
  if args_vec.len() <= 1 { None } else { args_vec.last().cloned() }
}

fn process_file_input(
  args: &Args,
  file: &str,
  ocr_enabled: bool,
) -> Result<PreparedInput, Box<dyn std::error::Error>> {
  let temp_file = format!("{file}-{}", uuid::Uuid::new_v4());

  let extension = std::path::Path::new(file)
    .extension()
    .and_then(|ext| ext.to_str())
    .map(|ext| ext.to_lowercase());
  let is_pdf = extension.as_deref() == Some("pdf");
  let use_ocr = is_pdf && ocr_enabled;

  let content = if use_ocr {
    extract_pdf_text_with_ocr(file)?
  } else {
    read_content_without_ocr(file, extension.as_deref())?
  };

  let lines = if is_pdf {
    cli_justify::justify_pdf_hybrid(&content, args.col)
  } else {
    cli_justify::justify(&content, args.col)
  };

  if lines.is_empty() || (lines.len() == 1 && lines[0].trim().is_empty()) {
    eprintln!("Error: No readable content found in file '{file}'");
    eprintln!("The file may be empty, corrupted, or in an unsupported format.");
    std::process::exit(1);
  }

  Ok(PreparedInput {
    lines,
    temp_file: Some(temp_file),
    raw_content: Some(content),
  })
}

fn extract_pdf_text_with_ocr(
  file: &str,
) -> Result<String, Box<dyn std::error::Error>> {
  cli_pdf_to_text::pdf_to_text_with_bundled_ocr(file)
}

fn read_content_without_ocr(
  file: &str,
  extension: Option<&str>,
) -> Result<String, Box<dyn std::error::Error>> {
  match extension {
    Some("epub") => match cli_epub_to_text::epub_to_text(file) {
      Ok(content) => Ok(content),
      Err(e) => {
        eprintln!("Error:\nUnable to read EPUB file '{file}'\n");
        eprintln!("Details:\n{e}\n");
        std::process::exit(1);
      }
    },
    Some("pdf") => match cli_pdf_to_text::pdf_to_text(file) {
      Ok(content) => Ok(content),
      Err(e) => {
        eprintln!("Error:\nUnable to read PDF file '{file}'\n");
        eprintln!("Details:\n{e}\n");
        std::process::exit(1);
      }
    },
    _ => read_via_best_effort(file),
  }
}

fn read_via_best_effort(
  file: &str,
) -> Result<String, Box<dyn std::error::Error>> {
  match pandoc_to_text(file)
    .or_else(|_| cli_epub_to_text::epub_to_text(file))
    .or_else(|_| cli_pdf_to_text::pdf_to_text(file))
  {
    Ok(content) => Ok(content),
    Err(e) => {
      eprintln!("Error:\nUnable to read file '{file}'\n");
      eprintln!("Details:\n{e}\n");

      if which("pandoc").is_none() {
        eprintln!(
          "pandoc not installed!\n\nFor additional formats, install pandoc:\nsudo apt install pandoc\n# scoop install pandoc\n# brew install pandoc"
        );
      }
      std::process::exit(1);
    }
  }
}

fn pandoc_to_text(
  file_path: &str,
) -> Result<String, Box<dyn std::error::Error>> {
  if which("pandoc").is_none() {
    return Err(
      "pandoc not found. Install with:\nsudo apt install pandoc\n# scoop install pandoc\n# brew install pandoc".into(),
    );
  }

  let canonical_path = normalize_file_path(file_path)?;
  let output = Command::new("pandoc")
    .arg("--to=plain")
    .arg("--wrap=none")
    .arg("--")
    .arg(canonical_path)
    .stdin(Stdio::null())
    .stdout(Stdio::piped())
    .stderr(Stdio::piped())
    .output()?;

  if !output.status.success() {
    let stderr = String::from_utf8_lossy(&output.stderr);
    return Err(format!("pandoc failed: {stderr}").into());
  }

  Ok(String::from_utf8(output.stdout)?)
}