img2text 0.2.0 - Docs.rs

use anyhow::{anyhow, bail, Context, Result};
use clap::{Parser, ValueHint};
use std::{convert::TryInto, io::prelude::*, path::PathBuf, str::FromStr, unreachable};

mod imageops;

#[derive(Parser, Debug)]
#[clap(long_about = r"
Image-to-text converter

                 ⠀⠀⠀⠀⠀⠀⠀⢠⣄⣠⣶⣤⣿⣤⣶⣄⣠⡄⠀⠀⠀⠀⠀⠀⠀
                 ⠀⠀⠀⠀⣄⣸⣿⣾⡿⠿⠛⢿⣀⡿⠛⠿⢿⣷⣿⣇⣠⠀⠀⠀⠀
                 ⠀⠀⢠⣤⣿⣿⣛⣁⣀⣀⣀⣀⣉⣀⣀⣀⡀⠈⠛⢿⣿⣤⡄⠀⠀
                 ⠀⠲⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⡄⠈⣿⣿⣷⠖⠀
                 ⠐⢾⣿⣤⠼⠇⢸⣿⣿⣿⣇⣀⣀⣀⣹⣿⣿⣿⠇⠸⢧⣤⣿⡷⠂
    powered by:  ⠰⢿⣿⡇⠀⠀⢸⣿⣿⣿⡿⠿⠿⢿⣿⣿⣿⣦⠀⠀⢀⣸⣿⡿⠆
                 ⠐⠿⣿⣷⣤⣤⣼⣿⣿⣿⣧⣤⣄⠀⢻⣿⣿⣿⣦⣤⣾⣿⣿⠿⠂
                 ⠀⠚⢿⣿⣿⡿⠿⠿⠿⠿⠿⠿⠿⠀⠈⠻⠿⠿⠿⢿⣿⣿⡿⠓⠀
                 ⠀⠀⠘⠛⣿⣿⡟⢻⡆⠀⠀⠀⠀⠀⠀⠀⣸⠛⢳⣿⣿⠛⠃⠀⠀
                 ⠀⠀⠀⠀⠉⢹⡿⢿⣿⣷⣶⣶⣶⣶⣶⣾⣿⡿⢿⡏⠉⠀⠀⠀⠀
                 ⠀⠀⠀⠀⠀⠀⠀⠈⠉⠘⠟⠙⠿⠋⠻⠃⠉⠁⠀⠀⠀⠀⠀⠀⠀

(The above image was generated by this program with an option `-s 25`.)
")]
struct Opts {
    /// The image to process
    #[clap(name = "FILE", value_hint = ValueHint::AnyPath)]
    image_path: PathBuf,
    /// The glyph set to use
    #[clap(short = 'g', default_value = "braille", value_enum)]
    style: Style,
    /// The width of output characters, only used when `-s` is given without
    /// `!`
    #[clap(short = 'w', default_value = "0.45")]
    cell_width: f64,
    /// The output size, measured in character cells or percent (e.g., `80`,
    /// `80x40`, `80x40!`, `-80x40`, `100%`).
    /// [default: downscale to terminal size (if the output is a terminal) or
    /// 100% (otherwise)]
    ///
    ///  - 80: Fit within 80x80 character cells
    ///
    ///  - 80x40: Fit within 80x40 character cells, upscaling as necessary
    ///
    ///  - -80x40: Fit within 80x40 character cells, only downscaling
    ///
    ///  - 80x40!: Fit to 80x40 character cells, not maintaining the aspect
    ///    ratio
    ///
    ///  - 150%: Scale by 150%. The actual output size depends on the glyph set
    ///    being used; for example, `2x3` maps each 2x3 block to one character.
    ///
    #[clap(short = 's')]
    out_size: Option<SizeSpec>,

    /// Specifies how to interpret the input image.
    #[clap(short = 'i', default_value = "auto", value_enum)]
    input_ty: InputTy,
    /// A parameter for the Canny edge detector (`-i edge-canny`).
    ///
    /// Edges with a strength higher than the low threshold will appear in the
    /// output image if there are strong edges nearby.
    #[clap(long = "canny-low-threshold", default_value = "10")]
    edge_canny_low_threshold: f32,
    /// A parameter for the Canny edge detector (`-i edge-canny`).
    ///
    /// Edges with a strength higher than the high threshold will always appear
    /// as edges in the output image.
    #[clap(long = "canny-high-threshold", default_value = "20")]
    edge_canny_high_threshold: f32,
    /// Apply dithering to preserve the gray shades. Incompatible with
    /// `-i edge-canny`.
    #[clap(short = 'd', long = "dither")]
    dither: bool,
    /// Choose the contrast enhancing technique to use for dithering.
    #[clap(long = "dither-contrast", default_value = "median-quant", value_enum)]
    dither_contrast: DitherContrast,
}

#[derive(clap::ValueEnum, Clone, Debug)]
enum Style {
    Slc,
    Ms2x3,
    _1x1,
    _1x2,
    _2x2,
    _2x3,
    Braille,
}

impl Style {
    fn glyph_set(&self) -> &dyn img2text::GlyphSet {
        match self {
            Self::Slc => img2text::GLYPH_SET_SLC,
            Self::Ms2x3 => img2text::GLYPH_SET_MS_2X3,
            Self::_1x1 => img2text::GLYPH_SET_1X1,
            Self::_1x2 => img2text::GLYPH_SET_1X2,
            Self::_2x2 => img2text::GLYPH_SET_2X2,
            Self::_2x3 => img2text::GLYPH_SET_2X3,
            Self::Braille => img2text::GLYPH_SET_BRAILLE8,
        }
    }
}

#[derive(clap::ValueEnum, Clone, Debug, PartialEq)]
enum InputTy {
    /// Automatic detection
    Auto,
    /// White-on-black
    Wob,
    /// Black-on-white
    Bow,
    /// Canny edge detection
    EdgeCanny,
}

#[derive(clap::ValueEnum, Clone, Debug)]
enum DitherContrast {
    None,
    /// Quantize color values to the median of the dark or bright pixel set.
    MedianQuant,
    /// Apply pre-equalization
    Equalize,
}

#[derive(Debug)]
enum SizeSpec {
    Absolute { dims: [usize; 2], mode: SizeMode },
    Relative(f64),
}

#[derive(Debug, PartialEq)]
enum SizeMode {
    Contain,
    Fill,
    ScaleDown,
}

impl FromStr for SizeSpec {
    type Err = String;

    fn from_str(mut s: &str) -> Result<Self, Self::Err> {
        if let Some(rest) = s.strip_suffix("%") {
            let ratio: f64 = rest.parse().map_err(|_| format!("bad ratio: '{}'", rest))?;

            if !ratio.is_finite() || ratio < 0.0 {
                return Err(format!("ratio out of range: '{}'", rest));
            }

            return Ok(Self::Relative(ratio / 100.0));
        }

        let force = if let Some(rest) = s.strip_suffix("!") {
            s = rest;
            true
        } else {
            false
        };

        let scale_down = if let Some(rest) = s.strip_prefix("-") {
            s = rest;
            true
        } else {
            false
        };

        let dims = if let Some(i) = s.find("x") {
            // width x height
            let width = &s[0..i];
            let height = &s[i + 1..];
            [
                width
                    .parse()
                    .map_err(|_| format!("bad width: '{}'", width))?,
                height
                    .parse()
                    .map_err(|_| format!("bad height: '{}'", height))?,
            ]
        } else {
            // size
            let size = s.parse().map_err(|_| format!("bad size: '{}'", s))?;
            [size, size]
        };

        Ok(Self::Absolute {
            dims,
            mode: match (force, scale_down) {
                (true, false) => SizeMode::Fill,
                (false, true) => SizeMode::ScaleDown,
                (false, false) => SizeMode::Contain,
                (true, true) => return Err("cannot specify both `!` and `-`".to_owned()),
            },
        })
    }
}

fn main() -> Result<()> {
    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("img2text=info"))
        .init();

    let mut opts = Opts::parse();
    log::debug!("opts = {:#?}", opts);

    // Open the image
    let img = image::open(&opts.image_path).with_context(|| {
        format!(
            "Failed to read an input image from '{}'",
            opts.image_path.display()
        )
    })?;
    let mut img = img.into_luma8();

    // Options
    let mut b2t_opts = img2text::Bmp2textOpts::new();
    b2t_opts.glyph_set = opts.style.glyph_set();

    if !opts.cell_width.is_finite() || opts.cell_width <= 0.1 || opts.cell_width > 10.0 {
        bail!("cell_width is out of range");
    }

    if !opts.edge_canny_low_threshold.is_finite()
        || opts.edge_canny_low_threshold <= 0.0
        || opts.edge_canny_low_threshold > 1150.0
    {
        bail!("edge_canny_low_threshold is out of range");
    }

    if !opts.edge_canny_high_threshold.is_finite()
        || opts.edge_canny_high_threshold <= 0.0
        || opts.edge_canny_high_threshold > 1150.0
    {
        bail!("edge_canny_high_threshold is out of range");
    }

    if opts.edge_canny_low_threshold > opts.edge_canny_high_threshold {
        bail!("edge_canny_low_threshold mustn't be greater than edge_canny_high_threshold");
    }

    if opts.dither && opts.input_ty == InputTy::EdgeCanny {
        bail!("`--dither` and `-i edge-canny` are incompatible");
    }

    // Resize the image to the terminal size if the size is not specified
    let console_stdout = console::Term::stdout();
    if opts.out_size.is_none() && console_stdout.features().is_attended() {
        if let Some((h, w)) = console_stdout.size_checked() {
            let h = h.saturating_sub(3);
            log::info!(
                "downscaling to `{}x{}` (tty size minus some) because stdout is tty, and `-s` is unspecified",
                w,
                h
            );
            opts.out_size = Some(SizeSpec::Absolute {
                mode: SizeMode::ScaleDown,
                dims: [w as _, h as _],
            });
        }
    }

    // Resize the image if requested
    if let Some(out_size) = &opts.out_size {
        let in_dims = match out_size {
            SizeSpec::Absolute {
                dims,
                mode: SizeMode::Fill,
            } => img2text::adjust_image_size_for_output_size(*dims, &b2t_opts)
                .ok_or_else(|| anyhow!("requested size is too large"))?,

            SizeSpec::Absolute {
                dims,
                mode: SizeMode::Contain,
            } => img2text::adjust_image_size_for_output_size_preserving_aspect_ratio(
                [img.width() as _, img.height() as _],
                *dims,
                true,
                false, // contain
                opts.cell_width,
                &b2t_opts,
            )
            .ok_or_else(|| anyhow!("requested size is too large"))?,

            SizeSpec::Absolute {
                dims,
                mode: SizeMode::ScaleDown,
            } => img2text::adjust_image_size_for_output_size_preserving_aspect_ratio(
                [img.width() as _, img.height() as _],
                *dims,
                false,
                false, // contain
                opts.cell_width,
                &b2t_opts,
            )
            .ok_or_else(|| anyhow!("requested size is too large"))?,

            SizeSpec::Relative(ratio) => {
                let w = img.width() as f64 * ratio;
                let h = img.height() as f64 * ratio;
                if w > u32::MAX as f64 || h > u32::MAX as f64 {
                    bail!("requested size is too large");
                }
                [w as usize, h as usize]
            }
        };

        let in_dims = [in_dims[0] as u32, in_dims[1] as u32];

        if img.dimensions() != (in_dims[0], in_dims[1]) {
            log::debug!(
                "resampling the image from {:?} to {:?}",
                match img.dimensions() {
                    (x, y) => [x, y],
                },
                in_dims
            );

            img = image::imageops::resize(
                &img,
                in_dims[0],
                in_dims[1],
                image::imageops::FilterType::CatmullRom,
            );
        } else {
            log::debug!(
                "refusing to resample the image to the identical size ({:?})",
                in_dims
            );
        }
    }

    log::debug!(
        "expected output size for image of size {:?} is {:?}",
        match img.dimensions() {
            (x, y) => [x, y],
        },
        [
            img2text::num_glyphs_for_image_width(img.width() as _, &b2t_opts),
            img2text::num_lines_for_image_height(img.height() as _, &b2t_opts),
        ]
    );

    // Auto-threshold
    let mut histogram = [0; 256];
    imageops::accumulate_histogram(
        &mut histogram,
        img.pixels().map(|&image::Luma([luma])| luma),
    );
    log::trace!("histogram = {:?}", histogram);
    let mut threshold = if let Some(x) = imageops::find_threshold(&histogram) {
        log::debug!("threshold = {}", x);
        x
    } else {
        log::debug!("couldn't find the threshold, using the default value 128");
        128
    };

    // black-on-white/white-on-black detection
    let omega0: u32 = histogram[..threshold].iter().sum();
    let omega1: u32 = histogram[threshold..].iter().sum();
    if opts.input_ty == InputTy::Auto {
        let omega_min = omega0.min(omega1);
        let omega_max = omega0.max(omega1);
        log::debug!("[omega_min, omega_max] = {:?}", [omega_min, omega_max]);

        // TODO: probably should take line thickness into account when detecting
        //       line art
        opts.input_ty = if omega_min * 4 > omega_max {
            log::debug!(
                "there are roughly the same numbers of black and white \
                pixels. this indicates the input image is not a line art, so \
                we will use `edge-canny` (the Canny edge detector)."
            );
            InputTy::EdgeCanny
        } else {
            log::debug!(
                "the numbers of black and white pixels are remarkably different.
                this indicates the input image is a line art, so \
                we will not use the edge detector."
            );
            if omega1 > omega0 {
                InputTy::Bow
            } else {
                InputTy::Wob
            }
        };
        log::debug!("guessed input_ty = {:?}", opts.input_ty);
    }

    let invert = match opts.input_ty {
        InputTy::Bow => true,
        InputTy::Wob => false,
        InputTy::Auto => unreachable!(),
        InputTy::EdgeCanny => {
            if img.width() != 0 && img.height() != 0 {
                img = imageproc::edges::canny(
                    &img,
                    opts.edge_canny_low_threshold,
                    opts.edge_canny_high_threshold,
                );
            }
            false
        }
    };

    // Apply dithering.
    // `-i auto` can imply `-i edge-canny`, in which case just ignore `--dither`.
    if opts.dither && opts.input_ty != InputTy::EdgeCanny {
        let mut palette = [0, 255];

        match opts.dither_contrast {
            DitherContrast::None => {}
            DitherContrast::MedianQuant => {
                palette[0] = imageops::median(&histogram[0..threshold]) as u8;
                palette[1] = (imageops::median(&histogram[threshold..]) + threshold) as u8;
            }
            DitherContrast::Equalize => {
                let mut map = [0; 256];
                imageops::equalization_map(&mut map, &histogram);
                log::debug!("equalization map = {:?}", map);
                for luma in img.iter_mut() {
                    *luma = map[*luma as usize];
                }
                threshold = map[threshold] as usize;
            }
        }
        log::debug!("dithering palette = {:?}", palette);
        log::debug!("dithering quantization threshold = {:?}", threshold);

        image::imageops::colorops::dither(
            &mut img,
            &BlackWhiteColorMap {
                threshold: threshold as u8,
                palette,
            },
        );
    }

    // Process the image
    use img2text::ImageRead;
    let img_proxy = GrayImageRead {
        image: &img,
        threshold,
        invert,
    };
    let mut out_buffer = String::with_capacity(
        img2text::max_output_len_for_image_dims(img_proxy.dims(), &b2t_opts)
            .ok_or_else(|| anyhow!("image is too large"))?,
    );

    img2text::Bmp2text::new()
        .transform_and_write(&img_proxy, &b2t_opts, &mut out_buffer)
        .unwrap();

    std::io::stdout()
        .write(out_buffer.as_bytes())
        .with_context(|| "Failed to write the output to the standard output")?;

    Ok(())
}

struct GrayImageRead<'a> {
    image: &'a image::GrayImage,
    threshold: usize,
    invert: bool,
}

impl img2text::ImageRead for GrayImageRead<'_> {
    fn dims(&self) -> [usize; 2] {
        let (w, h) = self.image.dimensions();
        [w.try_into().unwrap(), h.try_into().unwrap()]
    }

    fn copy_line_as_spans_to(&self, y: usize, out: &mut [img2text::Span]) {
        let Self {
            image,
            threshold,
            invert,
        } = *self;
        img2text::set_spans_by_fn(out, self.dims()[0], move |x| {
            (image[(x as u32, y as u32)].0[0] as usize >= threshold) ^ invert
        });
    }
}

struct BlackWhiteColorMap {
    threshold: u8,
    palette: [u8; 2],
}

impl image::imageops::colorops::ColorMap for BlackWhiteColorMap {
    type Color = image::Luma<u8>;

    #[inline]
    fn index_of(&self, color: &Self::Color) -> usize {
        (color.0[0] >= self.threshold) as usize
    }

    #[inline]
    fn map_color(&self, color: &mut Self::Color) {
        *color = self.lookup(self.index_of(color)).unwrap();
    }

    #[inline]
    fn lookup(&self, index: usize) -> Option<Self::Color> {
        Some(image::Luma([self.palette[index]]))
    }

    fn has_lookup(&self) -> bool {
        true
    }
}