zh_num 0.4.8

Convert ASCII numbers and zh words
Documentation
use std::{
    io::{
        self, stderr, stdin, stdout, BufRead, StdinLock, StdoutLock, Write,
    },
    process::exit,
};

use zh_num::{
    parser::{hard_number, number},
    Number, ZhNum, ZhNumUpper,
};

const CRLF: &str = "\r\n";
const LF: &str = "\n";

fn get_eol(s: &str) -> &str {
    s.ends_with(CRLF)
        .then_some(CRLF)
        .or_else(|| s.ends_with(LF)
            .then_some(LF))
        .unwrap_or_default()
}

const BOMB: char = '\u{feff}';

enum Error {
    String(String),
    ParseError(std::num::ParseIntError),
}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Error::String(s) => write!(f, "{s}"),
            Error::ParseError(e) => write!(f, "{e}"),
        }
    }
}

impl From<String> for Error {
    fn from(v: String) -> Self {
        Self::String(v)
    }
}

impl From<std::num::ParseIntError> for Error {
    fn from(v: std::num::ParseIntError) -> Self {
        Self::ParseError(v)
    }
}

#[derive(Debug, Default)]
struct Config {
    dump: bool,
    is_upper: bool,
    rem: bool,
    hard: bool,
    skip_ch: usize,
    quiet: bool,
    remove_bomb: bool,
}
impl Config {
    fn options() -> getopts_macro::getopts::Options {
        getopts_macro::getopts_options! {
            -d, --dump              "反向转换, 也就是将ASCII数字转换成中文数字";
            -D, --is-upper          "类似 -d, 但是中文数字是大写";
            -r, --rem               "转换时保留结果之外的文本";
            -a, --hard              "转换硬数字, 如 `千零二三` `一零零十三`";
            -s, --skip-ch=COUNT     "识别时跳过一部分字符, 如果给定了-r则会留在结果中";
            -q, --quiet             "不对解析失败进行报错";
            -B, --remove-bomb       "移除 UTF-8 BOM 标记";
            -v, --version           "Print version";
            -h, --help              "Print help";
        }
    }

    fn parse() -> Result<Self, Error> {
        let desc = "将ASCII数字和中文数字相互转换, 从标准输入";
        let matches = getopts_macro::simple_parse(&Self::options(), desc, 0, "");

        if let Some(first) = matches.free.first() {
            return Err(format!("unexpected argument {first:?} found").into());
        }

        if matches.opt_present("version") {
            let name = env!("CARGO_BIN_NAME");
            let version = env!("CARGO_PKG_VERSION");
            println!("{name} {version}");
            exit(0)
        }

        let skip_ch = matches.opt_get_default("skip-ch", 0).map_err(|e| {
            format!("invalid value for option '-s': {e}")
        })?;
        Ok(Self {
            dump: matches.opt_present("dump"),
            is_upper: matches.opt_present("is-upper"),
            rem: matches.opt_present("rem"),
            hard: matches.opt_present("hard"),
            skip_ch,
            quiet: matches.opt_present("quiet"),
            remove_bomb: matches.opt_present("remove-bomb"),
        })
    }

    fn num_fmt(&self) -> fn(&mut io::StdoutLock, Number) -> io::Result<()> {
        if !self.is_upper {
            |f, n| write!(f, "{}", ZhNum(n))
        } else {
            |f, n| write!(f, "{}", ZhNumUpper(n))
        }
    }

    fn init_dependenices(mut self) -> Self {
        if self.dump && self.hard { eprintln!("警告: 在指定 -d 时 -a 被忽略"); }
        if self.is_upper && self.hard { eprintln!("警告: 在指定 -D 时 -a 被忽略"); }
        self.dump |= self.is_upper;
        self
    }
}

struct Processor {
    lnum: u64,
    noeol: bool,
    cfg: Config,
    line: String,
    stdin: StdinLock<'static>,
    stdout: StdoutLock<'static>,
}

impl Default for Processor {
    fn default() -> Self {
        Self {
            lnum: Default::default(),
            noeol: Default::default(),
            cfg: Default::default(),
            line: Default::default(),
            stdin: stdin().lock(),
            stdout: stdout().lock(),
        }
    }
}

impl Processor {
    fn skip_ch_line<'a>(&self, line: &'a str) -> (&'a str, &'a str) {
        line.char_indices()
            .nth(self.cfg.skip_ch)
            .map_or((line, ""), |(i, _)| {
                let s = line[i..].len() - line[i..].trim_start().len();
                line.split_at(i+s)
            })
    }

    fn fetch_line(&mut self) -> io::Result<bool> {
        if !self.noeol {
            self.lnum += 1;
        }
        self.line.clear();

        self.stdin.read_line(&mut self.line)?;
        self.noeol = get_eol(&self.line).is_empty();

        if self.cfg.remove_bomb && self.lnum <= 1 && self.line.starts_with(BOMB) {
            self.line.remove(0);
        }

        Ok(!self.line.is_empty())
    }

    fn run_dump_lines(&mut self) -> io::Result<()> {
        while self.fetch_line()? {
            let (prefix, line) = self.skip_ch_line(&self.line);
            let rem_idx = line
                .find(|ch| !char::is_ascii_digit(&ch))
                .unwrap_or(line.len());
            let (part, rem_str) = line.split_at(rem_idx);
            let num = part
                .parse()
                .map(Some)
                .or_else(|e| {
                    if !self.cfg.quiet {
                        writeln!(
                            stderr(),
                            "`{part}` ({}) {}: {e}",
                            rem_str.trim_end(),
                            self.lnum,
                        )?;
                    }
                    io::Result::Ok(None)
                })?;

            if self.cfg.rem {
                write!(self.stdout, "{prefix}")?;
            }
            if let Some(num) = num {
                self.cfg.num_fmt()(&mut self.stdout, num)?;
            } else {
                write!(self.stdout, "{part}")?;
            }
            if self.cfg.rem {
                write!(self.stdout, "{rem_str}")?;
            } else {
                write!(self.stdout, "{}", get_eol(rem_str))?;
            }
        }
        Ok(())
    }

    fn run_make_lines(&mut self) -> io::Result<()> {
        while self.fetch_line()? {
            let (prefix, line) = self.skip_ch_line(&self.line);
            let result = if !self.cfg.hard {
                number(line)
            } else {
                hard_number(line)
            };
            let (n, rem_str) = result
                .map(|(n, s)| (Some(n), s))
                .or_else(|e| {
                    if !self.cfg.quiet {
                        writeln!(stderr(), "`{}` {}:{} expected {}",
                            line.trim_end(),
                            self.lnum,
                            e.location.column+self.cfg.skip_ch,
                            e.expected,
                        )?;
                    }
                    io::Result::Ok((None, line))
                })?;
            if self.cfg.rem {
                write!(self.stdout, "{prefix}")?;
            }
            if let Some(n) = n {
                write!(self.stdout, "{n}")?;
            }
            if self.cfg.rem {
                write!(self.stdout, "{rem_str}")?;
            } else {
                write!(self.stdout, "{}", get_eol(rem_str))?;
            }
        }
        Ok(())
    }

    fn run_lines(&mut self) -> io::Result<()> {
        if self.cfg.dump {
            self.run_dump_lines()
        } else {
            self.run_make_lines()
        }
    }
}

fn main() {
    let cfg = Config::parse()
        .unwrap_or_else(|e| {
            eprintln!("error: {e}");
            exit(2)
        })
        .init_dependenices();
    let mut processor = Processor {
        cfg,
        ..Default::default()
    };

    match processor.run_lines() {
        Ok(()) => (),
        Err(e) => {
            eprintln!("Error at line {}: {e}", processor.lnum);
            exit(1)
        },
    }
}