gpt_sovits/text/
num.rs

1use {
2    crate::{error::GSVError, text::Lang},
3    pest::Parser,
4};
5
6#[derive(pest_derive::Parser)]
7#[grammar = "src/text/rule.pest"]
8pub struct ExprParser;
9
10pub mod zh {
11    use super::*;
12    use pest::iterators::Pair;
13
14    fn parse_pn(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
15        assert_eq!(pair.as_rule(), Rule::pn);
16        match pair.as_str() {
17            "+" => dst_string.push_str("加"),
18            "-" => dst_string.push_str("减"),
19            "*" | "×" => dst_string.push_str("乘"),
20            "/" | "÷" => dst_string.push_str("除以"),
21            "=" => dst_string.push_str("等于"),
22            _ => return Err(GSVError::UnknownOperator(pair.as_str().to_owned())),
23        }
24        Ok(())
25    }
26
27    fn parse_flag(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
28        assert_eq!(pair.as_rule(), Rule::flag);
29        match pair.as_str() {
30            "+" => dst_string.push_str("正"),
31            "-" => dst_string.push_str("负"),
32            _ => return Err(GSVError::UnknownFlag(pair.as_str().to_owned())),
33        }
34        Ok(())
35    }
36
37    fn parse_percent(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
38        assert_eq!(pair.as_rule(), Rule::percent);
39        dst_string.push_str("百分之");
40        let inner = pair.into_inner();
41        for pair in inner {
42            match pair.as_rule() {
43                Rule::decimals => parse_decimals(pair, dst_string)?,
44                Rule::integer => parse_integer(pair, dst_string, true)?,
45                _ => return Err(GSVError::UnknownRuleInPercent(pair.as_str().to_owned())),
46            }
47        }
48        Ok(())
49    }
50
51    static UNITS: [&str; 4] = ["", "十", "百", "千"];
52    static BASE_UNITS: [&str; 4] = ["", "万", "亿", "万"];
53
54    fn parse_integer(
55        pair: Pair<Rule>,
56        dst_string: &mut String,
57        unit: bool,
58    ) -> Result<(), GSVError> {
59        assert_eq!(pair.as_rule(), Rule::integer);
60
61        let digits: Vec<_> = pair.into_inner().collect(); // Remove .rev() to process left-to-right
62        let mut result = String::new();
63        let mut has_non_zero = false;
64
65        for (i, pair) in digits.iter().enumerate() {
66            let txt = match pair.as_str() {
67                "0" => "零",
68                "1" => "一",
69                "2" => "二",
70                "3" => "三",
71                "4" => "四",
72                "5" => "五",
73                "6" => "六",
74                "7" => "七",
75                "8" => "八",
76                "9" => "九",
77                _ => return Err(GSVError::UnknownDigit(pair.as_str().to_owned())),
78            };
79            // Calculate the position from most significant digit
80            let pos = digits.len() - 1 - i;
81            let u = if pos % 4 != 0 {
82                UNITS[pos % 4]
83            } else {
84                BASE_UNITS[(pos / 4) % 4]
85            };
86
87            if txt != "零" {
88                has_non_zero = true;
89                // Skip "一" for tens place (pos == 1) when the digit is 1
90                if !(pos == 1 && txt == "一") {
91                    result.push_str(txt);
92                }
93                if unit {
94                    result.push_str(u);
95                }
96            } else if has_non_zero && unit && pos > 0 {
97                result.push_str(txt);
98            }
99        }
100
101        if result.is_empty() {
102            dst_string.push_str("零");
103        } else {
104            if result.ends_with("零") {
105                result.truncate(result.len() - "零".len());
106            }
107            dst_string.push_str(&result);
108        }
109
110        Ok(())
111    }
112
113    fn parse_decimals(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
114        assert_eq!(pair.as_rule(), Rule::decimals);
115
116        let mut inner = pair.into_inner().rev();
117        let f_part = inner.next().unwrap();
118        if let Some(i_part) = inner.next() {
119            parse_integer(i_part, dst_string, true)?;
120        } else {
121            dst_string.push_str("零");
122        }
123        dst_string.push_str("点");
124        parse_integer(f_part, dst_string, false)?;
125
126        Ok(())
127    }
128
129    fn parse_fractional(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
130        assert_eq!(pair.as_rule(), Rule::fractional);
131
132        let mut inner = pair.into_inner();
133        let numerator = inner.next().unwrap();
134        let denominator = inner.next().unwrap();
135        parse_integer(denominator, dst_string, true)?;
136        dst_string.push_str("分之");
137        parse_integer(numerator, dst_string, true)?;
138        Ok(())
139    }
140
141    fn parse_num(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
142        assert_eq!(pair.as_rule(), Rule::num);
143
144        let inner = pair.into_inner();
145        for pair in inner {
146            match pair.as_rule() {
147                Rule::flag => parse_flag(pair, dst_string)?,
148                Rule::percent => parse_percent(pair, dst_string)?,
149                Rule::decimals => parse_decimals(pair, dst_string)?,
150                Rule::fractional => parse_fractional(pair, dst_string)?,
151                Rule::integer => parse_integer(pair, dst_string, true)?,
152                _ => return Err(GSVError::UnknownRuleInNum(pair.as_str().to_owned())),
153            }
154        }
155        Ok(())
156    }
157
158    fn parse_signs(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
159        assert_eq!(pair.as_rule(), Rule::signs);
160
161        let inner = pair.into_inner();
162        for pair in inner {
163            log::debug!("{:?}", pair);
164            match pair.as_rule() {
165                Rule::num => parse_num(pair, dst_string)?,
166                Rule::pn => parse_pn(pair, dst_string)?,
167                Rule::word => {
168                    log::warn!("word: {:?}", pair.as_str());
169                }
170                _ => return Err(GSVError::UnknownRuleInSigns(pair.as_str().to_owned())),
171            }
172        }
173        Ok(())
174    }
175
176    fn parse_link(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
177        assert_eq!(pair.as_rule(), Rule::link);
178        if pair.as_str() == "-" {
179            dst_string.push_str("杠");
180        }
181        Ok(())
182    }
183
184    fn parse_word(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
185        assert_eq!(pair.as_rule(), Rule::word);
186        let inner = pair.into_inner();
187        for pair in inner {
188            match pair.as_rule() {
189                Rule::digit => {
190                    let txt = match pair.as_str() {
191                        "0" => "零",
192                        "1" => "一",
193                        "2" => "二",
194                        "3" => "三",
195                        "4" => "四",
196                        "5" => "五",
197                        "6" => "六",
198                        "7" => "七",
199                        "8" => "八",
200                        "9" => "九",
201                        _ => return Err(GSVError::UnknownDigit(pair.as_str().to_owned())),
202                    };
203                    dst_string.push_str(txt);
204                }
205                Rule::alpha => {
206                    dst_string.push_str(pair.as_str());
207                }
208                Rule::greek => {
209                    let txt = match pair.as_str() {
210                        "α" | "Α" => "阿尔法",
211                        "β" | "Β" => "贝塔",
212                        "γ" | "Γ" => "伽马",
213                        "δ" | "Δ" => "德尔塔",
214                        "ε" | "Ε" => "艾普西龙",
215                        "ζ" | "Ζ" => "泽塔",
216                        "η" | "Η" => "艾塔",
217                        "θ" | "Θ" => "西塔",
218                        "ι" | "Ι" => "约塔",
219                        "κ" | "Κ" => "卡帕",
220                        "λ" | "Λ" => "兰姆达",
221                        "μ" | "Μ" => "缪",
222                        "ν" | "Ν" => "纽",
223                        "ξ" | "Ξ" => "克西",
224                        "ο" | "Ο" => "欧米克戈",
225                        "π" | "Π" => "派",
226                        "ρ" | "Ρ" => "罗",
227                        "σ" | "Σ" => "西格玛",
228                        "τ" | "Τ" => "套",
229                        "υ" | "Υ" => "宇普西龙",
230                        "φ" | "Φ" => "斐",
231                        "χ" | "Χ" => "希",
232                        "ψ" | "Ψ" => "普西",
233                        "ω" | "Ω" => "欧米伽",
234                        _ => return Err(GSVError::UnknownGreekLetter(pair.as_str().to_owned())),
235                    };
236                    dst_string.push_str(txt);
237                }
238                _ => return Err(GSVError::UnknownRuleWord(pair.as_str().to_owned())),
239            }
240        }
241        Ok(())
242    }
243
244    fn parse_ident(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
245        assert_eq!(pair.as_rule(), Rule::ident);
246        let inner = pair.into_inner();
247        for pair in inner {
248            match pair.as_rule() {
249                Rule::word => parse_word(pair, dst_string)?,
250                Rule::link => parse_link(pair, dst_string)?,
251                _ => return Err(GSVError::UnknownRuleIdent(pair.as_str().to_owned())),
252            }
253        }
254        Ok(())
255    }
256
257    pub fn parse_all(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
258        assert_eq!(pair.as_rule(), Rule::all);
259        let inner = pair.into_inner();
260        for pair in inner {
261            match pair.as_rule() {
262                Rule::signs => parse_signs(pair, dst_string)?,
263                Rule::ident => parse_ident(pair, dst_string)?,
264                _ => return Err(GSVError::UnknownRuleAll(pair.as_str().to_owned())),
265            }
266        }
267        Ok(())
268    }
269}
270
271pub mod en {
272    use {super::*, crate::error::GSVError, pest::iterators::Pair};
273
274    const SEPARATOR: &str = " ";
275
276    fn parse_pn(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
277        assert_eq!(pair.as_rule(), Rule::pn);
278        if !dst_string.is_empty() {
279            dst_string.push_str(SEPARATOR);
280        }
281        match pair.as_str() {
282            "+" => dst_string.push_str("plus"),
283            "-" => dst_string.push_str("minus"),
284            "*" | "×" => dst_string.push_str("times"),
285            "/" | "÷" => {
286                dst_string.push_str("divided by");
287            }
288            "=" => dst_string.push_str("is"),
289            _ => return Err(GSVError::UnknownOperator(pair.as_str().to_owned())),
290        }
291        Ok(())
292    }
293
294    fn parse_flag(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
295        assert_eq!(pair.as_rule(), Rule::flag);
296        if !dst_string.is_empty() {
297            dst_string.push_str(SEPARATOR);
298        }
299        match pair.as_str() {
300            "-" => dst_string.push_str("negative"),
301            _ => return Err(GSVError::UnknownFlag(pair.as_str().to_owned())),
302        }
303        Ok(())
304    }
305
306    fn parse_percent(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
307        assert_eq!(pair.as_rule(), Rule::percent);
308        let inner = pair.into_inner();
309        for pair in inner {
310            match pair.as_rule() {
311                Rule::decimals => parse_decimals(pair, dst_string)?,
312                Rule::integer => parse_integer(pair, dst_string, true)?,
313                _ => return Err(GSVError::UnknownRuleInPercent(pair.as_str().to_owned())),
314            }
315        }
316        if !dst_string.is_empty() {
317            dst_string.push_str(SEPARATOR);
318        }
319        dst_string.push_str("percent");
320        Ok(())
321    }
322
323    fn parse_integer(
324        pair: Pair<Rule>,
325        dst_string: &mut String,
326        unit: bool,
327    ) -> Result<(), GSVError> {
328        assert_eq!(pair.as_rule(), Rule::integer);
329        if !dst_string.is_empty() {
330            dst_string.push_str(SEPARATOR);
331        }
332
333        // Note: Replace with proper num2en::str_to_words if available
334        let digits: Vec<_> = pair.into_inner().collect();
335        for pair in digits {
336            let txt = match pair.as_str() {
337                "0" => "zero",
338                "1" => "one",
339                "2" => "two",
340                "3" => "three",
341                "4" => "four",
342                "5" => "five",
343                "6" => "six",
344                "7" => "seven",
345                "8" => "eight",
346                "9" => "nine",
347                _ => return Err(GSVError::UnknownDigit(pair.as_str().to_owned())),
348            };
349            dst_string.push_str(txt);
350            if unit && !dst_string.is_empty() {
351                dst_string.push_str(SEPARATOR);
352            }
353        }
354        Ok(())
355    }
356
357    fn parse_decimals(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
358        assert_eq!(pair.as_rule(), Rule::decimals);
359        if !dst_string.is_empty() {
360            dst_string.push_str(SEPARATOR);
361        }
362
363        let mut inner = pair.into_inner().rev();
364        let f_part = inner.next().unwrap();
365        if let Some(i_part) = inner.next() {
366            parse_integer(i_part, dst_string, true)?;
367        } else {
368            dst_string.push_str("zero");
369        }
370        if !dst_string.is_empty() {
371            dst_string.push_str(SEPARATOR);
372        }
373        dst_string.push_str("point");
374        if !dst_string.is_empty() {
375            dst_string.push_str(SEPARATOR);
376        }
377        parse_integer(f_part, dst_string, false)?;
378        Ok(())
379    }
380
381    fn parse_fractional(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
382        assert_eq!(pair.as_rule(), Rule::fractional);
383        let mut inner = pair.into_inner();
384        let numerator = inner.next().unwrap();
385        let denominator = inner.next().unwrap();
386        parse_integer(numerator, dst_string, true)?;
387        if !dst_string.is_empty() {
388            dst_string.push_str(SEPARATOR);
389        }
390        dst_string.push_str("over");
391        if !dst_string.is_empty() {
392            dst_string.push_str(SEPARATOR);
393        }
394        parse_integer(denominator, dst_string, true)?;
395        Ok(())
396    }
397
398    fn parse_num(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
399        assert_eq!(pair.as_rule(), Rule::num);
400        let inner = pair.into_inner();
401        for pair in inner {
402            match pair.as_rule() {
403                Rule::flag => parse_flag(pair, dst_string)?,
404                Rule::percent => parse_percent(pair, dst_string)?,
405                Rule::decimals => parse_decimals(pair, dst_string)?,
406                Rule::fractional => parse_fractional(pair, dst_string)?,
407                Rule::integer => parse_integer(pair, dst_string, true)?,
408                _ => return Err(GSVError::UnknownRuleInNum(pair.as_str().to_owned())),
409            }
410        }
411        Ok(())
412    }
413
414    fn parse_signs(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
415        assert_eq!(pair.as_rule(), Rule::signs);
416        let inner = pair.into_inner();
417        for pair in inner {
418            match pair.as_rule() {
419                Rule::num => parse_num(pair, dst_string)?,
420                Rule::pn => parse_pn(pair, dst_string)?,
421                Rule::word => {}
422                _ => return Err(GSVError::UnknownRuleInSigns(pair.as_str().to_owned())),
423            }
424        }
425        Ok(())
426    }
427
428    fn parse_link(pair: Pair<Rule>) -> Result<(), GSVError> {
429        assert_eq!(pair.as_rule(), Rule::link);
430        Ok(())
431    }
432
433    fn parse_word(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
434        assert_eq!(pair.as_rule(), Rule::word);
435        let inner = pair.into_inner();
436        for pair in inner {
437            match pair.as_rule() {
438                Rule::digit => {
439                    let txt = match pair.as_str() {
440                        "0" => "zero",
441                        "1" => "one",
442                        "2" => "two",
443                        "3" => "three",
444                        "4" => "four",
445                        "5" => "five",
446                        "6" => "six",
447                        "7" => "seven",
448                        "8" => "eight",
449                        "9" => "nine",
450                        _ => return Err(GSVError::UnknownDigit(pair.as_str().to_owned())),
451                    };
452                    if !dst_string.is_empty() {
453                        dst_string.push_str(SEPARATOR);
454                    }
455                    dst_string.push_str(txt);
456                }
457                Rule::alpha => {
458                    if !dst_string.is_empty() {
459                        dst_string.push_str(SEPARATOR);
460                    }
461                    dst_string.push_str(pair.as_str());
462                }
463                Rule::greek => {
464                    let txt = match pair.as_str() {
465                        "α" | "Α" => "alpha",
466                        "β" | "Β" => "beta",
467                        "γ" | "Γ" => "gamma",
468                        "δ" | "Δ" => "delta",
469                        "ε" | "Ε" => "epsilon",
470                        "ζ" | "Ζ" => "zeta",
471                        "η" | "Η" => "eta",
472                        "θ" | "Θ" => "theta",
473                        "ι" | "Ι" => "iota",
474                        "κ" | "Κ" => "kappa",
475                        "λ" | "Λ" => "lambda",
476                        "μ" | "Μ" => "mu",
477                        "ν" | "Ν" => "nu",
478                        "ξ" | "Ξ" => "xi",
479                        "ο" | "Ο" => "omicron",
480                        "π" | "Π" => "pi",
481                        "ρ" | "Ρ" => "rho",
482                        "σ" | "Σ" => "sigma",
483                        "τ" | "Τ" => "tau",
484                        "υ" | "Υ" => "upsilon",
485                        "φ" | "Φ" => "phi",
486                        "χ" | "Χ" => "chi",
487                        "ψ" | "Ψ" => "psi",
488                        "ω" | "Ω" => "omega",
489                        _ => return Err(GSVError::UnknownGreekLetter(pair.as_str().to_owned())),
490                    };
491                    if !dst_string.is_empty() {
492                        dst_string.push_str(SEPARATOR);
493                    }
494                    dst_string.push_str(txt);
495                }
496                _ => return Err(GSVError::UnknownRuleWord(pair.as_str().to_owned())),
497            }
498        }
499        Ok(())
500    }
501
502    fn parse_ident(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
503        assert_eq!(pair.as_rule(), Rule::ident);
504        let inner = pair.into_inner();
505        for pair in inner {
506            match pair.as_rule() {
507                Rule::word => parse_word(pair, dst_string)?,
508                Rule::link => parse_link(pair)?,
509                _ => return Err(GSVError::UnknownRuleIdent(pair.as_str().to_owned())),
510            }
511        }
512        Ok(())
513    }
514
515    pub fn parse_all(pair: Pair<Rule>, dst_string: &mut String) -> Result<(), GSVError> {
516        assert_eq!(pair.as_rule(), Rule::all);
517        let inner = pair.into_inner();
518        for pair in inner {
519            match pair.as_rule() {
520                Rule::signs => parse_signs(pair, dst_string)?,
521                Rule::ident => parse_ident(pair, dst_string)?,
522                _ => return Err(GSVError::UnknownRuleAll(pair.as_str().to_owned())),
523            }
524        }
525
526        Ok(())
527    }
528}
529
530#[derive(Debug)]
531pub struct NumSentence {
532    pub text: String,
533    pub lang: Lang,
534}
535
536static NUM_OP: [char; 8] = ['+', '-', '*', '×', '/', '÷', '=', '%'];
537
538impl NumSentence {
539    pub fn need_drop(&self) -> bool {
540        let num_text = self.text.trim();
541        num_text.is_empty() || num_text.chars().all(|c| NUM_OP.contains(&c))
542    }
543
544    pub fn is_link_symbol(&self) -> bool {
545        self.text == "-"
546    }
547
548    pub fn to_lang_text(&self) -> Result<String, GSVError> {
549        let mut dst_string = String::new();
550        let pairs = ExprParser::parse(Rule::all, &self.text)?;
551        for pair in pairs {
552            match self.lang {
553                Lang::Zh => zh::parse_all(pair, &mut dst_string)?,
554                Lang::En => en::parse_all(pair, &mut dst_string)?,
555            }
556        }
557        Ok(dst_string.trim().to_string())
558    }
559}
560
561pub fn is_numeric(p: &str) -> bool {
562    p.chars().any(|c| c.is_numeric())
563        || p.contains(&NUM_OP)
564        || p.to_lowercase().contains(&[
565            'α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 'θ', 'ι', 'κ', 'λ', 'μ', 'ν', 'ξ', 'ο', 'π', 'ρ',
566            'σ', 'ς', 'τ', 'υ', 'φ', 'χ', 'ψ', 'ω',
567        ])
568}