varurl/
lib.rs

1#![feature(let_chains)]
2
3use daachorse::{CharwiseDoubleArrayAhoCorasick, CharwiseDoubleArrayAhoCorasickBuilder, MatchKind};
4use mdli::{Kind, MdLi};
5
6#[derive(Clone)]
7pub struct VarUrl {
8  pub ac: Option<CharwiseDoubleArrayAhoCorasick<usize>>,
9}
10
11impl VarUrl {
12  pub fn new<I, S: AsRef<str>>(prefix_li: I) -> Self
13  where
14    I: IntoIterator<Item = S>,
15  {
16    match CharwiseDoubleArrayAhoCorasickBuilder::new()
17      .match_kind(MatchKind::LeftmostLongest)
18      .build(prefix_li.into_iter().filter(|i| !i.as_ref().is_empty()))
19    {
20      Ok(ac) => VarUrl { ac: Some(ac) },
21      Err(err) => {
22        match err {
23          daachorse::errors::DaachorseError::InvalidArgument(_) => {}
24          _ => {
25            tracing::error!("{}", err);
26          }
27        };
28        VarUrl { ac: None }
29      }
30    }
31  }
32
33  fn find_end<'a>(&self, before: &'a str, after: &'a str) -> Option<(usize, &'a str)> {
34    if before.ends_with("src=\"") || before.ends_with("href=\"") {
35      if let Some(url_end) = after.find('"') {
36        return Some((url_end, &after[..url_end]));
37      }
38    }
39    None
40  }
41
42  // from_lang: &str, to_lang: &str
43  pub fn replace(
44    &self,
45    mdli: &mut MdLi,
46    from_to: impl Fn(usize) -> Option<(&'static str, &'static str)>,
47  ) {
48    if let Some(ac) = &self.ac {
49      // let from_lang = format!("/{from_lang}/");
50      // let to_lang = format!("/{to_lang}/");
51
52      // 就地修改每个 Md 元素
53      for i in 0..mdli.li.len() {
54        if ![Kind::Img, Kind::Url, Kind::HtmOpen].contains(&mdli.li[i].kind) {
55          continue;
56        }
57
58        let md = &mdli.li[i].str;
59        let mut pre_pos = 0;
60        let mut new_str = String::new();
61        let mut last_end = 0; // 记录上一次匹配的结束位置
62
63        for m in ac.leftmost_find_iter(md) {
64          if let Some((from_lang, to_lang)) = from_to(m.value()) {
65            let start = m.start();
66            let end = m.end() - 1;
67            let val = &md[start..end];
68
69            if start == 0 {
70              new_str += val;
71              new_str += &md[end..].replace(from_lang, to_lang);
72              pre_pos = md.len();
73              continue;
74            }
75
76            // 如果当前开始位置小于上一次的结束位置,跳过这次匹配
77            if start < last_end {
78              continue;
79            }
80
81            let before = &md[..start];
82            let after = &md[end..];
83
84            if let Some((url_end, url_part)) = self.find_end(before, after) {
85              if url_part.contains(from_lang) {
86                let new_url = format!("{}{}", val, url_part.replace(from_lang, to_lang));
87
88                // 添加前面的文本和新URL
89                new_str.push_str(&md[pre_pos..start]);
90                new_str.push_str(&new_url);
91                pre_pos = end + url_end;
92                last_end = pre_pos; // 更新上一次的结束位置
93                continue;
94              }
95            }
96
97            // 添加未匹配的部分
98            new_str.push_str(&md[pre_pos..end]);
99            pre_pos = end;
100            last_end = end; // 更新上一次的结束位置
101          }
102        }
103
104        // 添加剩余的文本
105        if pre_pos < md.len() {
106          new_str.push_str(&md[pre_pos..]);
107        }
108
109        // 只有当文本有变化时才替换
110        if new_str != *md {
111          mdli.li[i].str = new_str;
112        }
113      }
114    }
115  }
116}