#[allow(warnings)]
#[cfg(feature = "ctf")]
pub mod ctf {
pub fn convert_to_pinyin_with_non_chinese(input: &str) -> String {
use crate::*;
use pinyin::ToPinyin;
input
.split_to_vec(" ")
.iter()
.map(|s| {
if contains_chinese(s) {
s.as_str()
.to_pinyin()
.map(|s| s.unwrap().plain())
.collect::<Vec<_>>()
.join("_")
} else {
s.to_string()
}
})
.collect::<Vec<String>>()
.join("_")
}
pub fn contains_chinese(text: &str) -> bool {
use regex::Regex;
let chinese_regex = Regex::new(r"[\p{Han}]").unwrap();
chinese_regex.is_match(text)
}
pub fn parse_html<T: ToString>(html: T, selectors: T, attr: Option<T>) -> Vec<String> {
use scraper::{Html, Selector};
let html = html.to_string();
let selectors = selectors.to_string();
let document = Html::parse_document(&html);
let img_selector = Selector::parse(&selectors).expect("Failed to parse selector");
let mut res = Vec::new();
if let Some(attr) = attr {
let attr = attr.to_string();
for element in document.select(&img_selector) {
if let Some(src) = element.value().attr(&attr) {
res.push(src.to_string());
}
}
} else {
for element in document.select(&img_selector) {
res.push(element.html());
}
}
res
}
pub fn parse_html_get_innner_html<T: ToString>(html: T, selectors: T) -> Vec<String> {
use scraper::{Html, Selector};
let html = html.to_string();
let selectors = selectors.to_string();
let document = Html::parse_document(&html);
let img_selector = Selector::parse(&selectors).expect("Failed to parse selector");
let mut res = Vec::new();
for element in document.select(&img_selector) {
res.push(
element
.text()
.into_iter()
.map(|s| s.trim().to_string())
.collect::<Vec<_>>()
.join("\t"),
);
}
res
}
pub fn find_png_urls<T: ToString>(text: T) -> Vec<String> {
use regex::Regex;
let text = text.to_string();
let re = Regex::new(r#"https?://[^\s\"']+\.(?:png|PNG)"#).expect("Failed to compile regex");
re.find_iter(&text)
.map(|m| m.as_str().to_string())
.collect()
}
pub fn find_urls<T: ToString>(text: T) -> Vec<String> {
use regex::Regex;
let text = text.to_string();
let re = Regex::new(r#"https?://[^\s"']+"#).expect("Failed to compile regex");
re.find_iter(&text)
.map(|m| m.as_str().to_string())
.collect()
}
pub fn hex_encode<T: AsRef<[u8]>>(data: T) -> String {
hex::encode(data)
}
pub fn hex_decode<T: AsRef<[u8]>>(data: T) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
Ok(hex::decode(data).unwrap())
}
pub fn base64_encode<T: AsRef<[u8]>>(s: T) -> String {
use base64::{engine::general_purpose, Engine as _};
general_purpose::STANDARD_NO_PAD.encode(s)
}
pub fn base64_decode<T: AsRef<[u8]>>(s: T) -> Vec<u8> {
use base64::{engine::general_purpose, Engine as _};
general_purpose::STANDARD_NO_PAD.decode(s).unwrap()
}
pub fn url_encode(url: &str) -> String {
use percent_encoding::{percent_decode_str, utf8_percent_encode};
let encoded = utf8_percent_encode(url, percent_encoding::NON_ALPHANUMERIC);
encoded.to_string()
}
pub fn url_decode(url: &str) -> String {
use percent_encoding::{percent_decode_str, utf8_percent_encode};
let decoded = percent_decode_str(url).decode_utf8().unwrap();
decoded.to_string()
}
pub fn unicode_to_chinese(unicode_str: impl ToString) -> Option<String> {
use unicode_segmentation::UnicodeSegmentation;
let converted = unicode_str
.to_string()
.split("\\u")
.filter(|&segment| !segment.is_empty())
.map(|segment| u32::from_str_radix(segment, 16))
.collect::<Result<Vec<u32>, _>>()
.ok()
.map(|code_points| {
code_points
.iter()
.map(|&code_point| std::char::from_u32(code_point).unwrap_or('\u{FFFD}'))
.collect::<String>()
});
converted
}
pub fn chinese_to_unicode(chinese_str: &str) -> Option<String> {
let converted = chinese_str
.chars()
.map(|ch| ch.escape_unicode().to_string())
.collect::<Vec<String>>()
.join("");
Some(converted.replace("{", "").replace("}", ""))
}
}
#[cfg(feature = "ctf")]
pub use ctf::*;