pub fn find_longest_string(value: &serde_json::Value, min_len: usize) -> Option<String> {
match value {
serde_json::Value::String(s) => {
if s.len() >= min_len {
Some(s.clone())
} else {
None
}
}
serde_json::Value::Object(map) => map
.values()
.filter_map(|v| find_longest_string(v, min_len))
.max_by_key(std::string::String::len),
serde_json::Value::Array(arr) => arr
.iter()
.filter_map(|v| find_longest_string(v, min_len))
.max_by_key(std::string::String::len),
_ => None,
}
}
pub fn find_content_by_key(value: &serde_json::Value, key: &str) -> Option<String> {
match value {
serde_json::Value::Object(map) => {
if let Some(serde_json::Value::String(s)) = map.get(key) {
return Some(s.clone());
}
for (_, v) in map {
if let Some(found) = find_content_by_key(v, key) {
return Some(found);
}
}
None
}
serde_json::Value::Array(arr) => {
for item in arr {
if let Some(found) = find_content_by_key(item, key) {
return Some(found);
}
}
None
}
_ => None,
}
}
pub(super) fn render_spa_content(content: &str) -> String {
if content.contains('<') && content.contains('>') {
let md = html2md::parse_html(content);
md.lines()
.map(str::trim)
.filter(|l| !l.is_empty())
.collect::<Vec<_>>()
.join("\n")
} else {
content.to_string()
}
}
pub(super) fn collect_text_from_json(value: &serde_json::Value, texts: &mut Vec<String>) {
const MIN_TEXT_LEN: usize = 50;
match value {
serde_json::Value::String(s) => {
if s.len() >= MIN_TEXT_LEN
&& !s.starts_with("http")
&& !s.starts_with("urn:")
&& !s.chars().all(|c| c.is_ascii_hexdigit() || c == '-')
{
texts.push(s.clone());
}
}
serde_json::Value::Object(map) => {
for v in map.values() {
collect_text_from_json(v, texts);
}
}
serde_json::Value::Array(arr) => {
for v in arr {
collect_text_from_json(v, texts);
}
}
_ => {}
}
}
pub(super) fn strip_html_comment_wrapper(s: &str) -> &str {
let s = s.strip_prefix("<!--").unwrap_or(s);
let s = s.strip_suffix("-->").unwrap_or(s);
s.trim()
}