use std::path::Path;
pub fn extract_attr(tag: &str, attr: &str) -> Option<String> {
let needle = format!("{}=\"", attr);
let start = tag.find(&needle)? + needle.len();
let rest = &tag[start..];
let end = rest.find('"')?;
Some(rest[..end].to_string())
}
pub fn try_parse_xml(content: &str) -> Result<(), String> {
use quick_xml::Reader;
use quick_xml::events::Event;
let mut reader = Reader::from_str(content);
reader.config_mut().trim_text(false);
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Eof) => return Ok(()),
Err(e) => return Err(format!("{}", e)),
Ok(_) => {}
}
buf.clear();
}
}
pub fn contains_tag(line: &str, name: &str) -> bool {
let lower = line.to_ascii_lowercase();
let needle_open = format!("<{}", name);
let mut search = lower.as_str();
while let Some(idx) = search.find(&needle_open) {
let after = &search[idx + needle_open.len()..];
if let Some(c) = after.chars().next() {
if c == ' ' || c == '>' || c == '/' || c == '\t' || c == '\n' {
return true;
}
} else {
return true;
}
search = &search[idx + needle_open.len()..];
}
false
}
pub fn line_of(content: &str, byte_offset: usize) -> usize {
content[..byte_offset.min(content.len())]
.bytes()
.filter(|b| *b == b'\n')
.count()
+ 1
}
pub fn find_nested_p(content: &str) -> Option<usize> {
let mut depth = 0i32;
let mut pos = 0usize;
let lower = content.to_ascii_lowercase();
while pos < lower.len() {
let rest = &lower[pos..];
let open = rest.find("<p");
let close = rest.find("</p>");
match (open, close) {
(Some(o), Some(c)) if o < c => {
let after_p = rest.as_bytes().get(o + 2).copied().unwrap_or(b' ');
if after_p == b' ' || after_p == b'>' || after_p == b'\t' || after_p == b'/' {
if depth >= 1 {
let abs = pos + o;
return Some(line_of(content, abs));
}
depth += 1;
}
pos += o + 2;
}
(Some(o), None) => {
let after_p = rest.as_bytes().get(o + 2).copied().unwrap_or(b' ');
if after_p == b' ' || after_p == b'>' || after_p == b'\t' || after_p == b'/' {
if depth >= 1 {
let abs = pos + o;
return Some(line_of(content, abs));
}
depth += 1;
}
pos += o + 2;
}
(_, Some(c)) => {
if depth > 0 {
depth -= 1;
}
pos += c + 4;
}
(None, None) => break,
}
}
None
}
pub fn has_negative_css(line: &str) -> bool {
let l = line.to_ascii_lowercase();
for prop in &["margin", "padding", "line-height"] {
let mut search = l.as_str();
while let Some(idx) = search.find(prop) {
let after = &search[idx + prop.len()..];
let colon = after.find(':');
if let Some(c) = colon {
let value = after[c + 1..].trim_start();
let bytes = value.as_bytes();
if bytes.len() >= 2 && bytes[0] == b'-' && bytes[1].is_ascii_digit() {
return true;
}
let end = value
.find(|c: char| c == ';' || c == '"' || c == '}')
.unwrap_or(value.len());
let vals = &value[..end];
let chars: Vec<char> = vals.chars().collect();
for i in 0..chars.len().saturating_sub(1) {
if chars[i] == '-' && chars[i + 1].is_ascii_digit() {
let prev = if i == 0 { ' ' } else { chars[i - 1] };
if prev == ' ' || prev == ':' || prev == '\t' {
return true;
}
}
}
}
search = &search[idx + prop.len()..];
}
}
false
}
pub fn heading_with_text_align(line: &str) -> Option<&'static str> {
let l = line.to_ascii_lowercase();
let tags: &[&'static str] = &["h1", "h2", "h3", "h4", "h5", "h6"];
for tag in tags {
let open = format!("<{}", tag);
if let Some(idx) = l.find(&open) {
if let Some(end) = l[idx..].find('>') {
let tag_content = &l[idx..idx + end];
if tag_content.contains("text-align") {
return Some(tag);
}
}
}
}
None
}
pub fn count_table_rows(content: &str) -> Vec<usize> {
let lower = content.to_ascii_lowercase();
let mut counts = Vec::new();
let mut pos = 0usize;
while let Some(start) = lower[pos..].find("<table") {
let abs = pos + start;
let end = match lower[abs..].find("</table>") {
Some(e) => abs + e,
None => break,
};
let table = &lower[abs..end];
let row_count = table.matches("<tr").count();
counts.push(row_count);
pos = end + 8;
}
counts
}
pub fn strip_tags_len(html: &str) -> usize {
let mut in_tag = false;
let mut count = 0usize;
for c in html.chars() {
match c {
'<' => in_tag = true,
'>' => in_tag = false,
_ if !in_tag => count += 1,
_ => {}
}
}
count
}
pub fn looks_like_html_cover_page(
base_dir: &Path,
idref: &str,
href: &str,
cover_image_href: &str,
) -> bool {
let href_lower = href.to_lowercase();
let idref_lower = idref.to_lowercase();
let file_stem = Path::new(&href_lower)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("");
let is_html = href_lower.ends_with(".html")
|| href_lower.ends_with(".xhtml")
|| href_lower.ends_with(".htm");
if !is_html {
return false;
}
let name_match = idref_lower.contains("cover") || file_stem.contains("cover");
if !name_match {
return false;
}
let full_path = base_dir.join(href);
let content = match std::fs::read_to_string(&full_path) {
Ok(s) => s,
Err(_) => return true,
};
let image_stem = Path::new(cover_image_href)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("");
content.contains("<img") && (image_stem.is_empty() || content.contains(image_stem))
}