use crate::client::SearchfoxClient;
use crate::types::{BlameInfo, CommitInfo, ParsedCommitInfo};
use crate::utils::searchfox_url_repo;
use anyhow::Result;
use regex::Regex;
use scraper::{Html, Selector};
use std::collections::HashMap;
impl SearchfoxClient {
pub async fn get_head_hash(&self) -> anyhow::Result<String> {
let url = format!(
"https://searchfox.org/{}/commit-info/HEAD",
searchfox_url_repo(&self.repo)
);
let response = self.get_raw(&url).await?;
let json: serde_json::Value = serde_json::from_str(&response)
.map_err(|_| anyhow::anyhow!("Failed to parse HEAD commit info"))?;
json.as_array()
.and_then(|arr| arr.first())
.and_then(|commit| commit.get("parent"))
.and_then(|p| p.as_str())
.map(|s| s.to_string())
.ok_or_else(|| anyhow::anyhow!("Could not find HEAD revision hash in commit-info"))
}
pub async fn get_blame_for_lines(
&self,
path: &str,
lines: &[usize],
) -> Result<HashMap<usize, BlameInfo>> {
let url = format!("https://searchfox.org/{}/source/{}", self.repo, path);
let html = self.get_html(&url).await?;
let blame_map = Self::parse_blame_from_html(&html)?;
let filtered_blame: HashMap<usize, (String, String, usize)> = blame_map
.into_iter()
.filter(|(line_no, _)| lines.contains(line_no))
.collect();
if filtered_blame.is_empty() {
return Ok(HashMap::new());
}
let unique_commits: Vec<&str> = {
let mut commits: Vec<&str> = filtered_blame
.values()
.map(|(hash, _, _)| hash.as_str())
.collect();
commits.sort_unstable();
commits.dedup();
commits
};
let commit_infos = self.get_commit_info(&unique_commits).await?;
let commit_map: HashMap<String, CommitInfo> = unique_commits
.into_iter()
.zip(commit_infos.into_iter())
.map(|(hash, info)| (hash.to_string(), info))
.collect();
let result = filtered_blame
.into_iter()
.map(|(line_no, (hash, path, orig_line))| {
let commit_info = commit_map.get(&hash).cloned();
let blame_info = BlameInfo {
commit_hash: hash.clone(),
original_path: path,
original_line: orig_line,
commit_info,
};
(line_no, blame_info)
})
.collect();
Ok(result)
}
async fn get_commit_info(&self, revs: &[&str]) -> Result<Vec<CommitInfo>> {
if revs.is_empty() {
return Ok(Vec::new());
}
const BATCH_SIZE: usize = 50;
let mut all_infos = Vec::new();
for chunk in revs.chunks(BATCH_SIZE) {
let revs_str = chunk.join(",");
let url = format!(
"https://searchfox.org/{}/commit-info/{}",
self.repo, revs_str
);
let response = self.get_raw(&url).await?;
let mut commit_infos: Vec<CommitInfo> = serde_json::from_str(&response)?;
all_infos.append(&mut commit_infos);
}
Ok(all_infos)
}
fn parse_blame_from_html(html: &str) -> Result<HashMap<usize, (String, String, usize)>> {
let document = Html::parse_document(html);
let blame_selector = Selector::parse(".blame-strip").unwrap();
let line_selector = Selector::parse("div[role='row']").unwrap();
let mut result = HashMap::new();
let mut line_number = 1;
for row in document.select(&line_selector) {
if let Some(blame_elem) = row.select(&blame_selector).next() {
if let Some(blame_data) = blame_elem.value().attr("data-blame") {
if let Some((hash, path, orig_line)) = Self::parse_data_blame(blame_data) {
result.insert(line_number, (hash, path, orig_line));
}
}
}
line_number += 1;
}
log::debug!("Parsed {} blame entries from HTML", result.len());
Ok(result)
}
fn parse_data_blame(data: &str) -> Option<(String, String, usize)> {
let parts: Vec<&str> = data.split('#').collect();
if parts.len() != 3 {
return None;
}
let hash = parts[0].to_string();
let path = parts[1].to_string();
let line_no = parts[2].parse::<usize>().ok()?;
Some((hash, path, line_no))
}
}
pub fn parse_commit_header(header: &str) -> ParsedCommitInfo {
let text = strip_html_tags(header);
let bug_number = extract_bug_number(&text);
let parts: Vec<&str> = text.split('\n').collect();
let message = if let Some(first_part) = parts.first() {
if let Some(idx) = first_part.find(':') {
first_part[idx + 1..].trim().to_string()
} else {
first_part.trim().to_string()
}
} else {
String::new()
};
let (author, date) = if parts.len() > 1 {
parse_author_date(parts[1])
} else {
(String::new(), String::new())
};
ParsedCommitInfo {
bug_number,
message,
author,
date,
}
}
fn strip_html_tags(html: &str) -> String {
let tag_re = Regex::new(r"<[^>]+>").unwrap();
let without_tags = tag_re.replace_all(html, "");
without_tags
.replace("<", "<")
.replace(">", ">")
.replace("&", "&")
.replace(""", "\"")
.replace("'", "'")
}
fn extract_bug_number(text: &str) -> Option<u64> {
let bug_re = Regex::new(r"[Bb]ug\s+(\d+)").unwrap();
bug_re
.captures(text)
.and_then(|cap| cap.get(1))
.and_then(|m| m.as_str().parse::<u64>().ok())
}
fn parse_author_date(text: &str) -> (String, String) {
let parts: Vec<&str> = text.split(',').collect();
if parts.len() >= 2 {
let author = parts[0].trim().to_string();
let date = parts[1..].join(",").trim().to_string();
(author, date)
} else {
(text.trim().to_string(), String::new())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_data_blame() {
let data = "88a286dcec9ba069397bd4c4c35b3e317bf66f4f#%#7";
let result = SearchfoxClient::parse_data_blame(data);
assert!(result.is_some());
let (hash, path, line) = result.unwrap();
assert_eq!(hash, "88a286dcec9ba069397bd4c4c35b3e317bf66f4f");
assert_eq!(path, "%");
assert_eq!(line, 7);
}
#[test]
fn test_parse_commit_header() {
let header =
"Bug <a href=\"...\">123456</a>: Fix audio issue\n<br><i>John Doe, 2021-05-15</i>";
let result = parse_commit_header(header);
assert_eq!(result.bug_number, Some(123456));
assert_eq!(result.message, "Fix audio issue");
assert_eq!(result.author, "John Doe");
assert_eq!(result.date, "2021-05-15");
}
#[test]
fn test_strip_html_tags() {
let html = "Bug <a href=\"url\">123</a>: message";
let result = strip_html_tags(html);
assert_eq!(result, "Bug 123: message");
}
}