1use crate::client::SearchfoxClient;
2use crate::types::{BlameInfo, CommitInfo, ParsedCommitInfo};
3use crate::utils::searchfox_url_repo;
4use anyhow::Result;
5use regex::Regex;
6use scraper::{Html, Selector};
7use std::collections::HashMap;
8
9impl SearchfoxClient {
10 pub async fn get_head_hash(&self) -> anyhow::Result<String> {
11 let url = format!(
12 "https://searchfox.org/{}/commit-info/HEAD",
13 searchfox_url_repo(&self.repo)
14 );
15 let response = self.get_raw(&url).await?;
16 let json: serde_json::Value = serde_json::from_str(&response)
17 .map_err(|_| anyhow::anyhow!("Failed to parse HEAD commit info"))?;
18 json.as_array()
19 .and_then(|arr| arr.first())
20 .and_then(|commit| commit.get("parent"))
21 .and_then(|p| p.as_str())
22 .map(|s| s.to_string())
23 .ok_or_else(|| anyhow::anyhow!("Could not find HEAD revision hash in commit-info"))
24 }
25
26 pub async fn get_blame_for_lines(
28 &self,
29 path: &str,
30 lines: &[usize],
31 ) -> Result<HashMap<usize, BlameInfo>> {
32 let url = format!("https://searchfox.org/{}/source/{}", self.repo, path);
34 let html = self.get_html(&url).await?;
35
36 let blame_map = Self::parse_blame_from_html(&html)?;
38
39 let filtered_blame: HashMap<usize, (String, String, usize)> = blame_map
41 .into_iter()
42 .filter(|(line_no, _)| lines.contains(line_no))
43 .collect();
44
45 if filtered_blame.is_empty() {
46 return Ok(HashMap::new());
47 }
48
49 let unique_commits: Vec<&str> = {
51 let mut commits: Vec<&str> = filtered_blame
52 .values()
53 .map(|(hash, _, _)| hash.as_str())
54 .collect();
55 commits.sort_unstable();
56 commits.dedup();
57 commits
58 };
59
60 let commit_infos = self.get_commit_info(&unique_commits).await?;
62
63 let commit_map: HashMap<String, CommitInfo> = unique_commits
65 .into_iter()
66 .zip(commit_infos.into_iter())
67 .map(|(hash, info)| (hash.to_string(), info))
68 .collect();
69
70 let result = filtered_blame
72 .into_iter()
73 .map(|(line_no, (hash, path, orig_line))| {
74 let commit_info = commit_map.get(&hash).cloned();
75 let blame_info = BlameInfo {
76 commit_hash: hash.clone(),
77 original_path: path,
78 original_line: orig_line,
79 commit_info,
80 };
81 (line_no, blame_info)
82 })
83 .collect();
84
85 Ok(result)
86 }
87
88 async fn get_commit_info(&self, revs: &[&str]) -> Result<Vec<CommitInfo>> {
90 if revs.is_empty() {
91 return Ok(Vec::new());
92 }
93
94 const BATCH_SIZE: usize = 50;
97
98 let mut all_infos = Vec::new();
99
100 for chunk in revs.chunks(BATCH_SIZE) {
101 let revs_str = chunk.join(",");
102 let url = format!(
103 "https://searchfox.org/{}/commit-info/{}",
104 self.repo, revs_str
105 );
106
107 let response = self.get_raw(&url).await?;
108 let mut commit_infos: Vec<CommitInfo> = serde_json::from_str(&response)?;
109 all_infos.append(&mut commit_infos);
110 }
111
112 Ok(all_infos)
113 }
114
115 fn parse_blame_from_html(html: &str) -> Result<HashMap<usize, (String, String, usize)>> {
117 let document = Html::parse_document(html);
118 let blame_selector = Selector::parse(".blame-strip").unwrap();
119 let line_selector = Selector::parse("div[role='row']").unwrap();
120
121 let mut result = HashMap::new();
122 let mut line_number = 1;
123
124 for row in document.select(&line_selector) {
127 if let Some(blame_elem) = row.select(&blame_selector).next() {
129 if let Some(blame_data) = blame_elem.value().attr("data-blame") {
130 if let Some((hash, path, orig_line)) = Self::parse_data_blame(blame_data) {
131 result.insert(line_number, (hash, path, orig_line));
132 }
133 }
134 }
135 line_number += 1;
136 }
137
138 log::debug!("Parsed {} blame entries from HTML", result.len());
139 Ok(result)
140 }
141
142 fn parse_data_blame(data: &str) -> Option<(String, String, usize)> {
145 let parts: Vec<&str> = data.split('#').collect();
146 if parts.len() != 3 {
147 return None;
148 }
149
150 let hash = parts[0].to_string();
151 let path = parts[1].to_string();
152 let line_no = parts[2].parse::<usize>().ok()?;
153
154 Some((hash, path, line_no))
155 }
156}
157
158pub fn parse_commit_header(header: &str) -> ParsedCommitInfo {
160 let text = strip_html_tags(header);
162
163 let bug_number = extract_bug_number(&text);
165
166 let parts: Vec<&str> = text.split('\n').collect();
168
169 let message = if let Some(first_part) = parts.first() {
170 if let Some(idx) = first_part.find(':') {
172 first_part[idx + 1..].trim().to_string()
173 } else {
174 first_part.trim().to_string()
175 }
176 } else {
177 String::new()
178 };
179
180 let (author, date) = if parts.len() > 1 {
182 parse_author_date(parts[1])
183 } else {
184 (String::new(), String::new())
185 };
186
187 ParsedCommitInfo {
188 bug_number,
189 message,
190 author,
191 date,
192 }
193}
194
195fn strip_html_tags(html: &str) -> String {
196 let tag_re = Regex::new(r"<[^>]+>").unwrap();
197 let without_tags = tag_re.replace_all(html, "");
198
199 without_tags
201 .replace("<", "<")
202 .replace(">", ">")
203 .replace("&", "&")
204 .replace(""", "\"")
205 .replace("'", "'")
206}
207
208fn extract_bug_number(text: &str) -> Option<u64> {
209 let bug_re = Regex::new(r"[Bb]ug\s+(\d+)").unwrap();
210 bug_re
211 .captures(text)
212 .and_then(|cap| cap.get(1))
213 .and_then(|m| m.as_str().parse::<u64>().ok())
214}
215
216fn parse_author_date(text: &str) -> (String, String) {
217 let parts: Vec<&str> = text.split(',').collect();
219 if parts.len() >= 2 {
220 let author = parts[0].trim().to_string();
221 let date = parts[1..].join(",").trim().to_string();
222 (author, date)
223 } else {
224 (text.trim().to_string(), String::new())
225 }
226}
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231
232 #[test]
233 fn test_parse_data_blame() {
234 let data = "88a286dcec9ba069397bd4c4c35b3e317bf66f4f#%#7";
235 let result = SearchfoxClient::parse_data_blame(data);
236 assert!(result.is_some());
237
238 let (hash, path, line) = result.unwrap();
239 assert_eq!(hash, "88a286dcec9ba069397bd4c4c35b3e317bf66f4f");
240 assert_eq!(path, "%");
241 assert_eq!(line, 7);
242 }
243
244 #[test]
245 fn test_parse_commit_header() {
246 let header =
247 "Bug <a href=\"...\">123456</a>: Fix audio issue\n<br><i>John Doe, 2021-05-15</i>";
248 let result = parse_commit_header(header);
249
250 assert_eq!(result.bug_number, Some(123456));
251 assert_eq!(result.message, "Fix audio issue");
252 assert_eq!(result.author, "John Doe");
253 assert_eq!(result.date, "2021-05-15");
254 }
255
256 #[test]
257 fn test_strip_html_tags() {
258 let html = "Bug <a href=\"url\">123</a>: message";
259 let result = strip_html_tags(html);
260 assert_eq!(result, "Bug 123: message");
261 }
262}