1use crate::client::SearchfoxClient;
2use crate::types::{BlameInfo, CommitInfo, ParsedCommitInfo};
3use anyhow::Result;
4use regex::Regex;
5use scraper::{Html, Selector};
6use std::collections::HashMap;
7
8impl SearchfoxClient {
9 pub async fn get_blame_for_lines(
11 &self,
12 path: &str,
13 lines: &[usize],
14 ) -> Result<HashMap<usize, BlameInfo>> {
15 let url = format!("https://searchfox.org/{}/source/{}", self.repo, path);
17 let html = self.get_html(&url).await?;
18
19 let blame_map = Self::parse_blame_from_html(&html)?;
21
22 let filtered_blame: HashMap<usize, (String, String, usize)> = blame_map
24 .into_iter()
25 .filter(|(line_no, _)| lines.contains(line_no))
26 .collect();
27
28 if filtered_blame.is_empty() {
29 return Ok(HashMap::new());
30 }
31
32 let unique_commits: Vec<&str> = {
34 let mut commits: Vec<&str> = filtered_blame
35 .values()
36 .map(|(hash, _, _)| hash.as_str())
37 .collect();
38 commits.sort_unstable();
39 commits.dedup();
40 commits
41 };
42
43 let commit_infos = self.get_commit_info(&unique_commits).await?;
45
46 let commit_map: HashMap<String, CommitInfo> = unique_commits
48 .into_iter()
49 .zip(commit_infos.into_iter())
50 .map(|(hash, info)| (hash.to_string(), info))
51 .collect();
52
53 let result = filtered_blame
55 .into_iter()
56 .map(|(line_no, (hash, path, orig_line))| {
57 let commit_info = commit_map.get(&hash).cloned();
58 let blame_info = BlameInfo {
59 commit_hash: hash.clone(),
60 original_path: path,
61 original_line: orig_line,
62 commit_info,
63 };
64 (line_no, blame_info)
65 })
66 .collect();
67
68 Ok(result)
69 }
70
71 async fn get_commit_info(&self, revs: &[&str]) -> Result<Vec<CommitInfo>> {
73 if revs.is_empty() {
74 return Ok(Vec::new());
75 }
76
77 const BATCH_SIZE: usize = 50;
80
81 let mut all_infos = Vec::new();
82
83 for chunk in revs.chunks(BATCH_SIZE) {
84 let revs_str = chunk.join(",");
85 let url = format!(
86 "https://searchfox.org/{}/commit-info/{}",
87 self.repo, revs_str
88 );
89
90 let response = self.get_raw(&url).await?;
91 let mut commit_infos: Vec<CommitInfo> = serde_json::from_str(&response)?;
92 all_infos.append(&mut commit_infos);
93 }
94
95 Ok(all_infos)
96 }
97
98 fn parse_blame_from_html(html: &str) -> Result<HashMap<usize, (String, String, usize)>> {
100 let document = Html::parse_document(html);
101 let blame_selector = Selector::parse(".blame-strip").unwrap();
102 let line_selector = Selector::parse("div[role='row']").unwrap();
103
104 let mut result = HashMap::new();
105 let mut line_number = 1;
106
107 for row in document.select(&line_selector) {
110 if let Some(blame_elem) = row.select(&blame_selector).next() {
112 if let Some(blame_data) = blame_elem.value().attr("data-blame") {
113 if let Some((hash, path, orig_line)) = Self::parse_data_blame(blame_data) {
114 result.insert(line_number, (hash, path, orig_line));
115 }
116 }
117 }
118 line_number += 1;
119 }
120
121 log::debug!("Parsed {} blame entries from HTML", result.len());
122 Ok(result)
123 }
124
125 fn parse_data_blame(data: &str) -> Option<(String, String, usize)> {
128 let parts: Vec<&str> = data.split('#').collect();
129 if parts.len() != 3 {
130 return None;
131 }
132
133 let hash = parts[0].to_string();
134 let path = parts[1].to_string();
135 let line_no = parts[2].parse::<usize>().ok()?;
136
137 Some((hash, path, line_no))
138 }
139}
140
141pub fn parse_commit_header(header: &str) -> ParsedCommitInfo {
143 let text = strip_html_tags(header);
145
146 let bug_number = extract_bug_number(&text);
148
149 let parts: Vec<&str> = text.split('\n').collect();
151
152 let message = if let Some(first_part) = parts.first() {
153 if let Some(idx) = first_part.find(':') {
155 first_part[idx + 1..].trim().to_string()
156 } else {
157 first_part.trim().to_string()
158 }
159 } else {
160 String::new()
161 };
162
163 let (author, date) = if parts.len() > 1 {
165 parse_author_date(parts[1])
166 } else {
167 (String::new(), String::new())
168 };
169
170 ParsedCommitInfo {
171 bug_number,
172 message,
173 author,
174 date,
175 }
176}
177
178fn strip_html_tags(html: &str) -> String {
179 let tag_re = Regex::new(r"<[^>]+>").unwrap();
180 let without_tags = tag_re.replace_all(html, "");
181
182 without_tags
184 .replace("<", "<")
185 .replace(">", ">")
186 .replace("&", "&")
187 .replace(""", "\"")
188 .replace("'", "'")
189}
190
191fn extract_bug_number(text: &str) -> Option<u64> {
192 let bug_re = Regex::new(r"[Bb]ug\s+(\d+)").unwrap();
193 bug_re
194 .captures(text)
195 .and_then(|cap| cap.get(1))
196 .and_then(|m| m.as_str().parse::<u64>().ok())
197}
198
199fn parse_author_date(text: &str) -> (String, String) {
200 let parts: Vec<&str> = text.split(',').collect();
202 if parts.len() >= 2 {
203 let author = parts[0].trim().to_string();
204 let date = parts[1..].join(",").trim().to_string();
205 (author, date)
206 } else {
207 (text.trim().to_string(), String::new())
208 }
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn test_parse_data_blame() {
217 let data = "88a286dcec9ba069397bd4c4c35b3e317bf66f4f#%#7";
218 let result = SearchfoxClient::parse_data_blame(data);
219 assert!(result.is_some());
220
221 let (hash, path, line) = result.unwrap();
222 assert_eq!(hash, "88a286dcec9ba069397bd4c4c35b3e317bf66f4f");
223 assert_eq!(path, "%");
224 assert_eq!(line, 7);
225 }
226
227 #[test]
228 fn test_parse_commit_header() {
229 let header =
230 "Bug <a href=\"...\">123456</a>: Fix audio issue\n<br><i>John Doe, 2021-05-15</i>";
231 let result = parse_commit_header(header);
232
233 assert_eq!(result.bug_number, Some(123456));
234 assert_eq!(result.message, "Fix audio issue");
235 assert_eq!(result.author, "John Doe");
236 assert_eq!(result.date, "2021-05-15");
237 }
238
239 #[test]
240 fn test_strip_html_tags() {
241 let html = "Bug <a href=\"url\">123</a>: message";
242 let result = strip_html_tags(html);
243 assert_eq!(result, "Bug 123: message");
244 }
245}