ci_manager/util.rs
1//! Utility functions for parsing and working with GitHub CLI output and other utility functions.
2use super::*;
3
4/// Parse a path from a string
5/// # Example
6/// ```
7/// # use ci_manager::util::first_path_from_str;
8/// use std::path::PathBuf;
9///
10/// let haystack = r#"multi line
11/// test string with/path/file.txt is
12/// valid"#;
13/// let path = first_path_from_str(haystack).unwrap();
14/// assert_eq!(path, PathBuf::from("with/path/file.txt"));
15///
16/// // No path in string is an error
17/// let haystack = "Random string with no path";
18/// assert!(first_path_from_str(haystack).is_err());
19///
20/// // Path with no leading '/' and no file extension is OK
21/// let haystack = "foo app/3-_2/t/3 bar";
22/// let path = first_path_from_str(haystack).unwrap();
23/// assert_eq!(path, PathBuf::from("app/3-_2/t/3"));
24///
25/// // More realistic example
26/// let haystack = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
27/// let path = first_path_from_str(haystack).unwrap();
28/// assert_eq!(
29/// path,
30/// PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
31/// );
32/// ```
33/// # Errors
34/// This function returns an error if no valid path is found in the string
35pub fn first_path_from_str(s: &str) -> Result<PathBuf> {
36 static RE: Lazy<Regex> =
37 Lazy::new(|| Regex::new(r"[a-zA-Z0-9-_.\/]+\/[a-zA-Z0-9-_.]+").unwrap());
38
39 let path_str = RE.find(s).context("No path found in string")?.as_str();
40 Ok(PathBuf::from(path_str))
41}
42
43/// Take the lines with failed jobs from the output of `gh run view`
44pub fn take_lines_with_failed_jobs(output: String) -> Vec<String> {
45 static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"X.*ID [0-9]*\)").unwrap());
46
47 RE.find_iter(&output)
48 .map(|m| m.as_str().to_owned())
49 .collect()
50}
51
52/// Extract the job IDs from the lines with job information
53pub fn id_from_job_lines(lines: &[String]) -> Vec<String> {
54 static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"ID (?<JOB_ID>[0-9]*)").unwrap());
55
56 lines
57 .iter()
58 .map(|line| {
59 RE.captures(line)
60 .unwrap_or_else(|| {
61 panic!("Expected a line with a Job ID, but no ID found in line: {line}")
62 })
63 .name("JOB_ID")
64 .expect("Expected a Job ID")
65 .as_str()
66 .to_owned()
67 })
68 .collect()
69}
70
71/// Parse text for timestamps and IDs and remove them, returning the modified text without making a copy.
72///
73/// Some compromises are made to be able to remove timestamps in between other symbols e.g. '/83421321/'.
74/// but still avoid removing commit SHAs. That means that these symbols are also removed (any non-letter character
75/// preceding and following an ID).
76///
77/// # Example
78/// ```
79/// # use ci_manager::util::remove_timestamps_and_ids;
80/// # use pretty_assertions::assert_eq;
81/// let test_str = r"ID 21442749267 ";
82/// let modified = remove_timestamps_and_ids(test_str);
83/// assert_eq!(modified, "ID"); // Note that the space is removed
84///
85///
86/// let test_str = r#"ID 21442749267
87/// date: 2024-02-28 00:03:46
88/// other text"#;
89/// let modified = remove_timestamps_and_ids(test_str);
90/// assert_eq!(modified, "IDdate: \nother text");
91/// ```
92pub fn remove_timestamps_and_ids(text: &str) -> borrow::Cow<str> {
93 static RE: Lazy<Regex> = Lazy::new(|| {
94 Regex::new(
95 r"(?x)
96 # Timestamps like YYYY-MM-DD HH:MM:SS
97 ([0-9]{4}-[0-9]{2}-[0-9]{2}\x20[0-9]{2}:[0-9]{2}:[0-9]{2})
98 |
99 # IDs like 21442749267 but only if they are preceded and followed by non-letter characters
100 (?:[^[a-zA-Z]])([0-9]{10,11})(?:[^[a-zA-Z]])
101 ",
102 )
103 .unwrap()
104 });
105
106 RE.replace_all(text, "")
107}
108
109/// Remove non-ASCII characters from a string
110/// # Example
111/// ```
112/// # use ci_manager::util::remove_non_ascii;
113/// # use pretty_assertions::assert_eq;
114/// let test_str = "stríng wøth nön-æscii chåråcters";
115/// let modified = remove_non_ascii(test_str);
116/// assert_eq!(modified, "strng wth nn-scii chrcters");
117/// ```
118pub fn remove_non_ascii(text: &str) -> borrow::Cow<str> {
119 static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^\x00-\x7F]+").unwrap());
120
121 RE.replace_all(text, "")
122}
123
124/// Remove ANSI codes from a string
125/// # Example
126/// ```
127/// # use ci_manager::util::remove_ansi_codes;
128/// # use pretty_assertions::assert_eq;
129/// let test_str = r#"[1;31mERROR:[0m Logfile of failure stored in"#;
130/// let modified = remove_ansi_codes(test_str);
131/// assert_eq!(modified, "ERROR: Logfile of failure stored in");
132/// ```
133pub fn remove_ansi_codes(text: &str) -> borrow::Cow<str> {
134 static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\x1b\[[;\d]*[A-Za-z]").unwrap());
135
136 RE.replace_all(text, "")
137}
138
139/// Parse a log and remove line-prefixed timestamps in the format `YYYY-MM-DDTHH:MM:SS.0000000Z` (ISO 8601).
140/// # Example
141/// ```
142/// # use ci_manager::util::remove_timestamp_prefixes;
143/// # use pretty_assertions::assert_eq;
144/// let test_str = "2024-02-28T00:03:46.0000000Z [INFO] This is a log message";
145/// let modified = remove_timestamp_prefixes(test_str);
146/// assert_eq!(modified, "[INFO] This is a log message");
147/// ```
148/// ## Multiple lines
149/// ```
150/// # use ci_manager::util::remove_timestamp_prefixes;
151/// # use pretty_assertions::assert_eq;
152/// let test_str = "\
153/// 2024-02-28T00:03:46.0000000Z [INFO] This is a log message
154/// 2024-03-15T20:35:48.9824182Z [ERROR] This is another log message";
155/// let modified = remove_timestamp_prefixes(test_str);
156/// assert_eq!(modified, "\
157/// [INFO] This is a log message
158/// [ERROR] This is another log message");
159///
160pub fn remove_timestamp_prefixes(log: &str) -> borrow::Cow<str> {
161 // The fist group matches 0 or more newlines, and uses that group to replace the timestamp
162 // this way the newlines are preserved (making it agnostic to the type of newline used in the log)
163 static RE: Lazy<Regex> =
164 Lazy::new(|| Regex::new(r"([\r\n]*)\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{7}Z\s").unwrap());
165
166 RE.replace_all(log, "$1")
167}
168
169/// Parse an absolute path from a string. This assumes that the the first '/' found in the string is the start
170/// of the path.
171/// # Example
172/// ```
173/// # use ci_manager::util::first_abs_path_from_str;
174/// use std::path::PathBuf;
175///
176/// let test_str = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
177/// let path = first_abs_path_from_str(test_str).unwrap();
178/// assert_eq!(
179/// path,
180/// PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
181/// );
182/// ```
183///
184/// # Errors
185/// Returns an error if no '/' is found in the string or
186/// if the path is not a valid path.
187pub fn first_abs_path_from_str(s: &str) -> Result<PathBuf> {
188 let start = s.find('/').context("Path not found, no '/' in string")?;
189 let path = PathBuf::from(&s[start..]);
190 Ok(path)
191}
192
193/// Add https:// to a URL if it is not already present
194/// # Example
195/// ```
196/// # use ci_manager::util::ensure_https_prefix;
197/// # use pretty_assertions::assert_eq;
198/// // If the URL does not have the https prefix, it is added
199/// let mut url = String::from("github.com/docker/buildx/issues");
200/// ensure_https_prefix(&mut url);
201/// assert_eq!(url, "https://github.com/docker/buildx/issues");
202///
203/// // If the URL already has the https prefix, it is not modified
204/// let mut url = String::from("https://gitlab.com/foo-org/foo-repo");
205/// ensure_https_prefix(&mut url);
206/// assert_eq!(url, String::from("https://gitlab.com/foo-org/foo-repo"));
207/// ```
208pub fn ensure_https_prefix(url: &mut String) {
209 if url.starts_with("https://") {
210 return;
211 }
212 url.insert_str(0, "https://");
213}
214
215/// Canonicalize a repository URL to the form `https://{host}/{repo}`
216///
217/// # Arguments
218/// * `repo` - The repository URL e.g. `user1/user1-repo`
219/// * `host` - The host for the repository e.g. `github.com`
220///
221/// # Example
222/// ```
223/// # use ci_manager::util::canonicalize_repo_url;
224/// let repo = "bob/bobbys-repo";
225/// let canonicalized = canonicalize_repo_url(repo, "github");
226/// assert_eq!(canonicalized, "https://github.com/bob/bobbys-repo");
227///
228/// // If the host is already in the URL, only the protocol is added
229/// let repo = "github.com/lisa/lisas-repo";
230/// let canonicalized = canonicalize_repo_url(repo, "github.com");
231/// assert_eq!(canonicalized, "https://github.com/lisa/lisas-repo");
232///
233/// // If the URL is already in the canonical form, it is returned as is
234/// let repo = "https://gitlab.com/foo-org/foo-repo";
235/// let canonicalized = canonicalize_repo_url(repo, "gitlab.com");
236/// assert_eq!(canonicalized, repo);
237/// ```
238pub fn canonicalize_repo_url(repo: &str, host: &str) -> String {
239 // Check if the host argument has a top-level domain and add it `.com` if it doesn't
240 let host = if host.contains('.') {
241 host.to_string()
242 } else {
243 format!("{host}.com")
244 };
245 let canonical_prefix: String = format!("https://{host}/");
246 if repo.starts_with("https://") {
247 if repo.starts_with(&canonical_prefix) {
248 repo.to_string()
249 } else {
250 repo.replace("https://", &canonical_prefix)
251 }
252 } else if repo.starts_with(&format!("{host}/")) {
253 repo.replace(&format!("{host}/"), &canonical_prefix)
254 } else {
255 format!("{canonical_prefix}{repo}")
256 }
257}
258
259/// Parse a repository URL/identifier to owner and repo fragments
260/// # Example
261/// ```
262/// # use pretty_assertions::assert_eq;
263/// # use ci_manager::util::repo_to_owner_repo_fragments;
264/// let repo_url = "github.com/luftkode/distro-template";
265/// let (owner, repo) = repo_to_owner_repo_fragments(repo_url).unwrap();
266/// assert_eq!((owner.as_str(), repo.as_str()), ("luftkode", "distro-template"));
267///
268/// let repo_url = "luftkode/bifrost-app";
269/// let (owner, repo) = repo_to_owner_repo_fragments(repo_url).unwrap();
270/// assert_eq!((owner.as_str(), repo.as_str()), ("luftkode", "bifrost-app"));
271/// ```
272///
273/// # Errors
274/// Returns an error if the URL cannot be parsed
275/// # Example
276/// ```
277/// # use ci_manager::util::repo_to_owner_repo_fragments;
278/// let repo_url = "github.com/luftkode";
279/// let result = repo_to_owner_repo_fragments(repo_url);
280/// assert!(result.is_err());
281/// ```
282pub fn repo_to_owner_repo_fragments(repo_url: &str) -> Result<(String, String)> {
283 let parts: Vec<&str> = repo_url.split('/').collect();
284 // reverse the order of the parts and take the first two
285 let repo_and_owner = parts.into_iter().rev().take(2).collect::<Vec<&str>>();
286 // Check that there are 2 parts and that neither are empty or contain spaces or dots
287 if repo_and_owner.len() != 2
288 || repo_and_owner
289 .iter()
290 .any(|s| s.is_empty() || s.contains(' ') || s.contains('.'))
291 {
292 bail!("Could not parse owner and repo from URL: {repo_url}");
293 }
294 let (repo, owner) = (repo_and_owner[0], repo_and_owner[1]);
295 Ok((owner.to_string(), repo.to_string()))
296}
297
298/// Calculate the smallest levenshtein distance between an issue body and other issue bodies
299pub fn issue_text_similarity(issue_body: &str, other_issues: &[String]) -> usize {
300 let issue_body_without_timestamps = remove_timestamps_and_ids(issue_body);
301
302 let smallest_distance = other_issues
303 .iter()
304 .map(|other_issue_body| {
305 distance::levenshtein(
306 &issue_body_without_timestamps,
307 &remove_timestamps_and_ids(other_issue_body),
308 )
309 })
310 .min()
311 .unwrap_or(usize::MAX);
312
313 smallest_distance
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319 use pretty_assertions::assert_eq;
320
321 #[test]
322 fn test_absolute_path_from_str() {
323 let test_str = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
324 let path = first_abs_path_from_str(test_str).unwrap();
325 assert_eq!(
326 path,
327 PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
328 );
329 }
330
331 #[test]
332 pub fn test_canonicalize_repo_url() {
333 let repo = "luftkode/distro-template";
334 let canonicalized = canonicalize_repo_url(repo, "github.com");
335 assert_eq!(canonicalized, "https://github.com/luftkode/distro-template");
336 }
337
338 #[test]
339 pub fn test_remove_timestamps_and_ids() {
340 let test_str = "ID 8072883145 ";
341 let modified = remove_timestamps_and_ids(test_str);
342 assert_eq!(modified, "ID");
343 }
344
345 #[test]
346 pub fn test_remove_timestamps_and_ids_log_text() {
347 const LOG_TEXT: &'static str = r#"**Run ID**: 8072883145 [LINK TO RUN](https://github.com/luftkode/distro-template/actions/runs/8072883145)
348
349 **1 job failed:**
350 - **`Test template xilinx`**
351
352 ### `Test template xilinx` (ID 22055505284)
353 **Step failed:** `📦 Build yocto image`
354 \
355 **Log:** https://github.com/luftkode/distro-template/actions/runs/8072883145/job/22055505284
356 "#;
357
358 const EXPECTED_MODIFIED: &'static str = r#"**Run ID**:[LINK TO RUN](https://github.com/luftkode/distro-template/actions/runs
359
360 **1 job failed:**
361 - **`Test template xilinx`**
362
363 ### `Test template xilinx` (ID
364 **Step failed:** `📦 Build yocto image`
365 \
366 **Log:** https://github.com/luftkode/distro-template/actions/runsjob "#;
367
368 let modified = remove_timestamps_and_ids(LOG_TEXT);
369 assert_eq!(
370 modified, EXPECTED_MODIFIED,
371 "Expected: {EXPECTED_MODIFIED}\nGot: {modified}"
372 );
373 }
374}