1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
//! Utility functions for parsing and working with GitHub CLI output and other utility functions.
use super::*;
/// Parse a path from a string
/// # Example
/// ```
/// # use ci_manager::util::first_path_from_str;
/// use std::path::PathBuf;
///
/// let haystack = r#"multi line
/// test string with/path/file.txt is
/// valid"#;
/// let path = first_path_from_str(haystack).unwrap();
/// assert_eq!(path, PathBuf::from("with/path/file.txt"));
///
/// // No path in string is an error
/// let haystack = "Random string with no path";
/// assert!(first_path_from_str(haystack).is_err());
///
/// // Path with no leading '/' and no file extension is OK
/// let haystack = "foo app/3-_2/t/3 bar";
/// let path = first_path_from_str(haystack).unwrap();
/// assert_eq!(path, PathBuf::from("app/3-_2/t/3"));
///
/// // More realistic example
/// let haystack = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
/// let path = first_path_from_str(haystack).unwrap();
/// assert_eq!(
/// path,
/// PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
/// );
/// ```
/// # Errors
/// This function returns an error if no valid path is found in the string
pub fn first_path_from_str(s: &str) -> Result<PathBuf> {
static RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[a-zA-Z0-9-_.\/]+\/[a-zA-Z0-9-_.]+").unwrap());
let path_str = RE.find(s).context("No path found in string")?.as_str();
Ok(PathBuf::from(path_str))
}
/// Take the lines with failed jobs from the output of `gh run view`
pub fn take_lines_with_failed_jobs(output: String) -> Vec<String> {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"X.*ID [0-9]*\)").unwrap());
RE.find_iter(&output)
.map(|m| m.as_str().to_owned())
.collect()
}
/// Extract the job IDs from the lines with job information
pub fn id_from_job_lines(lines: &[String]) -> Vec<String> {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"ID (?<JOB_ID>[0-9]*)").unwrap());
lines
.iter()
.map(|line| {
RE.captures(line)
.unwrap_or_else(|| {
panic!("Expected a line with a Job ID, but no ID found in line: {line}")
})
.name("JOB_ID")
.expect("Expected a Job ID")
.as_str()
.to_owned()
})
.collect()
}
/// Parse text for timestamps and IDs and remove them, returning the modified text without making a copy.
///
/// Some compromises are made to be able to remove timestamps in between other symbols e.g. '/83421321/'.
/// but still avoid removing commit SHAs. That means that these symbols are also removed (any non-letter character
/// preceding and following an ID).
///
/// # Example
/// ```
/// # use ci_manager::util::remove_timestamps_and_ids;
/// # use pretty_assertions::assert_eq;
/// let test_str = r"ID 21442749267 ";
/// let modified = remove_timestamps_and_ids(test_str);
/// assert_eq!(modified, "ID"); // Note that the space is removed
///
///
/// let test_str = r#"ID 21442749267
/// date: 2024-02-28 00:03:46
/// other text"#;
/// let modified = remove_timestamps_and_ids(test_str);
/// assert_eq!(modified, "IDdate: \nother text");
/// ```
pub fn remove_timestamps_and_ids(text: &str) -> borrow::Cow<str> {
static RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?x)
# Timestamps like YYYY-MM-DD HH:MM:SS
([0-9]{4}-[0-9]{2}-[0-9]{2}\x20[0-9]{2}:[0-9]{2}:[0-9]{2})
|
# IDs like 21442749267 but only if they are preceded and followed by non-letter characters
(?:[^[a-zA-Z]])([0-9]{10,11})(?:[^[a-zA-Z]])
",
)
.unwrap()
});
RE.replace_all(text, "")
}
/// Parse a log and remove line-prefixed timestamps in the format `YYYY-MM-DDTHH:MM:SS.0000000Z` (ISO 8601).
/// # Example
/// ```
/// # use ci_manager::util::remove_timestamp_prefixes;
/// # use pretty_assertions::assert_eq;
/// let test_str = "2024-02-28T00:03:46.0000000Z [INFO] This is a log message";
/// let modified = remove_timestamp_prefixes(test_str);
/// assert_eq!(modified, "[INFO] This is a log message");
/// ```
/// ## Multiple lines
/// ```
/// # use ci_manager::util::remove_timestamp_prefixes;
/// # use pretty_assertions::assert_eq;
/// let test_str = "\
/// 2024-02-28T00:03:46.0000000Z [INFO] This is a log message
/// 2024-03-15T20:35:48.9824182Z [ERROR] This is another log message";
/// let modified = remove_timestamp_prefixes(test_str);
/// assert_eq!(modified, "\
/// [INFO] This is a log message
/// [ERROR] This is another log message");
///
pub fn remove_timestamp_prefixes(log: &str) -> borrow::Cow<str> {
// The fist group matches 0 or more newlines, and uses that group to replace the timestamp
// this way the newlines are preserved (making it agnostic to the type of newline used in the log)
static RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"([\r\n]*)\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{7}Z\s").unwrap());
RE.replace_all(log, "$1")
}
/// Parse an absolute path from a string. This assumes that the the first '/' found in the string is the start
/// of the path.
/// # Example
/// ```
/// # use ci_manager::util::first_abs_path_from_str;
/// use std::path::PathBuf;
///
/// let test_str = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
/// let path = first_abs_path_from_str(test_str).unwrap();
/// assert_eq!(
/// path,
/// PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
/// );
/// ```
///
/// # Errors
/// Returns an error if no '/' is found in the string or
/// if the path is not a valid path.
pub fn first_abs_path_from_str(s: &str) -> Result<PathBuf> {
let start = s.find('/').context("Path not found, no '/' in string")?;
let path = PathBuf::from(&s[start..]);
Ok(path)
}
/// Add https:// to a URL if it is not already present
/// # Example
/// ```
/// # use ci_manager::util::ensure_https_prefix;
/// # use pretty_assertions::assert_eq;
/// // If the URL does not have the https prefix, it is added
/// let mut url = String::from("github.com/docker/buildx/issues");
/// ensure_https_prefix(&mut url);
/// assert_eq!(url, "https://github.com/docker/buildx/issues");
///
/// // If the URL already has the https prefix, it is not modified
/// let mut url = String::from("https://gitlab.com/foo-org/foo-repo");
/// ensure_https_prefix(&mut url);
/// assert_eq!(url, String::from("https://gitlab.com/foo-org/foo-repo"));
/// ```
pub fn ensure_https_prefix(url: &mut String) {
if url.starts_with("https://") {
return;
}
url.insert_str(0, "https://");
}
/// Canonicalize a repository URL to the form `https://{host}/{repo}`
///
/// # Arguments
/// * `repo` - The repository URL e.g. `user1/user1-repo`
/// * `host` - The host for the repository e.g. `github.com`
///
/// # Example
/// ```
/// # use ci_manager::util::canonicalize_repo_url;
/// let repo = "bob/bobbys-repo";
/// let canonicalized = canonicalize_repo_url(repo, "github");
/// assert_eq!(canonicalized, "https://github.com/bob/bobbys-repo");
///
/// // If the host is already in the URL, only the protocol is added
/// let repo = "github.com/lisa/lisas-repo";
/// let canonicalized = canonicalize_repo_url(repo, "github.com");
/// assert_eq!(canonicalized, "https://github.com/lisa/lisas-repo");
///
/// // If the URL is already in the canonical form, it is returned as is
/// let repo = "https://gitlab.com/foo-org/foo-repo";
/// let canonicalized = canonicalize_repo_url(repo, "gitlab.com");
/// assert_eq!(canonicalized, repo);
/// ```
pub fn canonicalize_repo_url(repo: &str, host: &str) -> String {
// Check if the host argument has a top-level domain and add it `.com` if it doesn't
let host = if host.contains('.') {
host.to_string()
} else {
format!("{host}.com")
};
let canonical_prefix: String = format!("https://{host}/");
if repo.starts_with("https://") {
if repo.starts_with(&canonical_prefix) {
repo.to_string()
} else {
repo.replace("https://", &canonical_prefix)
}
} else if repo.starts_with(&format!("{host}/")) {
repo.replace(&format!("{host}/"), &canonical_prefix)
} else {
format!("{canonical_prefix}{repo}")
}
}
/// Parse a repository URL/identifier to owner and repo fragments
/// # Example
/// ```
/// # use pretty_assertions::assert_eq;
/// # use ci_manager::util::repo_to_owner_repo_fragments;
/// let repo_url = "github.com/luftkode/distro-template";
/// let (owner, repo) = repo_to_owner_repo_fragments(repo_url).unwrap();
/// assert_eq!((owner.as_str(), repo.as_str()), ("luftkode", "distro-template"));
///
/// let repo_url = "luftkode/bifrost-app";
/// let (owner, repo) = repo_to_owner_repo_fragments(repo_url).unwrap();
/// assert_eq!((owner.as_str(), repo.as_str()), ("luftkode", "bifrost-app"));
/// ```
///
/// # Errors
/// Returns an error if the URL cannot be parsed
/// # Example
/// ```
/// # use ci_manager::util::repo_to_owner_repo_fragments;
/// let repo_url = "github.com/luftkode";
/// let result = repo_to_owner_repo_fragments(repo_url);
/// assert!(result.is_err());
/// ```
pub fn repo_to_owner_repo_fragments(repo_url: &str) -> Result<(String, String)> {
let parts: Vec<&str> = repo_url.split('/').collect();
// reverse the order of the parts and take the first two
let repo_and_owner = parts.into_iter().rev().take(2).collect::<Vec<&str>>();
// Check that there are 2 parts and that neither are empty or contain spaces or dots
if repo_and_owner.len() != 2
|| repo_and_owner
.iter()
.any(|s| s.is_empty() || s.contains(' ') || s.contains('.'))
{
bail!("Could not parse owner and repo from URL: {repo_url}");
}
let (repo, owner) = (repo_and_owner[0], repo_and_owner[1]);
Ok((owner.to_string(), repo.to_string()))
}
/// Calculate the smallest levenshtein distance between an issue body and other issue bodies
pub fn issue_text_similarity(issue_body: &str, other_issues: &[String]) -> usize {
let issue_body_without_timestamps = remove_timestamps_and_ids(issue_body);
let smallest_distance = other_issues
.iter()
.map(|other_issue_body| {
distance::levenshtein(
&issue_body_without_timestamps,
&remove_timestamps_and_ids(other_issue_body),
)
})
.min()
.unwrap_or(usize::MAX);
smallest_distance
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_absolute_path_from_str() {
let test_str = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
let path = first_abs_path_from_str(test_str).unwrap();
assert_eq!(
path,
PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
);
}
#[test]
pub fn test_canonicalize_repo_url() {
let repo = "luftkode/distro-template";
let canonicalized = canonicalize_repo_url(repo, "github.com");
assert_eq!(canonicalized, "https://github.com/luftkode/distro-template");
}
#[test]
pub fn test_remove_timestamps_and_ids() {
let test_str = "ID 8072883145 ";
let modified = remove_timestamps_and_ids(test_str);
assert_eq!(modified, "ID");
}
#[test]
pub fn test_remove_timestamps_and_ids_log_text() {
const LOG_TEXT: &'static str = r#"**Run ID**: 8072883145 [LINK TO RUN](https://github.com/luftkode/distro-template/actions/runs/8072883145)
**1 job failed:**
- **`Test template xilinx`**
### `Test template xilinx` (ID 22055505284)
**Step failed:** `📦 Build yocto image`
\
**Log:** https://github.com/luftkode/distro-template/actions/runs/8072883145/job/22055505284
"#;
const EXPECTED_MODIFIED: &'static str = r#"**Run ID**:[LINK TO RUN](https://github.com/luftkode/distro-template/actions/runs
**1 job failed:**
- **`Test template xilinx`**
### `Test template xilinx` (ID
**Step failed:** `📦 Build yocto image`
\
**Log:** https://github.com/luftkode/distro-template/actions/runsjob "#;
let modified = remove_timestamps_and_ids(LOG_TEXT);
assert_eq!(
modified, EXPECTED_MODIFIED,
"Expected: {EXPECTED_MODIFIED}\nGot: {modified}"
);
}
}