#![cfg_attr(coverage_nightly, coverage(off))]
use anyhow::{anyhow, Result};
use regex::Regex;
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::env;
use tracing::{debug, info, warn};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GitHubIssue {
pub number: u64,
pub title: String,
pub body: Option<String>,
pub state: String,
pub html_url: String,
pub created_at: String,
pub updated_at: String,
pub labels: Vec<Label>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Label {
pub name: String,
}
#[derive(Debug, Clone)]
pub struct ParsedIssue {
pub issue: GitHubIssue,
pub file_paths: Vec<String>,
pub keywords: HashMap<String, f32>, pub summary: String,
}
const KEYWORD_MAPPINGS: &[(&[&str], &str, f32)] = &[
(
&[
"performance",
"slow",
"optimize",
"speed",
"latency",
"throughput",
],
"Performance",
3.0,
),
(
&[
"bug",
"error",
"fix",
"crash",
"panic",
"broken",
"incorrect",
],
"Correctness",
3.0,
),
(
&[
"unreadable",
"confusing",
"cleanup",
"refactor",
"complex",
"complicated",
"simplify",
],
"Complexity",
2.5,
),
(
&[
"security",
"vulnerability",
"exploit",
"injection",
"unsafe",
],
"Security",
4.0,
),
(
&["debt", "todo", "fixme", "hack", "workaround", "temporary"],
"TechnicalDebt",
2.0,
),
(
&["maintain", "maintenance", "coupling", "cohesion", "modular"],
"Maintainability",
2.0,
),
];
pub struct GitHubClient {
client: reqwest::Client,
_token: Option<String>,
}
impl GitHubClient {
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn new() -> Result<Self> {
let token = env::var("GITHUB_TOKEN")
.ok()
.or_else(|| env::var("GH_TOKEN").ok());
if token.is_none() {
warn!("No GitHub token found. API rate limits will be restrictive.");
}
let mut headers = HeaderMap::new();
headers.insert(
ACCEPT,
HeaderValue::from_static("application/vnd.github.v3+json"),
);
headers.insert(USER_AGENT, HeaderValue::from_static("pmat/0.1"));
if let Some(ref token) = token {
let auth_value = format!("Bearer {token}");
headers.insert(AUTHORIZATION, HeaderValue::from_str(&auth_value)?);
}
let client = reqwest::Client::builder()
.default_headers(headers)
.timeout(std::time::Duration::from_secs(30))
.build()?;
Ok(Self {
client,
_token: token,
})
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub async fn fetch_issue(&self, url: &str) -> Result<GitHubIssue> {
let (owner, repo, issue_number) = Self::parse_issue_url(url)?;
info!(
"Fetching GitHub issue: {}/{} #{}",
owner, repo, issue_number
);
let api_url = format!("https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}");
let response = self.client.get(&api_url).send().await?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
return Err(anyhow!("GitHub API error {status}: {body}"));
}
let issue: GitHubIssue = response.json().await?;
if issue.state == "closed" {
warn!("Issue #{} is closed", issue.number);
}
Ok(issue)
}
fn parse_issue_url(url: &str) -> Result<(String, String, u64)> {
let re = Regex::new(r"github\.com/([^/]+)/([^/]+)/issues/(\d+)")?;
let captures = re
.captures(url)
.ok_or_else(|| anyhow!("Invalid GitHub issue URL: {url}"))?;
let owner = captures[1].to_string();
let repo = captures[2].to_string();
let issue_number = captures[3].parse::<u64>()?;
Ok((owner, repo, issue_number))
}
}
#[must_use]
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn parse_issue(issue: GitHubIssue) -> ParsedIssue {
let mut file_paths = Vec::new();
let mut keywords = HashMap::new();
let text = format!(
"{} {}",
issue.title,
issue.body.as_ref().unwrap_or(&String::new())
);
file_paths.extend(extract_file_paths(&text));
extract_keywords(&text, &mut keywords);
let summary = generate_summary(&issue);
ParsedIssue {
issue,
file_paths,
keywords,
summary,
}
}
fn extract_file_paths(text: &str) -> Vec<String> {
let mut paths = HashSet::new();
let patterns = vec![
Regex::new(r"`([a-zA-Z0-9_\-./]+\.[a-zA-Z0-9]+)`").expect("Invalid regex"),
Regex::new(r"\b(?:[a-zA-Z0-9_\-]+/)*[a-zA-Z0-9_\-]+\.[a-zA-Z0-9]+\b")
.expect("Invalid regex"),
Regex::new(r"\b[a-zA-Z0-9_]+(?:::[a-zA-Z0-9_]+)+\b").expect("Invalid regex"),
];
for pattern in patterns {
for capture in pattern.captures_iter(text) {
if let Some(path) = capture.get(1).or(capture.get(0)) {
let path_str = path.as_str();
if path_str.contains("::") {
let file_path = path_str.replace("::", "/");
paths.insert(format!("src/{file_path}.rs"));
paths.insert(format!("server/src/{file_path}.rs"));
} else {
paths.insert(path_str.to_string());
}
}
}
}
let mut sorted_paths: Vec<_> = paths.into_iter().collect();
sorted_paths.sort();
debug!("Extracted {} file paths from issue", sorted_paths.len());
sorted_paths
}
fn extract_keywords(text: &str, keywords: &mut HashMap<String, f32>) {
let text_lower = text.to_lowercase();
for (keyword_list, category, weight) in KEYWORD_MAPPINGS {
for keyword in *keyword_list {
if text_lower.contains(keyword) {
let count = text_lower.matches(keyword).count() as f32;
let adjusted_weight = weight * (1.0 + (count - 1.0) * 0.2).min(2.0);
let entry = keywords.entry((*category).to_string()).or_insert(0.0);
*entry = (*entry + adjusted_weight).min(weight * 2.0);
debug!(
"Found keyword '{}' in category {} (weight: {:.1})",
keyword, category, adjusted_weight
);
}
}
}
if !keywords.is_empty() {
let max_weight = keywords.values().fold(0.0f32, |a, &b| a.max(b));
if max_weight > 0.0 {
for weight in keywords.values_mut() {
*weight /= max_weight;
}
}
}
}
fn generate_summary(issue: &GitHubIssue) -> String {
let body = issue
.body
.as_ref()
.map(|b| {
let first_paragraph = b.split("\n\n").next().unwrap_or(b);
crate::utils::string_truncate::truncate_with_ellipsis(first_paragraph, 200)
})
.unwrap_or_default();
if body.is_empty() {
issue.title.clone()
} else {
format!("{}\n\n{}", issue.title, body)
}
}
#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_file_paths() {
let text = r#"
The issue is in `src/services/complexity.rs` and also affects
the module services::ast_rust::analyze. Additionally, check
server/src/handlers/mod.rs for related code.
"#;
let paths = extract_file_paths(text);
assert!(paths.contains(&"src/services/complexity.rs".to_string()));
assert!(paths.contains(&"server/src/handlers/mod.rs".to_string()));
assert!(paths.iter().any(|p| p.contains("services/ast_rust")));
}
#[test]
fn test_extract_keywords() {
let text = "This function has terrible performance and is very slow.
It's also confusing and needs optimization.";
let mut keywords = HashMap::new();
extract_keywords(text, &mut keywords);
assert!(keywords.contains_key("Performance"));
assert!(keywords.contains_key("Complexity"));
assert!(keywords["Performance"] > 0.5); }
#[test]
fn test_parse_issue_url() {
let url = "https://github.com/owner/repo/issues/123";
let (owner, repo, number) = GitHubClient::parse_issue_url(url).unwrap();
assert_eq!(owner, "owner");
assert_eq!(repo, "repo");
assert_eq!(number, 123);
}
}
#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#[test]
fn basic_property_stability(_input in ".*") {
prop_assert!(true);
}
#[test]
fn module_consistency_check(_x in 0u32..1000) {
prop_assert!(_x < 1001);
}
}
}