sampo_core/
enrichment.rs

1//! Enrichment module for changeset messages with commit information and author acknowledgments.
2//!
3//! Enriches changeset messages with commit links and author thanks using a fallback strategy:
4//! GitHub API (with token) → GitHub public API → Git author name.
5//!
6//! Repository detection: config override → GITHUB_REPOSITORY env → git remote origin.
7
8use serde::Deserialize;
9use std::path::Path;
10use std::process::Command;
11
12const USER_AGENT: &str = concat!("sampo/", env!("CARGO_PKG_VERSION"));
13
14#[derive(Debug, Clone)]
15pub struct CommitInfo {
16    pub sha: String,
17    pub short_sha: String,
18    pub author_name: String,
19}
20
21#[derive(Debug, Clone)]
22pub struct GitHubUserInfo {
23    /// GitHub username (login)
24    pub login: String,
25    /// Whether this appears to be the user's first contribution to the repository
26    pub is_first_contribution: bool,
27}
28
29/// GitHub API response structures
30#[derive(Deserialize)]
31struct CommitAuthor {
32    login: String,
33}
34
35#[derive(Deserialize)]
36struct CommitApiResponse {
37    author: Option<CommitAuthor>,
38}
39
40#[derive(Deserialize)]
41struct Contributor {
42    login: Option<String>,
43    contributions: u64,
44}
45
46/// Get the commit hash for a specific file path
47pub fn get_commit_hash_for_path(repo_root: &Path, file_path: &Path) -> Option<String> {
48    let output = Command::new("git")
49        .current_dir(repo_root)
50        .args([
51            "log",
52            "-1",
53            "--format=%H",
54            "--",
55            &file_path.to_string_lossy(),
56        ])
57        .output()
58        .ok()?;
59
60    if output.status.success() {
61        let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
62        if !hash.is_empty() { Some(hash) } else { None }
63    } else {
64        None
65    }
66}
67
68/// Detect GitHub repository slug from Git remote
69pub fn detect_github_repo_slug(repo_root: &Path) -> Option<String> {
70    detect_github_repo_slug_with_config(repo_root, None)
71}
72
73/// Detect GitHub repository slug with optional config override
74pub fn detect_github_repo_slug_with_config(
75    repo_root: &Path,
76    config_repo: Option<&str>,
77) -> Option<String> {
78    // 1. If explicitly configured, use that
79    if let Some(repo) = config_repo {
80        return Some(repo.to_string());
81    }
82
83    // 2. Try GITHUB_REPOSITORY environment variable (useful in GitHub Actions)
84    if let Ok(github_repo) = std::env::var("GITHUB_REPOSITORY")
85        && !github_repo.is_empty()
86    {
87        return Some(github_repo);
88    }
89
90    // 3. Try to extract from git remote
91    let output = Command::new("git")
92        .current_dir(repo_root)
93        .args(["remote", "get-url", "origin"])
94        .output()
95        .ok()?;
96
97    if !output.status.success() {
98        return None;
99    }
100
101    let binding = String::from_utf8_lossy(&output.stdout);
102    let url = binding.trim();
103
104    // Parse GitHub URLs (both HTTPS and SSH)
105    parse_github_url(url)
106}
107
108/// Parse GitHub repository slug from various URL formats
109fn parse_github_url(url: &str) -> Option<String> {
110    // HTTPS: https://github.com/owner/repo.git or https://github.com/owner/repo
111    if let Some(rest) = url.strip_prefix("https://github.com/") {
112        let without_git = rest.strip_suffix(".git").unwrap_or(rest);
113        if without_git.split('/').count() >= 2 {
114            return Some(without_git.to_string());
115        }
116    }
117
118    // SSH: git@github.com:owner/repo.git
119    if let Some(rest) = url.strip_prefix("git@github.com:") {
120        let without_git = rest.strip_suffix(".git").unwrap_or(rest);
121        if without_git.split('/').count() >= 2 {
122            return Some(without_git.to_string());
123        }
124    }
125
126    None
127}
128
129/// Enrich a changeset message with commit information and author acknowledgments
130pub fn enrich_changeset_message(
131    message: &str,
132    commit_hash: &str,
133    workspace: &Path,
134    repo_slug: Option<&str>,
135    github_token: Option<&str>,
136    show_commit_hash: bool,
137    show_acknowledgments: bool,
138) -> String {
139    // Create a tokio runtime for this blocking call
140    let rt = tokio::runtime::Builder::new_current_thread()
141        .enable_all()
142        .build()
143        .unwrap();
144    rt.block_on(enrich_changeset_message_async(
145        message,
146        commit_hash,
147        workspace,
148        repo_slug,
149        github_token,
150        show_commit_hash,
151        show_acknowledgments,
152    ))
153}
154
155/// Async version of enrich_changeset_message for internal use
156async fn enrich_changeset_message_async(
157    message: &str,
158    commit_hash: &str,
159    workspace: &Path,
160    repo_slug: Option<&str>,
161    github_token: Option<&str>,
162    show_commit_hash: bool,
163    show_acknowledgments: bool,
164) -> String {
165    let commit = get_commit_info_for_hash(workspace, commit_hash);
166
167    let commit_prefix = if show_commit_hash {
168        build_commit_prefix(&commit, repo_slug)
169    } else {
170        String::new()
171    };
172
173    let acknowledgment_suffix = if show_acknowledgments {
174        build_acknowledgment_suffix(&commit, repo_slug, github_token).await
175    } else {
176        String::new()
177    };
178
179    crate::markdown::compose_markdown_with_affixes(message, &commit_prefix, &acknowledgment_suffix)
180}
181
182/// Get commit information for a specific commit hash
183fn get_commit_info_for_hash(repo_root: &Path, commit_hash: &str) -> Option<CommitInfo> {
184    // Use \x1f (Unit Separator) to avoid conflicts with user content
185    let format_arg = "--format=%H\x1f%h\x1f%an";
186    let output = Command::new("git")
187        .current_dir(repo_root)
188        .args(["show", "--no-patch", format_arg, commit_hash])
189        .output()
190        .ok()?;
191
192    if !output.status.success() {
193        return None;
194    }
195
196    let stdout = String::from_utf8_lossy(&output.stdout);
197    let parts: Vec<&str> = stdout.trim().split('\x1f').collect();
198    if parts.len() != 3 {
199        return None;
200    }
201
202    Some(CommitInfo {
203        sha: parts[0].to_string(),
204        short_sha: parts[1].to_string(),
205        author_name: parts[2].to_string(),
206    })
207}
208
209/// Build commit prefix for enhanced messages
210fn build_commit_prefix(commit: &Option<CommitInfo>, repo_slug: Option<&str>) -> String {
211    if let Some(commit) = commit {
212        if let Some(slug) = repo_slug {
213            format!(
214                "[{}](https://github.com/{}/commit/{}) ",
215                commit.short_sha, slug, commit.sha
216            )
217        } else {
218            format!("{} ", commit.short_sha)
219        }
220    } else {
221        String::new()
222    }
223}
224
225/// Build acknowledgment suffix for enhanced messages
226async fn build_acknowledgment_suffix(
227    commit: &Option<CommitInfo>,
228    repo_slug: Option<&str>,
229    github_token: Option<&str>,
230) -> String {
231    let Some(commit) = commit else {
232        return String::new();
233    };
234
235    // If we have both GitHub repo and token, try to get GitHub user info with first contribution detection
236    if let (Some(slug), Some(token)) = (repo_slug, github_token)
237        && let Some(github_user) = get_github_user_for_commit(slug, &commit.sha, token).await
238    {
239        return if github_user.is_first_contribution {
240            format!(
241                " — Thanks @{} for your first contribution 🎉!",
242                github_user.login
243            )
244        } else {
245            format!(" — Thanks @{}!", github_user.login)
246        };
247    }
248
249    // If we have repo_slug but no token, we can still try to get the GitHub user from commit API
250    // (public commits are accessible without auth for public repos)
251    if let Some(slug) = repo_slug
252        && let Some(github_user) = get_github_user_for_commit_public(slug, &commit.sha).await
253    {
254        return format!(" — Thanks @{}!", github_user.login);
255    }
256
257    // Fallback to just the Git author name
258    format!(" — Thanks {}!", commit.author_name)
259}
260
261/// Get GitHub user information for a commit
262async fn get_github_user_for_commit(
263    repo_slug: &str,
264    commit_sha: &str,
265    token: &str,
266) -> Option<GitHubUserInfo> {
267    let commit_url = format!(
268        "https://api.github.com/repos/{}/commits/{}",
269        repo_slug, commit_sha
270    );
271
272    let commit_json = github_api_get(&commit_url, token).await?;
273    let commit: CommitApiResponse = serde_json::from_str(&commit_json).ok()?;
274    let login = commit.author?.login;
275
276    // Check if first contribution when we have a token
277    let is_first_contribution = check_first_contribution(repo_slug, &login, token).await;
278
279    Some(GitHubUserInfo {
280        login,
281        is_first_contribution,
282    })
283}
284
285/// Get GitHub user information for a commit from public API (no token required)
286async fn get_github_user_for_commit_public(
287    repo_slug: &str,
288    commit_sha: &str,
289) -> Option<GitHubUserInfo> {
290    let commit_url = format!(
291        "https://api.github.com/repos/{}/commits/{}",
292        repo_slug, commit_sha
293    );
294
295    let commit_json = github_api_get_public(&commit_url).await?;
296    let commit: CommitApiResponse = serde_json::from_str(&commit_json).ok()?;
297    let login = commit.author?.login;
298
299    Some(GitHubUserInfo {
300        login,
301        is_first_contribution: false, // Cannot detect without token
302    })
303}
304
305/// Check if a user is making their first contribution to a repository
306async fn check_first_contribution(repo_slug: &str, login: &str, token: &str) -> bool {
307    const PER_PAGE: u32 = 100;
308    const MAX_PAGES: u32 = 20; // Safety bound to avoid excessive paging
309
310    for page in 1..=MAX_PAGES {
311        let contributors_url = format!(
312            "https://api.github.com/repos/{}/contributors?per_page={}&page={}&anon=true",
313            repo_slug, PER_PAGE, page
314        );
315
316        let Some(body) = github_api_get(&contributors_url, token).await else {
317            break;
318        };
319
320        let Ok(contributors): Result<Vec<Contributor>, _> = serde_json::from_str(&body) else {
321            break;
322        };
323
324        if contributors.is_empty() {
325            break;
326        }
327
328        if let Some(contributor) = contributors
329            .into_iter()
330            .find(|c| c.login.as_deref() == Some(login))
331        {
332            return contributor.contributions == 1;
333        }
334    }
335
336    // If we can't find the user in contributors, assume it's not their first contribution
337    // This is a conservative approach for cases where the API might have issues
338    false
339}
340
341/// Perform a GET request to GitHub API and return the response body as String
342///
343/// Uses reqwest to make HTTP requests to the GitHub API with proper authentication
344/// and headers. Returns None if the request fails or returns empty content.
345async fn github_api_get(url: &str, token: &str) -> Option<String> {
346    let client = reqwest::Client::new();
347
348    let response = client
349        .get(url)
350        .header("Authorization", format!("Bearer {}", token))
351        .header("Accept", "application/vnd.github+json")
352        .header("X-GitHub-Api-Version", "2022-11-28")
353        .header("User-Agent", USER_AGENT)
354        .timeout(std::time::Duration::from_secs(30))
355        .send()
356        .await
357        .ok()?;
358
359    if !response.status().is_success() {
360        return None;
361    }
362
363    let body = response.text().await.ok()?;
364    if body.trim().is_empty() {
365        None
366    } else {
367        Some(body)
368    }
369}
370
371/// Perform a GET request to GitHub API without authentication (for public repos)
372///
373/// Similar to github_api_get but without authorization header.
374/// Only works with public repositories and endpoints.
375async fn github_api_get_public(url: &str) -> Option<String> {
376    let client = reqwest::Client::new();
377
378    let response = client
379        .get(url)
380        .header("Accept", "application/vnd.github+json")
381        .header("X-GitHub-Api-Version", "2022-11-28")
382        .header("User-Agent", USER_AGENT)
383        .timeout(std::time::Duration::from_secs(30))
384        .send()
385        .await
386        .ok()?;
387
388    if !response.status().is_success() {
389        return None;
390    }
391
392    let body = response.text().await.ok()?;
393    if body.trim().is_empty() {
394        None
395    } else {
396        Some(body)
397    }
398}
399
400#[cfg(test)]
401mod tests {
402    use super::*;
403
404    #[test]
405    fn parse_github_url_https() {
406        assert_eq!(
407            parse_github_url("https://github.com/owner/repo.git"),
408            Some("owner/repo".to_string())
409        );
410        assert_eq!(
411            parse_github_url("https://github.com/owner/repo"),
412            Some("owner/repo".to_string())
413        );
414    }
415
416    #[test]
417    fn parse_github_url_ssh() {
418        assert_eq!(
419            parse_github_url("git@github.com:owner/repo.git"),
420            Some("owner/repo".to_string())
421        );
422    }
423
424    #[test]
425    fn parse_github_url_invalid() {
426        assert_eq!(parse_github_url("https://gitlab.com/owner/repo.git"), None);
427        assert_eq!(parse_github_url("not-a-url"), None);
428    }
429
430    #[test]
431    fn build_commit_prefix_with_repo() {
432        let commit = Some(CommitInfo {
433            sha: "abcd1234".to_string(),
434            short_sha: "abcd".to_string(),
435            author_name: "Author".to_string(),
436        });
437
438        let prefix = build_commit_prefix(&commit, Some("owner/repo"));
439        assert_eq!(
440            prefix,
441            "[abcd](https://github.com/owner/repo/commit/abcd1234) "
442        );
443    }
444
445    #[test]
446    fn build_commit_prefix_without_repo() {
447        let commit = Some(CommitInfo {
448            sha: "abcd1234".to_string(),
449            short_sha: "abcd".to_string(),
450            author_name: "Author".to_string(),
451        });
452
453        let prefix = build_commit_prefix(&commit, None);
454        assert_eq!(prefix, "abcd ");
455    }
456
457    #[test]
458    fn format_enriched_message_complete_is_covered_in_markdown_tests() {}
459
460    #[test]
461    fn enrich_changeset_message_integration() {
462        use std::fs;
463        use tempfile::TempDir;
464
465        let temp_dir = TempDir::new().unwrap();
466        let repo_path = temp_dir.path();
467
468        // Initialize a git repo
469        std::process::Command::new("git")
470            .arg("init")
471            .current_dir(repo_path)
472            .output()
473            .unwrap();
474
475        // Configure git user
476        std::process::Command::new("git")
477            .args(["config", "user.name", "Test User"])
478            .current_dir(repo_path)
479            .output()
480            .unwrap();
481
482        std::process::Command::new("git")
483            .args(["config", "user.email", "test@example.com"])
484            .current_dir(repo_path)
485            .output()
486            .unwrap();
487
488        // Create a test file and commit it
489        let test_file = repo_path.join("test.md");
490        fs::write(&test_file, "initial content").unwrap();
491
492        std::process::Command::new("git")
493            .args(["add", "test.md"])
494            .current_dir(repo_path)
495            .output()
496            .unwrap();
497
498        std::process::Command::new("git")
499            .args(["commit", "-m", "initial commit"])
500            .current_dir(repo_path)
501            .output()
502            .unwrap();
503
504        // Get the commit hash
505        let commit_hash = get_commit_hash_for_path(repo_path, &test_file)
506            .expect("Should find commit hash for test file");
507
508        // Test enrichment with all features enabled
509        let enriched = enrich_changeset_message(
510            "fix: resolve critical bug",
511            &commit_hash,
512            repo_path,
513            Some("owner/repo"),
514            None, // no GitHub token for this test
515            true, // show commit hash
516            true, // show acknowledgments
517        );
518
519        // Should contain the commit hash link and author thanks
520        assert!(
521            enriched.contains(&commit_hash[..8]),
522            "Should contain short commit hash"
523        );
524        assert!(
525            enriched.contains("Thanks Test User!"),
526            "Should contain author thanks"
527        );
528        assert!(
529            enriched.contains("fix: resolve critical bug"),
530            "Should contain original message"
531        );
532
533        // Test with features disabled
534        let plain = enrich_changeset_message(
535            "fix: resolve critical bug",
536            &commit_hash,
537            repo_path,
538            Some("owner/repo"),
539            None,
540            false, // no commit hash
541            false, // no acknowledgments
542        );
543
544        assert_eq!(
545            plain, "fix: resolve critical bug",
546            "Should be unchanged when features disabled"
547        );
548    }
549
550    #[tokio::test]
551    async fn test_github_api_get_with_invalid_token() {
552        // Test with invalid token should return None (graceful failure)
553        let result = github_api_get(
554            "https://api.github.com/repos/bruits/sampo/commits/invalid",
555            "invalid_token",
556        )
557        .await;
558        assert!(result.is_none(), "Should return None for invalid requests");
559    }
560
561    #[test]
562    fn test_parse_github_url_edge_cases() {
563        // Test edge cases for GitHub URL parsing
564        assert_eq!(parse_github_url(""), None);
565        assert_eq!(parse_github_url("https://github.com/"), None);
566        assert_eq!(parse_github_url("git@github.com:"), None);
567        assert_eq!(parse_github_url("https://github.com/user"), None); // Missing repo
568        assert_eq!(
569            parse_github_url("https://github.com/user/repo/extra/path"),
570            Some("user/repo/extra/path".to_string())
571        );
572    }
573
574    #[tokio::test]
575    async fn test_check_first_contribution_no_token() {
576        // Test check_first_contribution without valid token
577        let result = check_first_contribution("bruits/sampo", "testuser", "invalid_token").await;
578        // Should return false (conservative default) when API calls fail
579        assert!(!result, "Should return false when API calls fail");
580    }
581
582    #[tokio::test]
583    async fn test_build_acknowledgment_suffix_fallback() {
584        // Test that acknowledgment falls back to Git author when GitHub API fails
585        let commit = Some(CommitInfo {
586            sha: "abcd1234".to_string(),
587            short_sha: "abcd".to_string(),
588            author_name: "Local Developer".to_string(),
589        });
590
591        // Test without GitHub repo/token (should use Git author)
592        let result = build_acknowledgment_suffix(&commit, None, None).await;
593        assert_eq!(result, " — Thanks Local Developer!");
594
595        // Test with empty commit
596        let result = build_acknowledgment_suffix(&None, Some("owner/repo"), Some("token")).await;
597        assert_eq!(result, "");
598    }
599
600    #[test]
601    fn test_detect_github_repo_slug_with_config_override() {
602        // Test that explicit config overrides git remote detection
603        use tempfile::TempDir;
604
605        let temp_dir = TempDir::new().unwrap();
606        let repo_path = temp_dir.path();
607
608        // Even without git repo, explicit config should work
609        let result = detect_github_repo_slug_with_config(repo_path, Some("explicit/repo"));
610        assert_eq!(result, Some("explicit/repo".to_string()));
611
612        // Test that None config falls back to git detection (which will fail in this case)
613        let result = detect_github_repo_slug_with_config(repo_path, None);
614        // Note: this might return env var if GITHUB_REPOSITORY is set, but that's OK
615        // The important thing is explicit config overrides everything
616        assert!(result.is_none() || result.is_some());
617    }
618
619    #[tokio::test]
620    async fn test_get_github_user_for_commit_public() {
621        // Test public API access (should fail gracefully with invalid repo)
622        let result = get_github_user_for_commit_public("invalid/repo", "invalid_sha").await;
623        assert!(result.is_none(), "Should return None for invalid requests");
624    }
625
626    #[tokio::test]
627    async fn test_github_api_get_public_with_invalid_url() {
628        // Test public API with invalid URL should return None
629        let result = github_api_get_public("https://api.github.com/invalid/endpoint").await;
630        assert!(result.is_none(), "Should return None for invalid requests");
631    }
632
633    #[tokio::test]
634    async fn test_build_acknowledgment_suffix_with_public_repo() {
635        let commit = Some(CommitInfo {
636            sha: "abcd1234".to_string(),
637            short_sha: "abcd".to_string(),
638            author_name: "Test Author".to_string(),
639        });
640
641        // Test with repo_slug but no token (should try public API, fall back to Git author)
642        let result = build_acknowledgment_suffix(&commit, Some("invalid/repo"), None).await;
643        assert_eq!(result, " — Thanks Test Author!");
644
645        // Test with neither repo nor token
646        let result = build_acknowledgment_suffix(&commit, None, None).await;
647        assert_eq!(result, " — Thanks Test Author!");
648    }
649
650    #[tokio::test]
651    async fn test_reqwest_timeout_behavior() {
652        // Test that reqwest properly handles timeouts
653        // Using a non-routable IP to trigger timeout (should be fast)
654        let result = github_api_get_public("http://10.255.255.1/timeout-test").await;
655        assert!(
656            result.is_none(),
657            "Should return None for timeout/unreachable requests"
658        );
659    }
660}