sampo_core/
enrichment.rs

1//! Enrichment module for changeset messages with commit information and author acknowledgments.
2//!
3//! Enriches changeset messages with commit links and author thanks using a fallback strategy:
4//! GitHub API (with token) → GitHub public API → Git author name.
5//!
6//! Repository detection: config override → GITHUB_REPOSITORY env → git remote origin.
7
8use serde::Deserialize;
9use std::path::Path;
10use std::process::Command;
11
12#[derive(Debug, Clone)]
13pub struct CommitInfo {
14    pub sha: String,
15    pub short_sha: String,
16    pub author_name: String,
17}
18
19#[derive(Debug, Clone)]
20pub struct GitHubUserInfo {
21    /// GitHub username (login)
22    pub login: String,
23    /// Whether this appears to be the user's first contribution to the repository
24    pub is_first_contribution: bool,
25}
26
27/// GitHub API response structures
28#[derive(Deserialize)]
29struct CommitAuthor {
30    login: String,
31}
32
33#[derive(Deserialize)]
34struct CommitApiResponse {
35    author: Option<CommitAuthor>,
36}
37
38#[derive(Deserialize)]
39struct Contributor {
40    login: Option<String>,
41    contributions: u64,
42}
43
44/// Get the commit hash for a specific file path
45pub fn get_commit_hash_for_path(repo_root: &Path, file_path: &Path) -> Option<String> {
46    let output = Command::new("git")
47        .current_dir(repo_root)
48        .args([
49            "log",
50            "-1",
51            "--format=%H",
52            "--",
53            &file_path.to_string_lossy(),
54        ])
55        .output()
56        .ok()?;
57
58    if output.status.success() {
59        let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
60        if !hash.is_empty() { Some(hash) } else { None }
61    } else {
62        None
63    }
64}
65
66/// Detect GitHub repository slug from Git remote
67pub fn detect_github_repo_slug(repo_root: &Path) -> Option<String> {
68    detect_github_repo_slug_with_config(repo_root, None)
69}
70
71/// Detect GitHub repository slug with optional config override
72pub fn detect_github_repo_slug_with_config(
73    repo_root: &Path,
74    config_repo: Option<&str>,
75) -> Option<String> {
76    // 1. If explicitly configured, use that
77    if let Some(repo) = config_repo {
78        return Some(repo.to_string());
79    }
80
81    // 2. Try GITHUB_REPOSITORY environment variable (useful in GitHub Actions)
82    if let Ok(github_repo) = std::env::var("GITHUB_REPOSITORY")
83        && !github_repo.is_empty()
84    {
85        return Some(github_repo);
86    }
87
88    // 3. Try to extract from git remote
89    let output = Command::new("git")
90        .current_dir(repo_root)
91        .args(["remote", "get-url", "origin"])
92        .output()
93        .ok()?;
94
95    if !output.status.success() {
96        return None;
97    }
98
99    let binding = String::from_utf8_lossy(&output.stdout);
100    let url = binding.trim();
101
102    // Parse GitHub URLs (both HTTPS and SSH)
103    parse_github_url(url)
104}
105
106/// Parse GitHub repository slug from various URL formats
107fn parse_github_url(url: &str) -> Option<String> {
108    // HTTPS: https://github.com/owner/repo.git or https://github.com/owner/repo
109    if let Some(rest) = url.strip_prefix("https://github.com/") {
110        let without_git = rest.strip_suffix(".git").unwrap_or(rest);
111        if without_git.split('/').count() >= 2 {
112            return Some(without_git.to_string());
113        }
114    }
115
116    // SSH: git@github.com:owner/repo.git
117    if let Some(rest) = url.strip_prefix("git@github.com:") {
118        let without_git = rest.strip_suffix(".git").unwrap_or(rest);
119        if without_git.split('/').count() >= 2 {
120            return Some(without_git.to_string());
121        }
122    }
123
124    None
125}
126
127/// Enrich a changeset message with commit information and author acknowledgments
128pub fn enrich_changeset_message(
129    message: &str,
130    commit_hash: &str,
131    workspace: &Path,
132    repo_slug: Option<&str>,
133    github_token: Option<&str>,
134    show_commit_hash: bool,
135    show_acknowledgments: bool,
136) -> String {
137    // Create a tokio runtime for this blocking call
138    let rt = tokio::runtime::Builder::new_current_thread()
139        .enable_all()
140        .build()
141        .unwrap();
142    rt.block_on(enrich_changeset_message_async(
143        message,
144        commit_hash,
145        workspace,
146        repo_slug,
147        github_token,
148        show_commit_hash,
149        show_acknowledgments,
150    ))
151}
152
153/// Async version of enrich_changeset_message for internal use
154async fn enrich_changeset_message_async(
155    message: &str,
156    commit_hash: &str,
157    workspace: &Path,
158    repo_slug: Option<&str>,
159    github_token: Option<&str>,
160    show_commit_hash: bool,
161    show_acknowledgments: bool,
162) -> String {
163    let commit = get_commit_info_for_hash(workspace, commit_hash);
164
165    let commit_prefix = if show_commit_hash {
166        build_commit_prefix(&commit, repo_slug)
167    } else {
168        String::new()
169    };
170
171    let acknowledgment_suffix = if show_acknowledgments {
172        build_acknowledgment_suffix(&commit, repo_slug, github_token).await
173    } else {
174        String::new()
175    };
176
177    format_enriched_message(message, &commit_prefix, &acknowledgment_suffix)
178}
179
180/// Get commit information for a specific commit hash
181fn get_commit_info_for_hash(repo_root: &Path, commit_hash: &str) -> Option<CommitInfo> {
182    // Use \x1f (Unit Separator) to avoid conflicts with user content
183    let format_arg = "--format=%H\x1f%h\x1f%an";
184    let output = Command::new("git")
185        .current_dir(repo_root)
186        .args(["show", "--no-patch", format_arg, commit_hash])
187        .output()
188        .ok()?;
189
190    if !output.status.success() {
191        return None;
192    }
193
194    let stdout = String::from_utf8_lossy(&output.stdout);
195    let parts: Vec<&str> = stdout.trim().split('\x1f').collect();
196    if parts.len() != 3 {
197        return None;
198    }
199
200    Some(CommitInfo {
201        sha: parts[0].to_string(),
202        short_sha: parts[1].to_string(),
203        author_name: parts[2].to_string(),
204    })
205}
206
207/// Build commit prefix for enhanced messages
208fn build_commit_prefix(commit: &Option<CommitInfo>, repo_slug: Option<&str>) -> String {
209    if let Some(commit) = commit {
210        if let Some(slug) = repo_slug {
211            format!(
212                "[{}](https://github.com/{}/commit/{}) ",
213                commit.short_sha, slug, commit.sha
214            )
215        } else {
216            format!("{} ", commit.short_sha)
217        }
218    } else {
219        String::new()
220    }
221}
222
223/// Build acknowledgment suffix for enhanced messages
224async fn build_acknowledgment_suffix(
225    commit: &Option<CommitInfo>,
226    repo_slug: Option<&str>,
227    github_token: Option<&str>,
228) -> String {
229    let Some(commit) = commit else {
230        return String::new();
231    };
232
233    // If we have both GitHub repo and token, try to get GitHub user info with first contribution detection
234    if let (Some(slug), Some(token)) = (repo_slug, github_token)
235        && let Some(github_user) = get_github_user_for_commit(slug, &commit.sha, token).await
236    {
237        return if github_user.is_first_contribution {
238            format!(
239                " — Thanks @{} for your first contribution 🎉!",
240                github_user.login
241            )
242        } else {
243            format!(" — Thanks @{}!", github_user.login)
244        };
245    }
246
247    // If we have repo_slug but no token, we can still try to get the GitHub user from commit API
248    // (public commits are accessible without auth for public repos)
249    if let Some(slug) = repo_slug
250        && let Some(github_user) = get_github_user_for_commit_public(slug, &commit.sha).await
251    {
252        return format!(" — Thanks @{}!", github_user.login);
253    }
254
255    // Fallback to just the Git author name
256    format!(" — Thanks {}!", commit.author_name)
257}
258
259/// Format the final enriched message
260fn format_enriched_message(
261    message: &str,
262    commit_prefix: &str,
263    acknowledgment_suffix: &str,
264) -> String {
265    format!("{}{}{}", commit_prefix, message, acknowledgment_suffix)
266}
267
268/// Get GitHub user information for a commit
269async fn get_github_user_for_commit(
270    repo_slug: &str,
271    commit_sha: &str,
272    token: &str,
273) -> Option<GitHubUserInfo> {
274    let commit_url = format!(
275        "https://api.github.com/repos/{}/commits/{}",
276        repo_slug, commit_sha
277    );
278
279    let commit_json = github_api_get(&commit_url, token).await?;
280    let commit: CommitApiResponse = serde_json::from_str(&commit_json).ok()?;
281    let login = commit.author?.login;
282
283    // Check if first contribution when we have a token
284    let is_first_contribution = check_first_contribution(repo_slug, &login, token).await;
285
286    Some(GitHubUserInfo {
287        login,
288        is_first_contribution,
289    })
290}
291
292/// Get GitHub user information for a commit from public API (no token required)
293async fn get_github_user_for_commit_public(
294    repo_slug: &str,
295    commit_sha: &str,
296) -> Option<GitHubUserInfo> {
297    let commit_url = format!(
298        "https://api.github.com/repos/{}/commits/{}",
299        repo_slug, commit_sha
300    );
301
302    let commit_json = github_api_get_public(&commit_url).await?;
303    let commit: CommitApiResponse = serde_json::from_str(&commit_json).ok()?;
304    let login = commit.author?.login;
305
306    Some(GitHubUserInfo {
307        login,
308        is_first_contribution: false, // Cannot detect without token
309    })
310}
311
312/// Check if a user is making their first contribution to a repository
313async fn check_first_contribution(repo_slug: &str, login: &str, token: &str) -> bool {
314    const PER_PAGE: u32 = 100;
315    const MAX_PAGES: u32 = 20; // Safety bound to avoid excessive paging
316
317    for page in 1..=MAX_PAGES {
318        let contributors_url = format!(
319            "https://api.github.com/repos/{}/contributors?per_page={}&page={}&anon=true",
320            repo_slug, PER_PAGE, page
321        );
322
323        let Some(body) = github_api_get(&contributors_url, token).await else {
324            break;
325        };
326
327        let Ok(contributors): Result<Vec<Contributor>, _> = serde_json::from_str(&body) else {
328            break;
329        };
330
331        if contributors.is_empty() {
332            break;
333        }
334
335        if let Some(contributor) = contributors
336            .into_iter()
337            .find(|c| c.login.as_deref() == Some(login))
338        {
339            return contributor.contributions == 1;
340        }
341    }
342
343    // If we can't find the user in contributors, assume it's not their first contribution
344    // This is a conservative approach for cases where the API might have issues
345    false
346}
347
348/// Perform a GET request to GitHub API and return the response body as String
349///
350/// Uses reqwest to make HTTP requests to the GitHub API with proper authentication
351/// and headers. Returns None if the request fails or returns empty content.
352async fn github_api_get(url: &str, token: &str) -> Option<String> {
353    let client = reqwest::Client::new();
354
355    let response = client
356        .get(url)
357        .header("Authorization", format!("Bearer {}", token))
358        .header("Accept", "application/vnd.github+json")
359        .header("X-GitHub-Api-Version", "2022-11-28")
360        .header("User-Agent", "sampo/0.4.0")
361        .timeout(std::time::Duration::from_secs(30))
362        .send()
363        .await
364        .ok()?;
365
366    if !response.status().is_success() {
367        return None;
368    }
369
370    let body = response.text().await.ok()?;
371    if body.trim().is_empty() {
372        None
373    } else {
374        Some(body)
375    }
376}
377
378/// Perform a GET request to GitHub API without authentication (for public repos)
379///
380/// Similar to github_api_get but without authorization header.
381/// Only works with public repositories and endpoints.
382async fn github_api_get_public(url: &str) -> Option<String> {
383    let client = reqwest::Client::new();
384
385    let response = client
386        .get(url)
387        .header("Accept", "application/vnd.github+json")
388        .header("X-GitHub-Api-Version", "2022-11-28")
389        .header("User-Agent", "sampo/0.4.0")
390        .timeout(std::time::Duration::from_secs(30))
391        .send()
392        .await
393        .ok()?;
394
395    if !response.status().is_success() {
396        return None;
397    }
398
399    let body = response.text().await.ok()?;
400    if body.trim().is_empty() {
401        None
402    } else {
403        Some(body)
404    }
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410
411    #[test]
412    fn parse_github_url_https() {
413        assert_eq!(
414            parse_github_url("https://github.com/owner/repo.git"),
415            Some("owner/repo".to_string())
416        );
417        assert_eq!(
418            parse_github_url("https://github.com/owner/repo"),
419            Some("owner/repo".to_string())
420        );
421    }
422
423    #[test]
424    fn parse_github_url_ssh() {
425        assert_eq!(
426            parse_github_url("git@github.com:owner/repo.git"),
427            Some("owner/repo".to_string())
428        );
429    }
430
431    #[test]
432    fn parse_github_url_invalid() {
433        assert_eq!(parse_github_url("https://gitlab.com/owner/repo.git"), None);
434        assert_eq!(parse_github_url("not-a-url"), None);
435    }
436
437    #[test]
438    fn build_commit_prefix_with_repo() {
439        let commit = Some(CommitInfo {
440            sha: "abcd1234".to_string(),
441            short_sha: "abcd".to_string(),
442            author_name: "Author".to_string(),
443        });
444
445        let prefix = build_commit_prefix(&commit, Some("owner/repo"));
446        assert_eq!(
447            prefix,
448            "[abcd](https://github.com/owner/repo/commit/abcd1234) "
449        );
450    }
451
452    #[test]
453    fn build_commit_prefix_without_repo() {
454        let commit = Some(CommitInfo {
455            sha: "abcd1234".to_string(),
456            short_sha: "abcd".to_string(),
457            author_name: "Author".to_string(),
458        });
459
460        let prefix = build_commit_prefix(&commit, None);
461        assert_eq!(prefix, "abcd ");
462    }
463
464    #[test]
465    fn format_enriched_message_complete() {
466        let message =
467            format_enriched_message("feat: add new feature", "[abcd](link) ", " — Thanks @user!");
468        assert_eq!(
469            message,
470            "[abcd](link) feat: add new feature — Thanks @user!"
471        );
472    }
473
474    #[test]
475    fn enrich_changeset_message_integration() {
476        use std::fs;
477        use tempfile::TempDir;
478
479        let temp_dir = TempDir::new().unwrap();
480        let repo_path = temp_dir.path();
481
482        // Initialize a git repo
483        std::process::Command::new("git")
484            .arg("init")
485            .current_dir(repo_path)
486            .output()
487            .unwrap();
488
489        // Configure git user
490        std::process::Command::new("git")
491            .args(["config", "user.name", "Test User"])
492            .current_dir(repo_path)
493            .output()
494            .unwrap();
495
496        std::process::Command::new("git")
497            .args(["config", "user.email", "test@example.com"])
498            .current_dir(repo_path)
499            .output()
500            .unwrap();
501
502        // Create a test file and commit it
503        let test_file = repo_path.join("test.md");
504        fs::write(&test_file, "initial content").unwrap();
505
506        std::process::Command::new("git")
507            .args(["add", "test.md"])
508            .current_dir(repo_path)
509            .output()
510            .unwrap();
511
512        std::process::Command::new("git")
513            .args(["commit", "-m", "initial commit"])
514            .current_dir(repo_path)
515            .output()
516            .unwrap();
517
518        // Get the commit hash
519        let commit_hash = get_commit_hash_for_path(repo_path, &test_file)
520            .expect("Should find commit hash for test file");
521
522        // Test enrichment with all features enabled
523        let enriched = enrich_changeset_message(
524            "fix: resolve critical bug",
525            &commit_hash,
526            repo_path,
527            Some("owner/repo"),
528            None, // no GitHub token for this test
529            true, // show commit hash
530            true, // show acknowledgments
531        );
532
533        // Should contain the commit hash link and author thanks
534        assert!(
535            enriched.contains(&commit_hash[..8]),
536            "Should contain short commit hash"
537        );
538        assert!(
539            enriched.contains("Thanks Test User!"),
540            "Should contain author thanks"
541        );
542        assert!(
543            enriched.contains("fix: resolve critical bug"),
544            "Should contain original message"
545        );
546
547        // Test with features disabled
548        let plain = enrich_changeset_message(
549            "fix: resolve critical bug",
550            &commit_hash,
551            repo_path,
552            Some("owner/repo"),
553            None,
554            false, // no commit hash
555            false, // no acknowledgments
556        );
557
558        assert_eq!(
559            plain, "fix: resolve critical bug",
560            "Should be unchanged when features disabled"
561        );
562    }
563
564    #[tokio::test]
565    async fn test_github_api_get_with_invalid_token() {
566        // Test with invalid token should return None (graceful failure)
567        let result = github_api_get(
568            "https://api.github.com/repos/bruits/sampo/commits/invalid",
569            "invalid_token",
570        )
571        .await;
572        assert!(result.is_none(), "Should return None for invalid requests");
573    }
574
575    #[test]
576    fn test_parse_github_url_edge_cases() {
577        // Test edge cases for GitHub URL parsing
578        assert_eq!(parse_github_url(""), None);
579        assert_eq!(parse_github_url("https://github.com/"), None);
580        assert_eq!(parse_github_url("git@github.com:"), None);
581        assert_eq!(parse_github_url("https://github.com/user"), None); // Missing repo
582        assert_eq!(
583            parse_github_url("https://github.com/user/repo/extra/path"),
584            Some("user/repo/extra/path".to_string())
585        );
586    }
587
588    #[tokio::test]
589    async fn test_check_first_contribution_no_token() {
590        // Test check_first_contribution without valid token
591        let result = check_first_contribution("bruits/sampo", "testuser", "invalid_token").await;
592        // Should return false (conservative default) when API calls fail
593        assert!(!result, "Should return false when API calls fail");
594    }
595
596    #[tokio::test]
597    async fn test_build_acknowledgment_suffix_fallback() {
598        // Test that acknowledgment falls back to Git author when GitHub API fails
599        let commit = Some(CommitInfo {
600            sha: "abcd1234".to_string(),
601            short_sha: "abcd".to_string(),
602            author_name: "Local Developer".to_string(),
603        });
604
605        // Test without GitHub repo/token (should use Git author)
606        let result = build_acknowledgment_suffix(&commit, None, None).await;
607        assert_eq!(result, " — Thanks Local Developer!");
608
609        // Test with empty commit
610        let result = build_acknowledgment_suffix(&None, Some("owner/repo"), Some("token")).await;
611        assert_eq!(result, "");
612    }
613
614    #[test]
615    fn test_detect_github_repo_slug_with_config_override() {
616        // Test that explicit config overrides git remote detection
617        use tempfile::TempDir;
618
619        let temp_dir = TempDir::new().unwrap();
620        let repo_path = temp_dir.path();
621
622        // Even without git repo, explicit config should work
623        let result = detect_github_repo_slug_with_config(repo_path, Some("explicit/repo"));
624        assert_eq!(result, Some("explicit/repo".to_string()));
625
626        // Test that None config falls back to git detection (which will fail in this case)
627        let result = detect_github_repo_slug_with_config(repo_path, None);
628        // Note: this might return env var if GITHUB_REPOSITORY is set, but that's OK
629        // The important thing is explicit config overrides everything
630        assert!(result.is_none() || result.is_some());
631    }
632
633    #[tokio::test]
634    async fn test_get_github_user_for_commit_public() {
635        // Test public API access (should fail gracefully with invalid repo)
636        let result = get_github_user_for_commit_public("invalid/repo", "invalid_sha").await;
637        assert!(result.is_none(), "Should return None for invalid requests");
638    }
639
640    #[tokio::test]
641    async fn test_github_api_get_public_with_invalid_url() {
642        // Test public API with invalid URL should return None
643        let result = github_api_get_public("https://api.github.com/invalid/endpoint").await;
644        assert!(result.is_none(), "Should return None for invalid requests");
645    }
646
647    #[tokio::test]
648    async fn test_build_acknowledgment_suffix_with_public_repo() {
649        let commit = Some(CommitInfo {
650            sha: "abcd1234".to_string(),
651            short_sha: "abcd".to_string(),
652            author_name: "Test Author".to_string(),
653        });
654
655        // Test with repo_slug but no token (should try public API, fall back to Git author)
656        let result = build_acknowledgment_suffix(&commit, Some("invalid/repo"), None).await;
657        assert_eq!(result, " — Thanks Test Author!");
658
659        // Test with neither repo nor token
660        let result = build_acknowledgment_suffix(&commit, None, None).await;
661        assert_eq!(result, " — Thanks Test Author!");
662    }
663
664    #[tokio::test]
665    async fn test_reqwest_timeout_behavior() {
666        // Test that reqwest properly handles timeouts
667        // Using a non-routable IP to trigger timeout (should be fast)
668        let result = github_api_get_public("http://10.255.255.1/timeout-test").await;
669        assert!(
670            result.is_none(),
671            "Should return None for timeout/unreachable requests"
672        );
673    }
674}