sampo_core/
enrichment.rs

1//! Enrichment module for changeset messages with commit information and author acknowledgments.
2//!
3//! Enriches changeset messages with commit links and author thanks using a fallback strategy:
4//! GitHub API (with token) → GitHub public API → Git author name.
5//!
6//! Repository detection: config override → GITHUB_REPOSITORY env → git remote origin.
7
8use serde::Deserialize;
9use std::path::Path;
10use std::process::Command;
11
12#[derive(Debug, Clone)]
13pub struct CommitInfo {
14    pub sha: String,
15    pub short_sha: String,
16    pub author_name: String,
17}
18
19#[derive(Debug, Clone)]
20pub struct GitHubUserInfo {
21    /// GitHub username (login)
22    pub login: String,
23    /// Whether this appears to be the user's first contribution to the repository
24    pub is_first_contribution: bool,
25}
26
27/// GitHub API response structures
28#[derive(Deserialize)]
29struct CommitAuthor {
30    login: String,
31}
32
33#[derive(Deserialize)]
34struct CommitApiResponse {
35    author: Option<CommitAuthor>,
36}
37
38#[derive(Deserialize)]
39struct Contributor {
40    login: Option<String>,
41    contributions: u64,
42}
43
44/// Get the commit hash for a specific file path
45pub fn get_commit_hash_for_path(repo_root: &Path, file_path: &Path) -> Option<String> {
46    let output = Command::new("git")
47        .current_dir(repo_root)
48        .args([
49            "log",
50            "-1",
51            "--format=%H",
52            "--",
53            &file_path.to_string_lossy(),
54        ])
55        .output()
56        .ok()?;
57
58    if output.status.success() {
59        let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
60        if !hash.is_empty() { Some(hash) } else { None }
61    } else {
62        None
63    }
64}
65
66/// Detect GitHub repository slug from Git remote
67pub fn detect_github_repo_slug(repo_root: &Path) -> Option<String> {
68    detect_github_repo_slug_with_config(repo_root, None)
69}
70
71/// Detect GitHub repository slug with optional config override
72pub fn detect_github_repo_slug_with_config(
73    repo_root: &Path,
74    config_repo: Option<&str>,
75) -> Option<String> {
76    // 1. If explicitly configured, use that
77    if let Some(repo) = config_repo {
78        return Some(repo.to_string());
79    }
80
81    // 2. Try GITHUB_REPOSITORY environment variable (useful in GitHub Actions)
82    if let Ok(github_repo) = std::env::var("GITHUB_REPOSITORY")
83        && !github_repo.is_empty()
84    {
85        return Some(github_repo);
86    }
87
88    // 3. Try to extract from git remote
89    let output = Command::new("git")
90        .current_dir(repo_root)
91        .args(["remote", "get-url", "origin"])
92        .output()
93        .ok()?;
94
95    if !output.status.success() {
96        return None;
97    }
98
99    let binding = String::from_utf8_lossy(&output.stdout);
100    let url = binding.trim();
101
102    // Parse GitHub URLs (both HTTPS and SSH)
103    parse_github_url(url)
104}
105
106/// Parse GitHub repository slug from various URL formats
107fn parse_github_url(url: &str) -> Option<String> {
108    // HTTPS: https://github.com/owner/repo.git or https://github.com/owner/repo
109    if let Some(rest) = url.strip_prefix("https://github.com/") {
110        let without_git = rest.strip_suffix(".git").unwrap_or(rest);
111        if without_git.split('/').count() >= 2 {
112            return Some(without_git.to_string());
113        }
114    }
115
116    // SSH: git@github.com:owner/repo.git
117    if let Some(rest) = url.strip_prefix("git@github.com:") {
118        let without_git = rest.strip_suffix(".git").unwrap_or(rest);
119        if without_git.split('/').count() >= 2 {
120            return Some(without_git.to_string());
121        }
122    }
123
124    None
125}
126
127/// Enrich a changeset message with commit information and author acknowledgments
128pub fn enrich_changeset_message(
129    message: &str,
130    commit_hash: &str,
131    workspace: &Path,
132    repo_slug: Option<&str>,
133    github_token: Option<&str>,
134    show_commit_hash: bool,
135    show_acknowledgments: bool,
136) -> String {
137    // Create a tokio runtime for this blocking call
138    let rt = tokio::runtime::Builder::new_current_thread()
139        .enable_all()
140        .build()
141        .unwrap();
142    rt.block_on(enrich_changeset_message_async(
143        message,
144        commit_hash,
145        workspace,
146        repo_slug,
147        github_token,
148        show_commit_hash,
149        show_acknowledgments,
150    ))
151}
152
153/// Async version of enrich_changeset_message for internal use
154async fn enrich_changeset_message_async(
155    message: &str,
156    commit_hash: &str,
157    workspace: &Path,
158    repo_slug: Option<&str>,
159    github_token: Option<&str>,
160    show_commit_hash: bool,
161    show_acknowledgments: bool,
162) -> String {
163    let commit = get_commit_info_for_hash(workspace, commit_hash);
164
165    let commit_prefix = if show_commit_hash {
166        build_commit_prefix(&commit, repo_slug)
167    } else {
168        String::new()
169    };
170
171    let acknowledgment_suffix = if show_acknowledgments {
172        build_acknowledgment_suffix(&commit, repo_slug, github_token).await
173    } else {
174        String::new()
175    };
176
177    crate::markdown::compose_markdown_with_affixes(message, &commit_prefix, &acknowledgment_suffix)
178}
179
180/// Get commit information for a specific commit hash
181fn get_commit_info_for_hash(repo_root: &Path, commit_hash: &str) -> Option<CommitInfo> {
182    // Use \x1f (Unit Separator) to avoid conflicts with user content
183    let format_arg = "--format=%H\x1f%h\x1f%an";
184    let output = Command::new("git")
185        .current_dir(repo_root)
186        .args(["show", "--no-patch", format_arg, commit_hash])
187        .output()
188        .ok()?;
189
190    if !output.status.success() {
191        return None;
192    }
193
194    let stdout = String::from_utf8_lossy(&output.stdout);
195    let parts: Vec<&str> = stdout.trim().split('\x1f').collect();
196    if parts.len() != 3 {
197        return None;
198    }
199
200    Some(CommitInfo {
201        sha: parts[0].to_string(),
202        short_sha: parts[1].to_string(),
203        author_name: parts[2].to_string(),
204    })
205}
206
207/// Build commit prefix for enhanced messages
208fn build_commit_prefix(commit: &Option<CommitInfo>, repo_slug: Option<&str>) -> String {
209    if let Some(commit) = commit {
210        if let Some(slug) = repo_slug {
211            format!(
212                "[{}](https://github.com/{}/commit/{}) ",
213                commit.short_sha, slug, commit.sha
214            )
215        } else {
216            format!("{} ", commit.short_sha)
217        }
218    } else {
219        String::new()
220    }
221}
222
223/// Build acknowledgment suffix for enhanced messages
224async fn build_acknowledgment_suffix(
225    commit: &Option<CommitInfo>,
226    repo_slug: Option<&str>,
227    github_token: Option<&str>,
228) -> String {
229    let Some(commit) = commit else {
230        return String::new();
231    };
232
233    // If we have both GitHub repo and token, try to get GitHub user info with first contribution detection
234    if let (Some(slug), Some(token)) = (repo_slug, github_token)
235        && let Some(github_user) = get_github_user_for_commit(slug, &commit.sha, token).await
236    {
237        return if github_user.is_first_contribution {
238            format!(
239                " — Thanks @{} for your first contribution 🎉!",
240                github_user.login
241            )
242        } else {
243            format!(" — Thanks @{}!", github_user.login)
244        };
245    }
246
247    // If we have repo_slug but no token, we can still try to get the GitHub user from commit API
248    // (public commits are accessible without auth for public repos)
249    if let Some(slug) = repo_slug
250        && let Some(github_user) = get_github_user_for_commit_public(slug, &commit.sha).await
251    {
252        return format!(" — Thanks @{}!", github_user.login);
253    }
254
255    // Fallback to just the Git author name
256    format!(" — Thanks {}!", commit.author_name)
257}
258
259/// Get GitHub user information for a commit
260async fn get_github_user_for_commit(
261    repo_slug: &str,
262    commit_sha: &str,
263    token: &str,
264) -> Option<GitHubUserInfo> {
265    let commit_url = format!(
266        "https://api.github.com/repos/{}/commits/{}",
267        repo_slug, commit_sha
268    );
269
270    let commit_json = github_api_get(&commit_url, token).await?;
271    let commit: CommitApiResponse = serde_json::from_str(&commit_json).ok()?;
272    let login = commit.author?.login;
273
274    // Check if first contribution when we have a token
275    let is_first_contribution = check_first_contribution(repo_slug, &login, token).await;
276
277    Some(GitHubUserInfo {
278        login,
279        is_first_contribution,
280    })
281}
282
283/// Get GitHub user information for a commit from public API (no token required)
284async fn get_github_user_for_commit_public(
285    repo_slug: &str,
286    commit_sha: &str,
287) -> Option<GitHubUserInfo> {
288    let commit_url = format!(
289        "https://api.github.com/repos/{}/commits/{}",
290        repo_slug, commit_sha
291    );
292
293    let commit_json = github_api_get_public(&commit_url).await?;
294    let commit: CommitApiResponse = serde_json::from_str(&commit_json).ok()?;
295    let login = commit.author?.login;
296
297    Some(GitHubUserInfo {
298        login,
299        is_first_contribution: false, // Cannot detect without token
300    })
301}
302
303/// Check if a user is making their first contribution to a repository
304async fn check_first_contribution(repo_slug: &str, login: &str, token: &str) -> bool {
305    const PER_PAGE: u32 = 100;
306    const MAX_PAGES: u32 = 20; // Safety bound to avoid excessive paging
307
308    for page in 1..=MAX_PAGES {
309        let contributors_url = format!(
310            "https://api.github.com/repos/{}/contributors?per_page={}&page={}&anon=true",
311            repo_slug, PER_PAGE, page
312        );
313
314        let Some(body) = github_api_get(&contributors_url, token).await else {
315            break;
316        };
317
318        let Ok(contributors): Result<Vec<Contributor>, _> = serde_json::from_str(&body) else {
319            break;
320        };
321
322        if contributors.is_empty() {
323            break;
324        }
325
326        if let Some(contributor) = contributors
327            .into_iter()
328            .find(|c| c.login.as_deref() == Some(login))
329        {
330            return contributor.contributions == 1;
331        }
332    }
333
334    // If we can't find the user in contributors, assume it's not their first contribution
335    // This is a conservative approach for cases where the API might have issues
336    false
337}
338
339/// Perform a GET request to GitHub API and return the response body as String
340///
341/// Uses reqwest to make HTTP requests to the GitHub API with proper authentication
342/// and headers. Returns None if the request fails or returns empty content.
343async fn github_api_get(url: &str, token: &str) -> Option<String> {
344    let client = reqwest::Client::new();
345
346    let response = client
347        .get(url)
348        .header("Authorization", format!("Bearer {}", token))
349        .header("Accept", "application/vnd.github+json")
350        .header("X-GitHub-Api-Version", "2022-11-28")
351        .header("User-Agent", "sampo/0.4.0")
352        .timeout(std::time::Duration::from_secs(30))
353        .send()
354        .await
355        .ok()?;
356
357    if !response.status().is_success() {
358        return None;
359    }
360
361    let body = response.text().await.ok()?;
362    if body.trim().is_empty() {
363        None
364    } else {
365        Some(body)
366    }
367}
368
369/// Perform a GET request to GitHub API without authentication (for public repos)
370///
371/// Similar to github_api_get but without authorization header.
372/// Only works with public repositories and endpoints.
373async fn github_api_get_public(url: &str) -> Option<String> {
374    let client = reqwest::Client::new();
375
376    let response = client
377        .get(url)
378        .header("Accept", "application/vnd.github+json")
379        .header("X-GitHub-Api-Version", "2022-11-28")
380        .header("User-Agent", "sampo/0.4.0")
381        .timeout(std::time::Duration::from_secs(30))
382        .send()
383        .await
384        .ok()?;
385
386    if !response.status().is_success() {
387        return None;
388    }
389
390    let body = response.text().await.ok()?;
391    if body.trim().is_empty() {
392        None
393    } else {
394        Some(body)
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    #[test]
403    fn parse_github_url_https() {
404        assert_eq!(
405            parse_github_url("https://github.com/owner/repo.git"),
406            Some("owner/repo".to_string())
407        );
408        assert_eq!(
409            parse_github_url("https://github.com/owner/repo"),
410            Some("owner/repo".to_string())
411        );
412    }
413
414    #[test]
415    fn parse_github_url_ssh() {
416        assert_eq!(
417            parse_github_url("git@github.com:owner/repo.git"),
418            Some("owner/repo".to_string())
419        );
420    }
421
422    #[test]
423    fn parse_github_url_invalid() {
424        assert_eq!(parse_github_url("https://gitlab.com/owner/repo.git"), None);
425        assert_eq!(parse_github_url("not-a-url"), None);
426    }
427
428    #[test]
429    fn build_commit_prefix_with_repo() {
430        let commit = Some(CommitInfo {
431            sha: "abcd1234".to_string(),
432            short_sha: "abcd".to_string(),
433            author_name: "Author".to_string(),
434        });
435
436        let prefix = build_commit_prefix(&commit, Some("owner/repo"));
437        assert_eq!(
438            prefix,
439            "[abcd](https://github.com/owner/repo/commit/abcd1234) "
440        );
441    }
442
443    #[test]
444    fn build_commit_prefix_without_repo() {
445        let commit = Some(CommitInfo {
446            sha: "abcd1234".to_string(),
447            short_sha: "abcd".to_string(),
448            author_name: "Author".to_string(),
449        });
450
451        let prefix = build_commit_prefix(&commit, None);
452        assert_eq!(prefix, "abcd ");
453    }
454
455    #[test]
456    fn format_enriched_message_complete_is_covered_in_markdown_tests() {}
457
458    #[test]
459    fn enrich_changeset_message_integration() {
460        use std::fs;
461        use tempfile::TempDir;
462
463        let temp_dir = TempDir::new().unwrap();
464        let repo_path = temp_dir.path();
465
466        // Initialize a git repo
467        std::process::Command::new("git")
468            .arg("init")
469            .current_dir(repo_path)
470            .output()
471            .unwrap();
472
473        // Configure git user
474        std::process::Command::new("git")
475            .args(["config", "user.name", "Test User"])
476            .current_dir(repo_path)
477            .output()
478            .unwrap();
479
480        std::process::Command::new("git")
481            .args(["config", "user.email", "test@example.com"])
482            .current_dir(repo_path)
483            .output()
484            .unwrap();
485
486        // Create a test file and commit it
487        let test_file = repo_path.join("test.md");
488        fs::write(&test_file, "initial content").unwrap();
489
490        std::process::Command::new("git")
491            .args(["add", "test.md"])
492            .current_dir(repo_path)
493            .output()
494            .unwrap();
495
496        std::process::Command::new("git")
497            .args(["commit", "-m", "initial commit"])
498            .current_dir(repo_path)
499            .output()
500            .unwrap();
501
502        // Get the commit hash
503        let commit_hash = get_commit_hash_for_path(repo_path, &test_file)
504            .expect("Should find commit hash for test file");
505
506        // Test enrichment with all features enabled
507        let enriched = enrich_changeset_message(
508            "fix: resolve critical bug",
509            &commit_hash,
510            repo_path,
511            Some("owner/repo"),
512            None, // no GitHub token for this test
513            true, // show commit hash
514            true, // show acknowledgments
515        );
516
517        // Should contain the commit hash link and author thanks
518        assert!(
519            enriched.contains(&commit_hash[..8]),
520            "Should contain short commit hash"
521        );
522        assert!(
523            enriched.contains("Thanks Test User!"),
524            "Should contain author thanks"
525        );
526        assert!(
527            enriched.contains("fix: resolve critical bug"),
528            "Should contain original message"
529        );
530
531        // Test with features disabled
532        let plain = enrich_changeset_message(
533            "fix: resolve critical bug",
534            &commit_hash,
535            repo_path,
536            Some("owner/repo"),
537            None,
538            false, // no commit hash
539            false, // no acknowledgments
540        );
541
542        assert_eq!(
543            plain, "fix: resolve critical bug",
544            "Should be unchanged when features disabled"
545        );
546    }
547
548    #[tokio::test]
549    async fn test_github_api_get_with_invalid_token() {
550        // Test with invalid token should return None (graceful failure)
551        let result = github_api_get(
552            "https://api.github.com/repos/bruits/sampo/commits/invalid",
553            "invalid_token",
554        )
555        .await;
556        assert!(result.is_none(), "Should return None for invalid requests");
557    }
558
559    #[test]
560    fn test_parse_github_url_edge_cases() {
561        // Test edge cases for GitHub URL parsing
562        assert_eq!(parse_github_url(""), None);
563        assert_eq!(parse_github_url("https://github.com/"), None);
564        assert_eq!(parse_github_url("git@github.com:"), None);
565        assert_eq!(parse_github_url("https://github.com/user"), None); // Missing repo
566        assert_eq!(
567            parse_github_url("https://github.com/user/repo/extra/path"),
568            Some("user/repo/extra/path".to_string())
569        );
570    }
571
572    #[tokio::test]
573    async fn test_check_first_contribution_no_token() {
574        // Test check_first_contribution without valid token
575        let result = check_first_contribution("bruits/sampo", "testuser", "invalid_token").await;
576        // Should return false (conservative default) when API calls fail
577        assert!(!result, "Should return false when API calls fail");
578    }
579
580    #[tokio::test]
581    async fn test_build_acknowledgment_suffix_fallback() {
582        // Test that acknowledgment falls back to Git author when GitHub API fails
583        let commit = Some(CommitInfo {
584            sha: "abcd1234".to_string(),
585            short_sha: "abcd".to_string(),
586            author_name: "Local Developer".to_string(),
587        });
588
589        // Test without GitHub repo/token (should use Git author)
590        let result = build_acknowledgment_suffix(&commit, None, None).await;
591        assert_eq!(result, " — Thanks Local Developer!");
592
593        // Test with empty commit
594        let result = build_acknowledgment_suffix(&None, Some("owner/repo"), Some("token")).await;
595        assert_eq!(result, "");
596    }
597
598    #[test]
599    fn test_detect_github_repo_slug_with_config_override() {
600        // Test that explicit config overrides git remote detection
601        use tempfile::TempDir;
602
603        let temp_dir = TempDir::new().unwrap();
604        let repo_path = temp_dir.path();
605
606        // Even without git repo, explicit config should work
607        let result = detect_github_repo_slug_with_config(repo_path, Some("explicit/repo"));
608        assert_eq!(result, Some("explicit/repo".to_string()));
609
610        // Test that None config falls back to git detection (which will fail in this case)
611        let result = detect_github_repo_slug_with_config(repo_path, None);
612        // Note: this might return env var if GITHUB_REPOSITORY is set, but that's OK
613        // The important thing is explicit config overrides everything
614        assert!(result.is_none() || result.is_some());
615    }
616
617    #[tokio::test]
618    async fn test_get_github_user_for_commit_public() {
619        // Test public API access (should fail gracefully with invalid repo)
620        let result = get_github_user_for_commit_public("invalid/repo", "invalid_sha").await;
621        assert!(result.is_none(), "Should return None for invalid requests");
622    }
623
624    #[tokio::test]
625    async fn test_github_api_get_public_with_invalid_url() {
626        // Test public API with invalid URL should return None
627        let result = github_api_get_public("https://api.github.com/invalid/endpoint").await;
628        assert!(result.is_none(), "Should return None for invalid requests");
629    }
630
631    #[tokio::test]
632    async fn test_build_acknowledgment_suffix_with_public_repo() {
633        let commit = Some(CommitInfo {
634            sha: "abcd1234".to_string(),
635            short_sha: "abcd".to_string(),
636            author_name: "Test Author".to_string(),
637        });
638
639        // Test with repo_slug but no token (should try public API, fall back to Git author)
640        let result = build_acknowledgment_suffix(&commit, Some("invalid/repo"), None).await;
641        assert_eq!(result, " — Thanks Test Author!");
642
643        // Test with neither repo nor token
644        let result = build_acknowledgment_suffix(&commit, None, None).await;
645        assert_eq!(result, " — Thanks Test Author!");
646    }
647
648    #[tokio::test]
649    async fn test_reqwest_timeout_behavior() {
650        // Test that reqwest properly handles timeouts
651        // Using a non-routable IP to trigger timeout (should be fast)
652        let result = github_api_get_public("http://10.255.255.1/timeout-test").await;
653        assert!(
654            result.is_none(),
655            "Should return None for timeout/unreachable requests"
656        );
657    }
658}