Skip to main content

synwire_agent/middleware/
fetch_detector.rs

1//! Repository fetch detector middleware.
2//!
3//! Monitors web fetch calls for GitHub raw content URLs.
4//! After 3+ fetches from the same repository, emits a `PromptSuggestion`
5//! recommending the user clone the repository for faster access.
6
7use std::collections::HashMap;
8
9/// Detects repeated HTTP fetches from the same GitHub repository and
10/// suggests cloning it locally once the fetch count exceeds a threshold.
11#[derive(Debug, Clone)]
12pub struct RepoFetchDetector {
13    /// Minimum number of fetches before a suggestion is emitted.
14    pub threshold: usize,
15    /// Per-repository fetch counts keyed by `"owner/repo"`.
16    counts: HashMap<String, usize>,
17}
18
19impl RepoFetchDetector {
20    /// Construct a detector with the given `threshold`.
21    ///
22    /// A threshold of `3` means a suggestion is emitted on the third fetch
23    /// from the same repository.
24    #[must_use]
25    pub fn new(threshold: usize) -> Self {
26        Self {
27            threshold,
28            counts: HashMap::new(),
29        }
30    }
31
32    /// Record a single HTTP fetch from `url`.
33    ///
34    /// Recognised URL patterns:
35    /// - `raw.githubusercontent.com/<owner>/<repo>/…`
36    /// - `github.com/<owner>/<repo>/blob/…`
37    ///
38    /// Unrecognised URLs are silently ignored.
39    pub fn record_fetch(&mut self, url: &str) {
40        if let Some(owner_repo) = parse_github_owner_repo(url) {
41            *self.counts.entry(owner_repo).or_insert(0) += 1;
42        }
43    }
44
45    /// Return `true` if the fetch count for `owner_repo` meets or exceeds the threshold.
46    #[must_use]
47    pub fn should_suggest(&self, owner_repo: &str) -> bool {
48        self.counts
49            .get(owner_repo)
50            .is_some_and(|&c| c >= self.threshold)
51    }
52
53    /// Return clone suggestions for every repository that has reached the threshold.
54    #[must_use]
55    pub fn suggestions(&self) -> Vec<String> {
56        let mut suggestions: Vec<String> = self
57            .counts
58            .iter()
59            .filter(|&(_, &count)| count >= self.threshold)
60            .map(|(owner_repo, _)| format!("Consider cloning {owner_repo} for faster access"))
61            .collect();
62        suggestions.sort();
63        suggestions
64    }
65}
66
67/// Parse a GitHub URL and return `"owner/repo"` if the pattern matches.
68fn parse_github_owner_repo(url: &str) -> Option<String> {
69    // Pattern: raw.githubusercontent.com/<owner>/<repo>/
70    if let Some(rest) = url
71        .strip_prefix("https://raw.githubusercontent.com/")
72        .or_else(|| url.strip_prefix("http://raw.githubusercontent.com/"))
73        .or_else(|| url.strip_prefix("raw.githubusercontent.com/"))
74    {
75        return extract_two_segments(rest);
76    }
77
78    // Pattern: github.com/<owner>/<repo>/blob/
79    if let Some(rest) = url
80        .strip_prefix("https://github.com/")
81        .or_else(|| url.strip_prefix("http://github.com/"))
82        .or_else(|| url.strip_prefix("github.com/"))
83    {
84        return extract_two_segments(rest);
85    }
86
87    None
88}
89
90/// Extract the first two path segments as `"seg1/seg2"` from a URL suffix.
91fn extract_two_segments(rest: &str) -> Option<String> {
92    let mut parts = rest.splitn(3, '/');
93    let owner = parts.next().filter(|s| !s.is_empty())?;
94    let repo = parts.next().filter(|s| !s.is_empty())?;
95    Some(format!("{owner}/{repo}"))
96}
97
98#[cfg(test)]
99#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
100mod tests {
101    use super::*;
102
103    #[test]
104    fn three_fetches_from_raw_url_triggers_suggestion() {
105        let mut det = RepoFetchDetector::new(3);
106        for _ in 0..3 {
107            det.record_fetch("https://raw.githubusercontent.com/owner/repo/main/file.txt");
108        }
109        assert!(det.should_suggest("owner/repo"));
110        let sug = det.suggestions();
111        assert!(!sug.is_empty());
112        assert!(sug[0].contains("owner/repo"));
113    }
114
115    #[test]
116    fn two_fetches_do_not_trigger_suggestion() {
117        let mut det = RepoFetchDetector::new(3);
118        for _ in 0..2 {
119            det.record_fetch("https://raw.githubusercontent.com/owner/repo/main/file.txt");
120        }
121        assert!(!det.should_suggest("owner/repo"));
122        assert!(det.suggestions().is_empty());
123    }
124
125    #[test]
126    fn github_blob_url_is_recognised() {
127        let mut det = RepoFetchDetector::new(1);
128        det.record_fetch("https://github.com/acme/widget/blob/main/src/lib.rs");
129        assert!(det.should_suggest("acme/widget"));
130    }
131
132    #[test]
133    fn unrecognised_url_is_ignored() {
134        let mut det = RepoFetchDetector::new(1);
135        det.record_fetch("https://example.com/some/path/file.txt");
136        assert!(det.suggestions().is_empty());
137    }
138
139    #[test]
140    fn threshold_boundary_exactly_at_threshold() {
141        let mut det = RepoFetchDetector::new(2);
142        det.record_fetch("https://raw.githubusercontent.com/x/y/main/a.txt");
143        assert!(!det.should_suggest("x/y"), "1 fetch should not trigger");
144        det.record_fetch("https://raw.githubusercontent.com/x/y/main/b.txt");
145        assert!(det.should_suggest("x/y"), "2 fetches should trigger");
146    }
147}