1use anyhow::{Context, Result};
2use chrono::Utc;
3use rusqlite::{Connection, OptionalExtension, params};
4use serde::{Deserialize, Serialize};
5use sha2::{Digest as ShaDigest, Sha256};
6use std::path::Path;
7use std::time::Duration;
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
10#[serde(rename_all = "snake_case")]
11pub enum SourceKind {
12 GithubReleases,
13 ChangelogUrl,
14 DocsUrl,
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct SourceConfig {
19 pub name: String,
20 pub kind: SourceKind,
21 pub url: String,
22 #[serde(default)]
23 pub tags: Vec<String>,
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct SourcesFile {
28 #[serde(default)]
29 pub source: Vec<SourceConfig>,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct IntelItem {
34 pub source: String,
35 pub version: String,
36 pub date: String,
37 pub body: String,
38 pub url: String,
39 pub etag: Option<String>,
40 pub tags: Vec<String>,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct IntelDigest {
45 pub source: String,
46 pub title: String,
47 pub summary: String,
48 pub version: String,
49 pub date: String,
50 pub url: String,
51 pub tags: Vec<String>,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct BacklogTicket {
56 pub title: String,
57 pub source: String,
58 pub score: u32,
59 pub reason: String,
60 pub url: String,
61 pub tags: Vec<String>,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct ScanReport {
66 pub scanned: usize,
67 pub inserted_or_updated: usize,
68 pub items: Vec<IntelItem>,
69}
70
71pub fn load_sources_file(path: &Path) -> Result<Vec<SourceConfig>> {
72 let raw = std::fs::read_to_string(path)
73 .with_context(|| format!("could not read intel sources file {}", path.display()))?;
74 let parsed: SourcesFile = toml::from_str(&raw)
75 .with_context(|| format!("could not parse intel sources file {}", path.display()))?;
76 Ok(parsed.source)
77}
78
79pub struct IntelCache {
80 path: std::path::PathBuf,
81}
82
83impl IntelCache {
84 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
85 let path = path.as_ref().to_path_buf();
86 if let Some(parent) = path.parent() {
87 std::fs::create_dir_all(parent)?;
88 }
89 let cache = Self { path };
90 cache.init()?;
91 Ok(cache)
92 }
93
94 fn conn(&self) -> Result<Connection> {
95 Ok(Connection::open(&self.path)?)
96 }
97
98 fn init(&self) -> Result<()> {
99 let conn = self.conn()?;
100 conn.execute_batch(
101 "CREATE TABLE IF NOT EXISTS intel_items (
102 source TEXT NOT NULL,
103 version TEXT NOT NULL,
104 date TEXT NOT NULL,
105 body TEXT NOT NULL,
106 url TEXT NOT NULL,
107 etag TEXT,
108 tags TEXT NOT NULL DEFAULT '[]',
109 updated_at INTEGER NOT NULL,
110 UNIQUE(source, version, url)
111 );",
112 )?;
113 Ok(())
114 }
115
116 pub fn upsert_items(&self, items: &[IntelItem]) -> Result<usize> {
117 let mut conn = self.conn()?;
118 let tx = conn.transaction()?;
119 let mut changed = 0usize;
120 for item in items {
121 let tags = serde_json::to_string(&item.tags)?;
122 changed += tx.execute(
123 "INSERT INTO intel_items (source, version, date, body, url, etag, tags, updated_at)
124 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, strftime('%s','now'))
125 ON CONFLICT(source, version, url) DO UPDATE SET
126 date=excluded.date,
127 body=excluded.body,
128 etag=excluded.etag,
129 tags=excluded.tags,
130 updated_at=excluded.updated_at",
131 params![
132 item.source,
133 item.version,
134 item.date,
135 item.body,
136 item.url,
137 item.etag,
138 tags
139 ],
140 )?;
141 }
142 tx.commit()?;
143 Ok(changed)
144 }
145
146 pub fn items(&self, limit: usize) -> Result<Vec<IntelItem>> {
147 let conn = self.conn()?;
148 let mut stmt = conn.prepare(
149 "SELECT source, version, date, body, url, etag, tags
150 FROM intel_items
151 ORDER BY date DESC, updated_at DESC
152 LIMIT ?1",
153 )?;
154 let rows = stmt.query_map([limit as i64], row_to_item)?;
155 Ok(rows.filter_map(|r| r.ok()).collect())
156 }
157
158 pub fn digests(&self, limit: usize) -> Result<Vec<IntelDigest>> {
159 Ok(self
160 .items(limit)?
161 .into_iter()
162 .map(|item| IntelDigest {
163 title: digest_title(&item),
164 summary: summarize(&item.body, 260),
165 version: item.version,
166 date: item.date,
167 url: item.url,
168 source: item.source,
169 tags: item.tags,
170 })
171 .collect())
172 }
173
174 pub fn backlog(&self, limit: usize) -> Result<Vec<BacklogTicket>> {
175 let mut tickets: Vec<_> = self
176 .digests(limit.saturating_mul(3).max(limit))?
177 .into_iter()
178 .map(|digest| {
179 let (score, reason) = score_digest(&digest);
180 BacklogTicket {
181 title: format!("Évaluer {} {}", digest.source, digest.version),
182 source: digest.source,
183 score,
184 reason,
185 url: digest.url,
186 tags: digest.tags,
187 }
188 })
189 .filter(|t| t.score > 0)
190 .collect();
191 tickets.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.title.cmp(&b.title)));
192 tickets.truncate(limit);
193 Ok(tickets)
194 }
195
196 pub fn etag_for_url(&self, url: &str) -> Result<Option<String>> {
197 let conn = self.conn()?;
198 Ok(conn
199 .query_row(
200 "SELECT etag FROM intel_items WHERE url=?1 AND etag IS NOT NULL LIMIT 1",
201 [url],
202 |row| row.get(0),
203 )
204 .optional()?)
205 }
206}
207
208fn row_to_item(row: &rusqlite::Row<'_>) -> rusqlite::Result<IntelItem> {
209 let tags_raw: String = row.get(6)?;
210 let tags = serde_json::from_str(&tags_raw).unwrap_or_default();
211 Ok(IntelItem {
212 source: row.get(0)?,
213 version: row.get(1)?,
214 date: row.get(2)?,
215 body: row.get(3)?,
216 url: row.get(4)?,
217 etag: row.get(5)?,
218 tags,
219 })
220}
221
222pub async fn scan_sources(
223 sources: &[SourceConfig],
224 cache_path: impl AsRef<Path>,
225 limit_per_source: usize,
226) -> Result<ScanReport> {
227 let cache = IntelCache::open(cache_path)?;
228 let client = reqwest::Client::builder()
229 .timeout(Duration::from_secs(20))
230 .user_agent(format!("sparrow-intel/{}", env!("CARGO_PKG_VERSION")))
231 .build()?;
232 let mut all = Vec::new();
233 for source in sources {
234 let mut items = fetch_source(&client, source, limit_per_source).await?;
235 all.append(&mut items);
236 }
237 let changed = cache.upsert_items(&all)?;
238 Ok(ScanReport {
239 scanned: sources.len(),
240 inserted_or_updated: changed,
241 items: all,
242 })
243}
244
245async fn fetch_source(
246 client: &reqwest::Client,
247 source: &SourceConfig,
248 limit: usize,
249) -> Result<Vec<IntelItem>> {
250 match source.kind {
251 SourceKind::GithubReleases => fetch_github_releases(client, source, limit).await,
252 SourceKind::ChangelogUrl | SourceKind::DocsUrl => fetch_text_url(client, source).await,
253 }
254}
255
256async fn fetch_github_releases(
257 client: &reqwest::Client,
258 source: &SourceConfig,
259 limit: usize,
260) -> Result<Vec<IntelItem>> {
261 let api = github_releases_api(&source.url)?;
262 let resp = client.get(api).send().await?.error_for_status()?;
263 let releases: Vec<serde_json::Value> = resp.json().await?;
264 Ok(releases
265 .into_iter()
266 .take(limit)
267 .map(|release| {
268 let version = release
269 .get("tag_name")
270 .and_then(|v| v.as_str())
271 .unwrap_or("release")
272 .to_string();
273 let name = release
274 .get("name")
275 .and_then(|v| v.as_str())
276 .unwrap_or(&version);
277 let body = release
278 .get("body")
279 .and_then(|v| v.as_str())
280 .unwrap_or("")
281 .to_string();
282 let date = release
283 .get("published_at")
284 .and_then(|v| v.as_str())
285 .unwrap_or("")
286 .to_string();
287 let url = release
288 .get("html_url")
289 .and_then(|v| v.as_str())
290 .unwrap_or(&source.url)
291 .to_string();
292 IntelItem {
293 source: source.name.clone(),
294 version: version.clone(),
295 date,
296 body: format!("{name}\n\n{body}"),
297 url,
298 etag: None,
299 tags: source.tags.clone(),
300 }
301 })
302 .collect())
303}
304
305async fn fetch_text_url(client: &reqwest::Client, source: &SourceConfig) -> Result<Vec<IntelItem>> {
306 let resp = client.get(&source.url).send().await?.error_for_status()?;
307 let etag = resp
308 .headers()
309 .get(reqwest::header::ETAG)
310 .and_then(|h| h.to_str().ok())
311 .map(str::to_string);
312 let body = resp.text().await?;
313 let version = short_hash(&body);
314 let date = Utc::now().to_rfc3339();
315 Ok(vec![IntelItem {
316 source: source.name.clone(),
317 version,
318 date,
319 body,
320 url: source.url.clone(),
321 etag,
322 tags: source.tags.clone(),
323 }])
324}
325
326fn github_releases_api(raw: &str) -> Result<String> {
327 if raw.contains("api.github.com/repos/") {
328 return Ok(raw.to_string());
329 }
330 let parsed = url::Url::parse(raw)?;
331 let host = parsed.host_str().unwrap_or_default();
332 if host != "github.com" {
333 anyhow::bail!("github_releases source must point at github.com or api.github.com");
334 }
335 let parts: Vec<_> = parsed
336 .path_segments()
337 .map(|s| s.collect::<Vec<_>>())
338 .unwrap_or_default();
339 if parts.len() < 2 {
340 anyhow::bail!("github_releases source must include owner/repo");
341 }
342 Ok(format!(
343 "https://api.github.com/repos/{}/{}/releases",
344 parts[0], parts[1]
345 ))
346}
347
348fn short_hash(body: &str) -> String {
349 let mut hasher = Sha256::new();
350 hasher.update(body.as_bytes());
351 format!("{:.12x}", hasher.finalize())
352}
353
354fn digest_title(item: &IntelItem) -> String {
355 item.body
356 .lines()
357 .find(|l| !l.trim().is_empty())
358 .map(|l| summarize(l, 90))
359 .unwrap_or_else(|| item.version.clone())
360}
361
362fn summarize(text: &str, max: usize) -> String {
363 let compact = text.split_whitespace().collect::<Vec<_>>().join(" ");
364 if compact.len() <= max {
365 compact
366 } else {
367 format!(
368 "{}...",
369 compact
370 .chars()
371 .take(max.saturating_sub(3))
372 .collect::<String>()
373 )
374 }
375}
376
377fn score_digest(digest: &IntelDigest) -> (u32, String) {
378 const SIGNALS: &[(&str, u32, &str)] = &[
379 ("agent", 20, "agentic workflow"),
380 ("tool", 14, "tooling/API"),
381 ("mcp", 18, "MCP compatibility"),
382 ("permission", 14, "permissions"),
383 ("sandbox", 14, "sandbox safety"),
384 ("approval", 12, "approvals"),
385 ("checkpoint", 12, "checkpoint/replay"),
386 ("replay", 12, "checkpoint/replay"),
387 ("webview", 10, "cockpit UI"),
388 ("performance", 10, "performance"),
389 ("routing", 10, "model routing"),
390 ("memory", 8, "memory/context"),
391 ("context", 8, "memory/context"),
392 ("release", 6, "release intelligence"),
393 ];
394 let hay = format!(
395 "{} {} {} {}",
396 digest.title,
397 digest.summary,
398 digest.source,
399 digest.tags.join(" ")
400 )
401 .to_lowercase();
402 let mut score = 0;
403 let mut reasons = Vec::new();
404 for (needle, weight, reason) in SIGNALS {
405 if hay.contains(needle) {
406 score += *weight;
407 if !reasons.contains(reason) {
408 reasons.push(*reason);
409 }
410 }
411 }
412 (score.min(100), reasons.join(", "))
413}
414
415pub fn default_cache_path(state_dir: &Path) -> std::path::PathBuf {
416 state_dir.join("intel.sqlite")
417}
418
419#[cfg(test)]
420mod tests {
421 use super::*;
422
423 fn item(body: &str) -> IntelItem {
424 IntelItem {
425 source: "test".into(),
426 version: "v1".into(),
427 date: "2026-06-12T00:00:00Z".into(),
428 body: body.into(),
429 url: "https://example.test/release".into(),
430 etag: None,
431 tags: vec!["agent".into()],
432 }
433 }
434
435 #[test]
436 fn cache_round_trips_digests_and_backlog() {
437 let dir = tempfile::tempdir().unwrap();
438 let cache = IntelCache::open(dir.path().join("intel.sqlite")).unwrap();
439 cache
440 .upsert_items(&[item("Agent tool sandbox release with replay support")])
441 .unwrap();
442 let digests = cache.digests(10).unwrap();
443 assert_eq!(digests.len(), 1);
444 let backlog = cache.backlog(10).unwrap();
445 assert_eq!(backlog.len(), 1);
446 assert!(backlog[0].score >= 40);
447 }
448
449 #[test]
450 fn parses_sources_file() {
451 let dir = tempfile::tempdir().unwrap();
452 let path = dir.path().join("sources.toml");
453 std::fs::write(
454 &path,
455 r#"
456[[source]]
457name = "Codex"
458kind = "github_releases"
459url = "https://github.com/openai/codex"
460tags = ["agent", "cli"]
461"#,
462 )
463 .unwrap();
464 let sources = load_sources_file(&path).unwrap();
465 assert_eq!(sources.len(), 1);
466 assert_eq!(sources[0].kind, SourceKind::GithubReleases);
467 }
468
469 #[test]
470 fn github_api_url_is_derived_from_repo_url() {
471 let api = github_releases_api("https://github.com/openai/codex").unwrap();
472 assert_eq!(api, "https://api.github.com/repos/openai/codex/releases");
473 }
474}