Skip to main content

suno_core/
select.rs

1//! Pure clip selection and filtering.
2
3use std::cmp::Reverse;
4use std::collections::HashSet;
5
6use crate::model::Clip;
7
8/// A recency filter specification.
9pub enum RecencySpec {
10    /// Keep clips created within the last N seconds before `now`.
11    Relative(u64),
12    /// Keep clips created after the last-run timestamp.
13    LastRun,
14}
15
16impl RecencySpec {
17    /// Parse a spec string such as `"7d"`, `"2w"`, or `"last-run"`.
18    pub fn parse(spec: &str) -> Result<Self, String> {
19        if spec == "last-run" {
20            return Ok(RecencySpec::LastRun);
21        }
22        let split = spec
23            .find(|c: char| !c.is_ascii_digit())
24            .unwrap_or(spec.len());
25        let (digits, unit) = spec.split_at(split);
26        let n: u64 = digits
27            .parse()
28            .map_err(|_| format!("invalid recency spec: {spec}"))?;
29        let secs = match unit {
30            "d" => n
31                .checked_mul(86_400)
32                .ok_or_else(|| format!("recency spec overflows: {spec}"))?,
33            "w" => n
34                .checked_mul(7)
35                .and_then(|v| v.checked_mul(86_400))
36                .ok_or_else(|| format!("recency spec overflows: {spec}"))?,
37            _ => return Err(format!("unknown unit in recency spec: {spec}")),
38        };
39        Ok(RecencySpec::Relative(secs))
40    }
41}
42
43/// Parameters for clip selection.
44pub struct SelectParams {
45    /// Keep at most the N most recent clips (by `created_at`).
46    pub limit: Option<usize>,
47    /// Keep only clips newer than this spec.
48    pub since: Option<RecencySpec>,
49    /// Always retain at least this many newest clips, regardless of the recency filter.
50    pub min_newest: usize,
51    /// Current Unix timestamp in seconds; used for relative recency specs.
52    pub now: u64,
53    /// Last-run Unix timestamp in seconds; used when `since` is `RecencySpec::LastRun`.
54    pub last_run: Option<u64>,
55}
56
57impl Default for SelectParams {
58    fn default() -> Self {
59        Self {
60            limit: None,
61            since: None,
62            min_newest: 1,
63            now: 0,
64            last_run: None,
65        }
66    }
67}
68
69/// Produce the final ordered selection from a slice of clips.
70///
71/// Deduplicates by ID (first occurrence wins), applies recency and limit
72/// filters, and enforces the min-newest floor. The original input order is
73/// always preserved in the output.
74pub fn select<'a>(clips: &'a [Clip], params: &SelectParams) -> Vec<&'a Clip> {
75    let mut seen: HashSet<&str> = HashSet::new();
76    let deduped: Vec<&Clip> = clips
77        .iter()
78        .filter(|c| seen.insert(c.id.as_str()))
79        .collect();
80
81    let threshold: Option<u64> = match &params.since {
82        None => None,
83        Some(RecencySpec::Relative(secs)) => Some(params.now.saturating_sub(*secs)),
84        Some(RecencySpec::LastRun) => params.last_run,
85    };
86
87    // Indices into deduped sorted by clip_ts descending; computed once and
88    // reused by both the min-newest floor and the limit step.
89    let recency_order: Vec<usize> = {
90        let mut idx: Vec<usize> = (0..deduped.len()).collect();
91        idx.sort_by_key(|&i| Reverse(clip_ts(deduped[i])));
92        idx
93    };
94
95    // Apply recency filter. Clips with an unparseable timestamp are kept;
96    // they are not given an epoch timestamp that would make them a deletion candidate.
97    let mut keep: HashSet<&str> = match threshold {
98        Some(t) => deduped
99            .iter()
100            .filter(|c| parse_timestamp(&c.created_at).is_none_or(|ts| ts > t))
101            .map(|c| c.id.as_str())
102            .collect(),
103        None => deduped.iter().map(|c| c.id.as_str()).collect(),
104    };
105
106    // Min-newest floor: when a recency threshold was active and fewer than
107    // min_newest clips passed it, pull in enough of the newest clips to meet
108    // the floor.
109    if threshold.is_some() && keep.len() < params.min_newest {
110        for &i in recency_order.iter().take(params.min_newest) {
111            keep.insert(deduped[i].id.as_str());
112        }
113    }
114
115    // Limit: keep only the N most recent. When a recency threshold is active the
116    // floor is authoritative, so the effective limit cannot drop below min_newest.
117    let effective_limit = params.limit.map(|n| {
118        if threshold.is_some() {
119            n.max(params.min_newest)
120        } else {
121            n
122        }
123    });
124    if let Some(n) = effective_limit
125        && keep.len() > n
126    {
127        keep = recency_order
128            .iter()
129            .filter(|&&i| keep.contains(deduped[i].id.as_str()))
130            .take(n)
131            .map(|&i| deduped[i].id.as_str())
132            .collect();
133    }
134
135    deduped
136        .into_iter()
137        .filter(|c| keep.contains(c.id.as_str()))
138        .collect()
139}
140
141/// Return the Unix timestamp (seconds) for a clip, or 0 if unparseable.
142fn clip_ts(clip: &Clip) -> u64 {
143    parse_timestamp(&clip.created_at).unwrap_or(0)
144}
145
146/// Parse an ISO 8601 UTC timestamp string to Unix seconds.
147///
148/// Accepts `YYYY-MM-DDTHH:MM:SS[.fff]Z`.
149fn parse_timestamp(s: &str) -> Option<u64> {
150    let s = s.strip_suffix('Z')?;
151    let (date, time) = s.split_once('T')?;
152    let time = time.split_once('.').map_or(time, |(t, _)| t);
153    let mut dp = date.split('-');
154    let year: u32 = dp.next()?.parse().ok()?;
155    let month: u32 = dp.next()?.parse().ok()?;
156    let day: u32 = dp.next()?.parse().ok()?;
157    let mut tp = time.split(':');
158    let hour: u64 = tp.next()?.parse().ok()?;
159    let minute: u64 = tp.next()?.parse().ok()?;
160    let second: u64 = tp.next()?.parse().ok()?;
161    let days = civil_to_days(year, month, day)?;
162    Some(days * 86_400 + hour * 3_600 + minute * 60 + second)
163}
164
165/// Convert a Gregorian calendar date to days since the Unix epoch (1970-01-01).
166///
167/// Uses Howard Hinnant's civil-to-days algorithm.
168fn civil_to_days(y: u32, m: u32, d: u32) -> Option<u64> {
169    let (y, m, d) = (y as i64, m as i64, d as i64);
170    let ya = if m <= 2 { y - 1 } else { y };
171    let era = ya.div_euclid(400);
172    let yoe = ya - era * 400;
173    let doy = (153 * (m + if m > 2 { -3 } else { 9 }) + 2) / 5 + d - 1;
174    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
175    let days = era * 146_097 + doe - 719_468;
176    u64::try_from(days).ok()
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    fn clip(id: &str, created_at: &str) -> Clip {
184        Clip {
185            id: id.to_string(),
186            created_at: created_at.to_string(),
187            ..Default::default()
188        }
189    }
190
191    // --- parse_timestamp ---
192
193    #[test]
194    fn parse_timestamp_epoch() {
195        assert_eq!(parse_timestamp("1970-01-01T00:00:00Z"), Some(0));
196    }
197
198    #[test]
199    fn parse_timestamp_one_day() {
200        assert_eq!(parse_timestamp("1970-01-02T00:00:00Z"), Some(86_400));
201    }
202
203    #[test]
204    fn parse_timestamp_with_millis() {
205        assert_eq!(
206            parse_timestamp("2024-01-15T08:30:00.000Z"),
207            parse_timestamp("2024-01-15T08:30:00Z")
208        );
209    }
210
211    #[test]
212    fn parse_timestamp_missing_z_returns_none() {
213        assert!(parse_timestamp("2024-01-15T08:30:00").is_none());
214    }
215
216    #[test]
217    fn parse_timestamp_empty_returns_none() {
218        assert!(parse_timestamp("").is_none());
219    }
220
221    // --- RecencySpec::parse ---
222
223    #[test]
224    fn parse_recency_days() {
225        let RecencySpec::Relative(secs) = RecencySpec::parse("7d").unwrap() else {
226            panic!("expected Relative");
227        };
228        assert_eq!(secs, 7 * 86_400);
229    }
230
231    #[test]
232    fn parse_recency_weeks() {
233        let RecencySpec::Relative(secs) = RecencySpec::parse("2w").unwrap() else {
234            panic!("expected Relative");
235        };
236        assert_eq!(secs, 2 * 7 * 86_400);
237    }
238
239    #[test]
240    fn parse_recency_last_run() {
241        assert!(matches!(
242            RecencySpec::parse("last-run").unwrap(),
243            RecencySpec::LastRun
244        ));
245    }
246
247    #[test]
248    fn parse_recency_invalid_unit() {
249        assert!(RecencySpec::parse("3x").is_err());
250    }
251
252    #[test]
253    fn parse_recency_invalid_number() {
254        assert!(RecencySpec::parse("wd").is_err());
255    }
256
257    #[test]
258    fn parse_recency_overflow_returns_error() {
259        assert!(RecencySpec::parse(&format!("{}d", u64::MAX)).is_err());
260        assert!(RecencySpec::parse(&format!("{}w", u64::MAX)).is_err());
261    }
262
263    // --- select: deduplication ---
264
265    #[test]
266    fn dedup_keeps_first_occurrence() {
267        let clips = vec![
268            clip("a", "2024-01-01T00:00:00Z"),
269            clip("b", "2024-01-02T00:00:00Z"),
270            clip("a", "2024-01-03T00:00:00Z"),
271        ];
272        let result = select(&clips, &SelectParams::default());
273        assert_eq!(result.len(), 2);
274        assert_eq!(result[0].id, "a");
275        assert_eq!(result[1].id, "b");
276    }
277
278    // --- select: order preservation ---
279
280    #[test]
281    fn preserves_original_order() {
282        // Clips are given newest-last; select must not reorder them.
283        let clips = vec![
284            clip("a", "2024-01-03T00:00:00Z"),
285            clip("b", "2024-01-01T00:00:00Z"),
286            clip("c", "2024-01-02T00:00:00Z"),
287        ];
288        let result = select(&clips, &SelectParams::default());
289        assert_eq!(
290            result.iter().map(|c| c.id.as_str()).collect::<Vec<_>>(),
291            ["a", "b", "c"]
292        );
293    }
294
295    // --- select: limit ---
296
297    #[test]
298    fn limit_keeps_n_most_recent() {
299        let clips = vec![
300            clip("a", "2024-01-01T00:00:00Z"),
301            clip("b", "2024-01-03T00:00:00Z"),
302            clip("c", "2024-01-02T00:00:00Z"),
303        ];
304        let params = SelectParams {
305            limit: Some(2),
306            ..Default::default()
307        };
308        let result = select(&clips, &params);
309        // b (newest) and c should be kept, in original order a=0, b=1, c=2 -> b, c
310        assert_eq!(result.len(), 2);
311        assert_eq!(result[0].id, "b");
312        assert_eq!(result[1].id, "c");
313    }
314
315    #[test]
316    fn limit_larger_than_set_keeps_all() {
317        let clips = vec![
318            clip("a", "2024-01-01T00:00:00Z"),
319            clip("b", "2024-01-02T00:00:00Z"),
320        ];
321        let params = SelectParams {
322            limit: Some(10),
323            ..Default::default()
324        };
325        assert_eq!(select(&clips, &params).len(), 2);
326    }
327
328    // --- select: recency filter ---
329
330    #[test]
331    fn since_filters_old_clips() {
332        // now = 2024-01-10T00:00:00Z = 1704844800; threshold = now - 7d
333        let now = parse_timestamp("2024-01-10T00:00:00Z").unwrap();
334        let clips = vec![
335            clip("old", "2024-01-01T00:00:00Z"),
336            clip("new", "2024-01-05T00:00:00Z"),
337        ];
338        let params = SelectParams {
339            since: Some(RecencySpec::Relative(7 * 86_400)),
340            min_newest: 0,
341            now,
342            ..Default::default()
343        };
344        let result = select(&clips, &params);
345        assert_eq!(result.len(), 1);
346        assert_eq!(result[0].id, "new");
347    }
348
349    #[test]
350    fn since_last_run_uses_supplied_timestamp() {
351        let last_run = parse_timestamp("2024-01-05T00:00:00Z").unwrap();
352        let clips = vec![
353            clip("old", "2024-01-04T00:00:00Z"),
354            clip("new", "2024-01-06T00:00:00Z"),
355        ];
356        let params = SelectParams {
357            since: Some(RecencySpec::LastRun),
358            min_newest: 0,
359            last_run: Some(last_run),
360            ..Default::default()
361        };
362        let result = select(&clips, &params);
363        assert_eq!(result.len(), 1);
364        assert_eq!(result[0].id, "new");
365    }
366
367    // --- select: min-newest floor ---
368
369    #[test]
370    fn min_newest_floor_prevents_empty_selection() {
371        let now = parse_timestamp("2024-01-10T00:00:00Z").unwrap();
372        let clips = vec![
373            clip("a", "2024-01-01T00:00:00Z"),
374            clip("b", "2024-01-02T00:00:00Z"),
375        ];
376        // All clips are older than the 1-day threshold; min_newest=1 should save the newest.
377        let params = SelectParams {
378            since: Some(RecencySpec::Relative(86_400)),
379            min_newest: 1,
380            now,
381            ..Default::default()
382        };
383        let result = select(&clips, &params);
384        assert_eq!(result.len(), 1);
385        assert_eq!(result[0].id, "b");
386    }
387
388    #[test]
389    fn min_newest_floor_keeps_n_when_all_filtered() {
390        let now = parse_timestamp("2024-01-10T00:00:00Z").unwrap();
391        let clips = vec![
392            clip("a", "2024-01-01T00:00:00Z"),
393            clip("b", "2024-01-02T00:00:00Z"),
394            clip("c", "2024-01-03T00:00:00Z"),
395        ];
396        let params = SelectParams {
397            since: Some(RecencySpec::Relative(86_400)),
398            min_newest: 2,
399            now,
400            ..Default::default()
401        };
402        let result = select(&clips, &params);
403        assert_eq!(result.len(), 2);
404        // b and c are the two newest; original order preserved -> b, c
405        let ids: Vec<&str> = result.iter().map(|c| c.id.as_str()).collect();
406        assert_eq!(ids, ["b", "c"]);
407    }
408
409    #[test]
410    fn min_newest_not_applied_without_recency_filter() {
411        // min_newest only kicks in when a since filter is active.
412        let clips = vec![
413            clip("a", "2024-01-01T00:00:00Z"),
414            clip("b", "2024-01-02T00:00:00Z"),
415        ];
416        let params = SelectParams {
417            min_newest: 5,
418            ..Default::default()
419        };
420        assert_eq!(select(&clips, &params).len(), 2);
421    }
422
423    #[test]
424    fn min_newest_does_not_reduce_passing_set() {
425        // When the recency filter already keeps more than min_newest, the floor is a no-op.
426        let now = parse_timestamp("2024-01-10T00:00:00Z").unwrap();
427        let clips = vec![
428            clip("a", "2024-01-08T00:00:00Z"),
429            clip("b", "2024-01-09T00:00:00Z"),
430        ];
431        let params = SelectParams {
432            since: Some(RecencySpec::Relative(7 * 86_400)),
433            min_newest: 1,
434            now,
435            ..Default::default()
436        };
437        assert_eq!(select(&clips, &params).len(), 2);
438    }
439
440    // --- select: combined limit + recency + min-newest ---
441
442    #[test]
443    fn limit_trims_when_above_min_newest() {
444        let now = parse_timestamp("2024-01-10T00:00:00Z").unwrap();
445        let clips = vec![
446            clip("a", "2024-01-04T00:00:00Z"),
447            clip("b", "2024-01-05T00:00:00Z"),
448            clip("c", "2024-01-06T00:00:00Z"),
449            clip("d", "2024-01-07T00:00:00Z"),
450            clip("e", "2024-01-08T00:00:00Z"),
451        ];
452        // All 5 pass the 7-day threshold; min_newest=2, limit=3;
453        // effective_limit=max(3,2)=3 → e, d, c kept in original order.
454        let params = SelectParams {
455            since: Some(RecencySpec::Relative(7 * 86_400)),
456            min_newest: 2,
457            limit: Some(3),
458            now,
459            ..Default::default()
460        };
461        let result = select(&clips, &params);
462        assert_eq!(result.len(), 3);
463        let ids: Vec<&str> = result.iter().map(|c| c.id.as_str()).collect();
464        assert_eq!(ids, ["c", "d", "e"]);
465    }
466
467    #[test]
468    fn limit_below_min_newest_is_clamped_to_floor() {
469        let now = parse_timestamp("2024-01-10T00:00:00Z").unwrap();
470        let clips = vec![
471            clip("a", "2024-01-01T00:00:00Z"),
472            clip("b", "2024-01-02T00:00:00Z"),
473            clip("c", "2024-01-03T00:00:00Z"),
474        ];
475        // All fail recency; min_newest=3 but limit=1; floor must win -> all 3 kept.
476        let params = SelectParams {
477            since: Some(RecencySpec::Relative(86_400)),
478            min_newest: 3,
479            limit: Some(1),
480            now,
481            ..Default::default()
482        };
483        let result = select(&clips, &params);
484        assert_eq!(result.len(), 3);
485    }
486
487    #[test]
488    fn unparseable_timestamp_is_kept_through_recency_filter() {
489        let now = parse_timestamp("2024-01-10T00:00:00Z").unwrap();
490        let clips = vec![clip("good", "2024-01-09T00:00:00Z"), clip("bad_ts", "")];
491        let params = SelectParams {
492            since: Some(RecencySpec::Relative(7 * 86_400)),
493            min_newest: 0,
494            now,
495            ..Default::default()
496        };
497        let result = select(&clips, &params);
498        assert_eq!(result.len(), 2);
499    }
500}