scanseq/
lib.rs

1//! # ScanSeq - High-Performance File Sequence Detection
2//!
3//! Fast, Rust-powered library and Python extension for detecting numbered file sequences.
4//! Designed for VFX, animation, and media production pipelines.
5//!
6//! # Crate Structure
7//!
8//! ```text
9//! scanseq (lib.rs)
10//! ├── core/           - Pure Rust sequence detection engine
11//! │   ├── mod.rs      - Module exports (Scanner, Seq, get_seqs)
12//! │   ├── file/       - File parsing, digit groups, mask generation
13//! │   ├── seq/        - Sequence grouping algorithm
14//! │   └── scan.rs     - Parallel directory scanning
15//! └── Python bindings (feature = "python")
16//! ```
17//!
18//! # Features
19//!
20//! - **`python`**: Enables PyO3 bindings for Python integration
21//!   - Build with: `maturin develop --features python`
22//!   - Or: `cargo build --features python`
23//!
24//! # Rust API
25//!
26//! ```ignore
27//! use scanseq::core::Scanner;
28//!
29//! let scanner = Scanner::new(vec!["/renders"], true, Some("*.exr"), 2);
30//! for seq in scanner.iter() {
31//!     println!("{} [{}-{}]", seq.pattern(), seq.start, seq.end);
32//! }
33//! ```
34//!
35//! # Python API
36//!
37//! ```python
38//! import scanseq
39//!
40//! scanner = scanseq.Scanner(["/renders"], recursive=True, mask="*.exr")
41//! for seq in scanner.result.seqs:
42//!     print(f"{seq.pattern} [{seq.start}-{seq.end}]")
43//! ```
44//!
45//! # Algorithm Overview
46//!
47//! 1. **Scan**: Discover directories using jwalk (parallel)
48//! 2. **Parse**: Extract digit groups from filenames, create masks
49//! 3. **Group**: Hash by mask (e.g., `render_@.exr`), sub-group by anchors
50//! 4. **Detect**: Find frame numbers, compute padding, identify gaps
51//!
52//! See [`core`] module for detailed algorithm documentation.
53
54pub mod core;
55
56#[cfg(feature = "python")]
57use pyo3::prelude::*;
58#[cfg(feature = "python")]
59use pyo3::types::PyDict;
60#[cfg(feature = "python")]
61use std::sync::Arc;
62#[cfg(feature = "python")]
63use std::time::Instant;
64
65#[cfg(feature = "python")]
66use core::Seq as CoreSeq;
67#[cfg(feature = "python")]
68use rayon::prelude::*;
69
70/// Python-facing Seq class wrapping core::Seq
71#[cfg(feature = "python")]
72#[pyclass(name = "Seq")]
73#[derive(Clone)]
74pub struct PySeq {
75    #[pyo3(get)]
76    pattern: String,
77    #[pyo3(get)]
78    start: i64,
79    #[pyo3(get)]
80    end: i64,
81    #[pyo3(get)]
82    padding: usize,
83    #[pyo3(get)]
84    indices: Vec<i64>,
85    #[pyo3(get)]
86    missed: Vec<i64>,
87}
88
89#[cfg(feature = "python")]
90impl From<CoreSeq> for PySeq {
91    fn from(s: CoreSeq) -> Self {
92        PySeq {
93            pattern: s.pattern().to_string(),
94            start: s.start,
95            end: s.end,
96            padding: s.padding,
97            indices: s.indices.clone(),
98            missed: s.missed.clone(),
99        }
100    }
101}
102
103#[cfg(feature = "python")]
104impl PySeq {
105    /// Format frame number into path using pattern
106    fn format_frame(&self, frame: i64) -> String {
107        if self.padding >= 2 {
108            let placeholder = "#".repeat(self.padding);
109            let frame_str = format!("{:0width$}", frame, width = self.padding);
110            self.pattern.replace(&placeholder, &frame_str)
111        } else {
112            self.pattern.replace('@', &frame.to_string())
113        }
114    }
115}
116
117#[cfg(feature = "python")]
118#[pymethods]
119impl PySeq {
120    fn __repr__(&self) -> String {
121        if self.missed.is_empty() {
122            format!(
123                "Seq(\"{}\", start={}, end={}, frames={})",
124                self.pattern, self.start, self.end, self.indices.len()
125            )
126        } else {
127            format!(
128                "Seq(\"{}\", start={}, end={}, frames={}, missed={})",
129                self.pattern, self.start, self.end, self.indices.len(), self.missed.len()
130            )
131        }
132    }
133
134    /// Support dict(seq) by implementing Mapping protocol
135    fn keys(&self) -> Vec<&str> {
136        vec!["pattern", "start", "end", "padding", "indices", "missed", "count"]
137    }
138
139    fn __getitem__(&self, key: &str) -> PyResult<PyObject> {
140        Python::with_gil(|py| {
141            match key {
142                "pattern" => Ok(self.pattern.clone().into_pyobject(py)?.into_any().unbind()),
143                "start" => Ok(self.start.into_pyobject(py)?.into_any().unbind()),
144                "end" => Ok(self.end.into_pyobject(py)?.into_any().unbind()),
145                "padding" => Ok(self.padding.into_pyobject(py)?.into_any().unbind()),
146                "indices" => Ok(self.indices.clone().into_pyobject(py)?.into_any().unbind()),
147                "missed" => Ok(self.missed.clone().into_pyobject(py)?.into_any().unbind()),
148                "count" => Ok(self.indices.len().into_pyobject(py)?.into_any().unbind()),
149                _ => Err(pyo3::exceptions::PyKeyError::new_err(format!("Unknown key: {}", key))),
150            }
151        })
152    }
153
154    fn __str__(&self) -> String {
155        self.__repr__()
156    }
157
158    /// Number of files in sequence
159    fn __len__(&self) -> usize {
160        self.indices.len()
161    }
162
163    /// Get file path for specific frame number.
164    /// Returns None if frame doesn't exist (not in indices).
165    #[pyo3(signature = (frame))]
166    fn get_file(&self, frame: i64) -> Option<String> {
167        // O(log n) lookup in sorted indices - handles large gaps correctly
168        if self.indices.binary_search(&frame).is_ok() {
169            Some(self.format_frame(frame))
170        } else {
171            None
172        }
173    }
174
175    /// Check if sequence is complete (no missing frames)
176    fn is_complete(&self) -> bool {
177        self.missed.is_empty()
178    }
179
180    /// Expand to all frame paths in range (including missing).
181    /// Limited to 1M frames to prevent OOM.
182    fn expand(&self) -> PyResult<Vec<String>> {
183        const MAX_EXPAND: i64 = 1_000_000;
184        let count = self.end.saturating_sub(self.start).saturating_add(1);
185        if count > MAX_EXPAND {
186            return Err(pyo3::exceptions::PyValueError::new_err(
187                format!("Range too large: {} frames (max {})", count, MAX_EXPAND)
188            ));
189        }
190        Ok((self.start..=self.end).map(|f| self.format_frame(f)).collect())
191    }
192
193    /// Convert to dict
194    fn to_dict(&self, py: Python) -> PyResult<Py<PyAny>> {
195        let dict = PyDict::new(py);
196        dict.set_item("pattern", &self.pattern)?;
197        dict.set_item("start", self.start)?;
198        dict.set_item("end", self.end)?;
199        dict.set_item("padding", self.padding)?;
200        dict.set_item("indices", &self.indices)?;
201        dict.set_item("missed", &self.missed)?;
202        dict.set_item("count", self.indices.len())?;
203        Ok(dict.into_any().unbind())
204    }
205}
206
207/// Python-facing ScanResult class wrapping core::ScanResult
208#[cfg(feature = "python")]
209#[pyclass(name = "ScanResult")]
210#[derive(Clone)]
211pub struct PyScanResult {
212    /// Detected sequences (Arc for cheap iterator cloning)
213    seqs: Arc<Vec<PySeq>>,
214    /// Scan duration in milliseconds
215    #[pyo3(get)]
216    elapsed_ms: f64,
217    /// Errors encountered during scan
218    #[pyo3(get)]
219    errors: Vec<String>,
220}
221
222#[cfg(feature = "python")]
223#[pymethods]
224impl PyScanResult {
225    /// Get sequences list (clones Vec for Python ownership)
226    #[getter]
227    fn seqs(&self) -> Vec<PySeq> {
228        (*self.seqs).clone()
229    }
230
231    fn __repr__(&self) -> String {
232        format!(
233            "ScanResult(seqs={}, elapsed={:.2}ms, errors={})",
234            self.seqs.len(), self.elapsed_ms, self.errors.len()
235        )
236    }
237
238    fn __len__(&self) -> usize {
239        self.seqs.len()
240    }
241
242    /// Iterate over sequences (cheap Arc clone, no Vec copy)
243    fn __iter__(slf: PyRef<'_, Self>) -> PyResult<Py<SeqIter>> {
244        Py::new(slf.py(), SeqIter {
245            seqs: Arc::clone(&slf.seqs),
246            index: 0,
247        })
248    }
249}
250
251/// Stateful scanner - stores configuration and results (1:1 with Rust API)
252#[cfg(feature = "python")]
253#[pyclass]
254pub struct Scanner {
255    /// Root paths to scan
256    #[pyo3(get)]
257    roots: Vec<String>,
258    /// Recursive scanning enabled
259    #[pyo3(get)]
260    recursive: bool,
261    /// File mask filter
262    #[pyo3(get)]
263    mask: Option<String>,
264    /// Minimum sequence length
265    #[pyo3(get)]
266    min_len: usize,
267    /// Scan results (sequences, elapsed_ms, errors)
268    #[pyo3(get)]
269    result: PyScanResult,
270}
271
272#[cfg(feature = "python")]
273#[pymethods]
274impl Scanner {
275    /// Create scanner and run initial scan.
276    ///
277    /// Args:
278    ///     roots: List of directory paths to scan
279    ///     recursive: Scan subdirectories (default: True)
280    ///     mask: File mask/glob pattern (e.g., "*.exr")
281    ///     min_len: Minimum sequence length (default: 2)
282    #[new]
283    #[pyo3(signature = (roots, recursive=true, mask=None, min_len=2))]
284    fn new(py: Python, roots: Vec<String>, recursive: bool, mask: Option<String>, min_len: usize) -> PyResult<Self> {
285        let mut scanner = Scanner {
286            roots,
287            recursive,
288            mask,
289            min_len,
290            result: PyScanResult {
291                seqs: Arc::new(Vec::new()),
292                elapsed_ms: 0.0,
293                errors: Vec::new(),
294            },
295        };
296        scanner.rescan_impl(py)?;
297        Ok(scanner)
298    }
299
300    /// Scan a single path (static method).
301    ///
302    /// Args:
303    ///     root: Directory path to scan
304    ///     recursive: Scan subdirectories (default: True)
305    ///     mask: File mask/glob pattern
306    ///     min_len: Minimum sequence length (default: 2)
307    ///
308    /// Returns:
309    ///     ScanResult with sequences, elapsed_ms, and errors
310    #[staticmethod]
311    #[pyo3(signature = (root, recursive=true, mask=None, min_len=2))]
312    fn get_seq(py: Python, root: String, recursive: bool, mask: Option<String>, min_len: usize) -> PyResult<PyScanResult> {
313        let start = Instant::now();
314
315        let (seqs, errors) = py.allow_threads(|| {
316            match core::get_seqs(&root, recursive, mask.as_deref(), min_len) {
317                Ok(s) => (s, Vec::new()),
318                Err(e) => (Vec::new(), vec![e]),
319            }
320        });
321
322        Ok(PyScanResult {
323            seqs: Arc::new(seqs.into_iter().map(PySeq::from).collect()),
324            elapsed_ms: start.elapsed().as_secs_f64() * 1000.0,
325            errors,
326        })
327    }
328
329    /// Scan multiple paths in parallel (static method).
330    ///
331    /// Args:
332    ///     roots: List of directory paths to scan
333    ///     recursive: Scan subdirectories (default: True)
334    ///     mask: File mask/glob pattern
335    ///     min_len: Minimum sequence length (default: 2)
336    ///
337    /// Returns:
338    ///     ScanResult with sequences, elapsed_ms, and errors
339    #[staticmethod]
340    #[pyo3(signature = (roots, recursive=true, mask=None, min_len=2))]
341    fn get_seqs(py: Python, roots: Vec<String>, recursive: bool, mask: Option<String>, min_len: usize) -> PyResult<PyScanResult> {
342        let start = Instant::now();
343
344        // Scan roots in parallel
345        let (seqs, errors) = py.allow_threads(|| {
346            let results: Vec<_> = roots.par_iter().map(|root| {
347                match core::get_seqs(root, recursive, mask.as_deref(), min_len) {
348                    Ok(s) => (s, None),
349                    Err(e) => (Vec::new(), Some(format!("{}: {}", root, e))),
350                }
351            }).collect();
352
353            let mut all_seqs = Vec::new();
354            let mut all_errors = Vec::new();
355            for (seqs, err) in results {
356                all_seqs.extend(seqs);
357                if let Some(e) = err {
358                    all_errors.push(e);
359                }
360            }
361            (all_seqs, all_errors)
362        });
363
364        Ok(PyScanResult {
365            seqs: Arc::new(seqs.into_iter().map(PySeq::from).collect()),
366            elapsed_ms: start.elapsed().as_secs_f64() * 1000.0,
367            errors,
368        })
369    }
370
371    /// Find sequence containing the given file.
372    /// Scans parent directory (non-recursive) to find matching files.
373    ///
374    /// Args:
375    ///     path: Path to a file that may be part of a sequence
376    ///
377    /// Returns:
378    ///     Seq if file is part of a sequence, None otherwise
379    #[staticmethod]
380    #[pyo3(signature = (path))]
381    fn from_file(py: Python, path: String) -> Option<PySeq> {
382        py.allow_threads(|| {
383            core::Scanner::from_file(&path).map(PySeq::from)
384        })
385    }
386
387    /// Re-scan all roots with current settings.
388    /// Updates result with new sequences, elapsed_ms, and errors.
389    fn rescan(&mut self, py: Python) -> PyResult<()> {
390        self.rescan_impl(py)
391    }
392
393    /// Number of sequences found
394    fn __len__(&self) -> usize {
395        self.result.seqs.len()
396    }
397
398    fn __repr__(&self) -> String {
399        format!(
400            "Scanner(roots={}, seqs={}, elapsed={:.2}ms)",
401            self.roots.len(),
402            self.result.seqs.len(),
403            self.result.elapsed_ms
404        )
405    }
406
407    /// Iterate over sequences (convenience, same as iter(scanner.result))
408    fn __iter__(slf: PyRef<'_, Self>) -> PyResult<Py<SeqIter>> {
409        Py::new(slf.py(), SeqIter {
410            seqs: Arc::clone(&slf.result.seqs),
411            index: 0,
412        })
413    }
414}
415
416#[cfg(feature = "python")]
417impl Scanner {
418    fn rescan_impl(&mut self, py: Python) -> PyResult<()> {
419        let start = Instant::now();
420
421        // Clone config for GIL-free scanning
422        let roots = self.roots.clone();
423        let recursive = self.recursive;
424        let mask = self.mask.clone();
425        let min_len = self.min_len;
426
427        // Release GIL during parallel Rust file scanning
428        let (seqs, errors) = py.allow_threads(|| {
429            let results: Vec<_> = roots.par_iter().map(|root| {
430                match core::get_seqs(root, recursive, mask.as_deref(), min_len) {
431                    Ok(s) => (s, None),
432                    Err(e) => (Vec::new(), Some(format!("{}: {}", root, e))),
433                }
434            }).collect();
435
436            let mut all_seqs = Vec::new();
437            let mut all_errors = Vec::new();
438            for (seqs, err) in results {
439                all_seqs.extend(seqs);
440                if let Some(e) = err {
441                    all_errors.push(e);
442                }
443            }
444            (all_seqs, all_errors)
445        });
446
447        // Update result (GIL held again)
448        self.result = PyScanResult {
449            seqs: Arc::new(seqs.into_iter().map(PySeq::from).collect()),
450            elapsed_ms: start.elapsed().as_secs_f64() * 1000.0,
451            errors,
452        };
453
454        Ok(())
455    }
456}
457
458/// Iterator for sequences (uses Arc for cheap cloning)
459#[cfg(feature = "python")]
460#[pyclass]
461pub struct SeqIter {
462    seqs: Arc<Vec<PySeq>>,
463    index: usize,
464}
465
466#[cfg(feature = "python")]
467#[pymethods]
468impl SeqIter {
469    fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
470        slf
471    }
472
473    fn __next__(&mut self) -> Option<PySeq> {
474        if self.index < self.seqs.len() {
475            let seq = self.seqs[self.index].clone();
476            self.index += 1;
477            Some(seq)
478        } else {
479            None
480        }
481    }
482}
483
484#[cfg(feature = "python")]
485#[pymodule]
486fn scanseq(m: &Bound<'_, PyModule>) -> PyResult<()> {
487    m.add_class::<Scanner>()?;
488    m.add_class::<PyScanResult>()?;
489    m.add_class::<PySeq>()?;
490    Ok(())
491}