Skip to main content

provenant/copyright/
mod.rs

1//! Copyright detection module.
2//!
3//! Detects copyright statements, holder names, and author information
4//! from source code files using a four-stage pipeline:
5//! 1. Text preparation (normalization)
6//! 2. Candidate line selection
7//! 3. Lexing (POS tagging) and parsing (grammar rules)
8//! 4. Refinement and junk filtering
9
10use std::time::Duration;
11
12mod candidates;
13mod credits;
14mod detector;
15mod detector_input_normalization;
16pub mod golden_utils;
17mod grammar;
18mod hints;
19mod lexer;
20mod line_tracking;
21mod parser;
22mod patterns;
23mod prepare;
24mod refiner;
25mod types;
26
27#[cfg(all(test, feature = "golden-tests"))]
28mod golden_test;
29
30pub use candidates::strip_balanced_edge_parens;
31pub use credits::{detect_credits_authors, is_credits_file};
32pub use types::{AuthorDetection, CopyrightDetection, HolderDetection};
33
34#[derive(Debug, Clone)]
35pub struct CopyrightDetectionOptions {
36    pub include_copyrights: bool,
37    pub include_holders: bool,
38    pub include_authors: bool,
39    pub max_runtime: Option<Duration>,
40}
41
42impl Default for CopyrightDetectionOptions {
43    fn default() -> Self {
44        Self {
45            include_copyrights: true,
46            include_holders: true,
47            include_authors: true,
48            max_runtime: None,
49        }
50    }
51}
52
53/// Detect copyrights, holders, and authors in the given text content.
54///
55/// Returns a tuple of (copyrights, holders, authors).
56pub fn detect_copyrights(
57    content: &str,
58) -> (
59    Vec<CopyrightDetection>,
60    Vec<HolderDetection>,
61    Vec<AuthorDetection>,
62) {
63    detect_copyrights_with_options(content, &CopyrightDetectionOptions::default())
64}
65
66pub fn detect_copyrights_with_options(
67    content: &str,
68    options: &CopyrightDetectionOptions,
69) -> (
70    Vec<CopyrightDetection>,
71    Vec<HolderDetection>,
72    Vec<AuthorDetection>,
73) {
74    let (mut copyrights, mut holders, mut authors) = if let Some(max_runtime) = options.max_runtime
75    {
76        detector::detect_copyrights_from_text_with_deadline(content, Some(max_runtime))
77    } else {
78        detector::detect_copyrights_from_text(content)
79    };
80
81    if !options.include_copyrights {
82        copyrights.clear();
83    }
84    if !options.include_holders {
85        holders.clear();
86    }
87    if !options.include_authors {
88        authors.clear();
89    }
90
91    (copyrights, holders, authors)
92}
93
94#[cfg(test)]
95mod tests {
96    use super::{CopyrightDetectionOptions, detect_copyrights_with_options};
97
98    #[test]
99    fn test_options_can_disable_all_outputs() {
100        let content = "Copyright (c) 2024 Acme Inc.\nWritten by John Doe";
101        let options = CopyrightDetectionOptions {
102            include_copyrights: false,
103            include_holders: false,
104            include_authors: false,
105            ..CopyrightDetectionOptions::default()
106        };
107
108        let (copyrights, holders, authors) = detect_copyrights_with_options(content, &options);
109        assert!(copyrights.is_empty());
110        assert!(holders.is_empty());
111        assert!(authors.is_empty());
112    }
113
114    #[test]
115    fn test_options_can_keep_only_authors() {
116        let content = "Written by John Doe";
117        let options = CopyrightDetectionOptions {
118            include_copyrights: false,
119            include_holders: false,
120            include_authors: true,
121            ..CopyrightDetectionOptions::default()
122        };
123
124        let (copyrights, holders, authors) = detect_copyrights_with_options(content, &options);
125        assert!(copyrights.is_empty());
126        assert!(holders.is_empty());
127        assert!(!authors.is_empty());
128    }
129}