Skip to main content

gpl_license_guard/
lib.rs

1#![forbid(unsafe_code)]
2//! # gpl-license-guard
3//!
4//! An **independent**, general-purpose license-boundary inspection gate that emits a **receipt**, not a
5//! legal opinion. It scans a repo (and, later, a distribution artifact) for the practical GPL/LGPL boundary
6//! surfaces and classifies them with typed verdicts, so any project -- a commercial release, an open
7//! copyleft derivative, anything -- can *prove* its license posture continuously instead of arguing it by
8//! hand.
9//!
10//! The claim is deliberately narrow: "at this commit, under this policy, no detected GPL/LGPL boundary
11//! violation according to declared machine-checkable rules" -- never "legally safe forever". The receipt
12//! carries an explicit non-claims list.
13//!
14//! Independence is the point: this tool depends on nothing GNU and nothing from the projects it audits --
15//! it inspects from the outside, so the auditor is not part of the audited. Apache-2.0.
16
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19
20/// Overall gate verdict (typed, not a boolean).
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22pub enum GateVerdict {
23    Pass,
24    Warn,
25    Fail,
26    ManualReviewRequired,
27    NotApplicable,
28}
29
30/// How this package relates to GnuCOBOL material (the bucket that must NOT collapse).
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
32pub enum GnuBoundary {
33    NoGnuMaterialDetected,
34    ExternalOracleOnly,
35    GnuLibraryLinked,
36    GnuBinaryBundled,
37    GnuSourceVendored,
38    UnknownGnuSurface,
39}
40
41/// One observed surface.
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct Finding {
44    pub path: String,
45    pub kind: String,
46    pub severity: String, // low | medium | high
47    pub detail: String,
48}
49
50/// The machine-checkable policy.
51#[derive(Debug, Clone)]
52pub struct Policy {
53    pub name: String,
54    pub deny_licenses: Vec<String>,
55    pub manual_review_licenses: Vec<String>,
56    pub allow_link_gnucobol: bool,
57    pub allow_vendored_gnucobol_source: bool,
58    pub allow_bundled_gnucobol_binary: bool,
59    /// Tools declared as external oracles (invoked as a process, not bundled), e.g. `cobc`.
60    pub declared_external_tools: Vec<String>,
61}
62
63impl Policy {
64    /// Select a built-in policy by name (CLI `--policy`). Accepts short forms (`lgpl`, `derivative`,
65    /// `commercial`). Defaults to the commercial boundary.
66    pub fn by_name(name: &str) -> Self {
67        let n = name.to_ascii_lowercase();
68        if n.contains("lgpl") || n.contains("deriv") {
69            Self::lgpl_faithful_derivative()
70        } else {
71            Self::commercial_boundary()
72        }
73    }
74
75    /// Policy for an LGPL faithful-derivative open repo (e.g. gnucobol-rs): it legitimately vendors the
76    /// admitted oracle and links/inherits copyleft -- those are EXPECTED, not violations. The obligation
77    /// it must honor (stay LGPL, never claim permissive/clean-room) is a documentation claim, not a file
78    /// surface, so this policy permits the GNU material and records it factually.
79    pub fn lgpl_faithful_derivative() -> Self {
80        Policy {
81            name: "lgpl-faithful-derivative-v1".into(),
82            deny_licenses: Vec::new(), // open copyleft project: nothing denied by license
83            manual_review_licenses: Vec::new(),
84            allow_link_gnucobol: true,
85            allow_vendored_gnucobol_source: true,
86            allow_bundled_gnucobol_binary: true,
87            declared_external_tools: vec!["cobc".into()],
88        }
89    }
90
91    /// The default commercial-boundary policy: external oracle OK; no vendored source / bundled binary /
92    /// gnucobol linking in a commercial artifact; GPL/AGPL denied; LGPL needs manual review.
93    pub fn commercial_boundary() -> Self {
94        let s = |v: &[&str]| v.iter().map(|x| x.to_string()).collect();
95        Policy {
96            name: "commercial-boundary-v1".into(),
97            deny_licenses: s(&[
98                "GPL-2.0-only",
99                "GPL-2.0-or-later",
100                "GPL-3.0-only",
101                "GPL-3.0-or-later",
102                "AGPL-3.0-only",
103                "AGPL-3.0-or-later",
104            ]),
105            manual_review_licenses: s(&[
106                "LGPL-2.1-only",
107                "LGPL-2.1-or-later",
108                "LGPL-3.0-only",
109                "LGPL-3.0-or-later",
110                "MPL-2.0",
111            ]),
112            allow_link_gnucobol: false,
113            allow_vendored_gnucobol_source: false,
114            allow_bundled_gnucobol_binary: false,
115            declared_external_tools: vec!["cobc".into()],
116        }
117    }
118}
119
120/// The scan report (serializes into the receipt).
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct ScanReport {
123    pub schema: String,
124    pub project: String,
125    pub crate_license: String,
126    pub policy: String,
127    pub verdict: GateVerdict,
128    pub gnu_boundary: GnuBoundary,
129    pub gnucobol_source_vendored: bool,
130    pub gnucobol_binary_bundled: bool,
131    pub gnucobol_library_linked: bool,
132    pub external_tools: Vec<String>,
133    pub findings: Vec<Finding>,
134    pub non_claims: Vec<String>,
135}
136
137fn non_claims() -> Vec<String> {
138    [
139        "not legal advice",
140        "not a substitute for review by counsel",
141        "does not determine copyright ownership",
142        "does not prove the absence of all copied code",
143        "covers only declared inputs and the scanned tree at this commit",
144    ]
145    .iter()
146    .map(|s| s.to_string())
147    .collect()
148}
149
150fn read_to_string(p: &Path) -> String {
151    std::fs::read_to_string(p).unwrap_or_default()
152}
153
154/// Recursively collect file paths under `root`, skipping build/vcs dirs.
155fn walk(root: &Path, out: &mut Vec<std::path::PathBuf>) {
156    let skip = ["target", ".git", "node_modules"];
157    let rd = match std::fs::read_dir(root) {
158        Ok(r) => r,
159        Err(_) => return,
160    };
161    for e in rd.flatten() {
162        let p = e.path();
163        let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
164        if p.is_dir() {
165            if !skip.contains(&name) {
166                walk(&p, out);
167            }
168        } else {
169            out.push(p);
170        }
171    }
172}
173
174/// Detect the crate's own declared license + whether it declares a gnucobol-rs dependency.
175fn inspect_cargo(root: &Path) -> (String, String, bool) {
176    let txt = read_to_string(&root.join("Cargo.toml"));
177    let mut name = String::new();
178    let mut license = String::new();
179    let mut in_deps = false;
180    let mut links_gnucobol = false;
181    for line in txt.lines() {
182        let l = line.trim();
183        if l.starts_with('[') {
184            in_deps = l.starts_with("[dependencies")
185                || l.starts_with("[build-dependencies")
186                || l.starts_with("[target");
187            continue;
188        }
189        if let Some(v) = l.strip_prefix("name = ") {
190            name = v.trim().trim_matches('"').to_string();
191        } else if let Some(v) = l.strip_prefix("license = ") {
192            license = v.trim().trim_matches('"').to_string();
193        } else if in_deps && (l.starts_with("gnucobol-rs") || l.starts_with("\"gnucobol-rs\"")) {
194            links_gnucobol = true;
195        }
196    }
197    // workspace fallback: a virtual manifest has no [package]; read the PRIMARY member under crates/
198    // (prefer the member whose folder name matches the repo, else the first named member).
199    if name.is_empty() {
200        let repo_name = root.file_name().and_then(|n| n.to_str()).unwrap_or("").to_string();
201        if let Ok(rd) = std::fs::read_dir(root.join("crates")) {
202            let mut members: Vec<_> = rd.flatten().map(|e| e.path()).collect();
203            members.sort();
204            members.sort_by_key(|m| m.file_name().and_then(|n| n.to_str()).map(|n| n != repo_name).unwrap_or(true));
205            for m in members {
206                let mtxt = read_to_string(&m.join("Cargo.toml"));
207                let mut mn = String::new();
208                let mut ml = String::new();
209                for line in mtxt.lines() {
210                    let l = line.trim();
211                    if let Some(v) = l.strip_prefix("name = ") {
212                        mn = v.trim().trim_matches('"').to_string();
213                    } else if let Some(v) = l.strip_prefix("license = ") {
214                        ml = v.trim().trim_matches('"').to_string();
215                    }
216                }
217                if !mn.is_empty() {
218                    name = mn;
219                    license = ml;
220                    break;
221                }
222            }
223        }
224    }
225    (name, license, links_gnucobol)
226}
227
228/// Scan a repo root against a policy and produce a report.
229pub fn scan(root: impl AsRef<Path>, policy: &Policy) -> ScanReport {
230    let root = root.as_ref();
231    let (project, crate_license, links_gnucobol) = inspect_cargo(root);
232
233    let mut findings = Vec::new();
234    let mut vendored = false;
235    let mut binary = false;
236    let mut invokes_external_cobc = false;
237
238    let mut files = Vec::new();
239    walk(root, &mut files);
240    for p in &files {
241        let rel = p.strip_prefix(root).unwrap_or(p).to_string_lossy().replace('\\', "/");
242        let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
243        let low = rel.to_ascii_lowercase();
244
245        // external-oracle invocation: a real process spawn of the cobc binary -- detected as the contiguous
246        // call literal, so a tool that merely mentions the two tokens separately is not flagged.
247        if name.ends_with(".rs") && read_to_string(p).contains("new(\"cobc") {
248            invokes_external_cobc = true;
249        }
250
251        // vendored GnuCOBOL/libcob SOURCE (a real implementation tree, not a string mention)
252        let is_src_ext = name.ends_with(".c") || name.ends_with(".h") || name.ends_with(".cob") || name.ends_with(".cpy");
253        if (low.contains("gnucobol") || low.contains("libcob")) && (is_src_ext || low.contains("gnucobol-3.")) {
254            vendored = true;
255            findings.push(Finding {
256                path: rel.clone(),
257                kind: "vendored_gnucobol_source".into(),
258                severity: "high".into(),
259                detail: "GnuCOBOL/libcob source material inside the tree".into(),
260            });
261            continue;
262        }
263        // bundled GnuCOBOL/libcob BINARY
264        if name == "cobc"
265            || name.starts_with("libcob.so")
266            || name == "libcob.a"
267            || (low.contains("gnucobol") && (name.ends_with(".tar.gz") || name.ends_with(".tar.lz") || name.ends_with(".tar.xz")))
268        {
269            binary = true;
270            findings.push(Finding {
271                path: rel.clone(),
272                kind: "bundled_gnucobol_binary_or_archive".into(),
273                severity: "high".into(),
274                detail: "GnuCOBOL binary/archive inside the tree".into(),
275            });
276            continue;
277        }
278        // LGPL/GPL license texts present in the tree (informational; e.g. a faithful-derivative crate)
279        if name == "COPYING.LESSER" || name.starts_with("LGPL-") {
280            findings.push(Finding {
281                path: rel.clone(),
282                kind: "lgpl_license_text".into(),
283                severity: "low".into(),
284                detail: "LGPL license text present".into(),
285            });
286        } else if name == "COPYING" || name.starts_with("GPL-") {
287            findings.push(Finding {
288                path: rel.clone(),
289                kind: "gpl_license_text".into(),
290                severity: "medium".into(),
291                detail: "GPL license text present".into(),
292            });
293        }
294    }
295
296    // build.rs link directives (a behavioral link signal, not a string mention)
297    let build_rs = read_to_string(&root.join("build.rs"));
298    let build_links_gnu = build_rs.contains("rustc-link-lib")
299        && (build_rs.contains("cob") || build_rs.contains("gnucobol"));
300    if build_links_gnu {
301        findings.push(Finding {
302            path: "build.rs".into(),
303            kind: "native_link_directive".into(),
304            severity: "high".into(),
305            detail: "build.rs links a cob/gnucobol native library".into(),
306        });
307    }
308    let library_linked = links_gnucobol || build_links_gnu;
309    if links_gnucobol {
310        findings.push(Finding {
311            path: "Cargo.toml".into(),
312            kind: "gnucobol_rs_crate_dependency".into(),
313            severity: "medium".into(),
314            detail: "links the LGPL gnucobol-rs crate (LGPL relink obligation applies to distributed binaries)".into(),
315        });
316    }
317
318    // classify the GNU boundary (the buckets must not collapse)
319    let gnu_boundary = if vendored {
320        GnuBoundary::GnuSourceVendored
321    } else if binary {
322        GnuBoundary::GnuBinaryBundled
323    } else if library_linked {
324        GnuBoundary::GnuLibraryLinked
325    } else if invokes_external_cobc {
326        GnuBoundary::ExternalOracleOnly
327    } else {
328        GnuBoundary::NoGnuMaterialDetected
329    };
330
331    // verdict per policy
332    let license_denied = policy.deny_licenses.iter().any(|d| d == &crate_license);
333    let license_review = policy.manual_review_licenses.iter().any(|d| d == &crate_license);
334    let verdict = if license_denied {
335        GateVerdict::Fail
336    } else if vendored && !policy.allow_vendored_gnucobol_source {
337        GateVerdict::Fail
338    } else if binary && !policy.allow_bundled_gnucobol_binary {
339        GateVerdict::Fail
340    } else if library_linked && !policy.allow_link_gnucobol {
341        GateVerdict::ManualReviewRequired
342    } else if license_review {
343        GateVerdict::ManualReviewRequired
344    } else {
345        GateVerdict::Pass
346    };
347
348    ScanReport {
349        schema: "gpl-license-guard-receipt-v1".into(),
350        project,
351        crate_license,
352        policy: policy.name.clone(),
353        verdict,
354        gnu_boundary,
355        gnucobol_source_vendored: vendored,
356        gnucobol_binary_bundled: binary,
357        gnucobol_library_linked: library_linked,
358        external_tools: if gnu_boundary == GnuBoundary::ExternalOracleOnly {
359            policy.declared_external_tools.clone()
360        } else {
361            Vec::new()
362        },
363        findings,
364        non_claims: non_claims(),
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371
372    #[test]
373    fn clean_forensic_crate_passes() {
374        // a crate with no GNU material, no gnucobol-rs dep, permissive license -> Pass
375        let dir = tempdir("clean");
376        write(&dir, "Cargo.toml", "name = \"kobold-x\"\nlicense = \"Apache-2.0\"\n[dependencies]\nserde = \"1\"\n");
377        write(&dir, "src/lib.rs", "pub fn f() {}\n");
378        let r = scan(&dir, &Policy::commercial_boundary());
379        assert_eq!(r.verdict, GateVerdict::Pass);
380        assert_eq!(r.gnu_boundary, GnuBoundary::NoGnuMaterialDetected); // pure data tool, no cobc spawn
381        assert!(!r.gnucobol_library_linked);
382        cleanup(&dir);
383    }
384
385    #[test]
386    fn external_cobc_invocation_is_oracle_only() {
387        let dir = tempdir("oracle");
388        write(&dir, "Cargo.toml", "name = \"kobold-y\"\nlicense = \"Apache-2.0\"\n");
389        write(&dir, "src/main.rs", "fn main(){ let _ = std::process::Command::new(\"cobc\").arg(\"-x\"); }\n");
390        let r = scan(&dir, &Policy::commercial_boundary());
391        assert_eq!(r.gnu_boundary, GnuBoundary::ExternalOracleOnly);
392        assert_eq!(r.verdict, GateVerdict::Pass);
393        assert_eq!(r.external_tools, vec!["cobc".to_string()]);
394        cleanup(&dir);
395    }
396
397    #[test]
398    fn the_guard_does_not_self_flag() {
399        // scanning this very crate must NOT report it as invoking cobc (its detector merely mentions the
400        // tokens). Run against the crate's own manifest dir.
401        let root = env!("CARGO_MANIFEST_DIR");
402        let r = scan(root, &Policy::commercial_boundary());
403        assert_eq!(r.gnu_boundary, GnuBoundary::NoGnuMaterialDetected, "the guard must not self-flag");
404        assert_eq!(r.verdict, GateVerdict::Pass);
405    }
406
407    #[test]
408    fn gnucobol_rs_dependency_triggers_manual_review() {
409        let dir = tempdir("linked");
410        write(
411            &dir,
412            "Cargo.toml",
413            "name = \"kobold-guard\"\nlicense = \"Apache-2.0\"\n[dependencies]\ngnucobol-rs = \"0.7\"\n",
414        );
415        let r = scan(&dir, &Policy::commercial_boundary());
416        assert_eq!(r.gnu_boundary, GnuBoundary::GnuLibraryLinked);
417        assert_eq!(r.verdict, GateVerdict::ManualReviewRequired);
418        assert!(r.gnucobol_library_linked);
419        cleanup(&dir);
420    }
421
422    #[test]
423    fn vendored_gnucobol_source_fails() {
424        let dir = tempdir("vendored");
425        write(&dir, "Cargo.toml", "name = \"bad\"\nlicense = \"Apache-2.0\"\n");
426        write(&dir, "vendor/gnucobol/libcob/move.c", "/* cob source */\n");
427        let r = scan(&dir, &Policy::commercial_boundary());
428        assert_eq!(r.gnu_boundary, GnuBoundary::GnuSourceVendored);
429        assert_eq!(r.verdict, GateVerdict::Fail);
430        cleanup(&dir);
431    }
432
433    #[test]
434    fn vendored_source_passes_under_derivative_policy() {
435        // the SAME vendored-source tree that FAILs the commercial policy PASSes the lgpl-derivative policy:
436        // two correct, opposite verdicts -- that is the separation, machine-checked.
437        let dir = tempdir("deriv");
438        write(&dir, "Cargo.toml", "name = \"gnucobol-rs\"\nlicense = \"LGPL-3.0-or-later\"\n");
439        write(&dir, "research/gnucobol-3.2/libcob/move.c", "/* cob */\n");
440        assert_eq!(scan(&dir, &Policy::commercial_boundary()).verdict, GateVerdict::Fail);
441        let r = scan(&dir, &Policy::lgpl_faithful_derivative());
442        assert_eq!(r.gnu_boundary, GnuBoundary::GnuSourceVendored); // recorded factually
443        assert_eq!(r.verdict, GateVerdict::Pass); // but permitted for an LGPL derivative
444        cleanup(&dir);
445    }
446
447    #[test]
448    fn gpl_crate_license_is_denied() {
449        let dir = tempdir("gpl");
450        write(&dir, "Cargo.toml", "name = \"x\"\nlicense = \"GPL-3.0-or-later\"\n");
451        let r = scan(&dir, &Policy::commercial_boundary());
452        assert_eq!(r.verdict, GateVerdict::Fail);
453        cleanup(&dir);
454    }
455
456    // --- tiny test fs helpers (no external deps) ---
457    fn tempdir(tag: &str) -> std::path::PathBuf {
458        let d = std::env::temp_dir().join(format!("klg-test-{tag}"));
459        let _ = std::fs::remove_dir_all(&d);
460        std::fs::create_dir_all(&d).unwrap();
461        d
462    }
463    fn write(dir: &Path, rel: &str, content: &str) {
464        let p = dir.join(rel);
465        std::fs::create_dir_all(p.parent().unwrap()).unwrap();
466        std::fs::write(p, content).unwrap();
467    }
468    fn cleanup(dir: &Path) {
469        let _ = std::fs::remove_dir_all(dir);
470    }
471}