provenant-cli 0.0.8

Provenant is a high-performance Rust scanner for licenses, packages, and source provenance.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
//! Tests for rule/license file parsing.
//!
//! Tests for YAML frontmatter parsing edge cases including:
//! - PGP signatures in content
//! - Dashes in license text
//! - Various delimiter formats

use super::*;
use crate::license_detection::index::{loaded_license_to_license, loaded_rule_to_rule};
use crate::license_detection::models::{License, Rule};
use anyhow::Result;

fn parse_license_from_str(content: &str, filename: &str) -> Result<License> {
    let temp_path = std::env::temp_dir().join(filename);
    std::fs::write(&temp_path, content)?;
    let loaded = parse_license_to_loaded(&temp_path)?;
    let result = Ok(loaded_license_to_license(loaded));
    let _ = std::fs::remove_file(&temp_path);
    result
}

fn parse_rule_from_str(content: &str, filename: &str) -> Result<Rule> {
    let temp_path = std::env::temp_dir().join(filename);
    std::fs::write(&temp_path, content)?;
    let loaded = parse_rule_to_loaded(&temp_path)?;
    let result = loaded_rule_to_rule(loaded);
    let _ = std::fs::remove_file(&temp_path);
    Ok(result)
}

#[test]
fn test_parse_license_file_basic() {
    let content = r#"---
key: test-license
name: Test License
category: Permissive
---
This is the license text.
It has multiple lines."#;

    let result = parse_license_from_str(content, "test-license.LICENSE");
    assert!(result.is_ok(), "Should parse basic license: {:?}", result);
    let license = result.unwrap();
    assert_eq!(license.key, "test-license");
    assert_eq!(license.name, "Test License");
    assert!(license.text.contains("license text"));
}

#[test]
fn test_parse_rule_file_basic() {
    let content = r#"---
license_expression: mit
is_license_text: yes
---
Permission is hereby granted, free of charge."#;

    let result = parse_rule_from_str(content, "mit_1.RULE");
    assert!(result.is_ok(), "Should parse basic rule: {:?}", result);
    let rule = result.unwrap();
    assert_eq!(rule.license_expression, "mit");
    assert!(rule.text.contains("Permission"));
    assert!(rule.is_license_text());
}

#[test]
fn test_parse_license_empty_frontmatter() {
    let content = r#"---
---
This is the license text."#;

    let result = parse_license_from_str(content, "empty-fm.LICENSE");
    assert!(result.is_ok(), "Empty frontmatter should use defaults");
}

#[test]
fn test_parse_license_empty_text() {
    let content = r#"---
key: empty-text
name: Test License
---
"#;

    let result = parse_license_from_str(content, "empty-text.LICENSE");
    assert!(
        result.is_err(),
        "Empty text should fail for non-deprecated license"
    );
    assert!(
        result
            .unwrap_err()
            .to_string()
            .contains("empty text content")
    );
}

#[test]
fn test_parse_license_empty_text_deprecated_allowed() {
    let content = r#"---
key: deprecated
name: Test License
is_deprecated: yes
---
"#;

    let result = parse_license_from_str(content, "deprecated.LICENSE");
    assert!(result.is_ok(), "Deprecated license can have empty text");
}

#[test]
fn test_parse_license_empty_text_unknown_allowed() {
    let content = r#"---
key: unknown
name: Test License
is_unknown: yes
---
"#;

    let result = parse_license_from_str(content, "unknown.LICENSE");
    assert!(result.is_ok(), "Unknown license can have empty text");
}

#[test]
fn test_parse_license_empty_text_generic_allowed() {
    let content = r#"---
key: generic
name: Test License
is_generic: yes
---
"#;

    let result = parse_license_from_str(content, "generic.LICENSE");
    assert!(result.is_ok(), "Generic license can have empty text");
}

#[test]
fn test_parse_rule_empty_text() {
    let content = r#"---
license_expression: mit
---
"#;

    let result = parse_rule_from_str(content, "empty-text.RULE");
    assert!(result.is_err(), "Rule with empty text should fail");
    assert!(
        result
            .unwrap_err()
            .to_string()
            .contains("empty text content")
    );
}

#[test]
fn test_parse_license_with_pgp_signature() {
    let content = r#"---
key: pgp-license
name: Test License
---
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

This is the license text with a PGP signature.
-----END PGP SIGNATURE-----"#;

    let result = parse_license_from_str(content, "pgp-license.LICENSE");
    assert!(
        result.is_ok(),
        "Should parse license with PGP signature: {:?}",
        result
    );
    let license = result.unwrap();
    assert!(
        license.text.contains("PGP SIGNED MESSAGE"),
        "Text should contain PGP marker"
    );
    assert!(
        license.text.contains("license text"),
        "Text should contain actual license"
    );
}

#[test]
fn test_parse_license_with_dashes_in_content() {
    let content = r#"---
key: dashes-content
name: Test License
---
The Artistic License 1.0
--- 
This is a separator in the license text.
More text here.
--- end ---"#;

    let result = parse_license_from_str(content, "dashes-content.LICENSE");
    assert!(
        result.is_ok(),
        "Should parse license with dashes in content: {:?}",
        result
    );
    let license = result.unwrap();
    assert!(
        license.text.contains("separator"),
        "Text should contain content after dashes"
    );
    assert!(
        license.text.contains("--- end ---"),
        "Text should contain inline dashes"
    );
}

#[test]
fn test_parse_license_with_four_dash_delimiter() {
    let content = r#"----
key: four-dash
name: Test License
----
This is the license text."#;

    let result = parse_license_from_str(content, "four-dash.LICENSE");
    assert!(
        result.is_ok(),
        "Should parse license with 4-dash delimiter: {:?}",
        result
    );
    let license = result.unwrap();
    assert!(
        license.text.contains("license text"),
        "Text should be extracted correctly"
    );
}

#[test]
fn test_parse_rule_with_pgp_signature() {
    let content = r#"---
license_expression: test-rule
is_license_text: yes
---
-----BEGIN PGP SIGNED MESSAGE-----

Rule text here.
-----END PGP SIGNATURE-----"#;

    let result = parse_rule_from_str(content, "pgp-rule.RULE");
    assert!(
        result.is_ok(),
        "Should parse rule with PGP signature: {:?}",
        result
    );
    let rule = result.unwrap();
    assert!(
        rule.text.contains("PGP SIGNED MESSAGE"),
        "Text should contain PGP marker"
    );
}

#[test]
fn test_parse_rule_false_positive() {
    let content = r#"---
license_expression: gpl-2.0-plus
is_false_positive: yes
---
GPL"#;

    let result = parse_rule_from_str(content, "false-positive.RULE");
    assert!(result.is_ok(), "Should parse false positive rule");
    let rule = result.unwrap();
    assert!(rule.is_false_positive);
}

#[test]
fn test_parse_license_minimum_coverage() {
    let content = r#"---
key: min-coverage
name: Test License
minimum_coverage: 80
---
License text here."#;

    let result = parse_license_from_str(content, "min-coverage.LICENSE");
    assert!(result.is_ok());
    let license = result.unwrap();
    assert_eq!(license.minimum_coverage, Some(80));
}

#[test]
fn test_parse_rule_minimum_coverage() {
    let content = r#"---
license_expression: test
is_license_notice: yes
minimum_coverage: 99
---
Rule text."#;

    let result = parse_rule_from_str(content, "min-coverage.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert_eq!(rule.minimum_coverage, Some(99));
    assert!(rule.has_stored_minimum_coverage);
}

#[test]
fn test_parse_rule_without_minimum_coverage_has_no_stored_provenance() {
    let content = r#"---
license_expression: test
is_license_notice: yes
---
Rule text."#;

    let result = parse_rule_from_str(content, "no-min-coverage.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert_eq!(rule.minimum_coverage, None);
    assert!(!rule.has_stored_minimum_coverage);
}

#[test]
fn test_parse_license_no_key_in_frontmatter() {
    let content = r#"---
name: Test License
---
Text."#;

    let result = parse_license_from_str(content, "no-key.LICENSE");
    assert!(result.is_ok(), "Missing key should use filename");
    let license = result.unwrap();
    assert_eq!(license.key, "no-key");
}

#[test]
fn test_parse_rule_with_all_boolean_flags() {
    let content = r#"---
license_expression: test
is_license_text: yes
is_license_notice: yes
is_license_reference: yes
is_license_tag: yes
is_license_intro: yes
is_license_clue: yes
is_false_positive: yes
is_continuous: yes
---
Text."#;

    let result = parse_rule_from_str(content, "flags.RULE");
    assert!(result.is_err());
}

#[test]
fn test_parse_license_with_multiline_yaml() {
    let content = r#"---
key: multiline
name: Test License
notes: |
    Line 1
    Line 2
    Line 3
---
Text."#;

    let result = parse_license_from_str(content, "multiline.LICENSE");
    assert!(result.is_ok());
    let license = result.unwrap();
    assert!(license.notes.as_ref().unwrap().contains("Line 1"));
    assert!(license.notes.as_ref().unwrap().contains("Line 2"));
}

#[test]
fn test_parse_license_with_trailing_whitespace_on_delimiter() {
    let content = "---  \nkey: ws-delimiter\nname: Test\n---  \nText.";

    let result = parse_license_from_str(content, "ws-delimiter.LICENSE");
    assert!(
        result.is_ok(),
        "Should handle trailing whitespace on delimiter"
    );
    let license = result.unwrap();
    assert!(license.text.contains("Text"));
}

#[test]
fn test_parse_license_key_mismatch() {
    let content = r#"---
key: wrong-key
name: Test License
---
Text."#;

    let result = parse_license_from_str(content, "correct-key.LICENSE");
    assert!(result.is_err(), "Key mismatch should fail");
    let err = result.unwrap_err().to_string();
    assert!(
        err.contains("key mismatch"),
        "Error should mention key mismatch: {}",
        err
    );
}

#[test]
fn test_parse_rule_missing_license_expression() {
    let content = r#"---
is_license_text: yes
---
Some text."#;

    let result = parse_rule_from_str(content, "no-expr.RULE");
    assert!(
        result.is_err(),
        "Missing license_expression should fail for non-false-positive"
    );
    let err = result.unwrap_err().to_string();
    assert!(
        err.contains("license_expression"),
        "Error should mention license_expression: {}",
        err
    );
}

#[test]
fn test_parse_rule_false_positive_without_expression() {
    let content = r#"---
is_false_positive: yes
---
Some text."#;

    let result = parse_rule_from_str(content, "fp-no-expr.RULE");
    assert!(
        result.is_ok(),
        "False positive rule can omit license_expression"
    );
    let rule = result.unwrap();
    assert!(rule.is_false_positive);
    assert_eq!(rule.license_expression, "unknown");
}

#[test]
fn test_parse_license_content_too_short() {
    let content = "abc";

    let result = parse_license_from_str(content, "short.LICENSE");
    assert!(result.is_err(), "Content too short should fail");
    let err = result.unwrap_err().to_string();
    assert!(
        err.contains("too short"),
        "Error should mention too short: {}",
        err
    );
}

#[test]
fn test_parse_rule_content_too_short() {
    let content = "abc";

    let result = parse_rule_from_str(content, "short.RULE");
    assert!(result.is_err(), "Content too short should fail");
}

#[test]
fn test_parse_license_missing_delimiter() {
    let content = r#"key: no-delim
name: Test
This is text without delimiters."#;

    let result = parse_license_from_str(content, "no-delim.LICENSE");
    assert!(result.is_err(), "Missing delimiter should fail");
    let err = result.unwrap_err().to_string();
    assert!(
        err.contains("delimiter"),
        "Error should mention delimiter: {}",
        err
    );
}

#[test]
fn test_parse_rule_missing_delimiter() {
    let content = r#"license_expression: mit
This is text without delimiters."#;

    let result = parse_rule_from_str(content, "no-delim.RULE");
    assert!(result.is_err(), "Missing delimiter should fail");
}

#[test]
fn test_parse_license_with_ignorable_fields() {
    let content = r#"---
key: ignorable
name: Test License
ignorable_copyrights:
    - Copyright (c) Example
ignorable_holders:
    - Example Corp
ignorable_authors:
    - John Doe
ignorable_urls:
    - http://example.com
ignorable_emails:
    - test@example.com
---
License text."#;

    let result = parse_license_from_str(content, "ignorable.LICENSE");
    assert!(result.is_ok());
    let license = result.unwrap();
    assert!(license.ignorable_copyrights.is_some());
    assert!(license.ignorable_holders.is_some());
    assert!(license.ignorable_authors.is_some());
    assert!(license.ignorable_urls.is_some());
    assert!(license.ignorable_emails.is_some());
}

#[test]
fn test_parse_rule_with_ignorable_fields() {
    let content = r#"---
license_expression: test
is_license_notice: yes
ignorable_urls:
    - http://example.com
ignorable_emails:
    - test@example.com
ignorable_copyrights:
    - Copyright (c) Example
ignorable_holders:
    - Example Corp
ignorable_authors:
    - John Doe
---
Rule text."#;

    let result = parse_rule_from_str(content, "ignorable.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert!(rule.ignorable_urls.is_some());
    assert!(rule.ignorable_emails.is_some());
    assert!(rule.ignorable_copyrights.is_some());
    assert!(rule.ignorable_holders.is_some());
    assert!(rule.ignorable_authors.is_some());
}

#[test]
fn test_parse_rule_with_referenced_filenames() {
    let content = r#"---
license_expression: mit
is_license_reference: yes
referenced_filenames:
    - MIT.txt
    - LICENSE
---
MIT License"#;

    let result = parse_rule_from_str(content, "ref-files.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert!(rule.referenced_filenames.is_some());
    let refs = rule.referenced_filenames.unwrap();
    assert_eq!(refs.len(), 2);
    assert!(refs.contains(&"MIT.txt".to_string()));
}

#[test]
fn test_parse_rule_relevance_field() {
    let content = r#"---
license_expression: mit
is_license_notice: yes
relevance: 85
---
MIT License"#;

    let result = parse_rule_from_str(content, "relevance.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert_eq!(rule.relevance, 85);
}

#[test]
fn test_parse_rule_relevance_default() {
    let content = r#"---
license_expression: mit
is_license_notice: yes
---
MIT License"#;

    let result = parse_rule_from_str(content, "relevance-default.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert_eq!(rule.relevance, 100);
}

#[test]
fn test_parse_license_with_urls() {
    let content = r#"---
key: url-test
short_name: URL Test
name: Test License
owner: Example Org
homepage_url: http://example.com
osi_license_key: URL-TEST
text_urls:
    - http://text.example.com
osi_url: http://osi.example.com
faq_url: http://faq.example.com
other_urls:
    - http://other.example.com
standard_notice: Include this notice.
---
License text."#;

    let result = parse_license_from_str(content, "url-test.LICENSE");
    assert!(result.is_ok());
    let license = result.unwrap();
    assert!(!license.reference_urls.is_empty());
    assert_eq!(license.short_name.as_deref(), Some("URL Test"));
    assert_eq!(license.language.as_deref(), Some("en"));
    assert_eq!(license.owner.as_deref(), Some("Example Org"));
    assert_eq!(license.homepage_url.as_deref(), Some("http://example.com"));
    assert_eq!(license.osi_license_key.as_deref(), Some("URL-TEST"));
    assert_eq!(
        license.text_urls,
        vec!["http://text.example.com".to_string()]
    );
    assert_eq!(license.osi_url.as_deref(), Some("http://osi.example.com"));
    assert_eq!(license.faq_url.as_deref(), Some("http://faq.example.com"));
    assert_eq!(
        license.other_urls,
        vec!["http://other.example.com".to_string()]
    );
    assert_eq!(
        license.standard_notice.as_deref(),
        Some("Include this notice.")
    );
}

#[test]
fn test_parse_license_replaced_by() {
    let content = r#"---
key: deprecated-replaced
name: Deprecated License
is_deprecated: yes
replaced_by:
    - mit
    - apache-2.0
---
"#;

    let result = parse_license_from_str(content, "deprecated-replaced.LICENSE");
    assert!(result.is_ok());
    let license = result.unwrap();
    assert!(license.is_deprecated);
    assert_eq!(license.replaced_by.len(), 2);
}

#[test]
fn test_parse_rule_notes_field() {
    let content = r#"---
license_expression: test
is_license_notice: yes
notes: This is a test note.
---
Rule text."#;

    let result = parse_rule_from_str(content, "notes.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert!(rule.notes.is_some());
    assert!(rule.notes.unwrap().contains("test note"));
}

#[test]
fn test_parse_rule_language_field() {
    let content = r#"---
license_expression: test
is_license_notice: yes
language: en
---
Rule text."#;

    let result = parse_rule_from_str(content, "language.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert_eq!(rule.language, Some("en".to_string()));
}

#[test]
fn test_parse_license_uses_short_name_as_name_fallback() {
    let content = r#"---
key: short-name-test
short_name: Short Name
---
License text."#;

    let result = parse_license_from_str(content, "short-name-test.LICENSE");
    assert!(result.is_ok());
    let license = result.unwrap();
    assert_eq!(license.name, "Short Name");
}

#[test]
fn test_parse_license_name_fallback_to_key() {
    let content = r#"---
key: key-as-name
---
License text."#;

    let result = parse_license_from_str(content, "key-as-name.LICENSE");
    assert!(result.is_ok());
    let license = result.unwrap();
    assert_eq!(license.name, "key-as-name");
}

#[test]
fn test_parse_bool_variants() {
    let content = r#"---
license_expression: test
is_license_reference: "1"
is_license_notice: "no"
is_license_text: false
is_license_tag: false
is_license_intro: "no"
is_license_clue: "0"
---
Text."#;

    let result = parse_rule_from_str(content, "bool-variants.RULE");
    assert!(result.is_ok());
    let rule = result.unwrap();
    assert!(rule.is_license_reference());
    assert!(!rule.is_license_notice());
    assert!(!rule.is_license_text());
    assert!(!rule.is_license_tag());
    assert!(!rule.is_license_intro());
    assert!(!rule.is_license_clue());
}

#[test]
fn test_ibmpl_rule_loaded() {
    let Some(engine) = crate::license_detection::LicenseDetectionEngine::from_embedded().ok()
    else {
        eprintln!("Skipping test: embedded engine not available");
        return;
    };

    let index = engine.index();

    let ibmpl_1 = index
        .rules_by_rid
        .iter()
        .find(|r| r.identifier == "ibmpl-1.0_1.RULE");
    assert!(ibmpl_1.is_some(), "ibmpl-1.0_1.RULE should be loaded");

    let rule = ibmpl_1.unwrap();
    assert_eq!(rule.license_expression, "ibmpl-1.0");
    assert!(rule.is_license_reference());
    assert_eq!(rule.relevance, 100);

    let expected_text = "distributed under the IBM Public License (IPL).";
    assert_eq!(rule.text, expected_text, "Rule text should match");
}

#[test]
fn test_ibmpl_rule_tokens() {
    use crate::license_detection::tokenize::tokenize;

    let rule_text = "distributed under the IBM Public License (IPL).";
    let query_text = "Version 0.7.0 and above will be distributed under the IBM Public\nLicense (IPL). The IPL is an approved open source license";

    let rule_tokens = tokenize(rule_text);
    let query_tokens = tokenize(query_text);

    eprintln!("Rule tokens: {:?}", rule_tokens);
    eprintln!("Query tokens: {:?}", query_tokens);

    let rule_len = rule_tokens.len();
    let mut found = false;
    for i in 0..=query_tokens.len().saturating_sub(rule_len) {
        if query_tokens[i..i + rule_len] == rule_tokens[..] {
            eprintln!("MATCH FOUND at position {}", i);
            eprintln!("Query segment: {:?}", &query_tokens[i..i + rule_len]);
            found = true;
            break;
        }
    }

    assert!(found, "Rule tokens should appear in query tokens");
}

#[test]
fn test_ibmpl_detection() {
    use crate::license_detection::LicenseDetectionEngine;

    let Some(engine) = LicenseDetectionEngine::from_embedded().ok() else {
        eprintln!("Skipping test: embedded engine not available");
        return;
    };

    // Test with exact rule text
    let exact_text = "distributed under the IBM Public License (IPL).";
    let detections = engine
        .detect_with_kind(exact_text, false, false)
        .expect("Detection failed");

    eprintln!("Exact text match:");
    for d in &detections {
        let expr = d.license_expression.as_deref().unwrap_or("none");
        let coverage = d.matches.first().map(|m| m.match_coverage).unwrap_or(0.0);
        eprintln!("  {} (coverage: {:.1}%)", expr, coverage);
    }

    // Test with the actual test file text (split across lines)
    let test_text = "Version 0.7.0 and above will be distributed under the IBM Public\nLicense (IPL). The IPL is an approved open source license";
    let detections = engine
        .detect_with_kind(test_text, false, false)
        .expect("Detection failed");

    eprintln!("\nTest file text match:");
    for d in &detections {
        let expr = d.license_expression.as_deref().unwrap_or("none");
        let coverage = d.matches.first().map(|m| m.match_coverage).unwrap_or(0.0);
        eprintln!("  {} (coverage: {:.1}%)", expr, coverage);
    }

    let has_ibmpl_match = detections.iter().any(|d| {
        d.matches
            .iter()
            .any(|m| m.license_expression.contains("ibmpl-1.0"))
    });
    assert!(
        has_ibmpl_match,
        "Should preserve the ibmpl-1.0 match in split text"
    );
    assert!(
        detections.iter().all(|d| d.license_expression.is_none()),
        "Single low-quality fragment should remain expressionless"
    );
}