grit-genomics 0.1.1

GRIT: Genomic Range Interval Toolkit - high-performance genomic interval operations
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
# GRIT: Genomic Range Interval Toolkit

A high-performance genomic interval toolkit written in Rust. Drop-in replacement for bedtools with **3-15x faster** performance.

[![CI](https://github.com/manish59/grit/actions/workflows/ci.yml/badge.svg)](https://github.com/manish59/grit/actions/workflows/ci.yml)
[![codecov](https://codecov.io/gh/manish59/grit/branch/main/graph/badge.svg)](https://codecov.io/gh/manish59/grit)
[![Crates.io](https://img.shields.io/crates/v/grit-genomics.svg)](https://crates.io/crates/grit-genomics)
[![docs.rs](https://docs.rs/grit-genomics/badge.svg)](https://docs.rs/grit-genomics)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Rust](https://img.shields.io/badge/rust-1.85%2B-blue.svg)](https://www.rust-lang.org)
[![Documentation](https://img.shields.io/badge/docs-GitHub%20Pages-blue)](https://manish59.github.io/grit/)

---

## Table of Contents

- [Why GRIT?]#why-grit
- [Installation]#installation
- [Documentation]https://manish59.github.io/grit/
- [Quick Start]#quick-start
- [Migrating from bedtools]#migrating-from-bedtools
- [Commands]#commands
  - [intersect]#intersect - Find overlapping intervals
  - [subtract]#subtract - Remove overlapping regions
  - [merge]#merge - Combine overlapping intervals
  - [sort]#sort - Sort BED files
  - [closest]#closest - Find nearest intervals
  - [window]#window - Find intervals within a window
  - [coverage]#coverage - Calculate interval coverage
  - [slop]#slop - Extend intervals
  - [complement]#complement - Find gaps between intervals
  - [genomecov]#genomecov - Genome-wide coverage
  - [jaccard]#jaccard - Similarity coefficient
  - [multiinter]#multiinter - Multi-file intersection
- [Utilities]#utilities
  - [generate]#generate - Generate synthetic datasets
- [Input Validation]#input-validation
  - [Sort Order Validation]#sort-order-validation
  - [Genome Order Validation]#genome-order-validation
  - [stdin Validation]#stdin-validation
- [Streaming Mode]#streaming-mode
- [Performance]#performance
- [Testing]#testing
- [Contributing]#contributing
- [License]#license

---

## Why GRIT?

| Feature | bedtools | GRIT |
|---------|----------|------|
| Speed | Baseline | **3-15x faster** |
| Memory (streaming) | N/A | **O(k) constant** |
| Parallelization | Single-threaded | Multi-core |
| Large file support | Limited by RAM | Process 50GB+ on 4GB RAM |

GRIT is designed for:
- **High-throughput genomics** - Process millions of intervals efficiently
- **Memory-constrained environments** - Streaming mode uses minimal RAM
- **Drop-in replacement** - Same CLI syntax as bedtools
- **Reproducibility** - Deterministic output regardless of thread count

---

## Installation

### From crates.io (Recommended)

```bash
cargo install grit-genomics
```

### From Homebrew (macOS/Linux)

```bash
brew install manish59/grit/grit
```

### From Source

```bash
git clone https://github.com/manish59/grit
cd grit
cargo build --release
cargo install --path .
```

### Verify Installation

```bash
grit --version
grit --help
```

### Documentation

Full command documentation with examples: **[https://manish59.github.io/grit/](https://manish59.github.io/grit/)**

---

## Quick Start

```bash
# Find overlapping intervals between two BED files
grit intersect -a regions.bed -b features.bed > overlaps.bed

# Merge overlapping intervals
grit merge -i intervals.bed > merged.bed

# Sort a BED file
grit sort -i unsorted.bed > sorted.bed

# Use streaming mode for large files (minimal memory)
grit intersect -a large_a.bed -b large_b.bed --streaming > result.bed
```

---

## Migrating from bedtools

GRIT is designed as a drop-in replacement for bedtools. Here's how common bedtools commands map to GRIT:

### Command Comparison Table

| bedtools | GRIT (basic) | GRIT (optimized) |
|----------|--------------|------------------|
| `bedtools intersect -a A.bed -b B.bed` | `grit intersect -a A.bed -b B.bed` | `grit intersect -a A.bed -b B.bed --streaming --assume-sorted` |
| `bedtools intersect -a A.bed -b B.bed -sorted` | `grit intersect -a A.bed -b B.bed` | `grit intersect -a A.bed -b B.bed --streaming --assume-sorted` |
| `bedtools subtract -a A.bed -b B.bed` | `grit subtract -a A.bed -b B.bed` | `grit subtract -a A.bed -b B.bed --streaming --assume-sorted` |
| `bedtools merge -i A.bed` | `grit merge -i A.bed` | `grit merge -i A.bed --assume-sorted` |
| `bedtools closest -a A.bed -b B.bed` | `grit closest -a A.bed -b B.bed` | `grit closest -a A.bed -b B.bed --streaming --assume-sorted` |
| `bedtools coverage -a A.bed -b B.bed -sorted` | `grit coverage -a A.bed -b B.bed` | `grit coverage -a A.bed -b B.bed --assume-sorted` |
| `bedtools window -a A.bed -b B.bed -w 1000` | `grit window -a A.bed -b B.bed -w 1000` | `grit window -a A.bed -b B.bed -w 1000 --assume-sorted` |
| `bedtools sort -i A.bed` | `grit sort -i A.bed` | `grit sort -i A.bed` |
| `bedtools slop -i A.bed -g genome.txt -b 100` | `grit slop -i A.bed -g genome.txt -b 100` | Same |
| `bedtools complement -i A.bed -g genome.txt` | `grit complement -i A.bed -g genome.txt` | `grit complement -i A.bed -g genome.txt --assume-sorted` |
| `bedtools jaccard -a A.bed -b B.bed` | `grit jaccard -a A.bed -b B.bed` | Same |

### Key GRIT Flags

| Flag | Description | When to Use |
|------|-------------|-------------|
| `--streaming` | O(k) memory mode | Large files (>1GB), memory-constrained systems |
| `--assume-sorted` | Skip sort validation | Pre-sorted files for faster startup |
| `--allow-unsorted` | Auto-sort in memory | Unsorted input (uses more memory) |
| `-g, --genome` | Validate chromosome order | Ensure genome-specific ordering |
| `--bedtools-compatible` | Match bedtools behavior | Zero-length interval handling |

### Performance Modes

```bash
# Basic (validates input, loads into memory)
grit intersect -a A.bed -b B.bed

# Streaming (constant memory, requires sorted input)
grit intersect -a A.bed -b B.bed --streaming

# Maximum performance (skip validation, streaming)
grit intersect -a A.bed -b B.bed --streaming --assume-sorted

# Handle unsorted input (auto-sorts in memory)
grit intersect -a unsorted.bed -b B.bed --allow-unsorted
```

### Common Workflow: bedtools to GRIT

```bash
# bedtools workflow
bedtools sort -i raw.bed > sorted.bed
bedtools merge -i sorted.bed > merged.bed
bedtools intersect -a merged.bed -b features.bed -sorted > result.bed

# GRIT equivalent (faster)
grit sort -i raw.bed > sorted.bed
grit merge -i sorted.bed --assume-sorted > merged.bed
grit intersect -a merged.bed -b features.bed --streaming --assume-sorted > result.bed

# GRIT pipeline (even faster - no intermediate files)
grit sort -i raw.bed | grit merge -i - --assume-sorted | grit intersect -a - -b features.bed --streaming --assume-sorted > result.bed
```

---

## Global Options

All commands support these options:

| Option | Description |
|--------|-------------|
| `-t, --threads <N>` | Number of threads (default: all CPUs) |
| `--bedtools-compatible` | Normalize zero-length intervals to 1bp for bedtools parity |
| `-h, --help` | Show help for any command |
| `-V, --version` | Show version |

```bash
# Run with 4 threads
grit -t 4 intersect -a file1.bed -b file2.bed

# Enable bedtools-compatible mode for zero-length intervals
grit --bedtools-compatible intersect -a snps.bed -b features.bed

# Get help for a specific command
grit intersect --help
```

---

## Commands

---

### intersect

Find overlapping intervals between two BED files.

#### When to Use

- Identify genomic regions that overlap between datasets (e.g., peaks vs. promoters)
- Filter intervals based on overlap with a reference set
- Find regions with NO overlap (exclusion analysis)
- Count how many times each region is covered

#### Why Use GRIT

- **4.4x faster** than bedtools intersect
- **O(k) memory** in streaming mode (k = max concurrent overlaps)
- **19x less memory** than bedtools

#### How to Use

```
grit intersect -a <FILE_A> -b <FILE_B> [OPTIONS]
```

**Required:**
- `-a, --file-a <FILE>` - Query intervals (file A)
- `-b, --file-b <FILE>` - Reference intervals (file B)

**Output Modes:**

| Option | Output |
|--------|--------|
| *(default)* | Overlap region only |
| `--wa` | Original A entry |
| `--wb` | Overlap region + B entry |
| `--wa --wb` | Both A and B entries |
| `-c, --count` | A entry + overlap count |
| `-u, --unique` | A entry once if ANY overlap |
| `-v, --no-overlap` | A entries with NO overlap |

**Filtering:**

| Option | Description |
|--------|-------------|
| `-f, --fraction <FLOAT>` | Minimum overlap as fraction of A (0.0-1.0) |
| `-r, --reciprocal` | Require reciprocal fraction overlap |

**Performance & Validation:**

| Option | Description |
|--------|-------------|
| `--streaming` | O(k) memory mode (requires sorted input) |
| `--assume-sorted` | Skip sort validation (faster for pre-sorted files) |
| `--allow-unsorted` | Allow unsorted input (loads and re-sorts in memory) |
| `-g, --genome <FILE>` | Validate chromosome order against genome file |
| `--stats` | Print statistics to stderr |

#### Examples

```bash
# Basic: find overlap regions
grit intersect -a peaks.bed -b promoters.bed > overlaps.bed

# Get original entries from both files
grit intersect -a a.bed -b b.bed --wa --wb > both.bed

# Find peaks NOT in blacklist regions
grit intersect -a peaks.bed -b blacklist.bed -v > filtered_peaks.bed

# Require 50% overlap of query interval
grit intersect -a a.bed -b b.bed -f 0.5 > overlap_50pct.bed

# Require 50% reciprocal overlap (both directions)
grit intersect -a a.bed -b b.bed -f 0.5 -r > reciprocal.bed

# Count overlaps per interval
grit intersect -a genes.bed -b variants.bed -c > gene_variant_counts.bed

# Report each query interval once (if it has any overlap)
grit intersect -a a.bed -b b.bed -u > has_overlap.bed

# Large files with minimal memory
grit intersect -a huge_a.bed -b huge_b.bed --streaming > result.bed
```

---

### subtract

Remove portions of A that overlap with B.

#### When to Use

- Remove blacklist regions from your intervals
- Exclude known features from analysis regions
- Clean up interval sets by removing specific regions

#### Why Use GRIT

- **6.5x faster** than bedtools subtract
- **19x less memory** in streaming mode
- Precise interval arithmetic

#### How to Use

```
grit subtract -a <FILE_A> -b <FILE_B> [OPTIONS]
```

**Required:**
- `-a, --file-a <FILE>` - Intervals to modify
- `-b, --file-b <FILE>` - Intervals to remove

**Options:**

| Option | Description |
|--------|-------------|
| `-A, --remove-entire` | Remove entire A interval if ANY overlap |
| `-f, --fraction <FLOAT>` | Minimum overlap fraction required |
| `-r, --reciprocal` | Require reciprocal fraction |
| `--streaming` | O(k) memory mode (requires sorted input) |
| `--assume-sorted` | Skip sort validation (faster for pre-sorted files) |
| `--allow-unsorted` | Allow unsorted input (loads and re-sorts in memory) |
| `-g, --genome <FILE>` | Validate chromosome order against genome file |
| `--stats` | Print statistics to stderr |

#### Examples

```bash
# Remove blacklist regions (keeps non-overlapping portions)
grit subtract -a peaks.bed -b blacklist.bed > clean_peaks.bed

# Remove entire interval if ANY overlap with blacklist
grit subtract -a peaks.bed -b blacklist.bed -A > strict_clean.bed

# Only subtract if >50% overlap
grit subtract -a a.bed -b b.bed -f 0.5 > result.bed

# Large files with streaming
grit subtract -a large_a.bed -b large_b.bed --streaming > result.bed
```

---

### merge

Combine overlapping and adjacent intervals into single intervals.

#### When to Use

- Collapse redundant overlapping intervals
- Create non-overlapping interval sets
- Simplify interval data before downstream analysis
- Combine intervals within a certain distance

#### Why Use GRIT

- **10.8x faster** than bedtools merge
- **~3 MB memory** regardless of file size
- Streaming by default (no `--streaming` flag needed)

#### How to Use

```
grit merge -i <INPUT> [OPTIONS]
```

**Required:**
- `-i, --input <FILE>` - Input BED file (use `-` for stdin)

**Options:**

| Option | Description |
|--------|-------------|
| `-d, --distance <INT>` | Merge intervals within this distance (default: 0) |
| `-s, --strand` | Only merge intervals on same strand |
| `-c, --count` | Report count of merged intervals |
| `--in-memory` | Load all records (for unsorted input) |
| `--assume-sorted` | Skip sort validation (faster for pre-sorted files) |
| `-g, --genome <FILE>` | Validate chromosome order against genome file |
| `--stats` | Print statistics to stderr |

#### Examples

```bash
# Basic merge (overlapping and adjacent)
grit merge -i intervals.bed > merged.bed

# Merge intervals within 100bp of each other
grit merge -i intervals.bed -d 100 > merged_100bp.bed

# Strand-specific merging
grit merge -i stranded.bed -s > merged_stranded.bed

# Count how many intervals were merged
grit merge -i intervals.bed -c > merged_counts.bed

# Read from stdin (piping)
cat intervals.bed | grit merge -i - > merged.bed

# Handle unsorted input
grit merge -i unsorted.bed --in-memory > merged.bed
```

---

### sort

Sort BED files by chromosome and position.

#### When to Use

- Prepare files for streaming operations
- Ensure consistent ordering for reproducibility
- Sort by interval size for analysis
- Use custom chromosome ordering (genome file)

#### Why Use GRIT

- **O(n) radix sort** vs O(n log n) comparison sort
- Memory-mapped I/O for large files
- Stable sort preserves input order for ties

#### How to Use

```
grit sort -i <INPUT> [OPTIONS]
```

**Required:**
- `-i, --input <FILE>` - Input BED file (use `-` for stdin)

**Options:**

| Option | Description |
|--------|-------------|
| `-g, --genome <FILE>` | Custom chromosome order from genome file |
| `--sizeA` | Sort by interval size (ascending) |
| `--sizeD` | Sort by interval size (descending) |
| `-r, --reverse` | Reverse final sort order |
| `--chrThenSizeA` | Sort by chromosome name only |
| `--stats` | Print statistics to stderr |

#### Examples

```bash
# Default sort (chromosome lexicographic, then start position)
grit sort -i unsorted.bed > sorted.bed

# Custom chromosome order from genome file
grit sort -i input.bed -g genome.txt > sorted.bed

# Sort by interval size (smallest first)
grit sort -i input.bed --sizeA > by_size.bed

# Sort by interval size (largest first)
grit sort -i input.bed --sizeD > by_size_desc.bed

# Reverse sort order
grit sort -i input.bed -r > reversed.bed

# Read from stdin
cat input.bed | grit sort -i - > sorted.bed
```

**Genome File Format:**
```
chr1    248956422
chr2    242193529
chr3    198295559
```

---

### closest

Find the nearest interval in B for each interval in A.

#### When to Use

- Find nearest gene for each variant
- Identify closest regulatory element to each peak
- Distance-to-feature analysis
- Nearest neighbor genomic analysis

#### Why Use GRIT

- Efficient O(n log m) binary search algorithm
- Flexible tie-breaking options
- Direction-aware searching (upstream/downstream)

#### How to Use

```
grit closest -a <FILE_A> -b <FILE_B> [OPTIONS]
```

**Required:**
- `-a, --file-a <FILE>` - Query intervals
- `-b, --file-b <FILE>` - Reference intervals to search

**Options:**

| Option | Description |
|--------|-------------|
| `-d, --distance` | Report distance in output |
| `-t, --tie <MODE>` | Handle ties: `all`, `first`, `last` |
| `--io` | Ignore overlapping intervals |
| `--iu` | Ignore upstream intervals |
| `--id` | Ignore downstream intervals |
| `-D, --max-distance <INT>` | Maximum search distance |
| `--streaming` | O(k) memory mode (requires sorted input) |
| `--assume-sorted` | Skip sort validation (faster for pre-sorted files) |
| `--allow-unsorted` | Allow unsorted input (loads and re-sorts in memory) |
| `-g, --genome <FILE>` | Validate chromosome order against genome file |

#### Examples

```bash
# Find closest gene for each variant
grit closest -a variants.bed -b genes.bed > nearest_genes.bed

# Include distance in output
grit closest -a a.bed -b b.bed -d > closest_with_distance.bed

# Only report first tie
grit closest -a a.bed -b b.bed -t first > closest_first.bed

# Find nearest non-overlapping interval
grit closest -a a.bed -b b.bed --io > nearest_nonoverlap.bed

# Only look downstream
grit closest -a a.bed -b b.bed --iu > downstream_only.bed

# Only look upstream
grit closest -a a.bed -b b.bed --id > upstream_only.bed

# Limit search to 10kb
grit closest -a a.bed -b b.bed -D 10000 > closest_10kb.bed
```

---

### window

Find intervals in B within a window around intervals in A.

#### When to Use

- Find features within a distance of query regions
- Identify nearby regulatory elements
- Proximity-based feature association
- Asymmetric distance searches (different upstream/downstream)

#### Why Use GRIT

- Flexible symmetric and asymmetric windows
- Count or report modes
- Efficient interval tree queries

#### How to Use

```
grit window -a <FILE_A> -b <FILE_B> [OPTIONS]
```

**Required:**
- `-a, --file-a <FILE>` - Query intervals
- `-b, --file-b <FILE>` - Reference intervals

**Options:**

| Option | Description |
|--------|-------------|
| `-w, --window <INT>` | Window size both sides (default: 1000) |
| `-l, --left <INT>` | Left/upstream window size |
| `-r, --right <INT>` | Right/downstream window size |
| `-c, --count` | Report count of matches |
| `-v, --no-overlap` | Report A intervals with NO matches |
| `--assume-sorted` | Skip sort validation (faster for pre-sorted files) |
| `-g, --genome <FILE>` | Validate chromosome order against genome file |

#### Examples

```bash
# Find features within 1kb of query regions
grit window -a genes.bed -b enhancers.bed -w 1000 > nearby.bed

# Asymmetric window: 5kb upstream, 1kb downstream
grit window -a tss.bed -b enhancers.bed -l 5000 -r 1000 > nearby.bed

# Count features in window
grit window -a genes.bed -b variants.bed -w 5000 -c > counts.bed

# Find regions with no features nearby
grit window -a genes.bed -b enhancers.bed -v > isolated.bed
```

---

### coverage

Calculate coverage depth of B intervals over A intervals.

#### When to Use

- Count reads overlapping genomic regions
- Calculate what fraction of each region is covered
- Generate coverage statistics for intervals
- Quality control of sequencing data

#### Why Use GRIT

- **9x faster** than bedtools coverage
- **134x less memory** than bedtools
- Multiple output formats (counts, histogram, per-base)

#### How to Use

```
grit coverage -a <FILE_A> -b <FILE_B> [OPTIONS]
```

**Required:**
- `-a, --file-a <FILE>` - Target regions
- `-b, --file-b <FILE>` - Features to count (reads, etc.)

**Options:**

| Option | Description |
|--------|-------------|
| `--hist` | Report histogram of coverage depths |
| `-d, --per-base` | Report depth at each position |
| `--mean` | Report mean depth per region |
| `--assume-sorted` | Skip sort validation (faster for pre-sorted files) |
| `-g, --genome <FILE>` | Validate chromosome order against genome file |

**Output Format (default):**
```
chrom  start  end  name  score  strand  count  bases_covered  length  fraction
```

#### Examples

```bash
# Basic coverage (count, covered bases, length, fraction)
grit coverage -a regions.bed -b reads.bed > coverage.bed

# Mean depth per region
grit coverage -a regions.bed -b reads.bed --mean > mean_depth.bed

# Per-base depth
grit coverage -a regions.bed -b reads.bed -d > per_base.bed

# Histogram of coverage depths
grit coverage -a regions.bed -b reads.bed --hist > histogram.txt

# Streaming mode for large files
grit coverage -a regions.bed -b reads.bed --streaming > coverage.bed
```

---

### slop

Extend intervals by a specified number of bases.

#### When to Use

- Expand peaks to include flanking regions
- Create promoter regions from TSS coordinates
- Add padding around features
- Strand-aware extension (upstream/downstream)

#### Why Use GRIT

- Respects chromosome boundaries
- Strand-aware extension
- Percentage-based extension option

#### How to Use

```
grit slop -i <INPUT> -g <GENOME> [OPTIONS]
```

**Required:**
- `-i, --input <FILE>` - Input BED file
- `-g, --genome <FILE>` - Chromosome sizes file

**Options:**

| Option | Description |
|--------|-------------|
| `-b, --both <INT>` | Extend both sides by N bases |
| `-l, --left <INT>` | Extend left/upstream |
| `-r, --right <INT>` | Extend right/downstream |
| `-s, --strand` | Use strand for upstream/downstream |
| `--pct` | Values are fractions of interval size |

#### Examples

```bash
# Extend 100bp on both sides
grit slop -i peaks.bed -g genome.txt -b 100 > extended.bed

# Create 500bp upstream + 100bp downstream regions
grit slop -i tss.bed -g genome.txt -l 500 -r 100 > promoters.bed

# Strand-aware extension (upstream/downstream relative to strand)
grit slop -i genes.bed -g genome.txt -l 1000 -r 0 -s > upstream_1kb.bed

# Extend by 10% of interval size on each side
grit slop -i peaks.bed -g genome.txt -b 0.1 --pct > extended_10pct.bed
```

**Genome File Format:**
```
chr1    248956422
chr2    242193529
chr3    198295559
```

---

### complement

Find genomic regions NOT covered by input intervals.

#### When to Use

- Find gaps between features
- Identify intergenic regions
- Create inverse of an interval set
- Find uncovered portions of chromosomes

#### Why Use GRIT

- O(n) single-pass streaming algorithm
- Memory efficient
- Simple, focused operation

#### How to Use

```
grit complement -i <INPUT> -g <GENOME>
```

**Required:**
- `-i, --input <FILE>` - Input BED file
- `-g, --genome <FILE>` - Chromosome sizes file

#### Examples

```bash
# Find gaps between intervals
grit complement -i genes.bed -g genome.txt > intergenic.bed

# Find uncovered regions
grit complement -i covered.bed -g genome.txt > gaps.bed
```

---

### genomecov

Compute genome-wide coverage statistics.

#### When to Use

- Generate coverage tracks for visualization
- Compute depth distribution across genome
- Create BedGraph files for genome browsers
- Normalize coverage (scaling)

#### Why Use GRIT

- Multiple output formats (histogram, BedGraph)
- Coverage scaling for normalization
- Efficient whole-genome processing

#### How to Use

```
grit genomecov -i <INPUT> -g <GENOME> [OPTIONS]
```

**Required:**
- `-i, --input <FILE>` - Input BED file
- `-g, --genome <FILE>` - Chromosome sizes file

**Options:**

| Option | Description |
|--------|-------------|
| `-d, --per-base` | Report depth at each position (1-based) |
| `--bg` | BedGraph format (non-zero regions only) |
| `--bga` | BedGraph format (including zero coverage) |
| `--scale <FLOAT>` | Scale depth by factor (default: 1.0) |

#### Examples

```bash
# Default histogram output
grit genomecov -i reads.bed -g genome.txt > histogram.txt

# BedGraph for visualization (non-zero only)
grit genomecov -i reads.bed -g genome.txt --bg > coverage.bedgraph

# BedGraph including zero coverage regions
grit genomecov -i reads.bed -g genome.txt --bga > coverage_all.bedgraph

# Per-base depth (large output)
grit genomecov -i reads.bed -g genome.txt -d > per_base.txt

# Scale coverage (e.g., RPM normalization)
grit genomecov -i reads.bed -g genome.txt --bg --scale 0.5 > scaled.bedgraph
```

**Histogram Output Format:**
```
chrom  depth  bases_at_depth  chrom_size  fraction
```

---

### jaccard

Calculate Jaccard similarity coefficient between two interval sets.

#### When to Use

- Compare similarity of two interval sets
- Measure overlap between experiments
- Quality control: compare replicates
- Quantify agreement between methods

#### Why Use GRIT

- O(n + m) efficient sweep-line algorithm
- Single-pass computation
- Standard Jaccard metric

#### How to Use

```
grit jaccard -a <FILE_A> -b <FILE_B>
```

**Required:**
- `-a, --file-a <FILE>` - First BED file
- `-b, --file-b <FILE>` - Second BED file

**Output Format:**
```
intersection    union    jaccard    n_intersections
15000           45000    0.333333   150
```

#### Examples

```bash
# Compare two peak sets
grit jaccard -a peaks_rep1.bed -b peaks_rep2.bed

# Compare methods
grit jaccard -a method1.bed -b method2.bed
```

---

### multiinter

Identify intervals and which files contain them across multiple BED files.

#### When to Use

- Find common intervals across multiple samples
- Identify sample-specific intervals
- Multi-way intersection analysis
- Consensus peak calling

#### Why Use GRIT

- Handles arbitrary number of files
- Reports which files contain each interval
- Cluster mode for strict consensus

#### How to Use

```
grit multiinter -i <FILE1> <FILE2> [FILE3...] [OPTIONS]
```

**Required:**
- `-i, --input <FILES>` - Two or more input BED files

**Options:**

| Option | Description |
|--------|-------------|
| `--cluster` | Only output intervals in ALL files |

#### Examples

```bash
# Find intervals across 3 files (reports which files contain each)
grit multiinter -i rep1.bed rep2.bed rep3.bed > multi.bed

# Find intervals present in ALL files (consensus)
grit multiinter -i rep1.bed rep2.bed rep3.bed --cluster > consensus.bed
```

---

## Utilities

---

### generate

Generate synthetic BED datasets for benchmarking and testing.

#### When to Use

- Create reproducible test data for benchmarking
- Generate datasets with specific characteristics (uniform, clustered)
- Test GRIT commands with controlled data sizes
- Compare performance across different data distributions

#### How to Use

```
grit generate [OPTIONS]
```

**Options:**

| Option | Description |
|--------|-------------|
| `-o, --output <DIR>` | Output directory (default: `./grit_bench_data`) |
| `--sizes <SIZES>` | Comma-separated sizes: `100K`, `1M`, `10M` |
| `--mode <MODE>` | Distribution: `balanced`, `clustered`, `identical`, `skewed-a-gt-b`, `skewed-b-gt-a`, `all` |
| `--seed <INT>` | Random seed for reproducibility (default: 42) |
| `--a <SIZE>` | Custom A file size |
| `--b <SIZE>` | Custom B file size |
| `--sorted <yes\|no\|auto>` | Output sorting (default: `auto`) |
| `--hotspot-frac <FLOAT>` | Genome fraction for hotspots (default: 0.05) |
| `--hotspot-weight <FLOAT>` | Interval fraction in hotspots (default: 0.80) |
| `--force` | Overwrite existing files |

**Size Notation:**

| Format | Example | Value |
|--------|---------|-------|
| Number | `1000` | 1,000 intervals |
| K suffix | `100K` | 100,000 intervals |
| M suffix | `10M` | 10,000,000 intervals |

**Generation Modes:**

| Mode | Description |
|------|-------------|
| `balanced` | Equal-sized A and B with uniform distribution |
| `clustered` | Intervals concentrated in hotspot regions |
| `identical` | A and B contain identical intervals |
| `skewed-a-gt-b` | A file 10x larger than B |
| `skewed-b-gt-a` | B file 10x larger than A |
| `all` | Generate all modes |

#### Examples

```bash
# Quick test data (100K intervals)
grit generate --sizes 100K --mode balanced --force

# Benchmark suite (multiple sizes)
grit generate --sizes 1M,5M,10M --mode all --seed 42

# Custom asymmetric sizes
grit generate --a 10M --b 1M --mode balanced

# Clustered data (simulates ChIP-seq peaks)
grit generate --mode clustered --hotspot-frac 0.1 --hotspot-weight 0.9

# Unsorted output for testing sort validation
grit generate --sizes 1M --sorted no
```

**Output Structure:**

```
grit_bench_data/
├── balanced/
│   └── 1M/
│       ├── A.bed
│       └── B.bed
├── clustered/
│   └── ...
└── ...
```

---

## Input Validation

GRIT validates input files to prevent silent failures from incorrectly sorted data. This section explains the validation behavior and how to control it.

### Sort Order Validation

By default, GRIT validates that input files are sorted before processing. Most commands require sorted input (by chromosome, then by start position).

**If files are unsorted, you'll see a helpful error:**

```
Error: File A is not sorted: position 100 at line 5 comes after 200 on chr1

Fix: Run 'grit sort -i a.bed > sorted_a.bed' first.
Or use '--allow-unsorted' to load and re-sort in memory (uses O(n) memory).
```

**How to sort files:**

```bash
# Sort with GRIT (recommended)
grit sort -i unsorted.bed > sorted.bed

# Or use standard Unix sort
sort -k1,1 -k2,2n unsorted.bed > sorted.bed
```

### Validation Flags

| Flag | Description | Memory Impact |
|------|-------------|---------------|
| `--assume-sorted` | Skip validation entirely | No change |
| `--allow-unsorted` | Load and re-sort in memory | O(n) |
| `-g, --genome <FILE>` | Validate genome chromosome order | No change |

#### `--assume-sorted`

Skip validation when you know files are pre-sorted:

```bash
# Skip validation for faster startup
grit intersect --streaming --assume-sorted -a sorted_a.bed -b sorted_b.bed

# Useful in pipelines where files are guaranteed sorted
grit merge -i - --assume-sorted < sorted.bed
```

**Warning:** Using `--assume-sorted` with unsorted files produces incorrect results silently.

#### `--allow-unsorted`

For non-streaming commands (`intersect`, `subtract`, `closest`), explicitly allow unsorted input:

```bash
# Load and re-sort in memory (uses O(n) memory)
grit intersect --allow-unsorted -a unsorted_a.bed -b unsorted_b.bed

# Without this flag, unsorted input fails with a clear error
```

This flag is not available for streaming commands, which require pre-sorted input.

### Genome Order Validation

Use `-g, --genome` to validate that chromosomes appear in a specific order (e.g., hg38, mm10):

```bash
# Validate chromosome order against genome file
grit intersect --streaming -a a.bed -b b.bed -g hg38.genome

# Merge with genome order validation
grit merge -i input.bed -g hg38.genome

# Sort files to match genome order
grit sort -i input.bed -g hg38.genome > sorted.bed
```

**Genome file format** (tab-separated: chromosome name and size):

```
chr1    248956422
chr2    242193529
chr3    198295559
chrX    156040895
chrY    57227415
```

**When `-g` is provided:**
- Chromosomes must appear in the genome file order
- Chromosomes not in the genome file cause an error
- Error messages suggest how to fix: `grit sort -i file.bed -g genome.txt`

**Without `-g`:** Any contiguous chromosome order is accepted (lexicographic, natural, etc.)

### stdin Validation

When reading from stdin, GRIT buffers the input to validate sort order:

```bash
# stdin is validated by default (buffers entire input)
cat sorted.bed | grit merge -i - > merged.bed

# Skip stdin validation with --assume-sorted (no buffering)
cat sorted.bed | grit merge -i - --assume-sorted > merged.bed
```

**Note:** stdin validation uses O(n) memory to buffer input. For large piped inputs where data is guaranteed sorted, use `--assume-sorted` to skip buffering.

### Validation Summary by Command

| Command | Requires Sorted | `--allow-unsorted` | `-g, --genome` |
|---------|-----------------|--------------------| ---------------|
| `intersect` | Yes (streaming) / Validates (default) | Yes | Yes |
| `subtract` | Yes (streaming) / Validates (default) | Yes | Yes |
| `closest` | Yes (streaming) / Validates (default) | Yes | Yes |
| `merge` | Yes | No (use `--in-memory`) | Yes |
| `window` | Yes | No | Yes |
| `coverage` | Yes | No | Yes |
| `sort` | No | N/A | Yes (for ordering) |
| `slop` | No | N/A | No |
| `complement` | Yes | No | No |

---

## Streaming Mode

For very large files, streaming mode processes data with constant O(k) memory, where k is the maximum number of overlapping intervals at any position (typically < 100).

### When to Use Streaming

- Files larger than available RAM
- Processing 50GB+ files on laptops
- Memory-constrained environments
- When files are already sorted

### Streaming Commands

Commands that support `--streaming` mode:

```bash
# Intersect
grit intersect -a a.bed -b b.bed --streaming > result.bed
grit intersect -a a.bed -b b.bed --streaming --assume-sorted > result.bed

# Subtract
grit subtract -a a.bed -b b.bed --streaming > result.bed
grit subtract -a a.bed -b b.bed --streaming --assume-sorted > result.bed

# Closest
grit closest -a a.bed -b b.bed --streaming > result.bed
grit closest -a a.bed -b b.bed --streaming --assume-sorted > result.bed

# Window (always uses streaming internally)
grit window -a a.bed -b b.bed > result.bed
grit window -a a.bed -b b.bed --assume-sorted > result.bed

# Coverage (always uses streaming internally)
grit coverage -a a.bed -b b.bed > result.bed
grit coverage -a a.bed -b b.bed --assume-sorted > result.bed

# Merge (streaming by default)
grit merge -i sorted.bed > result.bed
grit merge -i sorted.bed --assume-sorted > result.bed
```

### Memory Comparison

| Mode | Memory Usage | Best For |
|------|--------------|----------|
| Default (parallel) | O(n + m) | Maximum speed |
| Streaming | O(k) ≈ 2 MB | Large files, low RAM |

---

## Zero-Length Interval Semantics

GRIT uses strict half-open interval semantics by default, which differs from bedtools in handling zero-length intervals.

### What Are Zero-Length Intervals?

Zero-length intervals have `start == end`, such as:
```
chr1    100    100
```

These represent point positions (e.g., SNP locations from VCF-to-BED conversion) rather than regions.

### Default Behavior (Strict Mode)

In strict half-open semantics, a zero-length interval `[100, 100)` contains no bases:
- It does **not** overlap with itself
- It does **not** overlap with adjacent intervals like `[100, 101)`

This follows the mathematical definition of half-open intervals.

### Bedtools Behavior

Bedtools treats zero-length intervals as if they were 1bp intervals:
- `[100, 100)` overlaps with `[100, 101)`
- Self-intersection of zero-length intervals produces output

### Enabling Bedtools Compatibility

Use `--bedtools-compatible` to match bedtools behavior:

```bash
# Default: strict semantics (zero-length intervals don't overlap)
grit intersect -a snps.bed -b features.bed

# Bedtools-compatible: zero-length intervals normalized to 1bp
grit --bedtools-compatible intersect -a snps.bed -b features.bed
```

When enabled, zero-length intervals are normalized to 1bp during parsing:
```
chr1    100    100  →  chr1    100    101
```

### When to Use Each Mode

| Mode | Use Case |
|------|----------|
| **Strict (default)** | Mathematical correctness, new projects |
| **Bedtools-compatible** | Reproducing bedtools results, dbSNP data |

### Performance Impact

The `--bedtools-compatible` flag has **negligible performance impact** (<1%). Normalization occurs once during parsing, not in inner loops.

---

## Performance

### Benchmarks

Tested on 10M × 5M intervals (uniform distribution):

| Command | bedtools | GRIT | Speedup | Memory Reduction |
|---------|----------|------|---------|------------------|
| window | 32.18s | 2.10s | **15.3x** | 137x less |
| merge | 3.68s | 0.34s | **10.8x** | ~same |
| coverage | 16.53s | 1.84s | **9.0x** | 134x less |
| subtract | 9.49s | 1.47s | **6.5x** | 19x less |
| closest | 9.70s | 1.95s | **5.0x** | 59x less |
| intersect | 6.77s | 1.54s | **4.4x** | 19x less |
| jaccard | 4.98s | 1.59s | **3.1x** | 1230x less |

See [full benchmark methodology](https://manish59.github.io/grit/benchmarks.html) for details.

### Performance Tips

1. **Use streaming for large files** - Constant memory, often faster
2. **Pre-sort your files** - Use `--assume-sorted` to skip validation
3. **Adjust thread count** - Default uses all CPUs, tune with `-t`
4. **Use merge first** - Reduce interval count before expensive operations

---

## Testing

### Quick Start

```bash
# Run all tests
cargo test

# Build release for testing
cargo build --release
```

### Unit Tests

```bash
# All unit tests (290+ tests)
cargo test

# Specific module tests
cargo test streaming       # Streaming infrastructure tests
cargo test intersect       # Intersect command tests
cargo test merge           # Merge command tests
cargo test sort            # Sort command tests
cargo test coverage        # Coverage command tests
cargo test closest         # Closest command tests

# With verbose output
cargo test -- --nocapture
```

### Integration Tests

```bash
# Fast sort integration tests (requires bedtools installed)
cargo test --release --test fast_sort_integration
```

### Test Sorted Input Validation

GRIT validates that input files are sorted for streaming operations:

```bash
# Create test files
cat > sorted.bed << 'EOF'
chr1	100	200
chr1	300	400
chr2	100	200
EOF

cat > unsorted.bed << 'EOF'
chr2	100	200
chr1	300	400
EOF

# This should succeed (sorted input)
grit intersect --streaming -a sorted.bed -b sorted.bed

# This should fail with error (unsorted input)
grit intersect --streaming -a unsorted.bed -b sorted.bed
# Error: File A is not sorted...

# Skip validation with --assume-sorted (faster for pre-sorted files)
grit intersect --streaming --assume-sorted -a sorted.bed -b sorted.bed
```

### Test Commands Individually

```bash
# Intersect
grit intersect -a a.bed -b b.bed > result.bed
grit intersect --streaming -a a.bed -b b.bed > result.bed
grit intersect --streaming --assume-sorted -a a.bed -b b.bed > result.bed

# Subtract
grit subtract --streaming -a a.bed -b b.bed > result.bed
grit subtract --streaming --assume-sorted -a a.bed -b b.bed > result.bed

# Merge
grit merge -i sorted.bed > merged.bed
grit merge --assume-sorted -i sorted.bed > merged.bed

# Closest
grit closest --streaming -a a.bed -b b.bed > result.bed
grit closest --streaming --assume-sorted -a a.bed -b b.bed > result.bed

# Window
grit window -a a.bed -b b.bed -w 1000 > result.bed
grit window --assume-sorted -a a.bed -b b.bed -w 1000 > result.bed

# Coverage
grit coverage -a a.bed -b b.bed > result.bed
grit coverage --assume-sorted -a a.bed -b b.bed > result.bed
```

### Verify Against bedtools

```bash
# Compare intersect output
diff <(bedtools intersect -a a.bed -b b.bed | sort) \
     <(grit intersect -a a.bed -b b.bed | sort)

# Compare sort output
diff <(bedtools sort -i input.bed) <(grit sort -i input.bed)

# Compare merge output
diff <(bedtools merge -i sorted.bed) <(grit merge -i sorted.bed)

# Compare subtract output
diff <(bedtools subtract -a a.bed -b b.bed | sort) \
     <(grit subtract --streaming -a a.bed -b b.bed | sort)

# SHA256 parity check (for large files)
sha256sum <(bedtools intersect -a a.bed -b b.bed | sort) \
          <(grit intersect -a a.bed -b b.bed | sort)
```

### Run Benchmarks

```bash
# Run all benchmarks
cargo bench

# Specific benchmarks
cargo bench intersect
cargo bench sort
cargo bench merge

# Run benchmark script (if available)
./benchmarks/bench.sh run 1M 500K coverage subtract closest merge intersect
./benchmarks/grit-only.sh 10M 5M
```

### Performance Testing

```bash
# Generate large test files
for i in $(seq 1 1000000); do
  echo -e "chr$((RANDOM % 22 + 1))\t$((RANDOM * 100))\t$((RANDOM * 100 + 1000))"
done > large_a.bed

# Sort the test file
grit sort -i large_a.bed > large_a_sorted.bed

# Time streaming vs parallel mode
time grit intersect -a large_a_sorted.bed -b large_a_sorted.bed --streaming > /dev/null
time grit intersect -a large_a_sorted.bed -b large_a_sorted.bed > /dev/null

# Memory usage (on Linux)
/usr/bin/time -v grit intersect --streaming -a large_a_sorted.bed -b large_a_sorted.bed > /dev/null
```

### Test Coverage

```bash
# Install cargo-tarpaulin for coverage
cargo install cargo-tarpaulin

# Run coverage report
cargo tarpaulin --out Html
```

---

## Contributing

Contributions welcome! Please:

1. Fork the repository
2. Create a feature branch (`git checkout -b feature/new-feature`)
3. Commit changes (`git commit -m 'feat: add new feature'`)
4. Push to branch (`git push origin feature/new-feature`)
5. Open a Pull Request

---

## License

MIT License - see [LICENSE](LICENSE) for details.

---

## Acknowledgments

- [bedtools]https://bedtools.readthedocs.io/ by Aaron Quinlan - the inspiration for this project
- [Rayon]https://github.com/rayon-rs/rayon for parallel processing