1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
[]
= "difflib-fast"
= "0.3.5"
= "Fast, byte-for-byte exact difflib Ratcliff–Obershelp (gestalt) similarity ratio + single-linkage clustering (suffix automaton), plus an exact all-pairs weighted-cosine similarity join (L2AP, CPU+GPU)."
= ["difflib", "similarity", "ratcliff-obershelp", "suffix-automaton", "fuzzy"]
= ["algorithms", "text-processing"]
= "2021"
= "MIT"
= "https://github.com/prostomarkeloff/difflib-fast"
= ["prostomarkeloff"]
# The benchmark suite + corpora (large, derived) are not part of the published crate / Python sdist.
= ["/benchmarks"]
[]
= "difflib_fast"
# rlib for the bench bin + downstream crates; cdylib for the (optional) Python extension via maturin.
= ["cdylib", "rlib"]
[]
= "1"
# Optional Python bindings (the `python` feature, built by maturin). `abi3-py39` → one wheel per
# platform works on CPython 3.9+. The pure-Rust crate has zero Python dependency by default.
= { = "0.28", = true, = ["extension-module", "abi3-py39"] }
# Used ONLY as the `bench` binary's global allocator (libraries must not set one) — gated behind the
# `bench` feature so the published library never pulls it in. macOS's default malloc madvise churn
# cost ~25% once parallel, hence the override for benchmarking.
= { = "0.1", = false, = true }
# Apple Metal compute bindings — used by the `gpu` feature to run the SAM matching_stats walk on
# the M3 GPU in parallel with the CPU SAM walker (heterogeneous CPU+GPU exact RO). Apple-only;
# the feature does not compile on non-Apple targets, and `cluster_canonicals` falls back to the
# CPU-only path automatically when the GPU device cannot be acquired at runtime.
[]
= { = "0.33", = true }
[]
= []
= ["dep:pyo3"]
= ["dep:mimalloc"] # enables the `bench` binary's mimalloc global allocator
# Heterogeneous CPU+GPU exact RO — Metal compute shader port of the SAM matching_stats walker.
# macOS-only; the feature wires up automatically on Apple Silicon and is a no-op on other targets.
# The kernel reads the corpus SAMs out of unified shared memory (Apple Silicon's UMA = zero copy
# between CPU and GPU), so there is no host↔device transfer cost. `Rationer` degrades to CPU when
# this is off or no Metal device can be acquired at runtime.
= ["dep:metal"]
# `instrument` adds atomic counters inside `gestalt::longest_in` and its endpos range queries plus
# the RO recursion, plus a `gestalt::instrument::dump()` API that prints histograms of chain-walk
# depth, recursion depth, fmatch distribution, the linear-vs-segtree split, and per-call scan
# lengths. Costs ~10-20% wall when active (relaxed-ordered atomics on the hot path); the
# `cfg(feature)` gate compiles to a no-op in default builds so production stays untouched.
= []
# `profiling` marks `simjoin`'s hot phases `#[inline(never)]` so the sampler (samply) attributes
# self-time per phase (candidate-gen vs verify vs index-suffix) instead of one inlined `cosine_join`
# blob. Pure observability — compiles to identical codegen as default when off. Never ship it.
= []
# `objc`'s `sel!`/`msg_send!` macros (pulled in transitively by the `metal` crate under the `gpu`
# feature) expand to `cfg(feature = "cargo-clippy")` checks — whitelist that cfg so the lint stays
# on for genuinely-unexpected cfgs without flooding gpu builds with dependency-macro noise.
[]
= { = "warn", = ['cfg(feature, values("cargo-clippy"))'] }
# Strict clippy: all + pedantic denied (priority -1 so crate-local `#[allow]`s on hot paths override).
[]
= { = "deny", = -1 }
= { = "deny", = -1 }
# Gold CPU baseline bench (raw ratio / threshold qualify / cluster). Gated behind `bench` so the
# published library never builds it. The GPU / PMC / cross-impl harness lives outside the tracked
# tree (see perf-local/).
[[]]
= "bench"
= ["bench"]
# GPU-vs-CPU throughput experiment for the simjoin verify step (Apple Metal). Needs the `gpu` feature.
[[]]
= "simjoin_gpu_bench"
= ["gpu"]
# Keep symbols + line tables in release so an external sampler (samply) can resolve frames.
[]
= "line-tables-only"
= false