[package]
edition = "2024"
rust-version = "1.88"
name = "txtfp"
version = "0.2.2"
authors = ["bravo1goingdark <kumarashutosh34169@gmail.com>"]
build = false
exclude = [
".github/**",
"benches/**",
"examples/**",
"tests/**",
"fuzz/**",
"*.sh",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Text fingerprinting: MinHash + LSH, SimHash, and ONNX semantic embeddings"
homepage = "https://github.com/themankindproject/txtfp"
documentation = "https://docs.rs/txtfp"
readme = "README.md"
keywords = [
"fingerprint",
"minhash",
"simhash",
"lsh",
"deduplication",
]
categories = [
"text-processing",
"algorithms",
]
license = "MIT"
repository = "https://github.com/themankindproject/txtfp"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[features]
cjk = [
"std",
"dep:jieba-rs",
]
cjk-japanese = [
"cjk",
"dep:lindera",
"lindera/embed-ipadic",
]
cjk-korean = [
"cjk",
"dep:lindera",
"lindera/embed-ko-dic",
]
cohere = [
"semantic",
"dep:reqwest",
"dep:serde_json",
"dep:tokio",
]
default = [
"std",
"minhash",
"simhash",
"lsh",
]
lsh = [
"minhash",
"dep:hashbrown",
"dep:smallvec",
]
markup = [
"std",
"dep:html2text",
"dep:pulldown-cmark",
]
minhash = []
openai = [
"semantic",
"dep:reqwest",
"dep:serde_json",
"dep:tokio",
]
parallel = [
"std",
"dep:rayon",
]
pdf = [
"std",
"dep:pdf-extract",
]
security = ["dep:unicode-security"]
semantic = [
"std",
"dep:ort",
"dep:tokenizers",
"dep:hf-hub",
"dep:ndarray",
]
serde = ["dep:serde"]
simhash = []
std = [
"thiserror/std",
"unicode-normalization/std",
"unicode-bidi/std",
]
tlsh = ["dep:tlsh2"]
voyage = [
"semantic",
"dep:reqwest",
"dep:serde_json",
"dep:tokio",
]
[lib]
name = "txtfp"
path = "src/lib.rs"
[dependencies.ahash]
version = "0.8.12"
default-features = false
[dependencies.blake3]
version = "1.8.5"
default-features = false
[dependencies.bytemuck]
version = "1.25.0"
features = ["derive"]
[dependencies.caseless]
version = "0.2.2"
default-features = false
[dependencies.hashbrown]
version = "0.17.0"
features = [
"default-hasher",
"inline-more",
]
optional = true
default-features = false
[dependencies.hf-hub]
version = "0.5.0"
features = [
"ureq",
"rustls-tls",
]
optional = true
default-features = false
[dependencies.html2text]
version = "0.17.1"
optional = true
[dependencies.jieba-rs]
version = "0.9"
optional = true
[dependencies.lindera]
version = "3.0"
optional = true
default-features = false
[dependencies.ndarray]
version = "0.16"
optional = true
[dependencies.ort]
version = "=2.0.0-rc.10"
features = [
"std",
"download-binaries",
"ndarray",
]
optional = true
default-features = false
[dependencies.pdf-extract]
version = "0.10.0"
optional = true
[dependencies.pulldown-cmark]
version = "0.13.3"
optional = true
default-features = false
[dependencies.rayon]
version = "1.12.0"
optional = true
[dependencies.reqwest]
version = "0.13.2"
features = [
"json",
"blocking",
]
optional = true
default-features = false
[dependencies.serde]
version = "1.0.228"
features = [
"derive",
"alloc",
]
optional = true
default-features = false
[dependencies.serde_json]
version = "1.0.149"
optional = true
[dependencies.smallvec]
version = "1.15.1"
optional = true
[dependencies.thiserror]
version = "2.0.18"
default-features = false
[dependencies.tlsh2]
version = "1.1.0"
features = ["diff"]
optional = true
[dependencies.tokenizers]
version = "0.22.2"
features = ["onig"]
optional = true
default-features = false
[dependencies.tokio]
version = "1.52.1"
features = [
"rt",
"macros",
]
optional = true
[dependencies.unicode-bidi]
version = "0.3.18"
default-features = false
[dependencies.unicode-normalization]
version = "0.1.25"
default-features = false
[dependencies.unicode-security]
version = "0.1.2"
optional = true
[dependencies.unicode-segmentation]
version = "1.13.2"
[dependencies.wide]
version = "0.7"
default-features = false
[dependencies.xxhash-rust]
version = "0.8.15"
features = [
"xxh3",
"const_xxh3",
]
default-features = false
[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]
default-features = false
[dev-dependencies.hex]
version = "0.4.3"
[dev-dependencies.mimalloc]
version = "0.1.50"
default-features = false
[dev-dependencies.proptest]
version = "1.11.0"
[dev-dependencies.serde_json]
version = "1.0.149"
[dev-dependencies.tempfile]
version = "3.27.0"
[profile.bench]
opt-level = 3
lto = "fat"
codegen-units = 1
debug = 0
[profile.release]
lto = "thin"
codegen-units = 1