[package]
edition = "2021"
rust-version = "1.86.0"
name = "text-splitter"
version = "0.30.1"
authors = ["Ben Brandt <benjamin.j.brandt@gmail.com>"]
build = false
exclude = [
".github/**",
".vscode/**",
"/bindings/**",
"/benches/output.txt",
"/docs/**",
"/tests/snapshots/**",
"/tests/text_splitter_snapshots.rs",
"/tests/inputs/**",
"/tests/tokenizers/**",
"*.yml",
"*.yaml",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens, and is callable from Rust and Python."
readme = "README.md"
keywords = [
"text",
"split",
"tokenizer",
"nlp",
"ai",
]
categories = ["text-processing"]
license = "MIT"
repository = "https://github.com/benbrandt/text-splitter"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[features]
code = ["dep:tree-sitter"]
markdown = ["dep:pulldown-cmark"]
tiktoken-rs = ["dep:tiktoken-rs"]
tokenizers = [
"dep:tokenizers",
"tokenizers/onig",
]
[lib]
name = "text_splitter"
path = "src/lib.rs"
[[test]]
name = "code"
path = "tests/code.rs"
[[test]]
name = "markdown"
path = "tests/markdown.rs"
[[test]]
name = "snapshots"
path = "tests/snapshots.rs"
[[test]]
name = "text_splitter"
path = "tests/text_splitter.rs"
[[bench]]
name = "chunk_size"
path = "benches/chunk_size.rs"
harness = false
[dependencies.ahash]
version = "0.8.12"
[dependencies.auto_enums]
version = "0.8"
[dependencies.either]
version = "1.15"
[dependencies.icu_provider]
version = "2"
features = ["sync"]
[dependencies.icu_segmenter]
version = "2"
[dependencies.itertools]
version = "0.14"
[dependencies.memchr]
version = "2.8.0"
[dependencies.pulldown-cmark]
version = "0.13"
optional = true
default-features = false
[dependencies.strum]
version = "0.28"
features = ["derive"]
[dependencies.thiserror]
version = "2.0.18"
[dependencies.tiktoken-rs]
version = "0.11"
optional = true
[dependencies.tokenizers]
version = "0.22"
optional = true
default-features = false
[dependencies.tree-sitter]
version = "0.26"
optional = true
[dev-dependencies.dirs]
version = "6.0.0"
[dev-dependencies.divan]
version = "0.1.21"
[dev-dependencies.fake]
version = "5"
[dev-dependencies.insta]
version = "1.47"
features = [
"glob",
"yaml",
]
[dev-dependencies.more-asserts]
version = "0.3"
[dev-dependencies.rayon]
version = "1.11"
[dev-dependencies.tokenizers]
version = "0.22"
features = [
"onig",
"http",
]
default-features = false
[dev-dependencies.tree-sitter-rust]
version = "0.24"
[lints.clippy]
cargo = "warn"
pedantic = "warn"
[lints.rust]
missing_debug_implementations = "warn"
missing_docs = "warn"
[lints.rust.future_incompatible]
level = "warn"
priority = -1
[lints.rust.nonstandard_style]
level = "warn"
priority = -1
[lints.rust.rust_2018_compatibility]
level = "warn"
priority = -1
[lints.rust.rust_2018_idioms]
level = "warn"
priority = -1
[lints.rust.rust_2021_compatibility]
level = "warn"
priority = -1
[lints.rust.rust_2024_compatibility]
level = "warn"
priority = -1
[lints.rust.unused]
level = "warn"
priority = -1
[profile.dev.package."*"]
opt-level = 3