text-splitter 0.30.0

Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens, and is callable from Rust and Python.
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
rust-version = "1.86.0"
name = "text-splitter"
version = "0.30.0"
authors = ["Ben Brandt <benjamin.j.brandt@gmail.com>"]
build = false
exclude = [
    ".github/**",
    ".vscode/**",
    "/bindings/**",
    "/benches/output.txt",
    "/docs/**",
    "/tests/snapshots/**",
    "/tests/text_splitter_snapshots.rs",
    "/tests/inputs/**",
    "/tests/tokenizers/**",
    "*.yml",
    "*.yaml",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens, and is callable from Rust and Python."
readme = "README.md"
keywords = [
    "text",
    "split",
    "tokenizer",
    "nlp",
    "ai",
]
categories = ["text-processing"]
license = "MIT"
repository = "https://github.com/benbrandt/text-splitter"

[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
    "--cfg",
    "docsrs",
]

[features]
code = ["dep:tree-sitter"]
markdown = ["dep:pulldown-cmark"]
tiktoken-rs = ["dep:tiktoken-rs"]
tokenizers = [
    "dep:tokenizers",
    "tokenizers/onig",
]

[lib]
name = "text_splitter"
path = "src/lib.rs"

[[test]]
name = "code"
path = "tests/code.rs"

[[test]]
name = "markdown"
path = "tests/markdown.rs"

[[test]]
name = "snapshots"
path = "tests/snapshots.rs"

[[test]]
name = "text_splitter"
path = "tests/text_splitter.rs"

[[bench]]
name = "chunk_size"
path = "benches/chunk_size.rs"
harness = false

[dependencies.ahash]
version = "0.8.12"

[dependencies.auto_enums]
version = "0.8"

[dependencies.either]
version = "1.15"

[dependencies.icu_provider]
version = "2"
features = ["sync"]

[dependencies.icu_segmenter]
version = "2"

[dependencies.itertools]
version = "0.14"

[dependencies.memchr]
version = "2.8.0"

[dependencies.pulldown-cmark]
version = "0.13"
optional = true
default-features = false

[dependencies.strum]
version = "0.28"
features = ["derive"]

[dependencies.thiserror]
version = "2.0.18"

[dependencies.tiktoken-rs]
version = "0.11"
optional = true

[dependencies.tokenizers]
version = "0.22"
optional = true
default-features = false

[dependencies.tree-sitter]
version = "0.26"
optional = true

[dev-dependencies.dirs]
version = "6.0.0"

[dev-dependencies.divan]
version = "0.1.21"

[dev-dependencies.fake]
version = "5"

[dev-dependencies.insta]
version = "1.47"
features = [
    "glob",
    "yaml",
]

[dev-dependencies.more-asserts]
version = "0.3"

[dev-dependencies.rayon]
version = "1.11"

[dev-dependencies.tokenizers]
version = "0.22"
features = [
    "onig",
    "http",
]
default-features = false

[dev-dependencies.tree-sitter-rust]
version = "0.24"

[lints.clippy]
cargo = "warn"
pedantic = "warn"

[lints.rust]
missing_debug_implementations = "warn"
missing_docs = "warn"

[lints.rust.future_incompatible]
level = "warn"
priority = -1

[lints.rust.nonstandard_style]
level = "warn"
priority = -1

[lints.rust.rust_2018_compatibility]
level = "warn"
priority = -1

[lints.rust.rust_2018_idioms]
level = "warn"
priority = -1

[lints.rust.rust_2021_compatibility]
level = "warn"
priority = -1

[lints.rust.rust_2024_compatibility]
level = "warn"
priority = -1

[lints.rust.unused]
level = "warn"
priority = -1

[profile.dev.package."*"]
opt-level = 3