ah_ah_ah/lib.rs
1//! ah-ah-ah
2//!
3//! Offline token counting with pluggable backends and boundary-aware
4//! decomposition.
5//!
6//! Two backends are available:
7//!
8//! - **Claude** (default): Uses ctoc's 38,360 API-verified Claude 3+ tokens
9//! with greedy longest-match via Aho-Corasick. Overcounts by ~4% compared
10//! to the real Claude tokenizer — safe for budget enforcement.
11//! - **OpenAI**: Uses `bpe-openai` for exact o200k_base BPE encoding.
12//!
13//! Structured content (markdown tables, etc.) can cause greedy tokenizers to
14//! match tokens spanning structural boundaries. The [`Decomposer`] trait lets
15//! you plug in boundary-aware counting. A [`MarkdownDecomposer`] is included.
16//!
17//! # Quick start
18//!
19//! ```
20//! use ah_ah_ah::{count_tokens, Backend, MarkdownDecomposer};
21//!
22//! // Raw counting (no decomposition).
23//! let report = count_tokens("Hello, world!", None, Backend::Claude, None);
24//! assert!(report.count > 0);
25//!
26//! // With markdown-aware decomposition.
27//! let md = MarkdownDecomposer;
28//! let report = count_tokens("| A | B |\n|---|---|\n| 1 | 2 |", None, Backend::Claude, Some(&md));
29//! assert!(report.count > 0);
30//! ```
31#![deny(unsafe_code)]
32
33pub mod backend;
34mod claude;
35pub mod decompose;
36pub mod error;
37mod openai;
38pub mod tokens;
39
40pub use backend::Backend;
41pub use decompose::{Decomposer, MarkdownDecomposer};
42pub use error::{Error, Result};
43pub use tokens::{TokenReport, count_tokens};