use flate2::Compression;
use flate2::read::{ZlibDecoder, ZlibEncoder};
use hmac::{Hmac, Mac};
use sha2::{Digest, Sha256};
use std::io::Cursor;
use std::io::Read;
use uuid::Uuid;
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
#[cfg(not(target_arch = "wasm32"))]
mod native_signed_url;
#[cfg(not(target_arch = "wasm32"))]
pub use native_signed_url::{
SignedUrlBuildRequest, SignedUrlParams, VerifyError, generate_signed_url, verify_signed_url,
};
mod diff;
mod diff_multi;
pub use diff::{
DiffResult, MultiDiffLine, MultiDiffResult, MultiDiffStats, MultiDiffVariant,
StructuredDiffLine, StructuredDiffResult, compute_diff, multi_way_diff, multi_way_diff_native,
structured_diff, structured_diff_native,
};
mod patch;
pub use patch::{
apply_patch, batch_diff_versions, compute_sections_modified, compute_sections_modified_native,
create_patch, diff_versions, diff_versions_native, reconstruct_version,
reconstruct_version_native, squash_patches, squash_patches_native,
};
mod cherry_pick;
pub use cherry_pick::cherry_pick_merge;
mod three_way_merge;
pub use three_way_merge::{
Conflict, MergeStats, ThreeWayMergeResult, three_way_merge, three_way_merge_native,
};
#[cfg(feature = "wasm")]
pub mod wasm_bindings;
mod lifecycle;
pub use lifecycle::{
DocumentState, is_editable, is_editable_str, is_terminal, is_terminal_str, is_valid_transition,
is_valid_transition_str, validate_transition,
};
mod consensus;
pub use consensus::{
ApprovalPolicy, ApprovalResult, Review, evaluate_approvals, evaluate_approvals_native,
mark_stale_reviews, mark_stale_reviews_native,
};
pub mod crypto;
pub use crypto::{constant_time_eq_hex, sign_webhook_payload};
pub mod normalize;
pub use normalize::{l2_normalize, l2_normalize_wasm};
pub mod rbac;
pub use rbac::{DocumentRole, OrgRole, Permission, role_has_permission, role_permissions};
pub mod slugify;
pub use slugify::slugify;
mod semantic;
pub use semantic::{
EmbeddedReview, EmbeddedSection, ReviewCluster, SectionAlignment, SectionSimilarity,
SemanticChange, SemanticConsensusResult, SemanticDiffResult, cosine_similarity,
cosine_similarity_wasm, semantic_consensus, semantic_consensus_native, semantic_diff,
semantic_diff_native,
};
pub mod validation;
pub use validation::{
DEFAULT_MAX_CONTENT_BYTES, DEFAULT_MAX_LINE_BYTES, contains_binary_content,
default_max_content_bytes, default_max_line_bytes, detect_format, find_overlong_line,
};
pub mod graph;
pub use graph::{
GraphEdge, GraphNode, GraphStats, KnowledgeGraph, MessageInput, MessageMetadata, TopAgent,
TopTopic, build_graph_native, build_graph_wasm, extract_directives, extract_directives_wasm,
extract_mentions, extract_mentions_wasm, extract_tags, extract_tags_wasm, top_agents_native,
top_agents_wasm, top_topics_native, top_topics_wasm,
};
pub mod similarity;
pub use similarity::{
SimilarityResult, content_similarity, content_similarity_wasm, extract_ngrams,
extract_ngrams_wasm, extract_word_shingles, extract_word_shingles_wasm, fingerprint_similarity,
jaccard_similarity, jaccard_similarity_wasm, min_hash_fingerprint, min_hash_fingerprint_wasm,
rank_by_similarity, rank_by_similarity_wasm, simple_hash, text_similarity_jaccard,
};
pub mod disclosure;
pub use disclosure::code::parse_code_sections;
pub use disclosure::json::{extract_json_keys, parse_json_sections};
pub use disclosure::markdown::{extract_markdown_toc, parse_markdown_sections};
pub use disclosure::search::search_content;
pub use disclosure::text::parse_text_sections;
pub use disclosure::{
DocumentOverview, JsonKey, LineRangeResult, SearchResult, Section, TocEntry,
detect_document_format, generate_overview, get_line_range, get_section, query_json_path,
};
pub mod classify; pub mod tfidf;
pub use tfidf::{fnv1a_hash, tfidf_embed};
pub mod identity;
pub use identity::{body_hash, canonical_payload, keygen, sign_submission, verify_submission};
pub mod bft;
pub use bft::{
ChainedEvent, bft_check, bft_max_faults, bft_quorum, hash_chain_extend, verify_chain,
};
pub mod a2a;
pub use a2a::A2AMessage;
#[cfg(feature = "crdt")]
pub mod crdt;
pub mod canonical;
pub use canonical::{FrontmatterMeta, canonical_frontmatter, canonical_frontmatter_wasm};
pub mod blob;
pub use blob::{BlobNameError, blob_name_validate, hash_blob};
pub mod merkle;
pub use merkle::{
AuditEntry, hash_audit_entry, merkle_root, sign_merkle_root, verify_audit_chain,
verify_merkle_proof, verify_merkle_root_signature,
};
pub mod billing;
pub use billing::{
TierDecision, TierKind, TierLimits, UsageSnapshot, evaluate_tier_limits,
evaluate_tier_limits_wasm, get_tier_limits_wasm, tier_limits,
};
pub mod export_archive;
pub use export_archive::{
ARCHIVE_VERSION, ExportApiKey, ExportArchive, ExportAuditEntry, ExportDocument, ExportVersion,
ExportWebhook, RetentionPolicy, deserialize_export_archive, deserialize_retention_policy,
serialize_export_archive, serialize_retention_policy,
};
pub mod retention;
pub use retention::{
EvictionSet, LawfulBasis, RetentionAction, RetentionRow, RetentionTier, apply_retention,
canonical_policies,
};
type HmacSha256 = Hmac<Sha256>;
const BASE62: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn encode_base62(mut num: u64) -> String {
if num == 0 {
return "0".to_string();
}
let mut result = Vec::new();
while num > 0 {
result.push(BASE62[(num % 62) as usize]);
num /= 62;
}
result.reverse();
String::from_utf8(result).unwrap_or_default()
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn decode_base62(s: &str) -> u64 {
let mut result: u64 = 0;
for byte in s.bytes() {
let val = match byte {
b'0'..=b'9' => byte - b'0',
b'A'..=b'Z' => byte - b'A' + 10,
b'a'..=b'z' => byte - b'a' + 36,
_ => 0,
} as u64;
result = result * 62 + val;
}
result
}
const ZSTD_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn compress(data: &str) -> Result<Vec<u8>, String> {
zstd_compress(data.as_bytes())
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn decompress(data: &[u8]) -> Result<String, String> {
let bytes = decompress_bytes(data)?;
String::from_utf8(bytes).map_err(|e| format!("invalid UTF-8: {e}"))
}
pub fn zstd_compress(data: &[u8]) -> Result<Vec<u8>, String> {
zstd::encode_all(Cursor::new(data), 3).map_err(|e| format!("zstd compression failed: {e}"))
}
pub fn zstd_decompress(data: &[u8]) -> Result<Vec<u8>, String> {
zstd::decode_all(Cursor::new(data)).map_err(|e| format!("zstd decompression failed: {e}"))
}
pub fn decompress_bytes(data: &[u8]) -> Result<Vec<u8>, String> {
if data.len() >= 4 && data[..4] == ZSTD_MAGIC {
return zstd_decompress(data);
}
if data.len() >= 2 && data[0] == 0x78 {
let mut decoder = ZlibDecoder::new(data);
let mut out = Vec::new();
decoder
.read_to_end(&mut out)
.map_err(|e| format!("zlib decompression failed: {e}"))?;
return Ok(out);
}
Err(format!(
"unknown compression codec (first bytes: {:02x?})",
&data[..data.len().min(4)]
))
}
pub fn zlib_compress(data: &[u8]) -> Result<Vec<u8>, String> {
let mut encoder = ZlibEncoder::new(data, Compression::default());
let mut out = Vec::new();
encoder
.read_to_end(&mut out)
.map_err(|e| format!("zlib compression failed: {e}"))?;
Ok(out)
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn generate_id() -> String {
let uuid = Uuid::new_v4();
let hex = uuid.simple().to_string();
let hex_prefix = &hex[..16];
let num = u64::from_str_radix(hex_prefix, 16).unwrap_or(0);
let base62 = encode_base62(num);
format!("{:0>8}", &base62[..base62.len().min(8)])
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn hash_content(data: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(data.as_bytes());
hex::encode(hasher.finalize())
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn calculate_tokens(text: &str) -> u32 {
let len = text.len() as f64;
(len / 4.0).ceil() as u32
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn calculate_compression_ratio(original_size: u32, compressed_size: u32) -> f64 {
if compressed_size == 0 {
return 1.0;
}
let ratio = original_size as f64 / compressed_size as f64;
(ratio * 100.0).round() / 100.0
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn compute_signature(
slug: &str,
agent_id: &str,
conversation_id: &str,
expires_at: f64,
secret: &str,
) -> String {
compute_signature_with_length(slug, agent_id, conversation_id, expires_at, secret, 16)
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn compute_signature_with_length(
slug: &str,
agent_id: &str,
conversation_id: &str,
expires_at: f64,
secret: &str,
sig_length: usize,
) -> String {
let payload = format!(
"{}:{}:{}:{}",
slug, agent_id, conversation_id, expires_at as u64
);
let Ok(mut mac) = HmacSha256::new_from_slice(secret.as_bytes()) else {
return String::new();
};
mac.update(payload.as_bytes());
let result = mac.finalize();
let hex_full = hex::encode(result.into_bytes());
let len = sig_length.min(64);
hex_full[..len].to_string()
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn compute_org_signature(
slug: &str,
agent_id: &str,
conversation_id: &str,
org_id: &str,
expires_at: f64,
secret: &str,
) -> String {
compute_org_signature_with_length(
slug,
agent_id,
conversation_id,
org_id,
expires_at,
secret,
32,
)
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn compute_org_signature_with_length(
slug: &str,
agent_id: &str,
conversation_id: &str,
org_id: &str,
expires_at: f64,
secret: &str,
sig_length: usize,
) -> String {
let payload = format!(
"{}:{}:{}:{}:{}",
slug, agent_id, conversation_id, org_id, expires_at as u64
);
let Ok(mut mac) = HmacSha256::new_from_slice(secret.as_bytes()) else {
return String::new();
};
mac.update(payload.as_bytes());
let result = mac.finalize();
let hex_full = hex::encode(result.into_bytes());
let len = sig_length.min(64);
hex_full[..len].to_string()
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn derive_signing_key(api_key: &str) -> String {
let Ok(mut mac) = HmacSha256::new_from_slice(api_key.as_bytes()) else {
return String::new();
};
mac.update(b"llmtxt-signing");
hex::encode(mac.finalize().into_bytes())
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn is_expired(expires_at_ms: f64) -> bool {
if expires_at_ms == 0.0 {
return false;
}
let now = current_time_ms();
now > expires_at_ms
}
#[cfg(target_arch = "wasm32")]
fn current_time_ms() -> f64 {
js_sys::Date::now()
}
#[cfg(not(target_arch = "wasm32"))]
fn current_time_ms() -> f64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_millis() as f64)
.unwrap_or(0.0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_base62_encode() {
assert_eq!(encode_base62(0), "0");
assert_eq!(encode_base62(1), "1");
assert_eq!(encode_base62(61), "z");
assert_eq!(encode_base62(62), "10");
assert_eq!(encode_base62(3844), "100");
}
#[test]
fn test_base62_decode() {
assert_eq!(decode_base62("0"), 0);
assert_eq!(decode_base62("z"), 61);
assert_eq!(decode_base62("10"), 62);
assert_eq!(decode_base62("100"), 3844);
}
#[test]
fn test_base62_roundtrip() {
for n in [0, 1, 42, 61, 62, 100, 3844, 999_999, u64::MAX / 2] {
assert_eq!(
decode_base62(&encode_base62(n)),
n,
"roundtrip failed for {n}"
);
}
}
#[test]
fn test_hash_content() {
assert_eq!(
hash_content("hello"),
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
assert_eq!(
hash_content(""),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
}
#[test]
fn test_calculate_tokens() {
assert_eq!(calculate_tokens("Hello, world!"), 4);
assert_eq!(calculate_tokens(""), 0);
assert_eq!(calculate_tokens("a"), 1);
assert_eq!(calculate_tokens("1234"), 1);
assert_eq!(calculate_tokens("12345"), 2);
}
#[test]
fn test_compression_ratio() {
assert_eq!(calculate_compression_ratio(1000, 400), 2.5);
assert_eq!(calculate_compression_ratio(100, 100), 1.0);
assert_eq!(calculate_compression_ratio(100, 0), 1.0);
assert_eq!(calculate_compression_ratio(500, 200), 2.5);
}
#[test]
fn test_compress_decompress_roundtrip() {
let input = "Hello, world! This is a test of the llmtxt compression.";
let compressed = compress(input).expect("compress should succeed");
assert_eq!(
&compressed[..4],
&ZSTD_MAGIC,
"compress() should produce zstd output"
);
let decompressed = decompress(&compressed).expect("decompress should succeed");
assert_eq!(decompressed, input);
}
#[test]
fn test_compress_empty() {
let compressed = compress("").expect("compress empty should succeed");
let decompressed = decompress(&compressed).expect("decompress should succeed");
assert_eq!(decompressed, "");
}
#[test]
fn test_zstd_roundtrip_bytes() {
let data = b"zstd binary roundtrip test data 12345";
let compressed = zstd_compress(data).expect("zstd_compress should succeed");
assert_eq!(&compressed[..4], &ZSTD_MAGIC);
let decompressed = zstd_decompress(&compressed).expect("zstd_decompress should succeed");
assert_eq!(decompressed, data);
}
#[test]
fn test_zstd_better_ratio_than_zlib_on_repetitive_text() {
let input = "the quick brown fox jumps over the lazy dog. ".repeat(200);
let zstd_out = zstd_compress(input.as_bytes()).expect("zstd compress");
let zlib_out = zlib_compress(input.as_bytes()).expect("zlib compress");
assert!(
zstd_out.len() <= zlib_out.len() + 50, "zstd ({}) should be at most marginally larger than zlib ({}) on repetitive text",
zstd_out.len(),
zlib_out.len()
);
}
#[test]
fn test_backward_compat_zlib_still_decompresses() {
let input = "legacy document stored with zlib compression";
let zlib_bytes = zlib_compress(input.as_bytes()).expect("zlib compress legacy");
assert_eq!(zlib_bytes[0], 0x78, "zlib output should start with 0x78");
let result = decompress(&zlib_bytes).expect("decompress should handle legacy zlib");
assert_eq!(result, input);
}
#[test]
fn test_decompress_bytes_zstd() {
let input = b"test payload for decompress_bytes zstd path";
let compressed = zstd_compress(input).expect("zstd compress");
let out = decompress_bytes(&compressed).expect("decompress_bytes zstd");
assert_eq!(out, input);
}
#[test]
fn test_decompress_bytes_zlib() {
let input = b"test payload for decompress_bytes zlib legacy path";
let compressed = zlib_compress(input).expect("zlib compress");
let out = decompress_bytes(&compressed).expect("decompress_bytes zlib");
assert_eq!(out, input);
}
#[test]
fn test_decompress_bytes_unknown_codec_errors() {
let garbage = b"\x00\x01\x02\x03invalid";
let result = decompress_bytes(garbage);
assert!(result.is_err(), "unknown codec should return Err");
}
#[test]
fn test_compute_signature() {
let sig = compute_signature(
"xK9mP2nQ",
"test-agent",
"conv_123",
1_700_000_000_000.0,
"test-secret",
);
assert_eq!(sig, "650eb9dd6c396a45");
}
#[test]
fn test_compute_signature_with_length() {
let sig16 = compute_signature_with_length(
"xK9mP2nQ",
"test-agent",
"conv_123",
1_700_000_000_000.0,
"test-secret",
16,
);
let sig32 = compute_signature_with_length(
"xK9mP2nQ",
"test-agent",
"conv_123",
1_700_000_000_000.0,
"test-secret",
32,
);
assert_eq!(sig16, "650eb9dd6c396a45");
assert_eq!(sig16.len(), 16);
assert_eq!(sig32.len(), 32);
assert!(sig32.starts_with(&sig16));
}
#[test]
fn test_generate_signed_url_with_path_prefix() {
let url = generate_signed_url(&SignedUrlBuildRequest {
base_url: "https://api.example.com",
path_prefix: "attachments",
slug: "xK9mP2nQ",
agent_id: "test-agent",
conversation_id: "conv_123",
expires_at: 1_700_000_000_000,
secret: "test-secret",
sig_length: 32,
})
.expect("signed URL should build");
assert!(url.starts_with("https://api.example.com/attachments/xK9mP2nQ?"));
assert!(url.contains("sig="));
}
#[test]
fn test_derive_signing_key() {
let key = derive_signing_key("sk_live_abc123");
assert_eq!(
key,
"fb5f79640e9ed141d4949ccb36110c7aaf829c56d9870942dd77219a57575372"
);
}
#[test]
fn test_generate_id_format() {
let id = generate_id();
assert_eq!(id.len(), 8);
assert!(id.chars().all(|c| c.is_ascii_alphanumeric()));
}
#[test]
fn test_generate_id_uniqueness() {
let ids: Vec<String> = (0..100).map(|_| generate_id()).collect();
let unique: std::collections::HashSet<&String> = ids.iter().collect();
assert_eq!(unique.len(), 100, "generated IDs should be unique");
}
#[test]
fn test_compute_org_signature() {
let sig = compute_org_signature(
"xK9mP2nQ",
"test-agent",
"conv_123",
"org_456",
1_700_000_000_000.0,
"test-secret",
);
assert_eq!(sig.len(), 32);
let non_org_sig = compute_signature_with_length(
"xK9mP2nQ",
"test-agent",
"conv_123",
1_700_000_000_000.0,
"test-secret",
32,
);
assert_ne!(sig, non_org_sig);
}
#[test]
fn test_compute_org_signature_with_length() {
let sig16 = compute_org_signature_with_length(
"xK9mP2nQ",
"test-agent",
"conv_123",
"org_456",
1_700_000_000_000.0,
"test-secret",
16,
);
let sig32 = compute_org_signature_with_length(
"xK9mP2nQ",
"test-agent",
"conv_123",
"org_456",
1_700_000_000_000.0,
"test-secret",
32,
);
assert_eq!(sig16.len(), 16);
assert_eq!(sig32.len(), 32);
assert!(sig32.starts_with(&sig16));
}
#[test]
fn test_is_expired() {
assert!(!is_expired(0.0));
assert!(is_expired(1.0));
assert!(!is_expired(f64::MAX));
}
#[test]
fn test_verify_signed_url_accepts_32_char_signature_and_path_prefix() {
let url = generate_signed_url(&SignedUrlBuildRequest {
base_url: "https://api.example.com",
path_prefix: "attachments",
slug: "xK9mP2nQ",
agent_id: "test-agent",
conversation_id: "conv_123",
expires_at: u64::MAX / 2,
secret: "test-secret",
sig_length: 32,
})
.expect("signed URL should build");
let params = verify_signed_url(&url, "test-secret").expect("signed URL should verify");
assert_eq!(params.slug, "xK9mP2nQ");
assert_eq!(params.agent_id, "test-agent");
assert_eq!(params.conversation_id, "conv_123");
}
#[test]
fn test_verify_signed_url_exp_zero_never_expires() {
let url = generate_signed_url(&SignedUrlBuildRequest {
base_url: "https://api.example.com",
path_prefix: "attachments",
slug: "xK9mP2nQ",
agent_id: "test-agent",
conversation_id: "conv_123",
expires_at: 0,
secret: "test-secret",
sig_length: 32,
})
.expect("signed URL should build");
let params = verify_signed_url(&url, "test-secret").expect("exp=0 should never expire");
assert_eq!(params.slug, "xK9mP2nQ");
assert_eq!(params.expires_at, 0);
}
}