taco_format/lib.rs
1/*!
2# TACO (Trajectory and Compressed Observables) Format
3
4TACO is a custom binary format tailored for molecular dynamics (MD) data that:
5
6* Uses delta encoding for positions, velocities, and forces
7* Leverages temporal correlation for inter-frame compression
8* Stores data in tensors with optional lossy or lossless compression
9* Embeds metadata and simulation parameters using an internal schema
10* Supports random frame access and chunked reading
11
12## File Structure
13
14```text
15[Header]
16- Format version
17- Simulation parameters (time step, temperature, etc.)
18- Atom metadata (masses, names, etc.)
19- Compression settings
20
21[Frame Index Table]
22- Byte offsets to each frame for random access
23
24[Data Blocks]
25- Chunked frames:
26 - ΔPosition tensors (Nx3)
27 - ΔVelocity tensors (Nx3)
28 - ΔForce tensors (Nx3)
29 - Box dimensions (if needed)
30```
31
32## Key Features
33
34### Delta Encoding
35TACO stores differences between consecutive frames to reduce data size.
36
37### Tensor Storage
38All data blocks are stored as tensors, enabling SIMD-friendly operations and direct
39use in GPU/ML pipelines.
40
41### Hybrid Compression
42TACO supports both lossless compression for forces and energies, and lossy
43compression with configurable precision for positions and velocities.
44
45### Smart Chunking
46Each chunk contains a configurable number of frames with a mini index and
47compressed blocks of positions, velocities, and forces.
48*/
49
50//! Core library for the TACO format, providing APIs to read, write, and manipulate
51//! molecular dynamics trajectory data.
52
53pub mod cli;
54mod compression;
55mod error;
56mod frame;
57mod header;
58mod io;
59mod metadata;
60mod tensor;
61mod utils;
62
63#[cfg(feature = "python")]
64#[cfg(not(doctest))]
65pub mod python;
66
67pub use crate::compression::{
68 CompressionSettings, PrecisionMode, compress_tensor, decompress_tensor,
69};
70pub use crate::error::{Error, Result};
71pub use crate::frame::{ExtraArray, Frame, FrameData};
72pub use crate::header::Header;
73pub use crate::io::{Reader, Writer};
74pub use crate::metadata::{AtomMetadata, SimulationMetadata};
75
76#[cfg(feature = "async")]
77pub use crate::io::r#async::{AsyncReader, AsyncWriter};
78
79/// Version number of the TACO format implementation
80pub const VERSION: &str = "0.1.1";
81
82/// Magic number used to identify TACO format files
83pub const MAGIC: [u8; 4] = [b'T', b'A', b'C', b'O'];
84
85/// Default chunk size (number of frames per chunk)
86pub const DEFAULT_CHUNK_SIZE: u32 = 100;