protocrap/
lib.rs

1//! # Protocrap
2//!
3//! A small, efficient, and flexible protobuf implementation for Rust.
4//!
5//! ## Overview
6//!
7//! Protocrap takes a different approach than other protobuf libraries. Instead of
8//! generating parsing and serialization code for each message type, it uses a single
9//! table-driven implementation. Code generation produces only struct definitions with
10//! accessors and static lookup tables.
11//!
12//! This design yields:
13//! - **Small binaries**: No code duplication across message types
14//! - **Fast compilation**: No macro expansion or monomorphization explosion
15//! - **Flexible memory**: Arena allocation with custom allocator support
16//! - **Universal streaming**: Push-based API works with sync and async
17//!
18//! ## Quick Start
19//!
20//! ### Code Generation
21//!
22//! Generate Rust code from `.proto` files using `protocrap-codegen`:
23//!
24//! ```bash
25//! # Create descriptor set with protoc
26//! protoc --include_imports --descriptor_set_out=types.bin my_types.proto
27//!
28//! # Generate Rust code
29//! protocrap-codegen types.bin src/types.pc.rs
30//! ```
31//!
32//! Include the generated code in your crate:
33//!
34//! ```ignore
35//! use protocrap;
36//! include!("types.pc.rs");
37//! ```
38//!
39//! ### Encoding Messages
40//!
41//! ```
42//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
43//! use protocrap::google::protobuf::FileDescriptorProto;
44//! use allocator_api2::alloc::Global;
45//!
46//! let mut arena = Arena::new(&Global);
47//! let mut msg = FileDescriptorProto::ProtoType::default();
48//! msg.set_name("example.proto", &mut arena);
49//! msg.set_package("my.package", &mut arena);
50//!
51//! // Encode to a Vec<u8>
52//! let bytes = msg.encode_vec::<32>().unwrap();
53//!
54//! // Or encode to a fixed buffer
55//! let mut buffer = [0u8; 1024];
56//! let encoded = msg.encode_flat::<32>(&mut buffer).unwrap();
57//! ```
58//!
59//! The const generic (`::<32>`) specifies the maximum message nesting depth.
60//!
61//! ### Decoding Messages
62//!
63//! ```
64//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
65//! use protocrap::google::protobuf::FileDescriptorProto;
66//! use allocator_api2::alloc::Global;
67//!
68//! // First encode a message to get some bytes
69//! let mut arena = Arena::new(&Global);
70//! let mut original = FileDescriptorProto::ProtoType::default();
71//! original.set_name("example.proto", &mut arena);
72//! let bytes = original.encode_vec::<32>().unwrap();
73//!
74//! // Decode from a byte slice
75//! let mut decoded = FileDescriptorProto::ProtoType::default();
76//! decoded.decode_flat::<32>(&mut arena, &bytes);
77//! assert_eq!(decoded.name(), "example.proto");
78//! ```
79//!
80//! ### Runtime Reflection
81//!
82//! Inspect messages dynamically without compile-time knowledge of the schema:
83//!
84//! ```
85//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
86//! use protocrap::google::protobuf::{FileDescriptorProto, DescriptorProto};
87//! use protocrap::descriptor_pool::DescriptorPool;
88//! use allocator_api2::alloc::Global;
89//!
90//! // Build descriptor pool from the library's own file descriptor
91//! let mut pool = DescriptorPool::new(&Global);
92//! let file_desc = FileDescriptorProto::ProtoType::file_descriptor();
93//! pool.add_file(file_desc);
94//!
95//! // Encode a real DescriptorProto (the descriptor for DescriptorProto itself)
96//! let descriptor = DescriptorProto::ProtoType::descriptor_proto();
97//! let bytes = descriptor.encode_vec::<32>().unwrap();
98//!
99//! // Decode dynamically using the pool
100//! let mut arena = Arena::new(&Global);
101//! let mut msg = pool.create_message(
102//!     "google.protobuf.DescriptorProto",
103//!     &mut arena,
104//! ).unwrap();
105//! msg.decode_flat::<32>(&mut arena, &bytes);
106//!
107//! // Access fields dynamically
108//! for field in msg.descriptor().field() {
109//!     if let Some(value) = msg.get_field(field.as_ref()) {
110//!         println!("{}: {:?}", field.name(), value);
111//!     }
112//! }
113//! ```
114//!
115//! ## Architecture
116//!
117//! ### Arena Allocation
118//!
119//! All variable-sized data (strings, bytes, repeated fields, sub-messages) is allocated
120//! in an [`arena::Arena`]. This provides:
121//!
122//! - **Speed**: Allocation is a pointer bump in the common case
123//! - **Bulk deallocation**: Drop the arena to free all messages at once
124//! - **Custom allocators**: Pass any `&dyn Allocator` to control memory placement
125//!
126//! ```
127//! use protocrap::arena::Arena;
128//! use allocator_api2::alloc::Global;
129//!
130//! let mut arena = Arena::new(&Global);
131//! // All allocations during decode/set operations use this arena
132//! // When arena drops, all memory is freed
133//! ```
134//!
135//! ### Push-Based Streaming
136//!
137//! The parser uses a push model: you provide data chunks, it returns updated state.
138//! This signature `(state, buffer) -> updated_state` enables:
139//!
140//! - Single implementation for sync and async
141//! - No callback traits or complex lifetime requirements
142//! - Works in embedded, WASM, and any async runtime
143//!
144//! ## Generated Code Structure
145//!
146//! For each protobuf message, the codegen produces a module containing:
147//!
148//! - `ProtoType`: The message struct with `#[repr(C)]` layout
149//! - Accessor methods following protobuf conventions
150//!
151//! Field accessors follow this pattern:
152//!
153//! | Proto Type | Getter | Setter | Other |
154//! |------------|--------|--------|-------|
155//! | Scalar | `field() -> T` | `set_field(T)` | `has_field()`, `clear_field()` |
156//! | String/Bytes | `field() -> &str`/`&[u8]` | `set_field(&str, &mut Arena)` | `has_field()`, `clear_field()` |
157//! | Message | `field() -> Option<&M>` | `field_mut() -> &mut M` | `has_field()`, `clear_field()` |
158//! | Repeated | `field() -> &[T]` | `field_mut() -> &mut RepeatedField<T>` | `add_field(...)` |
159//!
160//! ## Modules
161//!
162//! - [`arena`]: Arena allocator for message data
163//! - [`containers`]: Collection types ([`containers::RepeatedField`], [`containers::String`], [`containers::Bytes`])
164//! - [`reflection`]: Runtime message inspection and dynamic decoding
165//! - [`TypedMessage`]: Wrapper for repeated message elements
166//!
167//! ## Feature Flags
168//!
169//! - `std` (default): Enables `std::io` integration, `Vec`-based encoding
170//! - `serde_support` (default): Enables serde serialization via reflection
171//! - `nightly`: Use nightly Rust features for slightly better codegen (branch hints)
172//!
173//! For `no_std` environments, disable default features:
174//!
175//! ```toml
176//! [dependencies]
177//! protocrap = { version = "0.1", default-features = false }
178//! ```
179//!
180//! ## Restrictions
181//!
182//! Protocrap is designed for "sane" schemas:
183//!
184//! - Up to 256 optional fields per message
185//! - Struct sizes up to 64KB
186//! - Field numbers 1-2047 (1 or 2 byte wire tags)
187//! - Field numbers should be mostly consecutive
188//!
189//! The following are intentionally unsupported:
190//!
191//! - **Unknown fields**: Discarded during decoding (no round-trip preservation)
192//! - **Extensions**: Proto2 extensions are silently dropped
193//! - **Maps**: Decoded as repeated key-value pairs
194//! - **Proto3 zero-value omission**: All set fields are serialized
195
196#![cfg_attr(feature = "nightly", feature(likely_unlikely, allocator_api))]
197#![cfg_attr(not(feature = "std"), no_std)]
198
199pub mod arena;
200pub(crate) mod base;
201pub mod containers;
202pub mod reflection;
203
204// Re-export user-facing types at crate root
205pub use base::TypedMessage;
206#[cfg(feature = "std")]
207pub mod descriptor_pool;
208#[doc(hidden)]
209#[cfg(feature = "std")]
210pub mod test_utils;
211
212// Re-export Allocator trait - use core on nightly, polyfill on stable
213#[cfg(not(feature = "nightly"))]
214pub use allocator_api2::alloc::Allocator;
215#[cfg(feature = "nightly")]
216pub use core::alloc::Allocator;
217
218// Internal modules - only accessible within the crate
219// Types needed by generated code are re-exported via generated_code_only
220pub(crate) mod decoding;
221pub(crate) mod encoding;
222pub(crate) mod tables;
223pub(crate) mod utils;
224pub(crate) mod wire;
225
226/// Internal types for generated code. **Do not use directly.**
227#[doc(hidden)]
228pub mod generated_code_only;
229
230use crate as protocrap;
231include!("descriptor.pc.rs");
232
233#[cfg(feature = "serde_support")]
234pub mod serde;
235
236#[cfg(feature = "serde_support")]
237pub mod proto_json;
238
239#[cfg(feature = "codegen")]
240pub mod codegen;
241
242/// Errors that can occur during protobuf encoding/decoding operations.
243#[derive(Debug)]
244pub enum Error<E = ()> {
245    TreeTooDeep,
246    BufferTooSmall,
247    InvalidData,
248    MessageNotFound,
249    Io(E),
250}
251
252impl<E: core::fmt::Debug> core::fmt::Display for Error<E> {
253    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
254        match self {
255            Error::TreeTooDeep => write!(f, "message tree too deep"),
256            Error::BufferTooSmall => write!(f, "buffer too small"),
257            Error::InvalidData => write!(f, "invalid protobuf data"),
258            Error::MessageNotFound => write!(f, "message type not found"),
259            Error::Io(e) => write!(f, "{:?}", e),
260        }
261    }
262}
263
264impl<E: core::fmt::Debug> core::error::Error for Error<E> {}
265
266impl<E> From<E> for Error<E> {
267    fn from(e: E) -> Self {
268        Error::Io(e)
269    }
270}
271
272/// Read-only protobuf operations (encode, serialize, inspect).
273pub trait ProtobufRef<'pool> {
274    /// Get a dynamic view of this message for reflection.
275    fn as_dyn<'msg>(&'msg self) -> reflection::DynamicMessageRef<'pool, 'msg>;
276
277    /// Get the message's descriptor (schema metadata).
278    fn descriptor(&self) -> &'pool crate::google::protobuf::DescriptorProto::ProtoType {
279        self.as_dyn().descriptor()
280    }
281
282    /// Encode to a fixed buffer. Returns the encoded slice or an error.
283    fn encode_flat<'a, const STACK_DEPTH: usize>(
284        &self,
285        buffer: &'a mut [u8],
286    ) -> Result<&'a [u8], Error> {
287        let mut resumeable_encode = encoding::ResumeableEncode::<STACK_DEPTH>::new(self.as_dyn());
288        let encoding::ResumeResult::Done(buf) = resumeable_encode
289            .resume_encode(buffer)
290            .ok_or(Error::TreeTooDeep)?
291        else {
292            return Err(Error::BufferTooSmall);
293        };
294        Ok(buf)
295    }
296
297    /// Encode to a new Vec, growing as needed.
298    #[cfg(feature = "std")]
299    fn encode_vec<const STACK_DEPTH: usize>(&self) -> Result<Vec<u8>, Error> {
300        let mut buffer = vec![0u8; 1024];
301        let mut stack = Vec::new();
302        let mut resumeable_encode = encoding::ResumeableEncode::<STACK_DEPTH>::new(self.as_dyn());
303        loop {
304            match resumeable_encode
305                .resume_encode(&mut buffer)
306                .ok_or(Error::TreeTooDeep)?
307            {
308                encoding::ResumeResult::Done(buf) => {
309                    let len = buf.len();
310                    let end = buffer.len();
311                    let start = end - len;
312                    buffer.copy_within(start..end, 0);
313                    buffer.truncate(len);
314                    break;
315                }
316                encoding::ResumeResult::NeedsMoreBuffer => {
317                    let len = buffer.len().min(1024 * 1024);
318                    stack.push(core::mem::take(&mut buffer));
319                    buffer = vec![0u8; len * 2];
320                }
321            };
322        }
323        while let Some(old_buffer) = stack.pop() {
324            buffer.extend_from_slice(&old_buffer);
325        }
326        Ok(buffer)
327    }
328}
329
330/// Mutable protobuf operations (decode, deserialize).
331pub trait ProtobufMut<'pool>: ProtobufRef<'pool> {
332    /// Get a mutable dynamic view of this message.
333    fn as_dyn_mut<'msg>(&'msg mut self) -> reflection::DynamicMessage<'pool, 'msg>;
334
335    /// Decode from a byte slice. Returns true on success.
336    #[must_use]
337    fn decode_flat<const STACK_DEPTH: usize>(
338        &mut self,
339        arena: &mut crate::arena::Arena,
340        buf: &[u8],
341    ) -> bool {
342        let mut decoder =
343            decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
344        if !decoder.resume(buf, arena) {
345            self.as_dyn_mut().clear();
346            return false;
347        }
348        if !decoder.finish(arena) {
349            self.as_dyn_mut().clear();
350            return false;
351        }
352        true
353    }
354
355    fn decode<'a, E>(
356        &mut self,
357        arena: &mut crate::arena::Arena,
358        provider: &'a mut impl FnMut() -> Result<Option<&'a [u8]>, E>,
359    ) -> Result<(), Error<E>> {
360        let mut decoder = decoding::ResumeableDecode::<32>::new(self.as_dyn_mut(), isize::MAX);
361        loop {
362            let Some(buffer) = provider().map_err(Error::Io)? else {
363                break;
364            };
365            if !decoder.resume(buffer, arena) {
366                self.as_dyn_mut().clear();
367                return Err(Error::InvalidData);
368            }
369        }
370        if !decoder.finish(arena) {
371            self.as_dyn_mut().clear();
372            return Err(Error::InvalidData);
373        }
374        Ok(())
375    }
376
377    fn async_decode<'a, E, F>(
378        &'a mut self,
379        arena: &mut crate::arena::Arena,
380        provider: &'a mut impl FnMut() -> F,
381    ) -> impl core::future::Future<Output = Result<(), Error<E>>>
382    where
383        F: core::future::Future<Output = Result<Option<&'a [u8]>, E>> + 'a,
384    {
385        async move {
386            let mut decoder = decoding::ResumeableDecode::<32>::new(self.as_dyn_mut(), isize::MAX);
387            loop {
388                let Some(buffer) = provider().await.map_err(Error::Io)? else {
389                    break;
390                };
391                if !decoder.resume(buffer, arena) {
392                    self.as_dyn_mut().clear();
393                    return Err(Error::InvalidData);
394                }
395            }
396            if !decoder.finish(arena) {
397                self.as_dyn_mut().clear();
398                return Err(Error::InvalidData);
399            }
400            Ok(())
401        }
402    }
403
404    #[cfg(feature = "std")]
405    fn decode_from_bufread<const STACK_DEPTH: usize>(
406        &mut self,
407        arena: &mut crate::arena::Arena,
408        reader: &mut impl std::io::BufRead,
409    ) -> Result<(), Error<std::io::Error>> {
410        let mut decoder =
411            decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
412        loop {
413            let buffer = reader.fill_buf().map_err(Error::Io)?;
414            let len = buffer.len();
415            if len == 0 {
416                break;
417            }
418            if !decoder.resume(buffer, arena) {
419                self.as_dyn_mut().clear();
420                return Err(Error::InvalidData);
421            }
422            reader.consume(len);
423        }
424        if !decoder.finish(arena) {
425            self.as_dyn_mut().clear();
426            return Err(Error::InvalidData);
427        }
428        Ok(())
429    }
430
431    #[cfg(feature = "std")]
432    fn decode_from_read<const STACK_DEPTH: usize>(
433        &mut self,
434        arena: &mut crate::arena::Arena,
435        reader: &mut impl std::io::Read,
436    ) -> Result<(), Error<std::io::Error>> {
437        let mut buf_reader = std::io::BufReader::new(reader);
438        self.decode_from_bufread::<STACK_DEPTH>(arena, &mut buf_reader)
439    }
440
441    #[cfg(feature = "std")]
442    fn decode_from_async_bufread<'a, const STACK_DEPTH: usize>(
443        &'a mut self,
444        arena: &'a mut crate::arena::Arena<'a>,
445        reader: &mut (impl futures::io::AsyncBufRead + Unpin),
446    ) -> impl core::future::Future<Output = Result<(), Error<futures::io::Error>>> {
447        use futures::io::AsyncBufReadExt;
448
449        async move {
450            let mut decoder =
451                decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
452            loop {
453                let buffer = reader.fill_buf().await.map_err(Error::Io)?;
454                let len = buffer.len();
455                if len == 0 {
456                    break;
457                }
458                if !decoder.resume(buffer, arena) {
459                    self.as_dyn_mut().clear();
460                    return Err(Error::InvalidData);
461                }
462                reader.consume_unpin(len);
463            }
464            if !decoder.finish(arena) {
465                self.as_dyn_mut().clear();
466                return Err(Error::InvalidData);
467            }
468            Ok(())
469        }
470    }
471
472    #[cfg(feature = "std")]
473    fn decode_from_async_read<'a, const STACK_DEPTH: usize>(
474        &'a mut self,
475        arena: &'a mut crate::arena::Arena<'a>,
476        reader: &mut (impl futures::io::AsyncRead + Unpin),
477    ) -> impl core::future::Future<Output = Result<(), Error<futures::io::Error>>> {
478        async move {
479            let mut buf_reader = futures::io::BufReader::new(reader);
480            self.decode_from_async_bufread::<STACK_DEPTH>(arena, &mut buf_reader)
481                .await
482        }
483    }
484
485    #[cfg(feature = "serde_support")]
486    fn serde_deserialize<'arena, 'alloc, 'de, D>(
487        &'de mut self,
488        arena: &'arena mut crate::arena::Arena<'alloc>,
489        deserializer: D,
490    ) -> Result<(), D::Error>
491    where
492        D: ::serde::Deserializer<'de>,
493    {
494        serde::serde_deserialize_struct(self.as_dyn_mut(), arena, deserializer)
495    }
496}
497
498// Blanket impl for static protobuf types
499impl<T: generated_code_only::Protobuf> ProtobufRef<'static> for T {
500    fn as_dyn<'msg>(&'msg self) -> reflection::DynamicMessageRef<'static, 'msg> {
501        reflection::DynamicMessageRef {
502            object: crate::generated_code_only::as_object(self),
503            table: T::table(),
504        }
505    }
506}
507
508impl<T: generated_code_only::Protobuf> ProtobufMut<'static> for T {
509    fn as_dyn_mut<'msg>(&'msg mut self) -> reflection::DynamicMessage<'static, 'msg> {
510        reflection::DynamicMessage {
511            object: crate::generated_code_only::as_object_mut(self),
512            table: T::table(),
513        }
514    }
515}
516
517#[cfg(all(test, feature = "std"))]
518mod tests {
519    use crate::ProtobufMut;
520    use crate::ProtobufRef;
521
522    #[cfg(not(feature = "nightly"))]
523    use allocator_api2::alloc::Global;
524    #[cfg(feature = "nightly")]
525    use std::alloc::Global;
526
527    #[test]
528    fn descriptor_accessors() {
529        let file_descriptor =
530            crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
531        let message_descriptor =
532            crate::google::protobuf::DescriptorProto::ProtoType::descriptor_proto();
533        let nested_descriptor =
534            crate::google::protobuf::DescriptorProto::ExtensionRange::ProtoType::descriptor_proto();
535
536        // Test we built descriptor.proto using the canonical path
537        assert_eq!(file_descriptor.name(), "google/protobuf/descriptor.proto");
538        assert_eq!(message_descriptor.name(), "DescriptorProto");
539        assert_eq!(nested_descriptor.name(), "ExtensionRange");
540    }
541
542    #[test]
543    fn file_descriptor_roundtrip() {
544        crate::test_utils::assert_roundtrip(
545            crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor(),
546        );
547    }
548
549    #[test]
550    fn compare_encode_flat_vs_vec() {
551        let file_descriptor =
552            crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
553
554        // encode_flat with large buffer
555        let mut flat_buffer = vec![0u8; 100_000];
556        let flat_result = file_descriptor
557            .encode_flat::<32>(&mut flat_buffer)
558            .expect("encode_flat should work");
559        let flat_bytes = flat_result.to_vec();
560
561        // encode_vec with chunked encoding
562        let vec_bytes = file_descriptor
563            .encode_vec::<32>()
564            .expect("encode_vec should work");
565
566        // Dump to files for comparison
567        std::fs::write("/tmp/encode_flat.bin", &flat_bytes).expect("write flat");
568        std::fs::write("/tmp/encode_vec.bin", &vec_bytes).expect("write vec");
569
570        println!(
571            "encode_flat: {} bytes, encode_vec: {} bytes",
572            flat_bytes.len(),
573            vec_bytes.len()
574        );
575
576        if flat_bytes != vec_bytes {
577            // Find first difference
578            for (i, (a, b)) in flat_bytes.iter().zip(vec_bytes.iter()).enumerate() {
579                if a != b {
580                    println!(
581                        "First difference at byte {}: flat={:02x}, vec={:02x}",
582                        i, a, b
583                    );
584                    break;
585                }
586            }
587            if flat_bytes.len() != vec_bytes.len() {
588                println!(
589                    "Length mismatch: flat={}, vec={}",
590                    flat_bytes.len(),
591                    vec_bytes.len()
592                );
593            }
594        }
595
596        assert_eq!(
597            flat_bytes, vec_bytes,
598            "encode_flat and encode_vec should produce identical output"
599        );
600    }
601
602    #[test]
603    fn dynamic_file_descriptor_roundtrip() {
604        let mut pool = crate::descriptor_pool::DescriptorPool::new(&Global);
605        let file_descriptor =
606            crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
607        pool.add_file(&file_descriptor);
608
609        let bytes = file_descriptor.encode_vec::<32>().expect("should encode");
610        let mut arena = crate::arena::Arena::new(&Global);
611
612        let mut dynamic_file_descriptor = pool
613            .create_message("google.protobuf.FileDescriptorProto", &mut arena)
614            .expect("Should create");
615        assert!(dynamic_file_descriptor.decode_flat::<32>(&mut arena, &bytes));
616
617        let roundtrip = dynamic_file_descriptor
618            .encode_vec::<32>()
619            .expect("should encode");
620
621        assert_eq!(bytes, roundtrip);
622    }
623
624    #[test]
625    fn invalid_utf8_string_rejected() {
626        // FileDescriptorProto field 1 is "name" (string type)
627        // Wire format: tag (field 1, wire type 2) = 0x0a, then length, then bytes
628        // 0xFF is invalid UTF-8
629        let invalid_utf8_name: &[u8] = &[0x0a, 0x03, 0x61, 0xFF, 0x62]; // "a<invalid>b"
630
631        let mut arena = crate::arena::Arena::new(&Global);
632        let mut msg = crate::google::protobuf::FileDescriptorProto::ProtoType::default();
633        let result = msg.decode_flat::<32>(&mut arena, invalid_utf8_name);
634
635        assert!(
636            !result,
637            "decoding invalid UTF-8 in string field should fail"
638        );
639    }
640}