protocrap/
lib.rs

1//! # Protocrap
2//!
3//! A small, efficient, and flexible protobuf implementation for Rust.
4//!
5//! ## Overview
6//!
7//! Protocrap takes a different approach than other protobuf libraries. Instead of
8//! generating parsing and serialization code for each message type, it uses a single
9//! table-driven implementation. Code generation produces only struct definitions with
10//! accessors and static lookup tables.
11//!
12//! This design yields:
13//! - **Small binaries**: No code duplication across message types
14//! - **Fast compilation**: No macro expansion or monomorphization explosion
15//! - **Flexible memory**: Arena allocation with custom allocator support
16//! - **Universal streaming**: Push-based API works with sync and async
17//!
18//! ## Quick Start
19//!
20//! ### Code Generation
21//!
22//! Generate Rust code from `.proto` files using `protocrap-codegen`:
23//!
24//! ```bash
25//! # Create descriptor set with protoc
26//! protoc --include_imports --descriptor_set_out=types.bin my_types.proto
27//!
28//! # Generate Rust code
29//! protocrap-codegen types.bin src/types.pc.rs
30//! ```
31//!
32//! Include the generated code in your crate:
33//!
34//! ```ignore
35//! use protocrap;
36//! include!("types.pc.rs");
37//! ```
38//!
39//! ### Encoding Messages
40//!
41//! ```
42//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
43//! use protocrap::google::protobuf::FileDescriptorProto;
44//! use allocator_api2::alloc::Global;
45//!
46//! let mut arena = Arena::new(&Global);
47//! let mut msg = FileDescriptorProto::ProtoType::default();
48//! // Arena allocations return Result - handle errors appropriately
49//! msg.set_name("example.proto", &mut arena).unwrap();
50//! msg.set_package("my.package", &mut arena).unwrap();
51//!
52//! // Encode to a Vec<u8>
53//! let bytes = msg.encode_vec::<32>().unwrap();
54//!
55//! // Or encode to a fixed buffer
56//! let mut buffer = [0u8; 1024];
57//! let encoded = msg.encode_flat::<32>(&mut buffer).unwrap();
58//! ```
59//!
60//! The const generic (`::<32>`) specifies the maximum message nesting depth.
61//!
62//! ### Decoding Messages
63//!
64//! ```
65//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
66//! use protocrap::google::protobuf::FileDescriptorProto;
67//! use allocator_api2::alloc::Global;
68//!
69//! // First encode a message to get some bytes
70//! let mut arena = Arena::new(&Global);
71//! let mut original = FileDescriptorProto::ProtoType::default();
72//! original.set_name("example.proto", &mut arena).unwrap();
73//! let bytes = original.encode_vec::<32>().unwrap();
74//!
75//! // Decode from a byte slice
76//! let mut decoded = FileDescriptorProto::ProtoType::default();
77//! decoded.decode_flat::<32>(&mut arena, &bytes);
78//! assert_eq!(decoded.name(), "example.proto");
79//! ```
80//!
81//! ### Runtime Reflection
82//!
83//! Inspect messages dynamically without compile-time knowledge of the schema:
84//!
85//! ```
86//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
87//! use protocrap::google::protobuf::{FileDescriptorProto, DescriptorProto};
88//! use protocrap::descriptor_pool::DescriptorPool;
89//! use allocator_api2::alloc::Global;
90//!
91//! // Build descriptor pool from the library's own file descriptor
92//! let mut pool = DescriptorPool::new(&Global);
93//! let file_desc = FileDescriptorProto::ProtoType::file_descriptor();
94//! pool.add_file(file_desc);
95//!
96//! // Encode a real DescriptorProto (the descriptor for DescriptorProto itself)
97//! let descriptor = DescriptorProto::ProtoType::descriptor_proto();
98//! let bytes = descriptor.encode_vec::<32>().unwrap();
99//!
100//! // Decode dynamically using the pool
101//! let mut arena = Arena::new(&Global);
102//! let mut msg = pool.create_message(
103//!     "google.protobuf.DescriptorProto",
104//!     &mut arena,
105//! ).unwrap();
106//! msg.decode_flat::<32>(&mut arena, &bytes);
107//!
108//! // Access fields dynamically
109//! for field in msg.descriptor().field() {
110//!     if let Some(value) = msg.get_field(field.as_ref()) {
111//!         println!("{}: {:?}", field.name(), value);
112//!     }
113//! }
114//! ```
115//!
116//! ## Architecture
117//!
118//! ### Arena Allocation
119//!
120//! All variable-sized data (strings, bytes, repeated fields, sub-messages) is allocated
121//! in an [`arena::Arena`]. This provides:
122//!
123//! - **Speed**: Allocation is a pointer bump in the common case
124//! - **Bulk deallocation**: Drop the arena to free all messages at once
125//! - **Custom allocators**: Pass any `&dyn Allocator` to control memory placement
126//! - **Fallible allocation**: All allocations return `Result`, enabling graceful OOM handling
127//!
128//! ```
129//! use protocrap::arena::Arena;
130//! use allocator_api2::alloc::Global;
131//!
132//! let mut arena = Arena::new(&Global);
133//! // All allocations during decode/set operations use this arena
134//! // Operations return Result to handle allocation failures
135//! // When arena drops, all memory is freed
136//! ```
137//!
138//! ### Push-Based Streaming
139//!
140//! The parser uses a push model: you provide data chunks, it returns updated state.
141//! This signature `(state, buffer) -> updated_state` enables:
142//!
143//! - Single implementation for sync and async
144//! - No callback traits or complex lifetime requirements
145//! - Works in embedded, WASM, and any async runtime
146//!
147//! ## Generated Code Structure
148//!
149//! For each protobuf message, the codegen produces a module containing:
150//!
151//! - `ProtoType`: The message struct with `#[repr(C)]` layout
152//! - Accessor methods following protobuf conventions
153//!
154//! Field accessors follow this pattern:
155//!
156//! | Proto Type | Getter | Setter | Other |
157//! |------------|--------|--------|-------|
158//! | Scalar | `field() -> T` | `set_field(T)` | `has_field()`, `clear_field()` |
159//! | String/Bytes | `field() -> &str`/`&[u8]` | `set_field(&str, &mut Arena) -> Result` | `has_field()`, `clear_field()` |
160//! | Message | `field() -> Option<&M>` | `field_mut(&mut Arena) -> Result<&mut M>` | `has_field()`, `clear_field()` |
161//! | Repeated | `field() -> &[T]` | `field_mut() -> &mut RepeatedField<T>` | `add_field(...) -> Result` |
162//!
163//! **Note**: Operations that allocate from the arena return `Result` to handle allocation failures.
164//!
165//! ## Modules
166//!
167//! - [`arena`]: Arena allocator for message data
168//! - [`containers`]: Collection types ([`containers::RepeatedField`], [`containers::String`], [`containers::Bytes`])
169//! - [`reflection`]: Runtime message inspection and dynamic decoding
170//! - [`TypedMessage`]: Wrapper for repeated message elements
171//!
172//! ## Feature Flags
173//!
174//! - `std` (default): Enables `std::io` integration, `Vec`-based encoding
175//! - `serde_support` (default): Enables serde serialization via reflection
176//! - `nightly`: Use nightly Rust features for slightly better codegen (branch hints)
177//!
178//! For `no_std` environments, disable default features:
179//!
180//! ```toml
181//! [dependencies]
182//! protocrap = { version = "0.1", default-features = false }
183//! ```
184//!
185//! ## Restrictions
186//!
187//! Protocrap is designed for "sane" schemas:
188//!
189//! - Up to 256 optional fields per message
190//! - Struct sizes up to 64KB
191//! - Field numbers 1-2047 (1 or 2 byte wire tags)
192//! - Field numbers should be mostly consecutive
193//!
194//! The following are intentionally unsupported:
195//!
196//! - **Unknown fields**: Discarded during decoding (no round-trip preservation)
197//! - **Extensions**: Proto2 extensions are silently dropped
198//! - **Maps**: Decoded as repeated key-value pairs
199//! - **Proto3 zero-value omission**: All set fields are serialized
200
201#![cfg_attr(feature = "nightly", feature(likely_unlikely, allocator_api))]
202#![cfg_attr(not(feature = "std"), no_std)]
203
204pub mod arena;
205pub(crate) mod base;
206pub mod containers;
207pub mod reflection;
208
209// Re-export user-facing types at crate root
210pub use base::TypedMessage;
211#[cfg(feature = "std")]
212pub mod descriptor_pool;
213#[doc(hidden)]
214#[cfg(feature = "std")]
215pub mod test_utils;
216
217// Re-export Allocator trait - use core on nightly, polyfill on stable
218#[cfg(not(feature = "nightly"))]
219pub use allocator_api2::alloc::Allocator;
220#[cfg(feature = "nightly")]
221pub use core::alloc::Allocator;
222
223// Internal modules - only accessible within the crate
224// Types needed by generated code are re-exported via generated_code_only
225pub(crate) mod decoding;
226pub(crate) mod encoding;
227pub(crate) mod tables;
228pub(crate) mod utils;
229pub(crate) mod wire;
230
231/// Internal types for generated code. **Do not use directly.**
232#[doc(hidden)]
233pub mod generated_code_only;
234
235use crate as protocrap;
236include!("descriptor.pc.rs");
237
238#[cfg(feature = "serde_support")]
239pub mod serde;
240
241#[cfg(feature = "serde_support")]
242pub mod proto_json;
243
244#[cfg(feature = "codegen")]
245pub mod codegen;
246
247/// Errors that can occur during protobuf encoding/decoding operations.
248#[derive(Debug)]
249pub enum Error<E = ()> {
250    MessageTreeTooDeep,
251    BufferTooSmall,
252    InvalidProtobufData,
253    MessageNotFound,
254    ArenaAllocationFailed,
255    UnknownError,
256    Io(E),
257}
258
259impl<E: core::fmt::Debug> core::fmt::Display for Error<E> {
260    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
261        core::fmt::Debug::fmt(self, f)
262    }
263}
264
265impl<E: core::fmt::Debug> core::error::Error for Error<E> {}
266
267impl<E> From<E> for Error<E> {
268    fn from(e: E) -> Self {
269        Error::Io(e)
270    }
271}
272
273/// Read-only protobuf operations (encode, serialize, inspect).
274pub trait ProtobufRef<'pool> {
275    /// Get a dynamic view of this message for reflection.
276    fn as_dyn<'msg>(&'msg self) -> reflection::DynamicMessageRef<'pool, 'msg>;
277
278    /// Get the message's descriptor (schema metadata).
279    fn descriptor(&self) -> &'pool crate::google::protobuf::DescriptorProto::ProtoType {
280        self.as_dyn().descriptor()
281    }
282
283    /// Encode to a fixed buffer. Returns the encoded slice or an error.
284    fn encode_flat<'a, const STACK_DEPTH: usize>(
285        &self,
286        buffer: &'a mut [u8],
287    ) -> Result<&'a [u8], Error> {
288        let mut resumeable_encode = encoding::ResumeableEncode::<STACK_DEPTH>::new(self.as_dyn());
289        let encoding::ResumeResult::Done(buf) = resumeable_encode
290            .resume_encode(buffer)
291            .ok_or(Error::MessageTreeTooDeep)?
292        else {
293            return Err(Error::BufferTooSmall);
294        };
295        Ok(buf)
296    }
297
298    /// Encode to a new Vec, growing as needed.
299    #[cfg(feature = "std")]
300    fn encode_vec<const STACK_DEPTH: usize>(&self) -> Result<Vec<u8>, Error> {
301        let mut buffer = vec![0u8; 1024];
302        let mut stack = Vec::new();
303        let mut resumeable_encode = encoding::ResumeableEncode::<STACK_DEPTH>::new(self.as_dyn());
304        loop {
305            match resumeable_encode
306                .resume_encode(&mut buffer)
307                .ok_or(Error::MessageTreeTooDeep)?
308            {
309                encoding::ResumeResult::Done(buf) => {
310                    let len = buf.len();
311                    let end = buffer.len();
312                    let start = end - len;
313                    buffer.copy_within(start..end, 0);
314                    buffer.truncate(len);
315                    break;
316                }
317                encoding::ResumeResult::NeedsMoreBuffer => {
318                    let len = buffer.len().min(1024 * 1024);
319                    stack.push(core::mem::take(&mut buffer));
320                    buffer = vec![0u8; len * 2];
321                }
322            };
323        }
324        while let Some(old_buffer) = stack.pop() {
325            buffer.extend_from_slice(&old_buffer);
326        }
327        Ok(buffer)
328    }
329}
330
331/// Mutable protobuf operations (decode, deserialize).
332pub trait ProtobufMut<'pool>: ProtobufRef<'pool> {
333    /// Get a mutable dynamic view of this message.
334    fn as_dyn_mut<'msg>(&'msg mut self) -> reflection::DynamicMessage<'pool, 'msg>;
335
336    /// Decode from a byte slice. Returns true on success.
337    #[must_use]
338    fn decode_flat<const STACK_DEPTH: usize>(
339        &mut self,
340        arena: &mut crate::arena::Arena,
341        buf: &[u8],
342    ) -> bool {
343        let mut decoder =
344            decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
345        if !decoder.resume(buf, arena) {
346            self.as_dyn_mut().clear();
347            return false;
348        }
349        if !decoder.finish(arena) {
350            self.as_dyn_mut().clear();
351            return false;
352        }
353        true
354    }
355
356    fn decode<'a, E>(
357        &mut self,
358        arena: &mut crate::arena::Arena,
359        provider: &'a mut impl FnMut() -> Result<Option<&'a [u8]>, E>,
360    ) -> Result<(), Error<E>> {
361        let mut decoder = decoding::ResumeableDecode::<32>::new(self.as_dyn_mut(), isize::MAX);
362        loop {
363            let Some(buffer) = provider().map_err(Error::Io)? else {
364                break;
365            };
366            if !decoder.resume(buffer, arena) {
367                self.as_dyn_mut().clear();
368                return Err(Error::InvalidProtobufData);
369            }
370        }
371        if !decoder.finish(arena) {
372            self.as_dyn_mut().clear();
373            return Err(Error::InvalidProtobufData);
374        }
375        Ok(())
376    }
377
378    fn async_decode<'a, E, F>(
379        &'a mut self,
380        arena: &mut crate::arena::Arena,
381        provider: &'a mut impl FnMut() -> F,
382    ) -> impl core::future::Future<Output = Result<(), Error<E>>>
383    where
384        F: core::future::Future<Output = Result<Option<&'a [u8]>, E>> + 'a,
385    {
386        async move {
387            let mut decoder = decoding::ResumeableDecode::<32>::new(self.as_dyn_mut(), isize::MAX);
388            loop {
389                let Some(buffer) = provider().await.map_err(Error::Io)? else {
390                    break;
391                };
392                if !decoder.resume(buffer, arena) {
393                    self.as_dyn_mut().clear();
394                    return Err(Error::InvalidProtobufData);
395                }
396            }
397            if !decoder.finish(arena) {
398                self.as_dyn_mut().clear();
399                return Err(Error::InvalidProtobufData);
400            }
401            Ok(())
402        }
403    }
404
405    #[cfg(feature = "std")]
406    fn decode_from_bufread<const STACK_DEPTH: usize>(
407        &mut self,
408        arena: &mut crate::arena::Arena,
409        reader: &mut impl std::io::BufRead,
410    ) -> Result<(), Error<std::io::Error>> {
411        let mut decoder =
412            decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
413        loop {
414            let buffer = reader.fill_buf().map_err(Error::Io)?;
415            let len = buffer.len();
416            if len == 0 {
417                break;
418            }
419            if !decoder.resume(buffer, arena) {
420                self.as_dyn_mut().clear();
421                return Err(Error::InvalidProtobufData);
422            }
423            reader.consume(len);
424        }
425        if !decoder.finish(arena) {
426            self.as_dyn_mut().clear();
427            return Err(Error::InvalidProtobufData);
428        }
429        Ok(())
430    }
431
432    #[cfg(feature = "std")]
433    fn decode_from_read<const STACK_DEPTH: usize>(
434        &mut self,
435        arena: &mut crate::arena::Arena,
436        reader: &mut impl std::io::Read,
437    ) -> Result<(), Error<std::io::Error>> {
438        let mut buf_reader = std::io::BufReader::new(reader);
439        self.decode_from_bufread::<STACK_DEPTH>(arena, &mut buf_reader)
440    }
441
442    #[cfg(feature = "std")]
443    fn decode_from_async_bufread<'a, const STACK_DEPTH: usize>(
444        &'a mut self,
445        arena: &'a mut crate::arena::Arena<'a>,
446        reader: &mut (impl futures::io::AsyncBufRead + Unpin),
447    ) -> impl core::future::Future<Output = Result<(), Error<futures::io::Error>>> {
448        use futures::io::AsyncBufReadExt;
449
450        async move {
451            let mut decoder =
452                decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
453            loop {
454                let buffer = reader.fill_buf().await.map_err(Error::Io)?;
455                let len = buffer.len();
456                if len == 0 {
457                    break;
458                }
459                if !decoder.resume(buffer, arena) {
460                    self.as_dyn_mut().clear();
461                    return Err(Error::InvalidProtobufData);
462                }
463                reader.consume_unpin(len);
464            }
465            if !decoder.finish(arena) {
466                self.as_dyn_mut().clear();
467                return Err(Error::InvalidProtobufData);
468            }
469            Ok(())
470        }
471    }
472
473    #[cfg(feature = "std")]
474    fn decode_from_async_read<'a, const STACK_DEPTH: usize>(
475        &'a mut self,
476        arena: &'a mut crate::arena::Arena<'a>,
477        reader: &mut (impl futures::io::AsyncRead + Unpin),
478    ) -> impl core::future::Future<Output = Result<(), Error<futures::io::Error>>> {
479        async move {
480            let mut buf_reader = futures::io::BufReader::new(reader);
481            self.decode_from_async_bufread::<STACK_DEPTH>(arena, &mut buf_reader)
482                .await
483        }
484    }
485
486    #[cfg(feature = "serde_support")]
487    fn serde_deserialize<'arena, 'alloc, 'de, D>(
488        &'de mut self,
489        arena: &'arena mut crate::arena::Arena<'alloc>,
490        deserializer: D,
491    ) -> Result<(), D::Error>
492    where
493        D: ::serde::Deserializer<'de>,
494    {
495        serde::serde_deserialize_struct(self.as_dyn_mut(), arena, deserializer)
496    }
497}
498
499// Blanket impl for static protobuf types
500impl<T: generated_code_only::Protobuf> ProtobufRef<'static> for T {
501    fn as_dyn<'msg>(&'msg self) -> reflection::DynamicMessageRef<'static, 'msg> {
502        reflection::DynamicMessageRef {
503            object: crate::generated_code_only::as_object(self),
504            table: T::table(),
505        }
506    }
507}
508
509impl<T: generated_code_only::Protobuf> ProtobufMut<'static> for T {
510    fn as_dyn_mut<'msg>(&'msg mut self) -> reflection::DynamicMessage<'static, 'msg> {
511        reflection::DynamicMessage {
512            object: crate::generated_code_only::as_object_mut(self),
513            table: T::table(),
514        }
515    }
516}
517
518#[cfg(all(test, feature = "std"))]
519mod tests {
520    use crate::ProtobufMut;
521    use crate::ProtobufRef;
522
523    #[cfg(not(feature = "nightly"))]
524    use allocator_api2::alloc::Global;
525    #[cfg(feature = "nightly")]
526    use std::alloc::Global;
527
528    #[test]
529    fn descriptor_accessors() {
530        let file_descriptor =
531            crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
532        let message_descriptor =
533            crate::google::protobuf::DescriptorProto::ProtoType::descriptor_proto();
534        let nested_descriptor =
535            crate::google::protobuf::DescriptorProto::ExtensionRange::ProtoType::descriptor_proto();
536
537        // Test we built descriptor.proto using the canonical path
538        assert_eq!(file_descriptor.name(), "google/protobuf/descriptor.proto");
539        assert_eq!(message_descriptor.name(), "DescriptorProto");
540        assert_eq!(nested_descriptor.name(), "ExtensionRange");
541    }
542
543    #[test]
544    fn file_descriptor_roundtrip() {
545        crate::test_utils::assert_roundtrip(
546            crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor(),
547        );
548    }
549
550    #[test]
551    fn compare_encode_flat_vs_vec() {
552        let file_descriptor =
553            crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
554
555        // encode_flat with large buffer
556        let mut flat_buffer = vec![0u8; 100_000];
557        let flat_result = file_descriptor
558            .encode_flat::<32>(&mut flat_buffer)
559            .expect("encode_flat should work");
560        let flat_bytes = flat_result.to_vec();
561
562        // encode_vec with chunked encoding
563        let vec_bytes = file_descriptor
564            .encode_vec::<32>()
565            .expect("encode_vec should work");
566
567        // Dump to files for comparison
568        std::fs::write("/tmp/encode_flat.bin", &flat_bytes).expect("write flat");
569        std::fs::write("/tmp/encode_vec.bin", &vec_bytes).expect("write vec");
570
571        println!(
572            "encode_flat: {} bytes, encode_vec: {} bytes",
573            flat_bytes.len(),
574            vec_bytes.len()
575        );
576
577        if flat_bytes != vec_bytes {
578            // Find first difference
579            for (i, (a, b)) in flat_bytes.iter().zip(vec_bytes.iter()).enumerate() {
580                if a != b {
581                    println!(
582                        "First difference at byte {}: flat={:02x}, vec={:02x}",
583                        i, a, b
584                    );
585                    break;
586                }
587            }
588            if flat_bytes.len() != vec_bytes.len() {
589                println!(
590                    "Length mismatch: flat={}, vec={}",
591                    flat_bytes.len(),
592                    vec_bytes.len()
593                );
594            }
595        }
596
597        assert_eq!(
598            flat_bytes, vec_bytes,
599            "encode_flat and encode_vec should produce identical output"
600        );
601    }
602
603    #[test]
604    fn dynamic_file_descriptor_roundtrip() {
605        let mut pool = crate::descriptor_pool::DescriptorPool::new(&Global);
606        let file_descriptor =
607            crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
608        pool.add_file(&file_descriptor).unwrap();
609
610        let bytes = file_descriptor.encode_vec::<32>().expect("should encode");
611        let mut arena = crate::arena::Arena::new(&Global);
612
613        let mut dynamic_file_descriptor = pool
614            .create_message("google.protobuf.FileDescriptorProto", &mut arena)
615            .expect("Should create");
616        assert!(dynamic_file_descriptor.decode_flat::<32>(&mut arena, &bytes));
617
618        let roundtrip = dynamic_file_descriptor
619            .encode_vec::<32>()
620            .expect("should encode");
621
622        assert_eq!(bytes, roundtrip);
623    }
624
625    #[test]
626    fn invalid_utf8_string_rejected() {
627        // FileDescriptorProto field 1 is "name" (string type)
628        // Wire format: tag (field 1, wire type 2) = 0x0a, then length, then bytes
629        // 0xFF is invalid UTF-8
630        let invalid_utf8_name: &[u8] = &[0x0a, 0x03, 0x61, 0xFF, 0x62]; // "a<invalid>b"
631
632        let mut arena = crate::arena::Arena::new(&Global);
633        let mut msg = crate::google::protobuf::FileDescriptorProto::ProtoType::default();
634        let result = msg.decode_flat::<32>(&mut arena, invalid_utf8_name);
635
636        assert!(
637            !result,
638            "decoding invalid UTF-8 in string field should fail"
639        );
640    }
641}