protocrap/lib.rs
1//! # Protocrap
2//!
3//! A small, efficient, and flexible protobuf implementation for Rust.
4//!
5//! ## Overview
6//!
7//! Protocrap takes a different approach than other protobuf libraries. Instead of
8//! generating parsing and serialization code for each message type, it uses a single
9//! table-driven implementation. Code generation produces only struct definitions with
10//! accessors and static lookup tables.
11//!
12//! This design yields:
13//! - **Small binaries**: No code duplication across message types
14//! - **Fast compilation**: No macro expansion or monomorphization explosion
15//! - **Flexible memory**: Arena allocation with custom allocator support
16//! - **Universal streaming**: Push-based API works with sync and async
17//!
18//! ## Quick Start
19//!
20//! ### Code Generation
21//!
22//! Generate Rust code from `.proto` files using `protocrap-codegen`:
23//!
24//! ```bash
25//! # Create descriptor set with protoc
26//! protoc --include_imports --descriptor_set_out=types.bin my_types.proto
27//!
28//! # Generate Rust code
29//! protocrap-codegen types.bin src/types.pc.rs
30//! ```
31//!
32//! Include the generated code in your crate:
33//!
34//! ```ignore
35//! use protocrap;
36//! include!("types.pc.rs");
37//! ```
38//!
39//! ### Encoding Messages
40//!
41//! ```
42//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
43//! use protocrap::google::protobuf::FileDescriptorProto;
44//! use allocator_api2::alloc::Global;
45//!
46//! let mut arena = Arena::new(&Global);
47//! let mut msg = FileDescriptorProto::ProtoType::default();
48//! // Arena allocations return Result - handle errors appropriately
49//! msg.set_name("example.proto", &mut arena).unwrap();
50//! msg.set_package("my.package", &mut arena).unwrap();
51//!
52//! // Encode to a Vec<u8>
53//! let bytes = msg.encode_vec::<32>().unwrap();
54//!
55//! // Or encode to a fixed buffer
56//! let mut buffer = [0u8; 1024];
57//! let encoded = msg.encode_flat::<32>(&mut buffer).unwrap();
58//! ```
59//!
60//! The const generic (`::<32>`) specifies the maximum message nesting depth.
61//!
62//! ### Decoding Messages
63//!
64//! ```
65//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
66//! use protocrap::google::protobuf::FileDescriptorProto;
67//! use allocator_api2::alloc::Global;
68//!
69//! // First encode a message to get some bytes
70//! let mut arena = Arena::new(&Global);
71//! let mut original = FileDescriptorProto::ProtoType::default();
72//! original.set_name("example.proto", &mut arena).unwrap();
73//! let bytes = original.encode_vec::<32>().unwrap();
74//!
75//! // Decode from a byte slice
76//! let mut decoded = FileDescriptorProto::ProtoType::default();
77//! decoded.decode_flat::<32>(&mut arena, &bytes);
78//! assert_eq!(decoded.name(), "example.proto");
79//! ```
80//!
81//! ### Runtime Reflection
82//!
83//! Inspect messages dynamically without compile-time knowledge of the schema:
84//!
85//! ```
86//! use protocrap::{ProtobufRef, ProtobufMut, arena::Arena};
87//! use protocrap::google::protobuf::{FileDescriptorProto, DescriptorProto};
88//! use protocrap::descriptor_pool::DescriptorPool;
89//! use allocator_api2::alloc::Global;
90//!
91//! // Build descriptor pool from the library's own file descriptor
92//! let mut pool = DescriptorPool::new(&Global);
93//! let file_desc = FileDescriptorProto::ProtoType::file_descriptor();
94//! pool.add_file(file_desc);
95//!
96//! // Encode a real DescriptorProto (the descriptor for DescriptorProto itself)
97//! let descriptor = DescriptorProto::ProtoType::descriptor_proto();
98//! let bytes = descriptor.encode_vec::<32>().unwrap();
99//!
100//! // Decode dynamically using the pool
101//! let mut arena = Arena::new(&Global);
102//! let mut msg = pool.create_message(
103//! "google.protobuf.DescriptorProto",
104//! &mut arena,
105//! ).unwrap();
106//! msg.decode_flat::<32>(&mut arena, &bytes);
107//!
108//! // Access fields dynamically
109//! for field in msg.descriptor().field() {
110//! if let Some(value) = msg.get_field(field.as_ref()) {
111//! println!("{}: {:?}", field.name(), value);
112//! }
113//! }
114//! ```
115//!
116//! ## Architecture
117//!
118//! ### Arena Allocation
119//!
120//! All variable-sized data (strings, bytes, repeated fields, sub-messages) is allocated
121//! in an [`arena::Arena`]. This provides:
122//!
123//! - **Speed**: Allocation is a pointer bump in the common case
124//! - **Bulk deallocation**: Drop the arena to free all messages at once
125//! - **Custom allocators**: Pass any `&dyn Allocator` to control memory placement
126//! - **Fallible allocation**: All allocations return `Result`, enabling graceful OOM handling
127//!
128//! ```
129//! use protocrap::arena::Arena;
130//! use allocator_api2::alloc::Global;
131//!
132//! let mut arena = Arena::new(&Global);
133//! // All allocations during decode/set operations use this arena
134//! // Operations return Result to handle allocation failures
135//! // When arena drops, all memory is freed
136//! ```
137//!
138//! ### Push-Based Streaming
139//!
140//! The parser uses a push model: you provide data chunks, it returns updated state.
141//! This signature `(state, buffer) -> updated_state` enables:
142//!
143//! - Single implementation for sync and async
144//! - No callback traits or complex lifetime requirements
145//! - Works in embedded, WASM, and any async runtime
146//!
147//! ## Generated Code Structure
148//!
149//! For each protobuf message, the codegen produces a module containing:
150//!
151//! - `ProtoType`: The message struct with `#[repr(C)]` layout
152//! - Accessor methods following protobuf conventions
153//!
154//! Field accessors follow this pattern:
155//!
156//! | Proto Type | Getter | Setter | Other |
157//! |------------|--------|--------|-------|
158//! | Scalar | `field() -> T` | `set_field(T)` | `has_field()`, `clear_field()` |
159//! | String/Bytes | `field() -> &str`/`&[u8]` | `set_field(&str, &mut Arena) -> Result` | `has_field()`, `clear_field()` |
160//! | Message | `field() -> Option<&M>` | `field_mut(&mut Arena) -> Result<&mut M>` | `has_field()`, `clear_field()` |
161//! | Repeated | `field() -> &[T]` | `field_mut() -> &mut RepeatedField<T>` | `add_field(...) -> Result` |
162//!
163//! **Note**: Operations that allocate from the arena return `Result` to handle allocation failures.
164//!
165//! ## Modules
166//!
167//! - [`arena`]: Arena allocator for message data
168//! - [`containers`]: Collection types ([`containers::RepeatedField`], [`containers::String`], [`containers::Bytes`])
169//! - [`reflection`]: Runtime message inspection and dynamic decoding
170//! - [`TypedMessage`]: Wrapper for repeated message elements
171//!
172//! ## Feature Flags
173//!
174//! - `std` (default): Enables `std::io` integration, `Vec`-based encoding
175//! - `serde_support` (default): Enables serde serialization via reflection
176//! - `nightly`: Use nightly Rust features for slightly better codegen (branch hints)
177//!
178//! For `no_std` environments, disable default features:
179//!
180//! ```toml
181//! [dependencies]
182//! protocrap = { version = "0.1", default-features = false }
183//! ```
184//!
185//! ## Restrictions
186//!
187//! Protocrap is designed for "sane" schemas:
188//!
189//! - Up to 256 optional fields per message
190//! - Struct sizes up to 64KB
191//! - Field numbers 1-2047 (1 or 2 byte wire tags)
192//! - Field numbers should be mostly consecutive
193//!
194//! The following are intentionally unsupported:
195//!
196//! - **Unknown fields**: Discarded during decoding (no round-trip preservation)
197//! - **Extensions**: Proto2 extensions are silently dropped
198//! - **Maps**: Decoded as repeated key-value pairs
199//! - **Proto3 zero-value omission**: All set fields are serialized
200
201#![cfg_attr(feature = "nightly", feature(likely_unlikely, allocator_api))]
202#![cfg_attr(not(feature = "std"), no_std)]
203
204pub mod arena;
205pub(crate) mod base;
206pub mod containers;
207pub mod reflection;
208
209// Re-export user-facing types at crate root
210pub use base::TypedMessage;
211#[cfg(feature = "std")]
212pub mod descriptor_pool;
213#[doc(hidden)]
214#[cfg(feature = "std")]
215pub mod test_utils;
216
217// Re-export Allocator trait - use core on nightly, polyfill on stable
218#[cfg(not(feature = "nightly"))]
219pub use allocator_api2::alloc::Allocator;
220#[cfg(feature = "nightly")]
221pub use core::alloc::Allocator;
222
223// Internal modules - only accessible within the crate
224// Types needed by generated code are re-exported via generated_code_only
225pub(crate) mod decoding;
226pub(crate) mod encoding;
227pub(crate) mod tables;
228pub(crate) mod utils;
229pub(crate) mod wire;
230
231/// Internal types for generated code. **Do not use directly.**
232#[doc(hidden)]
233pub mod generated_code_only;
234
235use crate as protocrap;
236include!("descriptor.pc.rs");
237
238#[cfg(feature = "serde_support")]
239pub mod serde;
240
241#[cfg(feature = "serde_support")]
242pub mod proto_json;
243
244#[cfg(feature = "codegen")]
245pub mod codegen;
246
247/// Errors that can occur during protobuf encoding/decoding operations.
248#[derive(Debug)]
249pub enum Error<E = ()> {
250 MessageTreeTooDeep,
251 BufferTooSmall,
252 InvalidProtobufData,
253 MessageNotFound,
254 ArenaAllocationFailed,
255 UnknownError,
256 Io(E),
257}
258
259impl<E: core::fmt::Debug> core::fmt::Display for Error<E> {
260 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
261 core::fmt::Debug::fmt(self, f)
262 }
263}
264
265impl<E: core::fmt::Debug> core::error::Error for Error<E> {}
266
267impl<E> From<E> for Error<E> {
268 fn from(e: E) -> Self {
269 Error::Io(e)
270 }
271}
272
273/// Read-only protobuf operations (encode, serialize, inspect).
274pub trait ProtobufRef<'pool> {
275 /// Get a dynamic view of this message for reflection.
276 fn as_dyn<'msg>(&'msg self) -> reflection::DynamicMessageRef<'pool, 'msg>;
277
278 /// Get the message's descriptor (schema metadata).
279 fn descriptor(&self) -> &'pool crate::google::protobuf::DescriptorProto::ProtoType {
280 self.as_dyn().descriptor()
281 }
282
283 /// Encode to a fixed buffer. Returns the encoded slice or an error.
284 fn encode_flat<'a, const STACK_DEPTH: usize>(
285 &self,
286 buffer: &'a mut [u8],
287 ) -> Result<&'a [u8], Error> {
288 let mut resumeable_encode = encoding::ResumeableEncode::<STACK_DEPTH>::new(self.as_dyn());
289 let encoding::ResumeResult::Done(buf) = resumeable_encode
290 .resume_encode(buffer)
291 .ok_or(Error::MessageTreeTooDeep)?
292 else {
293 return Err(Error::BufferTooSmall);
294 };
295 Ok(buf)
296 }
297
298 /// Encode to a new Vec, growing as needed.
299 #[cfg(feature = "std")]
300 fn encode_vec<const STACK_DEPTH: usize>(&self) -> Result<Vec<u8>, Error> {
301 let mut buffer = vec![0u8; 1024];
302 let mut stack = Vec::new();
303 let mut resumeable_encode = encoding::ResumeableEncode::<STACK_DEPTH>::new(self.as_dyn());
304 loop {
305 match resumeable_encode
306 .resume_encode(&mut buffer)
307 .ok_or(Error::MessageTreeTooDeep)?
308 {
309 encoding::ResumeResult::Done(buf) => {
310 let len = buf.len();
311 let end = buffer.len();
312 let start = end - len;
313 buffer.copy_within(start..end, 0);
314 buffer.truncate(len);
315 break;
316 }
317 encoding::ResumeResult::NeedsMoreBuffer => {
318 let len = buffer.len().min(1024 * 1024);
319 stack.push(core::mem::take(&mut buffer));
320 buffer = vec![0u8; len * 2];
321 }
322 };
323 }
324 while let Some(old_buffer) = stack.pop() {
325 buffer.extend_from_slice(&old_buffer);
326 }
327 Ok(buffer)
328 }
329}
330
331/// Mutable protobuf operations (decode, deserialize).
332pub trait ProtobufMut<'pool>: ProtobufRef<'pool> {
333 /// Get a mutable dynamic view of this message.
334 fn as_dyn_mut<'msg>(&'msg mut self) -> reflection::DynamicMessage<'pool, 'msg>;
335
336 /// Decode from a byte slice. Returns true on success.
337 #[must_use]
338 fn decode_flat<const STACK_DEPTH: usize>(
339 &mut self,
340 arena: &mut crate::arena::Arena,
341 buf: &[u8],
342 ) -> bool {
343 let mut decoder =
344 decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
345 if !decoder.resume(buf, arena) {
346 self.as_dyn_mut().clear();
347 return false;
348 }
349 if !decoder.finish(arena) {
350 self.as_dyn_mut().clear();
351 return false;
352 }
353 true
354 }
355
356 fn decode<'a, E>(
357 &mut self,
358 arena: &mut crate::arena::Arena,
359 provider: &'a mut impl FnMut() -> Result<Option<&'a [u8]>, E>,
360 ) -> Result<(), Error<E>> {
361 let mut decoder = decoding::ResumeableDecode::<32>::new(self.as_dyn_mut(), isize::MAX);
362 loop {
363 let Some(buffer) = provider().map_err(Error::Io)? else {
364 break;
365 };
366 if !decoder.resume(buffer, arena) {
367 self.as_dyn_mut().clear();
368 return Err(Error::InvalidProtobufData);
369 }
370 }
371 if !decoder.finish(arena) {
372 self.as_dyn_mut().clear();
373 return Err(Error::InvalidProtobufData);
374 }
375 Ok(())
376 }
377
378 fn async_decode<'a, E, F>(
379 &'a mut self,
380 arena: &mut crate::arena::Arena,
381 provider: &'a mut impl FnMut() -> F,
382 ) -> impl core::future::Future<Output = Result<(), Error<E>>>
383 where
384 F: core::future::Future<Output = Result<Option<&'a [u8]>, E>> + 'a,
385 {
386 async move {
387 let mut decoder = decoding::ResumeableDecode::<32>::new(self.as_dyn_mut(), isize::MAX);
388 loop {
389 let Some(buffer) = provider().await.map_err(Error::Io)? else {
390 break;
391 };
392 if !decoder.resume(buffer, arena) {
393 self.as_dyn_mut().clear();
394 return Err(Error::InvalidProtobufData);
395 }
396 }
397 if !decoder.finish(arena) {
398 self.as_dyn_mut().clear();
399 return Err(Error::InvalidProtobufData);
400 }
401 Ok(())
402 }
403 }
404
405 #[cfg(feature = "std")]
406 fn decode_from_bufread<const STACK_DEPTH: usize>(
407 &mut self,
408 arena: &mut crate::arena::Arena,
409 reader: &mut impl std::io::BufRead,
410 ) -> Result<(), Error<std::io::Error>> {
411 let mut decoder =
412 decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
413 loop {
414 let buffer = reader.fill_buf().map_err(Error::Io)?;
415 let len = buffer.len();
416 if len == 0 {
417 break;
418 }
419 if !decoder.resume(buffer, arena) {
420 self.as_dyn_mut().clear();
421 return Err(Error::InvalidProtobufData);
422 }
423 reader.consume(len);
424 }
425 if !decoder.finish(arena) {
426 self.as_dyn_mut().clear();
427 return Err(Error::InvalidProtobufData);
428 }
429 Ok(())
430 }
431
432 #[cfg(feature = "std")]
433 fn decode_from_read<const STACK_DEPTH: usize>(
434 &mut self,
435 arena: &mut crate::arena::Arena,
436 reader: &mut impl std::io::Read,
437 ) -> Result<(), Error<std::io::Error>> {
438 let mut buf_reader = std::io::BufReader::new(reader);
439 self.decode_from_bufread::<STACK_DEPTH>(arena, &mut buf_reader)
440 }
441
442 #[cfg(feature = "std")]
443 fn decode_from_async_bufread<'a, const STACK_DEPTH: usize>(
444 &'a mut self,
445 arena: &'a mut crate::arena::Arena<'a>,
446 reader: &mut (impl futures::io::AsyncBufRead + Unpin),
447 ) -> impl core::future::Future<Output = Result<(), Error<futures::io::Error>>> {
448 use futures::io::AsyncBufReadExt;
449
450 async move {
451 let mut decoder =
452 decoding::ResumeableDecode::<STACK_DEPTH>::new(self.as_dyn_mut(), isize::MAX);
453 loop {
454 let buffer = reader.fill_buf().await.map_err(Error::Io)?;
455 let len = buffer.len();
456 if len == 0 {
457 break;
458 }
459 if !decoder.resume(buffer, arena) {
460 self.as_dyn_mut().clear();
461 return Err(Error::InvalidProtobufData);
462 }
463 reader.consume_unpin(len);
464 }
465 if !decoder.finish(arena) {
466 self.as_dyn_mut().clear();
467 return Err(Error::InvalidProtobufData);
468 }
469 Ok(())
470 }
471 }
472
473 #[cfg(feature = "std")]
474 fn decode_from_async_read<'a, const STACK_DEPTH: usize>(
475 &'a mut self,
476 arena: &'a mut crate::arena::Arena<'a>,
477 reader: &mut (impl futures::io::AsyncRead + Unpin),
478 ) -> impl core::future::Future<Output = Result<(), Error<futures::io::Error>>> {
479 async move {
480 let mut buf_reader = futures::io::BufReader::new(reader);
481 self.decode_from_async_bufread::<STACK_DEPTH>(arena, &mut buf_reader)
482 .await
483 }
484 }
485
486 #[cfg(feature = "serde_support")]
487 fn serde_deserialize<'arena, 'alloc, 'de, D>(
488 &'de mut self,
489 arena: &'arena mut crate::arena::Arena<'alloc>,
490 deserializer: D,
491 ) -> Result<(), D::Error>
492 where
493 D: ::serde::Deserializer<'de>,
494 {
495 serde::serde_deserialize_struct(self.as_dyn_mut(), arena, deserializer)
496 }
497}
498
499// Blanket impl for static protobuf types
500impl<T: generated_code_only::Protobuf> ProtobufRef<'static> for T {
501 fn as_dyn<'msg>(&'msg self) -> reflection::DynamicMessageRef<'static, 'msg> {
502 reflection::DynamicMessageRef {
503 object: crate::generated_code_only::as_object(self),
504 table: T::table(),
505 }
506 }
507}
508
509impl<T: generated_code_only::Protobuf> ProtobufMut<'static> for T {
510 fn as_dyn_mut<'msg>(&'msg mut self) -> reflection::DynamicMessage<'static, 'msg> {
511 reflection::DynamicMessage {
512 object: crate::generated_code_only::as_object_mut(self),
513 table: T::table(),
514 }
515 }
516}
517
518#[cfg(all(test, feature = "std"))]
519mod tests {
520 use crate::ProtobufMut;
521 use crate::ProtobufRef;
522
523 #[cfg(not(feature = "nightly"))]
524 use allocator_api2::alloc::Global;
525 #[cfg(feature = "nightly")]
526 use std::alloc::Global;
527
528 #[test]
529 fn descriptor_accessors() {
530 let file_descriptor =
531 crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
532 let message_descriptor =
533 crate::google::protobuf::DescriptorProto::ProtoType::descriptor_proto();
534 let nested_descriptor =
535 crate::google::protobuf::DescriptorProto::ExtensionRange::ProtoType::descriptor_proto();
536
537 // Test we built descriptor.proto using the canonical path
538 assert_eq!(file_descriptor.name(), "google/protobuf/descriptor.proto");
539 assert_eq!(message_descriptor.name(), "DescriptorProto");
540 assert_eq!(nested_descriptor.name(), "ExtensionRange");
541 }
542
543 #[test]
544 fn file_descriptor_roundtrip() {
545 crate::test_utils::assert_roundtrip(
546 crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor(),
547 );
548 }
549
550 #[test]
551 fn compare_encode_flat_vs_vec() {
552 let file_descriptor =
553 crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
554
555 // encode_flat with large buffer
556 let mut flat_buffer = vec![0u8; 100_000];
557 let flat_result = file_descriptor
558 .encode_flat::<32>(&mut flat_buffer)
559 .expect("encode_flat should work");
560 let flat_bytes = flat_result.to_vec();
561
562 // encode_vec with chunked encoding
563 let vec_bytes = file_descriptor
564 .encode_vec::<32>()
565 .expect("encode_vec should work");
566
567 // Dump to files for comparison
568 std::fs::write("/tmp/encode_flat.bin", &flat_bytes).expect("write flat");
569 std::fs::write("/tmp/encode_vec.bin", &vec_bytes).expect("write vec");
570
571 println!(
572 "encode_flat: {} bytes, encode_vec: {} bytes",
573 flat_bytes.len(),
574 vec_bytes.len()
575 );
576
577 if flat_bytes != vec_bytes {
578 // Find first difference
579 for (i, (a, b)) in flat_bytes.iter().zip(vec_bytes.iter()).enumerate() {
580 if a != b {
581 println!(
582 "First difference at byte {}: flat={:02x}, vec={:02x}",
583 i, a, b
584 );
585 break;
586 }
587 }
588 if flat_bytes.len() != vec_bytes.len() {
589 println!(
590 "Length mismatch: flat={}, vec={}",
591 flat_bytes.len(),
592 vec_bytes.len()
593 );
594 }
595 }
596
597 assert_eq!(
598 flat_bytes, vec_bytes,
599 "encode_flat and encode_vec should produce identical output"
600 );
601 }
602
603 #[test]
604 fn dynamic_file_descriptor_roundtrip() {
605 let mut pool = crate::descriptor_pool::DescriptorPool::new(&Global);
606 let file_descriptor =
607 crate::google::protobuf::FileDescriptorProto::ProtoType::file_descriptor();
608 pool.add_file(&file_descriptor).unwrap();
609
610 let bytes = file_descriptor.encode_vec::<32>().expect("should encode");
611 let mut arena = crate::arena::Arena::new(&Global);
612
613 let mut dynamic_file_descriptor = pool
614 .create_message("google.protobuf.FileDescriptorProto", &mut arena)
615 .expect("Should create");
616 assert!(dynamic_file_descriptor.decode_flat::<32>(&mut arena, &bytes));
617
618 let roundtrip = dynamic_file_descriptor
619 .encode_vec::<32>()
620 .expect("should encode");
621
622 assert_eq!(bytes, roundtrip);
623 }
624
625 #[test]
626 fn invalid_utf8_string_rejected() {
627 // FileDescriptorProto field 1 is "name" (string type)
628 // Wire format: tag (field 1, wire type 2) = 0x0a, then length, then bytes
629 // 0xFF is invalid UTF-8
630 let invalid_utf8_name: &[u8] = &[0x0a, 0x03, 0x61, 0xFF, 0x62]; // "a<invalid>b"
631
632 let mut arena = crate::arena::Arena::new(&Global);
633 let mut msg = crate::google::protobuf::FileDescriptorProto::ProtoType::default();
634 let result = msg.decode_flat::<32>(&mut arena, invalid_utf8_name);
635
636 assert!(
637 !result,
638 "decoding invalid UTF-8 in string field should fail"
639 );
640 }
641}