protobuf_native/
compiler.rs

1// Copyright Materialize, Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Implementation of the Protocol Buffer compiler.
17//!
18//! This module contains code for parsing .proto files and generating code based
19//! on them. It is particularly useful when you need to deal with arbitrary
20//! Protobuf messages at runtime.
21
22use std::collections::HashSet;
23use std::error::Error;
24use std::fmt;
25use std::fs;
26use std::marker::PhantomData;
27use std::marker::PhantomPinned;
28use std::mem;
29use std::path::{Path, PathBuf};
30use std::pin::Pin;
31use std::sync::OnceLock;
32
33use cxx::let_cxx_string;
34
35use crate::internal::{unsafe_ffi_conversions, CInt, ProtobufPath};
36use crate::io::DynZeroCopyInputStream;
37use crate::{DescriptorDatabase, FileDescriptorProto, FileDescriptorSet, OperationFailedError};
38
39mod well_known_types {
40    include!(concat!(env!("OUT_DIR"), "/well_known_types.rs"));
41}
42
43/// Returns the path to a directory containing the well-known proto files.
44///
45/// The files are extracted from embedded data on first call and cached
46/// in a temporary directory for the lifetime of the process.
47fn well_known_types_dir() -> &'static Path {
48    static DIR: OnceLock<PathBuf> = OnceLock::new();
49    DIR.get_or_init(|| {
50        // Use a persistent temp directory that won't be cleaned up during the process
51        let dir = std::env::temp_dir().join(format!(
52            "protobuf-native-well-known-types-{}",
53            env!("CARGO_PKG_VERSION")
54        ));
55
56        // Write all embedded proto files to the directory
57        for (path, contents) in well_known_types::WELL_KNOWN_TYPES {
58            let full_path = dir.join(path);
59            if let Some(parent) = full_path.parent() {
60                let _ = fs::create_dir_all(parent);
61            }
62            let _ = fs::write(&full_path, contents);
63        }
64
65        dir
66    })
67}
68
69#[cxx::bridge(namespace = "protobuf_native::compiler")]
70pub(crate) mod ffi {
71    #[derive(Debug)]
72    struct FileLoadError {
73        filename: String,
74        line: i64,
75        column: i64,
76        message: String,
77        warning: bool,
78    }
79
80    unsafe extern "C++" {
81        include!("protobuf-native/src/compiler.h");
82        include!("protobuf-native/src/internal.h");
83
84        #[namespace = "protobuf_native::internal"]
85        type CInt = crate::internal::CInt;
86
87        #[namespace = "absl"]
88        type string_view<'a> = crate::internal::StringView<'a>;
89
90        #[namespace = "google::protobuf"]
91        type FileDescriptorProto = crate::ffi::FileDescriptorProto;
92
93        #[namespace = "google::protobuf::io"]
94        type ZeroCopyInputStream = crate::io::ffi::ZeroCopyInputStream;
95
96        type SimpleErrorCollector;
97        fn NewSimpleErrorCollector() -> *mut SimpleErrorCollector;
98        unsafe fn DeleteSimpleErrorCollector(collector: *mut SimpleErrorCollector);
99        fn Errors(self: Pin<&mut SimpleErrorCollector>) -> Pin<&mut CxxVector<FileLoadError>>;
100
101        #[namespace = "google::protobuf::compiler"]
102        type MultiFileErrorCollector;
103        fn RecordError(
104            self: Pin<&mut MultiFileErrorCollector>,
105            filename: string_view,
106            line: CInt,
107            column: CInt,
108            message: string_view,
109        );
110        fn RecordWarning(
111            self: Pin<&mut MultiFileErrorCollector>,
112            filename: string_view,
113            line: CInt,
114            column: CInt,
115            message: string_view,
116        );
117
118        #[namespace = "google::protobuf::compiler"]
119        type SourceTree;
120        fn Open(self: Pin<&mut SourceTree>, filename: string_view) -> *mut ZeroCopyInputStream;
121        fn SourceTreeGetLastErrorMessage(source_tree: Pin<&mut SourceTree>) -> String;
122
123        #[namespace = "google::protobuf::compiler"]
124        type SourceTreeDescriptorDatabase;
125        unsafe fn NewSourceTreeDescriptorDatabase(
126            source_tree: *mut SourceTree,
127        ) -> *mut SourceTreeDescriptorDatabase;
128        unsafe fn DeleteSourceTreeDescriptorDatabase(
129            source_tree: *mut SourceTreeDescriptorDatabase,
130        );
131        unsafe fn FindFileByName(
132            self: Pin<&mut SourceTreeDescriptorDatabase>,
133            filename: &CxxString,
134            output: *mut FileDescriptorProto,
135        ) -> bool;
136        unsafe fn RecordErrorsTo(
137            self: Pin<&mut SourceTreeDescriptorDatabase>,
138            error_collector: *mut MultiFileErrorCollector,
139        );
140
141        type VirtualSourceTree;
142        fn NewVirtualSourceTree() -> *mut VirtualSourceTree;
143        unsafe fn DeleteVirtualSourceTree(tree: *mut VirtualSourceTree);
144        fn AddFile(self: Pin<&mut VirtualSourceTree>, filename: string_view, contents: Vec<u8>);
145
146        #[namespace = "google::protobuf::compiler"]
147        type DiskSourceTree;
148        fn NewDiskSourceTree() -> *mut DiskSourceTree;
149        unsafe fn DeleteDiskSourceTree(tree: *mut DiskSourceTree);
150        fn MapPath(
151            self: Pin<&mut DiskSourceTree>,
152            virtual_path: string_view,
153            disk_path: string_view,
154        );
155    }
156}
157
158/// If the importer encounters problems while trying to import the proto files,
159/// it reports them to a `MultiFileErrorCollector`.
160pub trait MultiFileErrorCollector: multi_file_error_collector::Sealed {
161    /// Adds an error message to the error collector at the specified position.
162    ///
163    /// Line and column numbers are zero-based. A line number of -1 indicates
164    /// an error with the entire file (e.g., "not found").
165    fn add_error(self: Pin<&mut Self>, filename: &str, line: i32, column: i32, message: &str) {
166        self.upcast_mut().RecordError(
167            filename.into(),
168            CInt::expect_from(line),
169            CInt::expect_from(column),
170            message.into(),
171        )
172    }
173
174    /// Adds a warning to the error collector at the specified position.
175    ///
176    /// See the documentation for [`add_error`] for details on the meaning of
177    /// the `line` and `column` parameters.
178    ///
179    /// [`add_error`]: MultiFileErrorCollector::add_error
180    fn add_warning(self: Pin<&mut Self>, filename: &str, line: i32, column: i32, message: &str) {
181        self.upcast_mut().RecordWarning(
182            filename.into(),
183            CInt::expect_from(line),
184            CInt::expect_from(column),
185            message.into(),
186        )
187    }
188}
189
190mod multi_file_error_collector {
191    use std::pin::Pin;
192
193    use super::ffi;
194
195    pub trait Sealed {
196        fn upcast(&self) -> &ffi::MultiFileErrorCollector;
197        fn upcast_mut(self: Pin<&mut Self>) -> Pin<&mut ffi::MultiFileErrorCollector>;
198        unsafe fn upcast_mut_ptr(self: Pin<&mut Self>) -> *mut ffi::MultiFileErrorCollector {
199            self.upcast_mut().get_unchecked_mut() as *mut _
200        }
201    }
202}
203
204/// A simple implementation of [`MultiFileErrorCollector`] that records errors
205/// in memory for later retrieval.
206pub struct SimpleErrorCollector {
207    _opaque: PhantomPinned,
208}
209
210impl Drop for SimpleErrorCollector {
211    fn drop(&mut self) {
212        unsafe { ffi::DeleteSimpleErrorCollector(self.as_ffi_mut_ptr_unpinned()) }
213    }
214}
215
216impl SimpleErrorCollector {
217    /// Creates a new simple error collector.
218    pub fn new() -> Pin<Box<SimpleErrorCollector>> {
219        let collector = ffi::NewSimpleErrorCollector();
220        unsafe { Self::from_ffi_owned(collector) }
221    }
222
223    unsafe_ffi_conversions!(ffi::SimpleErrorCollector);
224}
225
226impl<'a> Iterator for Pin<&'a mut SimpleErrorCollector> {
227    type Item = FileLoadError;
228
229    fn next(&mut self) -> Option<FileLoadError> {
230        self.as_mut().as_ffi_mut().Errors().pop().map(Into::into)
231    }
232}
233
234impl MultiFileErrorCollector for SimpleErrorCollector {}
235
236impl multi_file_error_collector::Sealed for SimpleErrorCollector {
237    fn upcast(&self) -> &ffi::MultiFileErrorCollector {
238        unsafe { mem::transmute(self) }
239    }
240
241    fn upcast_mut(self: Pin<&mut Self>) -> Pin<&mut ffi::MultiFileErrorCollector> {
242        unsafe { mem::transmute(self) }
243    }
244}
245
246/// An implementation of `DescriptorDatabase` which loads files from a
247/// `SourceTree` and parses them.
248///
249/// Note: This class does not implement `FindFileContainingSymbol` or
250/// `FindFileContainingExtension`; these will always return false.
251pub struct SourceTreeDescriptorDatabase<'a> {
252    _opaque: PhantomPinned,
253    _lifetime: PhantomData<&'a ()>,
254}
255
256impl<'a> Drop for SourceTreeDescriptorDatabase<'a> {
257    fn drop(&mut self) {
258        unsafe { ffi::DeleteSourceTreeDescriptorDatabase(self.as_ffi_mut_ptr_unpinned()) }
259    }
260}
261
262impl<'a> SourceTreeDescriptorDatabase<'a> {
263    /// Constructs a new descriptor database for the provided source tree.
264    pub fn new(
265        source_tree: Pin<&'a mut dyn SourceTree>,
266    ) -> Pin<Box<SourceTreeDescriptorDatabase<'a>>> {
267        let db = unsafe { ffi::NewSourceTreeDescriptorDatabase(source_tree.upcast_mut_ptr()) };
268        unsafe { Self::from_ffi_owned(db) }
269    }
270
271    /// Instructs the source tree descriptor database to report any parse errors
272    /// to the given [`MultiFileErrorCollector`].
273    ///
274    /// This should b ecalled before parsing.
275    pub fn record_errors_to(
276        self: Pin<&mut Self>,
277        error_collector: Pin<&'a mut dyn MultiFileErrorCollector>,
278    ) {
279        unsafe {
280            self.as_ffi_mut()
281                .RecordErrorsTo(error_collector.upcast_mut_ptr())
282        }
283    }
284
285    /// Builds a file descriptor set containing all file descriptor protos
286    /// reachable from the specified roots.
287    pub fn build_file_descriptor_set<P>(
288        mut self: Pin<&mut Self>,
289        roots: &[P],
290    ) -> Result<Pin<Box<FileDescriptorSet>>, OperationFailedError>
291    where
292        P: AsRef<Path>,
293    {
294        let mut out = FileDescriptorSet::new();
295        let mut seen = HashSet::new();
296        let mut stack = vec![];
297        for root in roots {
298            let root = root.as_ref();
299            stack.push(self.as_mut().find_file_by_name(root)?);
300            seen.insert(ProtobufPath::from(root).as_bytes().to_vec());
301        }
302        while let Some(file) = stack.pop() {
303            out.as_mut().add_file().copy_from(&file);
304            for i in 0..file.dependency_size() {
305                let dep_path = ProtobufPath::from(file.dependency(i));
306                if !seen.contains(dep_path.as_bytes()) {
307                    let dep = self
308                        .as_mut()
309                        .find_file_by_name(dep_path.as_path().as_ref())?;
310                    stack.push(dep);
311                    seen.insert(dep_path.as_bytes().to_vec());
312                }
313            }
314        }
315        Ok(out)
316    }
317
318    unsafe_ffi_conversions!(ffi::SourceTreeDescriptorDatabase);
319}
320
321impl<'a> DescriptorDatabase for SourceTreeDescriptorDatabase<'a> {
322    fn find_file_by_name(
323        self: Pin<&mut Self>,
324        filename: &Path,
325    ) -> Result<Pin<Box<FileDescriptorProto>>, OperationFailedError> {
326        let mut fd = FileDescriptorProto::new();
327        let_cxx_string!(filename = ProtobufPath::from(filename).as_bytes());
328        if unsafe {
329            self.as_ffi_mut()
330                .FindFileByName(&filename, fd.as_mut().as_ffi_mut_ptr())
331        } {
332            Ok(fd)
333        } else {
334            Err(OperationFailedError)
335        }
336    }
337}
338
339/// Abstract interface which represents a directory tree containing .proto
340/// files.
341///
342/// Used by the default implementation of `Importer` to resolve import
343/// statements. Most users will probably want to use the `DiskSourceTree`
344/// implementation.
345///
346/// This trait is sealed and cannot be implemented outside of this crate.
347pub trait SourceTree: source_tree::Sealed {
348    /// Opens the given file and return a stream that reads it.
349    ///
350    /// The filename must be a path relative to the root of the source tree and
351    /// must not contain "." or ".." components.
352    fn open<'a>(
353        self: Pin<&'a mut Self>,
354        filename: &Path,
355    ) -> Result<Pin<Box<DynZeroCopyInputStream<'a>>>, FileOpenError> {
356        let filename = ProtobufPath::from(filename);
357        let mut source_tree = self.upcast_mut();
358        let stream = source_tree.as_mut().Open(filename.into());
359        if stream.is_null() {
360            Err(FileOpenError(ffi::SourceTreeGetLastErrorMessage(
361                source_tree,
362            )))
363        } else {
364            Ok(unsafe { DynZeroCopyInputStream::from_ffi_owned(stream) })
365        }
366    }
367}
368
369mod source_tree {
370    use std::pin::Pin;
371
372    use super::ffi;
373
374    pub trait Sealed {
375        fn upcast(&self) -> &ffi::SourceTree;
376        fn upcast_mut(self: Pin<&mut Self>) -> Pin<&mut ffi::SourceTree>;
377        unsafe fn upcast_mut_ptr(self: Pin<&mut Self>) -> *mut ffi::SourceTree {
378            self.upcast_mut().get_unchecked_mut() as *mut _
379        }
380    }
381}
382
383/// An implementation of `SourceTree` which stores files in memory.
384pub struct VirtualSourceTree {
385    _opaque: PhantomPinned,
386}
387
388impl Drop for VirtualSourceTree {
389    fn drop(&mut self) {
390        unsafe { ffi::DeleteVirtualSourceTree(self.as_ffi_mut_ptr_unpinned()) }
391    }
392}
393
394impl VirtualSourceTree {
395    /// Creates a new virtual source tree.
396    pub fn new() -> Pin<Box<VirtualSourceTree>> {
397        let tree = ffi::NewVirtualSourceTree();
398        unsafe { Self::from_ffi_owned(tree) }
399    }
400
401    /// Adds a file to the source tree with the specified name and contents.
402    pub fn add_file(self: Pin<&mut Self>, filename: &Path, contents: Vec<u8>) {
403        let filename = ProtobufPath::from(filename);
404        self.as_ffi_mut().AddFile(filename.into(), contents)
405    }
406
407    /// Maps the well-known protobuf types to the source tree.
408    ///
409    /// This method adds all well-known type .proto files (like
410    /// `google/protobuf/any.proto`, `google/protobuf/timestamp.proto`, etc.)
411    /// to the virtual source tree, making them available for import.
412    ///
413    /// The proto files are embedded at compile time, so this method works
414    /// even if the protobuf include directory is not available at runtime.
415    ///
416    /// # Example
417    ///
418    /// ```
419    /// use std::path::Path;
420    /// use protobuf_native::compiler::VirtualSourceTree;
421    ///
422    /// let mut source_tree = VirtualSourceTree::new();
423    /// source_tree.as_mut().map_well_known_types();
424    /// source_tree.as_mut().add_file(
425    ///     Path::new("my.proto"),
426    ///     b"syntax = \"proto3\";\nimport \"google/protobuf/timestamp.proto\";\n".to_vec(),
427    /// );
428    /// // Now my.proto can import well-known types
429    /// ```
430    pub fn map_well_known_types(mut self: Pin<&mut Self>) {
431        for (path, contents) in well_known_types::WELL_KNOWN_TYPES {
432            self.as_mut().add_file(Path::new(path), contents.to_vec());
433        }
434    }
435
436    unsafe_ffi_conversions!(ffi::VirtualSourceTree);
437}
438
439impl SourceTree for VirtualSourceTree {}
440
441impl source_tree::Sealed for VirtualSourceTree {
442    fn upcast(&self) -> &ffi::SourceTree {
443        unsafe { mem::transmute(self) }
444    }
445
446    fn upcast_mut(self: Pin<&mut Self>) -> Pin<&mut ffi::SourceTree> {
447        unsafe { mem::transmute(self) }
448    }
449}
450
451/// An implementation of `SourceTree` which loads files from locations on disk.
452///
453/// Multiple mappings can be set up to map locations in the `DiskSourceTree` to
454/// locations in the physical filesystem.
455pub struct DiskSourceTree {
456    _opaque: PhantomPinned,
457}
458
459impl Drop for DiskSourceTree {
460    fn drop(&mut self) {
461        unsafe { ffi::DeleteDiskSourceTree(self.as_ffi_mut_ptr_unpinned()) }
462    }
463}
464
465impl DiskSourceTree {
466    /// Creates a new disk source tree.
467    pub fn new() -> Pin<Box<DiskSourceTree>> {
468        let tree = ffi::NewDiskSourceTree();
469        unsafe { Self::from_ffi_owned(tree) }
470    }
471
472    /// Maps a path on disk to a location in the source tree.
473    ///
474    /// The path may be either a file or a directory. If it is a directory, the
475    /// entire tree under it will be mapped to the given virtual location. To
476    /// map a directory to the root of the source tree, pass an empty string for
477    /// `virtual_path`.
478    ///
479    /// If multiple mapped paths apply when opening a file, they will be
480    /// searched in order. For example, if you do:
481    ///
482    /// ```
483    /// use std::path::Path;
484    /// use protobuf_native::compiler::DiskSourceTree;
485    ///
486    /// let mut source_tree = DiskSourceTree::new();
487    /// source_tree.as_mut().map_path(Path::new("bar"), Path::new("foo/bar"));
488    /// source_tree.as_mut().map_path(Path::new(""), Path::new("baz"));
489    /// ```
490    ///
491    /// and then you do:
492    ///
493    /// ```
494    /// # use std::path::Path;
495    /// # use std::pin::Pin;
496    /// # use protobuf_native::compiler::{SourceTree, DiskSourceTree};
497    /// # fn f(mut source_tree: Pin<&mut DiskSourceTree>) {
498    /// source_tree.open(Path::new("bar/qux"));
499    /// # }
500    /// ```
501    ///
502    /// the `DiskSourceTree` will first try to open foo/bar/qux, then
503    /// baz/bar/qux, returning the first one that opens successfully.
504    ///
505    /// `disk_path` may be an absolute path or relative to the current directory,
506    /// just like a path you'd pass to [`File::open`].
507    ///
508    /// [`File::open`]: std::fs::File::open
509    pub fn map_path(self: Pin<&mut Self>, virtual_path: &Path, disk_path: &Path) {
510        let virtual_path = ProtobufPath::from(virtual_path);
511        let disk_path = ProtobufPath::from(disk_path);
512        self.as_ffi_mut()
513            .MapPath(virtual_path.into(), disk_path.into())
514    }
515
516    /// Maps the well-known protobuf types to the source tree.
517    ///
518    /// This method makes the well-known types (like `google/protobuf/any.proto`,
519    /// `google/protobuf/timestamp.proto`, etc.) available for import.
520    ///
521    /// The proto files are embedded at compile time, so this method works
522    /// even if the protobuf include directory is not available at runtime.
523    ///
524    /// # Note
525    ///
526    /// This method writes the embedded proto files to a temporary directory
527    /// on disk (under `$TMPDIR/protobuf-native-well-known-types-{version}/`)
528    /// on first invocation. The directory persists for the lifetime of the
529    /// process and across invocations. If you need to avoid disk writes,
530    /// consider using [`VirtualSourceTree::map_well_known_types`] instead.
531    ///
532    /// # Example
533    ///
534    /// ```
535    /// use std::path::Path;
536    /// use protobuf_native::compiler::DiskSourceTree;
537    ///
538    /// let mut source_tree = DiskSourceTree::new();
539    /// source_tree.as_mut().map_well_known_types();
540    /// source_tree.as_mut().map_path(Path::new(""), Path::new("my/protos"));
541    /// // Now you can import well-known types in your .proto files:
542    /// // import "google/protobuf/timestamp.proto";
543    /// ```
544    pub fn map_well_known_types(self: Pin<&mut Self>) {
545        self.map_path(Path::new(""), well_known_types_dir())
546    }
547
548    unsafe_ffi_conversions!(ffi::DiskSourceTree);
549}
550
551impl SourceTree for DiskSourceTree {}
552
553impl source_tree::Sealed for DiskSourceTree {
554    fn upcast(&self) -> &ffi::SourceTree {
555        unsafe { mem::transmute(self) }
556    }
557
558    fn upcast_mut(self: Pin<&mut Self>) -> Pin<&mut ffi::SourceTree> {
559        unsafe { mem::transmute(self) }
560    }
561}
562
563/// An error occurred while opening a file.
564#[derive(Debug, Clone, Eq, PartialEq, Hash)]
565pub struct FileOpenError(String);
566
567impl fmt::Display for FileOpenError {
568    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
569        // The underlying error is descriptive enough in all cases to not
570        // warrant any additional context.
571        f.write_str(&self.0)
572    }
573}
574
575impl Error for FileOpenError {}
576
577/// Describes the severity of a [`FileLoadError`].
578#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
579pub enum Severity {
580    /// A true error.
581    Error,
582    /// An informational warning.
583    Warning,
584}
585
586impl fmt::Display for Severity {
587    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
588        match self {
589            Severity::Warning => f.write_str("warning"),
590            Severity::Error => f.write_str("error"),
591        }
592    }
593}
594
595/// Describes the location at which a [`FileLoadError`] occurred.
596#[derive(Debug, Clone, PartialEq, Eq, Hash)]
597pub struct Location {
598    /// The 1-based line number.
599    pub line: i64,
600    /// The 1-based column number.
601    pub column: i64,
602}
603
604/// An error occured while loading a file.
605#[derive(Debug, Clone, PartialEq, Eq, Hash)]
606
607pub struct FileLoadError {
608    /// The name of the file which failed to load.
609    pub filename: String,
610    /// A message describing the cause of the error.
611    pub message: String,
612    /// The severity of the error.
613    pub severity: Severity,
614    /// The specific location at which the error occurred, if applicable.
615    pub location: Option<Location>,
616}
617
618impl From<ffi::FileLoadError> for FileLoadError {
619    fn from(ffi: ffi::FileLoadError) -> FileLoadError {
620        let location = (ffi.line >= 0).then(|| Location {
621            line: ffi.line + 1,
622            column: ffi.column + 1,
623        });
624        FileLoadError {
625            filename: ffi.filename,
626            message: ffi.message,
627            severity: if ffi.warning {
628                Severity::Warning
629            } else {
630                Severity::Error
631            },
632            location,
633        }
634    }
635}
636
637impl fmt::Display for FileLoadError {
638    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
639        write!(f, "{}:", self.filename)?;
640        if let Some(location) = &self.location {
641            write!(f, "{}:{}:", location.line, location.column)?;
642        }
643        write!(f, " {}: {}", self.severity, self.message)
644    }
645}