midl_parser/
lib.rs

1//! Parser for MIDL files
2//!
3//! The parser in this crate is based on a protobuf parser
4//! github.com/stepancheg/rust-protobuf
5//!
6#![deny(missing_docs)]
7#![deny(broken_intra_doc_links)]
8
9use std::{
10    error::Error,
11    fmt, fs, io,
12    path::{Path, PathBuf, StripPrefixError},
13};
14
15use amend_io_error::amend_io_error;
16use linked_hash_map::LinkedHashMap;
17
18mod amend_io_error;
19#[doc(hidden)]
20pub mod linked_hash_map;
21mod model;
22mod parser;
23
24use crate::model::FileDescriptor;
25pub use parser::parse_string;
26
27//#[cfg(test)]
28//mod test_against_protobuf_protos;
29// Used by text format parser and by pure-rust codegen parsed
30// this it is public but hidden module.
31// https://github.com/rust-lang/rust/issues/44663
32#[doc(hidden)]
33pub(crate) mod lexer;
34
35/// Current version of midl parser crate
36pub const MIDL_PARSER_VERSION: &str = env!("CARGO_PKG_VERSION");
37
38#[derive(Debug)]
39struct WithFileError {
40    file: String,
41    error: CodegenError,
42}
43
44#[derive(Debug)]
45enum CodegenError {
46    ParserErrorWithLocation(parser::ParserErrorWithLocation),
47    //ConvertError(convert::ConvertError),
48}
49
50impl fmt::Display for CodegenError {
51    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52        match self {
53            CodegenError::ParserErrorWithLocation(e) => write!(f, "{}", e),
54            //CodegenError::ConvertError(e) => write!(f, "{}", e),
55        }
56    }
57}
58
59impl From<parser::ParserErrorWithLocation> for CodegenError {
60    fn from(e: parser::ParserErrorWithLocation) -> Self {
61        CodegenError::ParserErrorWithLocation(e)
62    }
63}
64
65/*
66impl From<convert::ConvertError> for CodegenError {
67    fn from(e: convert::ConvertError) -> Self {
68        CodegenError::ConvertError(e)
69    }
70}
71 */
72
73impl fmt::Display for WithFileError {
74    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
75        write!(f, "error in {}: {}", self.file, self.error)
76    }
77}
78
79impl Error for WithFileError {}
80
81struct Run<'a> {
82    parsed_files: LinkedHashMap<PathBuf, FileDescriptor>,
83    includes: &'a [PathBuf],
84}
85
86impl<'a> Run<'a> {
87    fn get_file_and_all_deps_already_parsed(
88        &self,
89        protobuf_path: &Path,
90        result: &mut LinkedHashMap<PathBuf, FileDescriptor>,
91    ) {
92        if result.get(protobuf_path).is_some() {
93            return;
94        }
95
96        let parsed = self
97            .parsed_files
98            .get(protobuf_path)
99            .expect("must be already parsed");
100        result.insert(protobuf_path.to_owned(), parsed.clone());
101
102        self.get_all_deps_already_parsed(parsed, result);
103    }
104
105    fn get_all_deps_already_parsed(
106        &self,
107        parsed: &model::FileDescriptor,
108        result: &mut LinkedHashMap<PathBuf, FileDescriptor>,
109    ) {
110        for import in &parsed.imports {
111            self.get_file_and_all_deps_already_parsed(Path::new(&import.path), result);
112        }
113    }
114
115    fn add_file(&mut self, protobuf_path: &Path, fs_path: &Path) -> io::Result<()> {
116        if self.parsed_files.get(protobuf_path).is_some() {
117            return Ok(());
118        }
119
120        let content = fs::read_to_string(fs_path)
121            .map_err(|e| amend_io_error(e, format!("failed to read {:?}", fs_path)))?;
122
123        self.add_file_content(protobuf_path, fs_path, &content)
124    }
125
126    fn add_file_content(
127        &mut self,
128        protobuf_path: &Path,
129        fs_path: &Path,
130        content: &str,
131    ) -> io::Result<()> {
132        let parsed = model::FileDescriptor::parse(content).map_err(|e| {
133            io::Error::new(
134                io::ErrorKind::Other,
135                WithFileError {
136                    file: format!("{}", fs_path.display()),
137                    error: e.into(),
138                },
139            )
140        })?;
141
142        for import in &parsed.imports {
143            self.add_imported_file(Path::new(&import.path))?;
144        }
145
146        let mut this_file_deps = LinkedHashMap::new();
147        self.get_all_deps_already_parsed(&parsed, &mut this_file_deps);
148
149        self.parsed_files.insert(protobuf_path.to_owned(), parsed);
150
151        Ok(())
152    }
153
154    fn add_imported_file(&mut self, protobuf_path: &Path) -> io::Result<()> {
155        for include_dir in self.includes {
156            let fs_path = include_dir.join(protobuf_path);
157            if fs_path.exists() {
158                return self.add_file(protobuf_path, &fs_path);
159            }
160        }
161        Ok(())
162    }
163
164    fn strip_prefix<'b>(path: &'b Path, prefix: &Path) -> Result<&'b Path, StripPrefixError> {
165        // special handling of `.` to allow successful `strip_prefix("foo.proto", ".")
166        if prefix == Path::new(".") {
167            Ok(path)
168        } else {
169            path.strip_prefix(prefix)
170        }
171    }
172
173    fn add_fs_file(&mut self, fs_path: &Path) -> io::Result<PathBuf> {
174        let relative_path = self
175            .includes
176            .iter()
177            .filter_map(|include_dir| Self::strip_prefix(fs_path, include_dir).ok())
178            .next();
179
180        match relative_path {
181            Some(relative_path) => {
182                assert!(relative_path.is_relative());
183                self.add_file(relative_path, fs_path)?;
184                Ok(relative_path.to_owned())
185            }
186            None => Err(io::Error::new(
187                io::ErrorKind::Other,
188                format!(
189                    "file {:?} must reside in include path {:?}",
190                    fs_path, self.includes
191                ),
192            )),
193        }
194    }
195}
196
197/// Validated model from parser
198pub struct ParsedAndTypechecked {
199    /// Paths loaded
200    pub relative_paths: Vec<PathBuf>,
201    /// Schemas read
202    pub parsed_files: LinkedHashMap<PathBuf, FileDescriptor>,
203}
204
205/// Parse and validate input, and generate model schema
206pub fn parse_and_typecheck(
207    includes: &[PathBuf],
208    input: &[PathBuf],
209) -> io::Result<ParsedAndTypechecked> {
210    let mut run = Run {
211        parsed_files: LinkedHashMap::new(),
212        includes,
213    };
214
215    let mut relative_paths = Vec::new();
216
217    for input in input {
218        println!("adding input file {}", input.display());
219        relative_paths.push(run.add_fs_file(input)?);
220    }
221
222    Ok(ParsedAndTypechecked {
223        relative_paths,
224        parsed_files: run.parsed_files,
225    })
226}
227
228/// A field occurrence: how any times field may appear
229/// moved from model since it's just for parsing now
230#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
231pub enum Occurrence {
232    /// A well-formed message can have zero or one of this field (but not more than one).
233    Optional,
234    /// This field can be repeated any number of times (including zero) in a well-formed message.
235    /// The order of the repeated values will be preserved.
236    Repeated,
237    /// A well-formed message must have exactly one of this field.
238    Required,
239}
240
241#[cfg(test)]
242mod tests {
243    use crate::{model::FileDescriptor, parser::Parser};
244    use frodobuf_schema::model::{Constant, Field, FieldType, HasAttributes, Message};
245
246    fn parse(input: &str) -> FileDescriptor {
247        let mut parser = Parser::new(input);
248        let r = parser
249            .next_proto()
250            .expect(&format!("parse failed at {}", parser.tokenizer.loc()));
251        let eof = parser
252            .tokenizer
253            .syntax_eof()
254            .expect(&format!("check eof failed at {}", parser.tokenizer.loc()));
255        assert!(eof, "{}", parser.tokenizer.loc());
256        r
257    }
258
259    #[test]
260    fn simple_message() -> Result<(), Box<dyn std::error::Error>> {
261        let proto = r#"package t;
262        message A {
263                string b = 1;
264        }
265        "#;
266        let parsed = parse(proto);
267        assert_eq!(parsed.schema.messages.len(), 1);
268        Ok(())
269    }
270
271    #[test]
272    fn nested_message() -> Result<(), Box<dyn std::error::Error>> {
273        let proto = r#"package t;
274        message A
275        {
276            message B {
277                repeated int32 a = 1;
278                optional string b = 2;
279            }
280            optional string b = 1;
281        }"#;
282        let parsed = parse(proto);
283        assert_eq!(parsed.schema.messages.len(), 1);
284        Ok(())
285    }
286
287    // returns the nth field in the message
288    fn get_field(message: &Message, n: usize) -> &Field {
289        let fields = &message.fields;
290        &fields.get(n).unwrap()
291    }
292
293    #[test]
294    fn data_types() -> Result<(), Box<dyn std::error::Error>> {
295        let proto = r#"package t;
296        message A {
297            int32 a = 1;
298            uint32 b = 2;
299            int64 c = 3;
300            uint64 d = 4;
301            int8 e = 5;
302            uint8 f = 6;
303            float32 g = 7;
304            float64 h = 8;
305            float i = 9; // alias for float32
306            double j = 10; // alias for float64
307            bool k = 11;
308            string x = 12;
309            bytes bb = 13;
310            map<uint32,bytes> y = 14;
311            [bool] ff = 15;
312        }
313        "#;
314        let parsed = parse(proto);
315        //println!("A: {:#?}", &parsed);
316        let message = &parsed.schema.messages.get(0).unwrap();
317
318        assert_eq!(message.fields[0].typ, FieldType::Int32);
319        assert_eq!(message.fields[1].typ, FieldType::Uint32);
320        assert_eq!(message.fields[2].typ, FieldType::Int64);
321        assert_eq!(message.fields[3].typ, FieldType::Uint64);
322        assert_eq!(message.fields[4].typ, FieldType::Int8);
323        assert_eq!(message.fields[5].typ, FieldType::Uint8);
324        assert_eq!(message.fields[6].typ, FieldType::Float32);
325        assert_eq!(message.fields[7].typ, FieldType::Float64);
326        assert_eq!(message.fields[8].typ, FieldType::Float32);
327        assert_eq!(message.fields[9].typ, FieldType::Float64);
328        assert_eq!(message.fields[10].typ, FieldType::Bool);
329        assert_eq!(message.fields[11].typ, FieldType::String);
330        assert_eq!(message.fields[12].typ, FieldType::Bytes);
331
332        if let FieldType::Map(b) = &get_field(message, 13).typ {
333            assert_eq!(b.as_ref(), &(FieldType::Uint32, FieldType::Bytes));
334        } else {
335            assert!(false, "not a map");
336        }
337
338        if let FieldType::Array(a) = &get_field(message, 14).typ {
339            assert_eq!(a.as_ref(), &FieldType::Bool);
340        } else {
341            assert!(false, "not an array");
342        }
343
344        Ok(())
345    }
346
347    #[test]
348    fn proto_options() -> Result<(), Box<dyn std::error::Error>> {
349        let proto = r#"package t;
350        option proto_foo = 99;
351        message A {
352                string b = 1;
353        }
354        "#;
355        let parsed = parse(proto);
356        //println!("A: {:#?}", &parsed);
357        assert_eq!(
358            parsed.schema.attributes.len(),
359            2usize, // there's one more for the _source
360            "proto options count"
361        );
362        let opt = parsed.schema.attributes.get(0).unwrap();
363        assert_eq!(opt.values.len(), 1usize, "proto options kv count");
364        let kv = opt.values.get(0).unwrap();
365        assert_eq!(kv.0, "proto_foo");
366        assert_eq!(kv.1, Constant::U64(99));
367        Ok(())
368    }
369
370    #[test]
371    fn message_field() -> Result<(), Box<dyn std::error::Error>> {
372        let proto = r#"package t;
373        message A {
374                string b = 1;
375        }
376        "#;
377        let parsed = parse(proto);
378
379        let message = &parsed.schema.messages[0];
380        let field = &message.fields[0];
381        assert_eq!(
382            (&field.typ, field.name.as_str(), field.number),
383            (&FieldType::String, "b", 1)
384        );
385
386        Ok(())
387    }
388
389    #[test]
390    fn field_attribute() -> Result<(), Box<dyn std::error::Error>> {
391        let proto = r#"package t;
392        message A {
393                @msg_foo(value = 99);
394                string b = 1;
395        }
396        "#;
397        let parsed = parse(proto);
398
399        let message = &parsed.schema.messages[0];
400        let field = &message.fields[0];
401
402        let attr = field.get_attribute("msg_foo").unwrap();
403        assert_eq!(attr.values.len(), 1usize, "one value");
404        assert_eq!(attr.values[0], ("value".to_string(), Constant::U64(99)));
405
406        Ok(())
407    }
408
409    #[test]
410    fn field_attr_ident() -> Result<(), Box<dyn std::error::Error>> {
411        let proto = r#"package t;
412        message A {
413                @a1
414                string b = 1;
415        }
416        "#;
417        let parsed = parse(proto);
418        let msg = &parsed.schema.messages[0];
419        let field = &msg.fields[0];
420
421        let attr = field.get_attribute("a1").unwrap();
422        assert!(attr.values.is_empty());
423        Ok(())
424    }
425
426    #[test]
427    fn message_attr_ident() -> Result<(), Box<dyn std::error::Error>> {
428        let proto = r#"package t;
429        @a1
430        message A {
431                string b = 1;
432        }
433        "#;
434        let parsed = parse(proto);
435        let msg = &parsed.schema.messages[0];
436
437        let attr = msg.get_attribute("a1").unwrap();
438        assert!(attr.values.is_empty());
439        Ok(())
440    }
441
442    #[test]
443    fn message_attr_values() -> Result<(), Box<dyn std::error::Error>> {
444        let proto = r#"package t;
445        @a1(k1,k2=100)
446        message A {
447                string b = 1;
448        }
449        "#;
450        let parsed = parse(proto);
451
452        let msg = &parsed.schema.messages[0];
453
454        let attr = msg.get_attribute("a1").unwrap();
455        assert_eq!(attr.values.len(), 2usize);
456        assert_eq!(attr.values[0], ("k1".to_string(), Constant::Bool(true)));
457        assert_eq!(attr.values[1], ("k2".to_string(), Constant::U64(100)));
458
459        Ok(())
460    }
461
462    #[test]
463    fn optional_field() -> Result<(), Box<dyn std::error::Error>> {
464        let proto = r#"package t;
465        message A {
466                string b? = 1;
467                optional string y = 2;
468                string x = 3;
469                required string z = 4;
470        }
471        "#;
472        let parsed = parse(proto);
473        let msg = &parsed.schema.messages.get(0).unwrap();
474        // optional
475        for i in 0..=1 {
476            assert!(msg.fields.get(i).unwrap().optional);
477        }
478        // required
479        for i in 2..=3 {
480            assert!(!msg.fields.get(i).unwrap().optional);
481        }
482        Ok(())
483    }
484}