Skip to main content

provenant/parsers/
meson.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use packageurl::PackageUrl;
6use serde_json::Value as JsonValue;
7
8use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
9
10use super::PackageParser;
11use super::license_normalization::normalize_spdx_declared_license;
12use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
13
14pub struct MesonParser;
15
16impl PackageParser for MesonParser {
17    const PACKAGE_TYPE: PackageType = PackageType::Meson;
18
19    fn is_match(path: &Path) -> bool {
20        path.file_name().is_some_and(|name| name == "meson.build")
21    }
22
23    fn extract_packages(path: &Path) -> Vec<PackageData> {
24        let content = match read_file_to_string(path, None) {
25            Ok(content) => content,
26            Err(error) => {
27                warn!("Failed to read meson.build at {:?}: {}", path, error);
28                return vec![default_package_data()];
29            }
30        };
31
32        match parse_meson_build(&content) {
33            Ok(package) => vec![package],
34            Err(_) => vec![default_package_data()],
35        }
36    }
37}
38
39fn parse_meson_build(content: &str) -> Result<PackageData, String> {
40    let sanitized = strip_comments(content)?;
41    let statements = split_statements(&sanitized);
42
43    let mut package = default_package_data();
44    let mut extra_data = HashMap::new();
45    let mut dependencies = Vec::new();
46    let mut control_flow_depth = 0usize;
47
48    for statement in statements.into_iter().take(MAX_ITERATION_COUNT) {
49        let trimmed = statement.trim();
50        if trimmed.is_empty() {
51            continue;
52        }
53
54        if is_block_closer(trimmed) {
55            control_flow_depth = control_flow_depth.saturating_sub(1);
56            continue;
57        }
58
59        if control_flow_depth > 0 {
60            if is_block_opener(trimmed) {
61                control_flow_depth += 1;
62            }
63            continue;
64        }
65
66        if is_block_opener(trimmed) {
67            control_flow_depth += 1;
68            continue;
69        }
70
71        let Ok(parsed) = parse_statement(trimmed) else {
72            continue;
73        };
74        match parsed {
75            Statement::Expr(expr) | Statement::Assignment(expr) => {
76                handle_top_level_expr(&expr, &mut package, &mut extra_data, &mut dependencies)
77            }
78        }
79    }
80
81    package.dependencies = dependencies;
82    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
83    package.purl = package
84        .name
85        .as_deref()
86        .and_then(|name| build_project_purl(name, package.version.as_deref()));
87
88    Ok(package)
89}
90
91fn handle_top_level_expr(
92    expr: &Expr,
93    package: &mut PackageData,
94    extra_data: &mut HashMap<String, JsonValue>,
95    dependencies: &mut Vec<Dependency>,
96) {
97    let Expr::Call(call) = expr else {
98        return;
99    };
100
101    match call.name.as_str() {
102        "project" if package.name.is_none() => apply_project_call(call, package, extra_data),
103        "dependency" => dependencies.extend(extract_dependencies_from_call(call)),
104        _ => {}
105    }
106}
107
108fn apply_project_call(
109    call: &CallExpr,
110    package: &mut PackageData,
111    extra_data: &mut HashMap<String, JsonValue>,
112) {
113    let Some(name) = call.positional.first().and_then(expr_as_string) else {
114        return;
115    };
116
117    package.package_type = Some(PackageType::Meson);
118    package.datasource_id = Some(DatasourceId::MesonBuild);
119    package.name = Some(truncate_field(name.to_string()));
120
121    let languages = call
122        .positional
123        .iter()
124        .skip(1)
125        .flat_map(extract_string_values)
126        .collect::<Vec<_>>();
127    if let Some(primary_language) = languages.first() {
128        package.primary_language = Some(truncate_field(primary_language.clone()));
129    }
130    if !languages.is_empty() {
131        extra_data.insert(
132            "languages".to_string(),
133            JsonValue::Array(languages.iter().cloned().map(JsonValue::String).collect()),
134        );
135    }
136
137    if let Some(version) = call.keyword.get("version").and_then(expr_as_string) {
138        package.version = Some(truncate_field(version.to_string()));
139    }
140
141    let licenses = call
142        .keyword
143        .get("license")
144        .map(extract_string_values)
145        .unwrap_or_default();
146    if !licenses.is_empty() {
147        package.extracted_license_statement = Some(truncate_field(licenses.join("\n")));
148        if licenses.len() == 1 {
149            let (declared_license_expression, declared_license_expression_spdx, license_detections) =
150                normalize_spdx_declared_license(licenses.first().map(String::as_str));
151            package.declared_license_expression = declared_license_expression;
152            package.declared_license_expression_spdx = declared_license_expression_spdx;
153            package.license_detections = license_detections;
154        }
155    }
156
157    let license_files = call
158        .keyword
159        .get("license_files")
160        .map(extract_string_values)
161        .unwrap_or_default();
162    if !license_files.is_empty() {
163        extra_data.insert(
164            "license_files".to_string(),
165            JsonValue::Array(license_files.into_iter().map(JsonValue::String).collect()),
166        );
167    }
168
169    if let Some(meson_version) = call.keyword.get("meson_version").and_then(expr_as_string) {
170        extra_data.insert(
171            "meson_version".to_string(),
172            JsonValue::String(meson_version.to_string()),
173        );
174    }
175}
176
177fn extract_dependencies_from_call(call: &CallExpr) -> Vec<Dependency> {
178    let dependency_names = call
179        .positional
180        .iter()
181        .filter_map(expr_as_string)
182        .map(ToOwned::to_owned)
183        .collect::<Vec<_>>();
184
185    if dependency_names.is_empty() {
186        return Vec::new();
187    }
188
189    let extracted_requirement = call.keyword.get("version").map(|expr| {
190        extract_string_values(expr)
191            .into_iter()
192            .filter(|value| !value.is_empty())
193            .collect::<Vec<_>>()
194            .join(", ")
195    });
196    let required = call.keyword.get("required").and_then(expr_as_bool);
197    let native = call.keyword.get("native").and_then(expr_as_bool);
198
199    dependency_names
200        .into_iter()
201        .take(MAX_ITERATION_COUNT)
202        .map(|name| {
203            let mut extra_data = HashMap::new();
204
205            if let Some(requirement) = extracted_requirement
206                .as_ref()
207                .filter(|value| !value.is_empty())
208            {
209                extra_data.insert(
210                    "version".to_string(),
211                    JsonValue::String(requirement.clone()),
212                );
213            }
214            if let Some(required) = required {
215                extra_data.insert("required".to_string(), JsonValue::Bool(required));
216            }
217            if let Some(method) = call.keyword.get("method").and_then(expr_as_string) {
218                extra_data.insert("method".to_string(), JsonValue::String(method.to_string()));
219            }
220            if let Some(native) = native {
221                extra_data.insert("native".to_string(), JsonValue::Bool(native));
222            }
223
224            let modules = call
225                .keyword
226                .get("modules")
227                .map(extract_string_values)
228                .unwrap_or_default();
229            if !modules.is_empty() {
230                extra_data.insert(
231                    "modules".to_string(),
232                    JsonValue::Array(modules.into_iter().map(JsonValue::String).collect()),
233                );
234            }
235
236            let fallback = call
237                .keyword
238                .get("fallback")
239                .map(extract_string_values)
240                .unwrap_or_default();
241            if !fallback.is_empty() {
242                extra_data.insert(
243                    "fallback".to_string(),
244                    JsonValue::Array(fallback.into_iter().map(JsonValue::String).collect()),
245                );
246            }
247
248            Dependency {
249                purl: build_dependency_purl(&name),
250                extracted_requirement: extracted_requirement
251                    .clone()
252                    .filter(|value| !value.is_empty())
253                    .map(truncate_field),
254                scope: Some("dependencies".to_string()),
255                is_runtime: Some(native != Some(true)),
256                is_optional: Some(required == Some(false)),
257                is_pinned: Some(false),
258                is_direct: Some(true),
259                resolved_package: None,
260                extra_data: (!extra_data.is_empty()).then_some(extra_data),
261            }
262        })
263        .collect()
264}
265
266fn build_project_purl(name: &str, version: Option<&str>) -> Option<String> {
267    let mut purl = PackageUrl::new(PackageType::Meson.as_str(), name).ok()?;
268    if let Some(version) = version {
269        purl.with_version(version).ok()?;
270    }
271    Some(truncate_field(purl.to_string()))
272}
273
274fn build_dependency_purl(name: &str) -> Option<String> {
275    let mut purl = PackageUrl::new("generic", name).ok()?;
276    purl.with_namespace("meson").ok()?;
277    Some(truncate_field(purl.to_string()))
278}
279
280fn default_package_data() -> PackageData {
281    PackageData {
282        package_type: Some(PackageType::Meson),
283        datasource_id: Some(DatasourceId::MesonBuild),
284        ..Default::default()
285    }
286}
287
288fn is_block_opener(statement: &str) -> bool {
289    matches!(
290        statement.split_whitespace().next(),
291        Some("if") | Some("foreach")
292    )
293}
294
295fn is_block_closer(statement: &str) -> bool {
296    matches!(statement.trim(), "endif" | "endforeach")
297}
298
299fn strip_comments(input: &str) -> Result<String, String> {
300    let chars: Vec<char> = input.chars().collect();
301    let mut output = String::with_capacity(input.len());
302    let mut index = 0usize;
303    let mut in_string = false;
304    let mut string_delimiter = '\0';
305    let mut escaped = false;
306    let mut chars_processed = 0usize;
307
308    while index < chars.len() {
309        chars_processed += 1;
310        if chars_processed > MAX_ITERATION_COUNT {
311            break;
312        }
313        let ch = chars[index];
314
315        if in_string {
316            output.push(ch);
317            if escaped {
318                escaped = false;
319            } else if ch == '\\' {
320                escaped = true;
321            } else if ch == string_delimiter {
322                in_string = false;
323            }
324            index += 1;
325            continue;
326        }
327
328        if matches!(ch, '\'' | '"') {
329            in_string = true;
330            string_delimiter = ch;
331            output.push(ch);
332            index += 1;
333            continue;
334        }
335
336        if ch == '#' {
337            index += 1;
338            while index < chars.len() && chars[index] != '\n' {
339                index += 1;
340            }
341            continue;
342        }
343
344        output.push(ch);
345        index += 1;
346    }
347
348    if in_string {
349        return Err("unterminated string literal".to_string());
350    }
351
352    Ok(output)
353}
354
355fn split_statements(input: &str) -> Vec<String> {
356    let mut statements = Vec::new();
357    let mut current = String::new();
358    let mut paren_depth = 0usize;
359    let mut bracket_depth = 0usize;
360    let mut in_string = false;
361    let mut string_delimiter = '\0';
362    let mut escaped = false;
363    let mut chars_processed = 0usize;
364
365    for ch in input.chars() {
366        chars_processed += 1;
367        if chars_processed > MAX_ITERATION_COUNT {
368            break;
369        }
370        current.push(ch);
371
372        if in_string {
373            if escaped {
374                escaped = false;
375            } else if ch == '\\' {
376                escaped = true;
377            } else if ch == string_delimiter {
378                in_string = false;
379            }
380            continue;
381        }
382
383        match ch {
384            '\'' | '"' => {
385                in_string = true;
386                string_delimiter = ch;
387            }
388            '(' => paren_depth += 1,
389            ')' => paren_depth = paren_depth.saturating_sub(1),
390            '[' => bracket_depth += 1,
391            ']' => bracket_depth = bracket_depth.saturating_sub(1),
392            '\n' if paren_depth == 0 && bracket_depth == 0 => {
393                let trimmed = current.trim();
394                if !trimmed.is_empty() {
395                    statements.push(trimmed.to_string());
396                }
397                current.clear();
398            }
399            _ => {}
400        }
401    }
402
403    let trimmed = current.trim();
404    if !trimmed.is_empty() {
405        statements.push(trimmed.to_string());
406    }
407
408    statements
409}
410
411#[derive(Debug, Clone)]
412enum Statement {
413    Expr(Expr),
414    Assignment(Expr),
415}
416
417#[derive(Debug, Clone)]
418enum Expr {
419    String(String),
420    Bool(bool),
421    Array(Vec<Expr>),
422    Identifier,
423    Call(CallExpr),
424}
425
426#[derive(Debug, Clone)]
427struct CallExpr {
428    name: String,
429    positional: Vec<Expr>,
430    keyword: HashMap<String, Expr>,
431}
432
433#[derive(Debug, Clone, PartialEq, Eq)]
434enum Token {
435    Ident(String),
436    Str(String),
437    Bool(bool),
438    LParen,
439    RParen,
440    LBracket,
441    RBracket,
442    Colon,
443    Comma,
444    Equal,
445}
446
447fn parse_statement(statement: &str) -> Result<Statement, String> {
448    let tokens = tokenize(statement)?;
449    if tokens.is_empty() {
450        return Err("empty statement".to_string());
451    }
452
453    if let [Token::Ident(name), Token::Equal, rest @ ..] = tokens.as_slice() {
454        let mut parser = Parser::new(rest);
455        parser.depth += 1;
456        let expr = parser.parse_expr()?;
457        parser.depth -= 1;
458        parser.expect_end()?;
459        let _ = name;
460        return Ok(Statement::Assignment(expr));
461    }
462
463    let mut parser = Parser::new(&tokens);
464    parser.depth += 1;
465    let expr = parser.parse_expr()?;
466    parser.depth -= 1;
467    parser.expect_end()?;
468    Ok(Statement::Expr(expr))
469}
470
471fn tokenize(input: &str) -> Result<Vec<Token>, String> {
472    let chars: Vec<char> = input.chars().collect();
473    let mut tokens = Vec::new();
474    let mut index = 0usize;
475
476    while index < chars.len() {
477        if tokens.len() >= MAX_ITERATION_COUNT {
478            break;
479        }
480        let ch = chars[index];
481        if ch.is_whitespace() {
482            index += 1;
483            continue;
484        }
485
486        match ch {
487            '(' => {
488                tokens.push(Token::LParen);
489                index += 1;
490            }
491            ')' => {
492                tokens.push(Token::RParen);
493                index += 1;
494            }
495            '[' => {
496                tokens.push(Token::LBracket);
497                index += 1;
498            }
499            ']' => {
500                tokens.push(Token::RBracket);
501                index += 1;
502            }
503            ':' => {
504                tokens.push(Token::Colon);
505                index += 1;
506            }
507            ',' => {
508                tokens.push(Token::Comma);
509                index += 1;
510            }
511            '=' => {
512                tokens.push(Token::Equal);
513                index += 1;
514            }
515            '\'' | '"' => {
516                let delimiter = ch;
517                index += 1;
518                let start = index;
519                let mut escaped = false;
520                while index < chars.len() {
521                    let current = chars[index];
522                    if escaped {
523                        escaped = false;
524                    } else if current == '\\' {
525                        escaped = true;
526                    } else if current == delimiter {
527                        break;
528                    }
529                    index += 1;
530                }
531
532                if index >= chars.len() {
533                    return Err("unterminated string token".to_string());
534                }
535
536                let value: String = chars[start..index].iter().collect();
537                tokens.push(Token::Str(value));
538                index += 1;
539            }
540            _ if is_ident_start(ch) => {
541                let start = index;
542                index += 1;
543                while index < chars.len() && is_ident_continue(chars[index]) {
544                    index += 1;
545                }
546                let ident: String = chars[start..index].iter().collect();
547                match ident.as_str() {
548                    "true" => tokens.push(Token::Bool(true)),
549                    "false" => tokens.push(Token::Bool(false)),
550                    _ => tokens.push(Token::Ident(ident)),
551                }
552            }
553            _ => {
554                return Err(format!("unsupported token '{}'", ch));
555            }
556        }
557    }
558
559    Ok(tokens)
560}
561
562fn is_ident_start(ch: char) -> bool {
563    ch.is_ascii_alphabetic() || ch == '_'
564}
565
566fn is_ident_continue(ch: char) -> bool {
567    ch.is_ascii_alphanumeric() || ch == '_'
568}
569
570const MAX_RECURSION_DEPTH: usize = 50;
571
572struct Parser<'a> {
573    tokens: &'a [Token],
574    index: usize,
575    depth: usize,
576}
577
578impl<'a> Parser<'a> {
579    fn new(tokens: &'a [Token]) -> Self {
580        Self {
581            tokens,
582            index: 0,
583            depth: 0,
584        }
585    }
586
587    fn parse_expr(&mut self) -> Result<Expr, String> {
588        if self.depth > MAX_RECURSION_DEPTH {
589            return Err("recursion depth exceeded".to_string());
590        }
591        match self.peek() {
592            Some(Token::Str(value)) => {
593                self.index += 1;
594                Ok(Expr::String(value.clone()))
595            }
596            Some(Token::Bool(value)) => {
597                self.index += 1;
598                Ok(Expr::Bool(*value))
599            }
600            Some(Token::LBracket) => self.parse_array(),
601            Some(Token::Ident(_)) => self.parse_identifier_or_call(),
602            Some(token) => Err(format!("unexpected token {:?}", token)),
603            None => Err("unexpected end of input".to_string()),
604        }
605    }
606
607    fn parse_array(&mut self) -> Result<Expr, String> {
608        self.expect(Token::LBracket)?;
609        let mut values = Vec::new();
610        let mut element_count = 0usize;
611        while !matches!(self.peek(), Some(Token::RBracket)) {
612            element_count += 1;
613            if element_count > MAX_ITERATION_COUNT {
614                break;
615            }
616            self.depth += 1;
617            let expr = self.parse_expr()?;
618            self.depth -= 1;
619            values.push(expr);
620            if matches!(self.peek(), Some(Token::Comma)) {
621                self.index += 1;
622            } else if !matches!(self.peek(), Some(Token::RBracket)) {
623                return Err("expected ',' or ']' in array".to_string());
624            }
625        }
626        self.expect(Token::RBracket)?;
627        Ok(Expr::Array(values))
628    }
629
630    fn parse_identifier_or_call(&mut self) -> Result<Expr, String> {
631        let Token::Ident(name) = self
632            .next()
633            .cloned()
634            .ok_or_else(|| "expected identifier".to_string())?
635        else {
636            return Err("expected identifier".to_string());
637        };
638
639        if !matches!(self.peek(), Some(Token::LParen)) {
640            let _ = name;
641            return Ok(Expr::Identifier);
642        }
643
644        self.expect(Token::LParen)?;
645        let mut positional = Vec::new();
646        let mut keyword = HashMap::new();
647        let mut arg_count = 0usize;
648
649        while !matches!(self.peek(), Some(Token::RParen)) {
650            arg_count += 1;
651            if arg_count > MAX_ITERATION_COUNT {
652                break;
653            }
654            if let (Some(Token::Ident(arg_name)), Some(Token::Colon)) =
655                (self.peek(), self.peek_n(1))
656            {
657                let arg_name = arg_name.clone();
658                self.index += 2;
659                self.depth += 1;
660                let value = self.parse_expr()?;
661                self.depth -= 1;
662                keyword.insert(arg_name, value);
663            } else {
664                self.depth += 1;
665                let expr = self.parse_expr()?;
666                self.depth -= 1;
667                positional.push(expr);
668            }
669
670            if matches!(self.peek(), Some(Token::Comma)) {
671                self.index += 1;
672            } else if !matches!(self.peek(), Some(Token::RParen)) {
673                return Err("expected ',' or ')' in call".to_string());
674            }
675        }
676
677        self.expect(Token::RParen)?;
678        Ok(Expr::Call(CallExpr {
679            name,
680            positional,
681            keyword,
682        }))
683    }
684
685    fn expect(&mut self, expected: Token) -> Result<(), String> {
686        match self.next() {
687            Some(token) if *token == expected => Ok(()),
688            Some(token) => Err(format!("expected {:?}, found {:?}", expected, token)),
689            None => Err(format!("expected {:?}, found end of input", expected)),
690        }
691    }
692
693    fn expect_end(&self) -> Result<(), String> {
694        if self.index == self.tokens.len() {
695            Ok(())
696        } else {
697            Err(format!(
698                "unexpected trailing tokens: {:?}",
699                &self.tokens[self.index..]
700            ))
701        }
702    }
703
704    fn peek(&self) -> Option<&'a Token> {
705        self.tokens.get(self.index)
706    }
707
708    fn peek_n(&self, offset: usize) -> Option<&'a Token> {
709        self.tokens.get(self.index + offset)
710    }
711
712    fn next(&mut self) -> Option<&'a Token> {
713        let token = self.tokens.get(self.index);
714        if token.is_some() {
715            self.index += 1;
716        }
717        token
718    }
719}
720
721fn expr_as_string(expr: &Expr) -> Option<&str> {
722    match expr {
723        Expr::String(value) => Some(value.as_str()),
724        _ => None,
725    }
726}
727
728fn expr_as_bool(expr: &Expr) -> Option<bool> {
729    match expr {
730        Expr::Bool(value) => Some(*value),
731        _ => None,
732    }
733}
734
735fn extract_string_values(expr: &Expr) -> Vec<String> {
736    match expr {
737        Expr::String(value) => vec![value.clone()],
738        Expr::Array(values) => values
739            .iter()
740            .filter_map(expr_as_string)
741            .map(ToOwned::to_owned)
742            .collect(),
743        _ => Vec::new(),
744    }
745}
746
747crate::register_parser!(
748    "Meson meson.build manifest",
749    &["**/meson.build"],
750    "meson",
751    "",
752    Some("https://mesonbuild.com/Syntax.html"),
753);