Skip to main content

provenant/parsers/
meson.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use packageurl::PackageUrl;
9use serde_json::Value as JsonValue;
10
11use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
12
13use super::PackageParser;
14use super::license_normalization::normalize_spdx_declared_license;
15use super::metadata::ParserMetadata;
16use super::utils::{MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, truncate_field};
17
18pub struct MesonParser;
19
20impl PackageParser for MesonParser {
21    const PACKAGE_TYPE: PackageType = PackageType::Meson;
22
23    fn is_match(path: &Path) -> bool {
24        path.file_name().is_some_and(|name| name == "meson.build")
25    }
26
27    fn metadata() -> Vec<ParserMetadata> {
28        vec![ParserMetadata {
29            description: "Meson meson.build manifest",
30            file_patterns: &["**/meson.build"],
31            package_type: "meson",
32            primary_language: "",
33            documentation_url: Some("https://mesonbuild.com/Syntax.html"),
34        }]
35    }
36
37    fn extract_packages(path: &Path) -> Vec<PackageData> {
38        let content = match read_file_to_string(path, None) {
39            Ok(content) => content,
40            Err(error) => {
41                warn!("Failed to read meson.build at {:?}: {}", path, error);
42                return vec![default_package_data()];
43            }
44        };
45
46        match parse_meson_build(&content) {
47            Ok(package) => vec![package],
48            Err(_) => vec![default_package_data()],
49        }
50    }
51}
52
53fn parse_meson_build(content: &str) -> Result<PackageData, String> {
54    let sanitized = strip_comments(content)?;
55    let statements = split_statements(&sanitized);
56
57    let mut package = default_package_data();
58    let mut extra_data = HashMap::new();
59    let mut dependencies = Vec::new();
60    let mut control_flow_depth = 0usize;
61
62    for statement in statements.into_iter().take(MAX_ITERATION_COUNT) {
63        let trimmed = statement.trim();
64        if trimmed.is_empty() {
65            continue;
66        }
67
68        if is_block_closer(trimmed) {
69            control_flow_depth = control_flow_depth.saturating_sub(1);
70            continue;
71        }
72
73        if control_flow_depth > 0 {
74            if is_block_opener(trimmed) {
75                control_flow_depth += 1;
76            }
77            continue;
78        }
79
80        if is_block_opener(trimmed) {
81            control_flow_depth += 1;
82            continue;
83        }
84
85        let Ok(parsed) = parse_statement(trimmed) else {
86            continue;
87        };
88        match parsed {
89            Statement::Expr(expr) | Statement::Assignment(expr) => {
90                handle_top_level_expr(&expr, &mut package, &mut extra_data, &mut dependencies)
91            }
92        }
93    }
94
95    package.dependencies = dependencies;
96    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
97    package.purl = package
98        .name
99        .as_deref()
100        .and_then(|name| build_project_purl(name, package.version.as_deref()));
101
102    Ok(package)
103}
104
105fn handle_top_level_expr(
106    expr: &Expr,
107    package: &mut PackageData,
108    extra_data: &mut HashMap<String, JsonValue>,
109    dependencies: &mut Vec<Dependency>,
110) {
111    let Expr::Call(call) = expr else {
112        return;
113    };
114
115    match call.name.as_str() {
116        "project" if package.name.is_none() => apply_project_call(call, package, extra_data),
117        "dependency" => dependencies.extend(extract_dependencies_from_call(call)),
118        _ => {}
119    }
120}
121
122fn apply_project_call(
123    call: &CallExpr,
124    package: &mut PackageData,
125    extra_data: &mut HashMap<String, JsonValue>,
126) {
127    let Some(name) = call.positional.first().and_then(expr_as_string) else {
128        return;
129    };
130
131    package.package_type = Some(PackageType::Meson);
132    package.datasource_id = Some(DatasourceId::MesonBuild);
133    package.name = Some(truncate_field(name.to_string()));
134
135    let languages = call
136        .positional
137        .iter()
138        .skip(1)
139        .flat_map(extract_string_values)
140        .collect::<Vec<_>>();
141    if let Some(primary_language) = languages.first() {
142        package.primary_language = Some(truncate_field(primary_language.clone()));
143    }
144    if !languages.is_empty() {
145        extra_data.insert(
146            "languages".to_string(),
147            JsonValue::Array(languages.iter().cloned().map(JsonValue::String).collect()),
148        );
149    }
150
151    if let Some(version) = call.keyword.get("version").and_then(expr_as_string) {
152        package.version = Some(truncate_field(version.to_string()));
153    }
154
155    let licenses = call
156        .keyword
157        .get("license")
158        .map(extract_string_values)
159        .unwrap_or_default();
160    if !licenses.is_empty() {
161        package.extracted_license_statement = Some(truncate_field(licenses.join("\n")));
162        if licenses.len() == 1 {
163            let (declared_license_expression, declared_license_expression_spdx, license_detections) =
164                normalize_spdx_declared_license(licenses.first().map(String::as_str));
165            package.declared_license_expression = declared_license_expression;
166            package.declared_license_expression_spdx = declared_license_expression_spdx;
167            package.license_detections = license_detections;
168        }
169    }
170
171    let license_files = call
172        .keyword
173        .get("license_files")
174        .map(extract_string_values)
175        .unwrap_or_default();
176    if !license_files.is_empty() {
177        extra_data.insert(
178            "license_files".to_string(),
179            JsonValue::Array(license_files.into_iter().map(JsonValue::String).collect()),
180        );
181    }
182
183    if let Some(meson_version) = call.keyword.get("meson_version").and_then(expr_as_string) {
184        extra_data.insert(
185            "meson_version".to_string(),
186            JsonValue::String(meson_version.to_string()),
187        );
188    }
189}
190
191fn extract_dependencies_from_call(call: &CallExpr) -> Vec<Dependency> {
192    let dependency_names = call
193        .positional
194        .iter()
195        .filter_map(expr_as_string)
196        .map(ToOwned::to_owned)
197        .collect::<Vec<_>>();
198
199    if dependency_names.is_empty() {
200        return Vec::new();
201    }
202
203    let extracted_requirement = call.keyword.get("version").map(|expr| {
204        extract_string_values(expr)
205            .into_iter()
206            .filter(|value| !value.is_empty())
207            .collect::<Vec<_>>()
208            .join(", ")
209    });
210    let required = call.keyword.get("required").and_then(expr_as_bool);
211    let native = call.keyword.get("native").and_then(expr_as_bool);
212
213    dependency_names
214        .into_iter()
215        .take(MAX_ITERATION_COUNT)
216        .map(|name| {
217            let mut extra_data = HashMap::new();
218
219            if let Some(requirement) = extracted_requirement
220                .as_ref()
221                .filter(|value| !value.is_empty())
222            {
223                extra_data.insert(
224                    "version".to_string(),
225                    JsonValue::String(requirement.clone()),
226                );
227            }
228            if let Some(required) = required {
229                extra_data.insert("required".to_string(), JsonValue::Bool(required));
230            }
231            if let Some(method) = call.keyword.get("method").and_then(expr_as_string) {
232                extra_data.insert("method".to_string(), JsonValue::String(method.to_string()));
233            }
234            if let Some(native) = native {
235                extra_data.insert("native".to_string(), JsonValue::Bool(native));
236            }
237
238            let modules = call
239                .keyword
240                .get("modules")
241                .map(extract_string_values)
242                .unwrap_or_default();
243            if !modules.is_empty() {
244                extra_data.insert(
245                    "modules".to_string(),
246                    JsonValue::Array(modules.into_iter().map(JsonValue::String).collect()),
247                );
248            }
249
250            let fallback = call
251                .keyword
252                .get("fallback")
253                .map(extract_string_values)
254                .unwrap_or_default();
255            if !fallback.is_empty() {
256                extra_data.insert(
257                    "fallback".to_string(),
258                    JsonValue::Array(fallback.into_iter().map(JsonValue::String).collect()),
259                );
260            }
261
262            Dependency {
263                purl: build_dependency_purl(&name),
264                extracted_requirement: extracted_requirement
265                    .clone()
266                    .filter(|value| !value.is_empty())
267                    .map(truncate_field),
268                scope: Some("dependencies".to_string()),
269                is_runtime: Some(native != Some(true)),
270                is_optional: Some(required == Some(false)),
271                is_pinned: Some(false),
272                is_direct: Some(true),
273                resolved_package: None,
274                extra_data: (!extra_data.is_empty()).then_some(extra_data),
275            }
276        })
277        .collect()
278}
279
280fn build_project_purl(name: &str, version: Option<&str>) -> Option<String> {
281    let mut purl = PackageUrl::new(PackageType::Meson.as_str(), name).ok()?;
282    if let Some(version) = version {
283        purl.with_version(version).ok()?;
284    }
285    Some(truncate_field(purl.to_string()))
286}
287
288fn build_dependency_purl(name: &str) -> Option<String> {
289    let mut purl = PackageUrl::new("generic", name).ok()?;
290    purl.with_namespace("meson").ok()?;
291    Some(truncate_field(purl.to_string()))
292}
293
294fn default_package_data() -> PackageData {
295    PackageData {
296        package_type: Some(PackageType::Meson),
297        datasource_id: Some(DatasourceId::MesonBuild),
298        ..Default::default()
299    }
300}
301
302fn is_block_opener(statement: &str) -> bool {
303    matches!(
304        statement.split_whitespace().next(),
305        Some("if") | Some("foreach")
306    )
307}
308
309fn is_block_closer(statement: &str) -> bool {
310    matches!(statement.trim(), "endif" | "endforeach")
311}
312
313fn strip_comments(input: &str) -> Result<String, String> {
314    let chars: Vec<char> = input.chars().collect();
315    let mut output = String::with_capacity(input.len());
316    let mut index = 0usize;
317    let mut in_string = false;
318    let mut string_delimiter = '\0';
319    let mut escaped = false;
320    let mut chars_processed = 0usize;
321
322    while index < chars.len() {
323        chars_processed += 1;
324        if chars_processed > MAX_ITERATION_COUNT {
325            break;
326        }
327        let ch = chars[index];
328
329        if in_string {
330            output.push(ch);
331            if escaped {
332                escaped = false;
333            } else if ch == '\\' {
334                escaped = true;
335            } else if ch == string_delimiter {
336                in_string = false;
337            }
338            index += 1;
339            continue;
340        }
341
342        if matches!(ch, '\'' | '"') {
343            in_string = true;
344            string_delimiter = ch;
345            output.push(ch);
346            index += 1;
347            continue;
348        }
349
350        if ch == '#' {
351            index += 1;
352            while index < chars.len() && chars[index] != '\n' {
353                index += 1;
354            }
355            continue;
356        }
357
358        output.push(ch);
359        index += 1;
360    }
361
362    if in_string {
363        return Err("unterminated string literal".to_string());
364    }
365
366    Ok(output)
367}
368
369fn split_statements(input: &str) -> Vec<String> {
370    let mut statements = Vec::new();
371    let mut current = String::new();
372    let mut paren_depth = 0usize;
373    let mut bracket_depth = 0usize;
374    let mut in_string = false;
375    let mut string_delimiter = '\0';
376    let mut escaped = false;
377    let mut chars_processed = 0usize;
378
379    for ch in input.chars() {
380        chars_processed += 1;
381        if chars_processed > MAX_ITERATION_COUNT {
382            break;
383        }
384        current.push(ch);
385
386        if in_string {
387            if escaped {
388                escaped = false;
389            } else if ch == '\\' {
390                escaped = true;
391            } else if ch == string_delimiter {
392                in_string = false;
393            }
394            continue;
395        }
396
397        match ch {
398            '\'' | '"' => {
399                in_string = true;
400                string_delimiter = ch;
401            }
402            '(' => paren_depth += 1,
403            ')' => paren_depth = paren_depth.saturating_sub(1),
404            '[' => bracket_depth += 1,
405            ']' => bracket_depth = bracket_depth.saturating_sub(1),
406            '\n' if paren_depth == 0 && bracket_depth == 0 => {
407                let trimmed = current.trim();
408                if !trimmed.is_empty() {
409                    statements.push(trimmed.to_string());
410                }
411                current.clear();
412            }
413            _ => {}
414        }
415    }
416
417    let trimmed = current.trim();
418    if !trimmed.is_empty() {
419        statements.push(trimmed.to_string());
420    }
421
422    statements
423}
424
425#[derive(Debug, Clone)]
426enum Statement {
427    Expr(Expr),
428    Assignment(Expr),
429}
430
431#[derive(Debug, Clone)]
432enum Expr {
433    String(String),
434    Bool(bool),
435    Array(Vec<Expr>),
436    Identifier,
437    Call(CallExpr),
438}
439
440#[derive(Debug, Clone)]
441struct CallExpr {
442    name: String,
443    positional: Vec<Expr>,
444    keyword: HashMap<String, Expr>,
445}
446
447#[derive(Debug, Clone, PartialEq, Eq)]
448enum Token {
449    Ident(String),
450    Str(String),
451    Bool(bool),
452    LParen,
453    RParen,
454    LBracket,
455    RBracket,
456    Colon,
457    Comma,
458    Equal,
459}
460
461fn parse_statement(statement: &str) -> Result<Statement, String> {
462    let tokens = tokenize(statement)?;
463    if tokens.is_empty() {
464        return Err("empty statement".to_string());
465    }
466
467    if let [Token::Ident(name), Token::Equal, rest @ ..] = tokens.as_slice() {
468        let mut parser = Parser::new(rest);
469        let expr = parser.parse_expr()?;
470        parser.expect_end()?;
471        let _ = name;
472        return Ok(Statement::Assignment(expr));
473    }
474
475    let mut parser = Parser::new(&tokens);
476    let expr = parser.parse_expr()?;
477    parser.expect_end()?;
478    Ok(Statement::Expr(expr))
479}
480
481fn tokenize(input: &str) -> Result<Vec<Token>, String> {
482    let chars: Vec<char> = input.chars().collect();
483    let mut tokens = Vec::new();
484    let mut index = 0usize;
485
486    while index < chars.len() {
487        if tokens.len() >= MAX_ITERATION_COUNT {
488            break;
489        }
490        let ch = chars[index];
491        if ch.is_whitespace() {
492            index += 1;
493            continue;
494        }
495
496        match ch {
497            '(' => {
498                tokens.push(Token::LParen);
499                index += 1;
500            }
501            ')' => {
502                tokens.push(Token::RParen);
503                index += 1;
504            }
505            '[' => {
506                tokens.push(Token::LBracket);
507                index += 1;
508            }
509            ']' => {
510                tokens.push(Token::RBracket);
511                index += 1;
512            }
513            ':' => {
514                tokens.push(Token::Colon);
515                index += 1;
516            }
517            ',' => {
518                tokens.push(Token::Comma);
519                index += 1;
520            }
521            '=' => {
522                tokens.push(Token::Equal);
523                index += 1;
524            }
525            '\'' | '"' => {
526                let delimiter = ch;
527                index += 1;
528                let start = index;
529                let mut escaped = false;
530                while index < chars.len() {
531                    let current = chars[index];
532                    if escaped {
533                        escaped = false;
534                    } else if current == '\\' {
535                        escaped = true;
536                    } else if current == delimiter {
537                        break;
538                    }
539                    index += 1;
540                }
541
542                if index >= chars.len() {
543                    return Err("unterminated string token".to_string());
544                }
545
546                let value: String = chars[start..index].iter().collect();
547                tokens.push(Token::Str(value));
548                index += 1;
549            }
550            _ if is_ident_start(ch) => {
551                let start = index;
552                index += 1;
553                while index < chars.len() && is_ident_continue(chars[index]) {
554                    index += 1;
555                }
556                let ident: String = chars[start..index].iter().collect();
557                match ident.as_str() {
558                    "true" => tokens.push(Token::Bool(true)),
559                    "false" => tokens.push(Token::Bool(false)),
560                    _ => tokens.push(Token::Ident(ident)),
561                }
562            }
563            _ => {
564                return Err(format!("unsupported token '{}'", ch));
565            }
566        }
567    }
568
569    Ok(tokens)
570}
571
572fn is_ident_start(ch: char) -> bool {
573    ch.is_ascii_alphabetic() || ch == '_'
574}
575
576fn is_ident_continue(ch: char) -> bool {
577    ch.is_ascii_alphanumeric() || ch == '_'
578}
579
580struct Parser<'a> {
581    tokens: &'a [Token],
582    index: usize,
583    guard: RecursionGuard<()>,
584}
585
586impl<'a> Parser<'a> {
587    fn new(tokens: &'a [Token]) -> Self {
588        Self {
589            tokens,
590            index: 0,
591            guard: RecursionGuard::depth_only(),
592        }
593    }
594
595    fn parse_expr(&mut self) -> Result<Expr, String> {
596        if self.guard.descend() {
597            return Err("recursion depth exceeded".to_string());
598        }
599        let result = match self.peek() {
600            Some(Token::Str(value)) => {
601                self.index += 1;
602                Ok(Expr::String(value.clone()))
603            }
604            Some(Token::Bool(value)) => {
605                self.index += 1;
606                Ok(Expr::Bool(*value))
607            }
608            Some(Token::LBracket) => self.parse_array(),
609            Some(Token::Ident(_)) => self.parse_identifier_or_call(),
610            Some(token) => Err(format!("unexpected token {:?}", token)),
611            None => Err("unexpected end of input".to_string()),
612        };
613        self.guard.ascend();
614        result
615    }
616
617    fn parse_array(&mut self) -> Result<Expr, String> {
618        self.expect(Token::LBracket)?;
619        let mut values = Vec::new();
620        let mut element_count = 0usize;
621        while !matches!(self.peek(), Some(Token::RBracket)) {
622            element_count += 1;
623            if element_count > MAX_ITERATION_COUNT {
624                break;
625            }
626            let expr = self.parse_expr()?;
627            values.push(expr);
628            if matches!(self.peek(), Some(Token::Comma)) {
629                self.index += 1;
630            } else if !matches!(self.peek(), Some(Token::RBracket)) {
631                return Err("expected ',' or ']' in array".to_string());
632            }
633        }
634        self.expect(Token::RBracket)?;
635        Ok(Expr::Array(values))
636    }
637
638    fn parse_identifier_or_call(&mut self) -> Result<Expr, String> {
639        let Token::Ident(name) = self
640            .next()
641            .cloned()
642            .ok_or_else(|| "expected identifier".to_string())?
643        else {
644            return Err("expected identifier".to_string());
645        };
646
647        if !matches!(self.peek(), Some(Token::LParen)) {
648            let _ = name;
649            return Ok(Expr::Identifier);
650        }
651
652        self.expect(Token::LParen)?;
653        let mut positional = Vec::new();
654        let mut keyword = HashMap::new();
655        let mut arg_count = 0usize;
656
657        while !matches!(self.peek(), Some(Token::RParen)) {
658            arg_count += 1;
659            if arg_count > MAX_ITERATION_COUNT {
660                break;
661            }
662            if let (Some(Token::Ident(arg_name)), Some(Token::Colon)) =
663                (self.peek(), self.peek_n(1))
664            {
665                let arg_name = arg_name.clone();
666                self.index += 2;
667                let value = self.parse_expr()?;
668                keyword.insert(arg_name, value);
669            } else {
670                let expr = self.parse_expr()?;
671                positional.push(expr);
672            }
673
674            if matches!(self.peek(), Some(Token::Comma)) {
675                self.index += 1;
676            } else if !matches!(self.peek(), Some(Token::RParen)) {
677                return Err("expected ',' or ')' in call".to_string());
678            }
679        }
680
681        self.expect(Token::RParen)?;
682        Ok(Expr::Call(CallExpr {
683            name,
684            positional,
685            keyword,
686        }))
687    }
688
689    fn expect(&mut self, expected: Token) -> Result<(), String> {
690        match self.next() {
691            Some(token) if *token == expected => Ok(()),
692            Some(token) => Err(format!("expected {:?}, found {:?}", expected, token)),
693            None => Err(format!("expected {:?}, found end of input", expected)),
694        }
695    }
696
697    fn expect_end(&self) -> Result<(), String> {
698        if self.index == self.tokens.len() {
699            Ok(())
700        } else {
701            Err(format!(
702                "unexpected trailing tokens: {:?}",
703                &self.tokens[self.index..]
704            ))
705        }
706    }
707
708    fn peek(&self) -> Option<&'a Token> {
709        self.tokens.get(self.index)
710    }
711
712    fn peek_n(&self, offset: usize) -> Option<&'a Token> {
713        self.tokens.get(self.index + offset)
714    }
715
716    fn next(&mut self) -> Option<&'a Token> {
717        let token = self.tokens.get(self.index);
718        if token.is_some() {
719            self.index += 1;
720        }
721        token
722    }
723}
724
725fn expr_as_string(expr: &Expr) -> Option<&str> {
726    match expr {
727        Expr::String(value) => Some(value.as_str()),
728        _ => None,
729    }
730}
731
732fn expr_as_bool(expr: &Expr) -> Option<bool> {
733    match expr {
734        Expr::Bool(value) => Some(*value),
735        _ => None,
736    }
737}
738
739fn extract_string_values(expr: &Expr) -> Vec<String> {
740    match expr {
741        Expr::String(value) => vec![value.clone()],
742        Expr::Array(values) => values
743            .iter()
744            .filter_map(expr_as_string)
745            .map(ToOwned::to_owned)
746            .collect(),
747        _ => Vec::new(),
748    }
749}