Skip to main content

provenant/parsers/
meson.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use packageurl::PackageUrl;
9use serde_json::Value as JsonValue;
10
11use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
12
13use super::PackageParser;
14use super::license_normalization::normalize_spdx_declared_license;
15use super::utils::{MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, truncate_field};
16
17pub struct MesonParser;
18
19impl PackageParser for MesonParser {
20    const PACKAGE_TYPE: PackageType = PackageType::Meson;
21
22    fn is_match(path: &Path) -> bool {
23        path.file_name().is_some_and(|name| name == "meson.build")
24    }
25
26    fn extract_packages(path: &Path) -> Vec<PackageData> {
27        let content = match read_file_to_string(path, None) {
28            Ok(content) => content,
29            Err(error) => {
30                warn!("Failed to read meson.build at {:?}: {}", path, error);
31                return vec![default_package_data()];
32            }
33        };
34
35        match parse_meson_build(&content) {
36            Ok(package) => vec![package],
37            Err(_) => vec![default_package_data()],
38        }
39    }
40}
41
42fn parse_meson_build(content: &str) -> Result<PackageData, String> {
43    let sanitized = strip_comments(content)?;
44    let statements = split_statements(&sanitized);
45
46    let mut package = default_package_data();
47    let mut extra_data = HashMap::new();
48    let mut dependencies = Vec::new();
49    let mut control_flow_depth = 0usize;
50
51    for statement in statements.into_iter().take(MAX_ITERATION_COUNT) {
52        let trimmed = statement.trim();
53        if trimmed.is_empty() {
54            continue;
55        }
56
57        if is_block_closer(trimmed) {
58            control_flow_depth = control_flow_depth.saturating_sub(1);
59            continue;
60        }
61
62        if control_flow_depth > 0 {
63            if is_block_opener(trimmed) {
64                control_flow_depth += 1;
65            }
66            continue;
67        }
68
69        if is_block_opener(trimmed) {
70            control_flow_depth += 1;
71            continue;
72        }
73
74        let Ok(parsed) = parse_statement(trimmed) else {
75            continue;
76        };
77        match parsed {
78            Statement::Expr(expr) | Statement::Assignment(expr) => {
79                handle_top_level_expr(&expr, &mut package, &mut extra_data, &mut dependencies)
80            }
81        }
82    }
83
84    package.dependencies = dependencies;
85    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
86    package.purl = package
87        .name
88        .as_deref()
89        .and_then(|name| build_project_purl(name, package.version.as_deref()));
90
91    Ok(package)
92}
93
94fn handle_top_level_expr(
95    expr: &Expr,
96    package: &mut PackageData,
97    extra_data: &mut HashMap<String, JsonValue>,
98    dependencies: &mut Vec<Dependency>,
99) {
100    let Expr::Call(call) = expr else {
101        return;
102    };
103
104    match call.name.as_str() {
105        "project" if package.name.is_none() => apply_project_call(call, package, extra_data),
106        "dependency" => dependencies.extend(extract_dependencies_from_call(call)),
107        _ => {}
108    }
109}
110
111fn apply_project_call(
112    call: &CallExpr,
113    package: &mut PackageData,
114    extra_data: &mut HashMap<String, JsonValue>,
115) {
116    let Some(name) = call.positional.first().and_then(expr_as_string) else {
117        return;
118    };
119
120    package.package_type = Some(PackageType::Meson);
121    package.datasource_id = Some(DatasourceId::MesonBuild);
122    package.name = Some(truncate_field(name.to_string()));
123
124    let languages = call
125        .positional
126        .iter()
127        .skip(1)
128        .flat_map(extract_string_values)
129        .collect::<Vec<_>>();
130    if let Some(primary_language) = languages.first() {
131        package.primary_language = Some(truncate_field(primary_language.clone()));
132    }
133    if !languages.is_empty() {
134        extra_data.insert(
135            "languages".to_string(),
136            JsonValue::Array(languages.iter().cloned().map(JsonValue::String).collect()),
137        );
138    }
139
140    if let Some(version) = call.keyword.get("version").and_then(expr_as_string) {
141        package.version = Some(truncate_field(version.to_string()));
142    }
143
144    let licenses = call
145        .keyword
146        .get("license")
147        .map(extract_string_values)
148        .unwrap_or_default();
149    if !licenses.is_empty() {
150        package.extracted_license_statement = Some(truncate_field(licenses.join("\n")));
151        if licenses.len() == 1 {
152            let (declared_license_expression, declared_license_expression_spdx, license_detections) =
153                normalize_spdx_declared_license(licenses.first().map(String::as_str));
154            package.declared_license_expression = declared_license_expression;
155            package.declared_license_expression_spdx = declared_license_expression_spdx;
156            package.license_detections = license_detections;
157        }
158    }
159
160    let license_files = call
161        .keyword
162        .get("license_files")
163        .map(extract_string_values)
164        .unwrap_or_default();
165    if !license_files.is_empty() {
166        extra_data.insert(
167            "license_files".to_string(),
168            JsonValue::Array(license_files.into_iter().map(JsonValue::String).collect()),
169        );
170    }
171
172    if let Some(meson_version) = call.keyword.get("meson_version").and_then(expr_as_string) {
173        extra_data.insert(
174            "meson_version".to_string(),
175            JsonValue::String(meson_version.to_string()),
176        );
177    }
178}
179
180fn extract_dependencies_from_call(call: &CallExpr) -> Vec<Dependency> {
181    let dependency_names = call
182        .positional
183        .iter()
184        .filter_map(expr_as_string)
185        .map(ToOwned::to_owned)
186        .collect::<Vec<_>>();
187
188    if dependency_names.is_empty() {
189        return Vec::new();
190    }
191
192    let extracted_requirement = call.keyword.get("version").map(|expr| {
193        extract_string_values(expr)
194            .into_iter()
195            .filter(|value| !value.is_empty())
196            .collect::<Vec<_>>()
197            .join(", ")
198    });
199    let required = call.keyword.get("required").and_then(expr_as_bool);
200    let native = call.keyword.get("native").and_then(expr_as_bool);
201
202    dependency_names
203        .into_iter()
204        .take(MAX_ITERATION_COUNT)
205        .map(|name| {
206            let mut extra_data = HashMap::new();
207
208            if let Some(requirement) = extracted_requirement
209                .as_ref()
210                .filter(|value| !value.is_empty())
211            {
212                extra_data.insert(
213                    "version".to_string(),
214                    JsonValue::String(requirement.clone()),
215                );
216            }
217            if let Some(required) = required {
218                extra_data.insert("required".to_string(), JsonValue::Bool(required));
219            }
220            if let Some(method) = call.keyword.get("method").and_then(expr_as_string) {
221                extra_data.insert("method".to_string(), JsonValue::String(method.to_string()));
222            }
223            if let Some(native) = native {
224                extra_data.insert("native".to_string(), JsonValue::Bool(native));
225            }
226
227            let modules = call
228                .keyword
229                .get("modules")
230                .map(extract_string_values)
231                .unwrap_or_default();
232            if !modules.is_empty() {
233                extra_data.insert(
234                    "modules".to_string(),
235                    JsonValue::Array(modules.into_iter().map(JsonValue::String).collect()),
236                );
237            }
238
239            let fallback = call
240                .keyword
241                .get("fallback")
242                .map(extract_string_values)
243                .unwrap_or_default();
244            if !fallback.is_empty() {
245                extra_data.insert(
246                    "fallback".to_string(),
247                    JsonValue::Array(fallback.into_iter().map(JsonValue::String).collect()),
248                );
249            }
250
251            Dependency {
252                purl: build_dependency_purl(&name),
253                extracted_requirement: extracted_requirement
254                    .clone()
255                    .filter(|value| !value.is_empty())
256                    .map(truncate_field),
257                scope: Some("dependencies".to_string()),
258                is_runtime: Some(native != Some(true)),
259                is_optional: Some(required == Some(false)),
260                is_pinned: Some(false),
261                is_direct: Some(true),
262                resolved_package: None,
263                extra_data: (!extra_data.is_empty()).then_some(extra_data),
264            }
265        })
266        .collect()
267}
268
269fn build_project_purl(name: &str, version: Option<&str>) -> Option<String> {
270    let mut purl = PackageUrl::new(PackageType::Meson.as_str(), name).ok()?;
271    if let Some(version) = version {
272        purl.with_version(version).ok()?;
273    }
274    Some(truncate_field(purl.to_string()))
275}
276
277fn build_dependency_purl(name: &str) -> Option<String> {
278    let mut purl = PackageUrl::new("generic", name).ok()?;
279    purl.with_namespace("meson").ok()?;
280    Some(truncate_field(purl.to_string()))
281}
282
283fn default_package_data() -> PackageData {
284    PackageData {
285        package_type: Some(PackageType::Meson),
286        datasource_id: Some(DatasourceId::MesonBuild),
287        ..Default::default()
288    }
289}
290
291fn is_block_opener(statement: &str) -> bool {
292    matches!(
293        statement.split_whitespace().next(),
294        Some("if") | Some("foreach")
295    )
296}
297
298fn is_block_closer(statement: &str) -> bool {
299    matches!(statement.trim(), "endif" | "endforeach")
300}
301
302fn strip_comments(input: &str) -> Result<String, String> {
303    let chars: Vec<char> = input.chars().collect();
304    let mut output = String::with_capacity(input.len());
305    let mut index = 0usize;
306    let mut in_string = false;
307    let mut string_delimiter = '\0';
308    let mut escaped = false;
309    let mut chars_processed = 0usize;
310
311    while index < chars.len() {
312        chars_processed += 1;
313        if chars_processed > MAX_ITERATION_COUNT {
314            break;
315        }
316        let ch = chars[index];
317
318        if in_string {
319            output.push(ch);
320            if escaped {
321                escaped = false;
322            } else if ch == '\\' {
323                escaped = true;
324            } else if ch == string_delimiter {
325                in_string = false;
326            }
327            index += 1;
328            continue;
329        }
330
331        if matches!(ch, '\'' | '"') {
332            in_string = true;
333            string_delimiter = ch;
334            output.push(ch);
335            index += 1;
336            continue;
337        }
338
339        if ch == '#' {
340            index += 1;
341            while index < chars.len() && chars[index] != '\n' {
342                index += 1;
343            }
344            continue;
345        }
346
347        output.push(ch);
348        index += 1;
349    }
350
351    if in_string {
352        return Err("unterminated string literal".to_string());
353    }
354
355    Ok(output)
356}
357
358fn split_statements(input: &str) -> Vec<String> {
359    let mut statements = Vec::new();
360    let mut current = String::new();
361    let mut paren_depth = 0usize;
362    let mut bracket_depth = 0usize;
363    let mut in_string = false;
364    let mut string_delimiter = '\0';
365    let mut escaped = false;
366    let mut chars_processed = 0usize;
367
368    for ch in input.chars() {
369        chars_processed += 1;
370        if chars_processed > MAX_ITERATION_COUNT {
371            break;
372        }
373        current.push(ch);
374
375        if in_string {
376            if escaped {
377                escaped = false;
378            } else if ch == '\\' {
379                escaped = true;
380            } else if ch == string_delimiter {
381                in_string = false;
382            }
383            continue;
384        }
385
386        match ch {
387            '\'' | '"' => {
388                in_string = true;
389                string_delimiter = ch;
390            }
391            '(' => paren_depth += 1,
392            ')' => paren_depth = paren_depth.saturating_sub(1),
393            '[' => bracket_depth += 1,
394            ']' => bracket_depth = bracket_depth.saturating_sub(1),
395            '\n' if paren_depth == 0 && bracket_depth == 0 => {
396                let trimmed = current.trim();
397                if !trimmed.is_empty() {
398                    statements.push(trimmed.to_string());
399                }
400                current.clear();
401            }
402            _ => {}
403        }
404    }
405
406    let trimmed = current.trim();
407    if !trimmed.is_empty() {
408        statements.push(trimmed.to_string());
409    }
410
411    statements
412}
413
414#[derive(Debug, Clone)]
415enum Statement {
416    Expr(Expr),
417    Assignment(Expr),
418}
419
420#[derive(Debug, Clone)]
421enum Expr {
422    String(String),
423    Bool(bool),
424    Array(Vec<Expr>),
425    Identifier,
426    Call(CallExpr),
427}
428
429#[derive(Debug, Clone)]
430struct CallExpr {
431    name: String,
432    positional: Vec<Expr>,
433    keyword: HashMap<String, Expr>,
434}
435
436#[derive(Debug, Clone, PartialEq, Eq)]
437enum Token {
438    Ident(String),
439    Str(String),
440    Bool(bool),
441    LParen,
442    RParen,
443    LBracket,
444    RBracket,
445    Colon,
446    Comma,
447    Equal,
448}
449
450fn parse_statement(statement: &str) -> Result<Statement, String> {
451    let tokens = tokenize(statement)?;
452    if tokens.is_empty() {
453        return Err("empty statement".to_string());
454    }
455
456    if let [Token::Ident(name), Token::Equal, rest @ ..] = tokens.as_slice() {
457        let mut parser = Parser::new(rest);
458        let expr = parser.parse_expr()?;
459        parser.expect_end()?;
460        let _ = name;
461        return Ok(Statement::Assignment(expr));
462    }
463
464    let mut parser = Parser::new(&tokens);
465    let expr = parser.parse_expr()?;
466    parser.expect_end()?;
467    Ok(Statement::Expr(expr))
468}
469
470fn tokenize(input: &str) -> Result<Vec<Token>, String> {
471    let chars: Vec<char> = input.chars().collect();
472    let mut tokens = Vec::new();
473    let mut index = 0usize;
474
475    while index < chars.len() {
476        if tokens.len() >= MAX_ITERATION_COUNT {
477            break;
478        }
479        let ch = chars[index];
480        if ch.is_whitespace() {
481            index += 1;
482            continue;
483        }
484
485        match ch {
486            '(' => {
487                tokens.push(Token::LParen);
488                index += 1;
489            }
490            ')' => {
491                tokens.push(Token::RParen);
492                index += 1;
493            }
494            '[' => {
495                tokens.push(Token::LBracket);
496                index += 1;
497            }
498            ']' => {
499                tokens.push(Token::RBracket);
500                index += 1;
501            }
502            ':' => {
503                tokens.push(Token::Colon);
504                index += 1;
505            }
506            ',' => {
507                tokens.push(Token::Comma);
508                index += 1;
509            }
510            '=' => {
511                tokens.push(Token::Equal);
512                index += 1;
513            }
514            '\'' | '"' => {
515                let delimiter = ch;
516                index += 1;
517                let start = index;
518                let mut escaped = false;
519                while index < chars.len() {
520                    let current = chars[index];
521                    if escaped {
522                        escaped = false;
523                    } else if current == '\\' {
524                        escaped = true;
525                    } else if current == delimiter {
526                        break;
527                    }
528                    index += 1;
529                }
530
531                if index >= chars.len() {
532                    return Err("unterminated string token".to_string());
533                }
534
535                let value: String = chars[start..index].iter().collect();
536                tokens.push(Token::Str(value));
537                index += 1;
538            }
539            _ if is_ident_start(ch) => {
540                let start = index;
541                index += 1;
542                while index < chars.len() && is_ident_continue(chars[index]) {
543                    index += 1;
544                }
545                let ident: String = chars[start..index].iter().collect();
546                match ident.as_str() {
547                    "true" => tokens.push(Token::Bool(true)),
548                    "false" => tokens.push(Token::Bool(false)),
549                    _ => tokens.push(Token::Ident(ident)),
550                }
551            }
552            _ => {
553                return Err(format!("unsupported token '{}'", ch));
554            }
555        }
556    }
557
558    Ok(tokens)
559}
560
561fn is_ident_start(ch: char) -> bool {
562    ch.is_ascii_alphabetic() || ch == '_'
563}
564
565fn is_ident_continue(ch: char) -> bool {
566    ch.is_ascii_alphanumeric() || ch == '_'
567}
568
569struct Parser<'a> {
570    tokens: &'a [Token],
571    index: usize,
572    guard: RecursionGuard<()>,
573}
574
575impl<'a> Parser<'a> {
576    fn new(tokens: &'a [Token]) -> Self {
577        Self {
578            tokens,
579            index: 0,
580            guard: RecursionGuard::depth_only(),
581        }
582    }
583
584    fn parse_expr(&mut self) -> Result<Expr, String> {
585        if self.guard.descend() {
586            return Err("recursion depth exceeded".to_string());
587        }
588        let result = match self.peek() {
589            Some(Token::Str(value)) => {
590                self.index += 1;
591                Ok(Expr::String(value.clone()))
592            }
593            Some(Token::Bool(value)) => {
594                self.index += 1;
595                Ok(Expr::Bool(*value))
596            }
597            Some(Token::LBracket) => self.parse_array(),
598            Some(Token::Ident(_)) => self.parse_identifier_or_call(),
599            Some(token) => Err(format!("unexpected token {:?}", token)),
600            None => Err("unexpected end of input".to_string()),
601        };
602        self.guard.ascend();
603        result
604    }
605
606    fn parse_array(&mut self) -> Result<Expr, String> {
607        self.expect(Token::LBracket)?;
608        let mut values = Vec::new();
609        let mut element_count = 0usize;
610        while !matches!(self.peek(), Some(Token::RBracket)) {
611            element_count += 1;
612            if element_count > MAX_ITERATION_COUNT {
613                break;
614            }
615            let expr = self.parse_expr()?;
616            values.push(expr);
617            if matches!(self.peek(), Some(Token::Comma)) {
618                self.index += 1;
619            } else if !matches!(self.peek(), Some(Token::RBracket)) {
620                return Err("expected ',' or ']' in array".to_string());
621            }
622        }
623        self.expect(Token::RBracket)?;
624        Ok(Expr::Array(values))
625    }
626
627    fn parse_identifier_or_call(&mut self) -> Result<Expr, String> {
628        let Token::Ident(name) = self
629            .next()
630            .cloned()
631            .ok_or_else(|| "expected identifier".to_string())?
632        else {
633            return Err("expected identifier".to_string());
634        };
635
636        if !matches!(self.peek(), Some(Token::LParen)) {
637            let _ = name;
638            return Ok(Expr::Identifier);
639        }
640
641        self.expect(Token::LParen)?;
642        let mut positional = Vec::new();
643        let mut keyword = HashMap::new();
644        let mut arg_count = 0usize;
645
646        while !matches!(self.peek(), Some(Token::RParen)) {
647            arg_count += 1;
648            if arg_count > MAX_ITERATION_COUNT {
649                break;
650            }
651            if let (Some(Token::Ident(arg_name)), Some(Token::Colon)) =
652                (self.peek(), self.peek_n(1))
653            {
654                let arg_name = arg_name.clone();
655                self.index += 2;
656                let value = self.parse_expr()?;
657                keyword.insert(arg_name, value);
658            } else {
659                let expr = self.parse_expr()?;
660                positional.push(expr);
661            }
662
663            if matches!(self.peek(), Some(Token::Comma)) {
664                self.index += 1;
665            } else if !matches!(self.peek(), Some(Token::RParen)) {
666                return Err("expected ',' or ')' in call".to_string());
667            }
668        }
669
670        self.expect(Token::RParen)?;
671        Ok(Expr::Call(CallExpr {
672            name,
673            positional,
674            keyword,
675        }))
676    }
677
678    fn expect(&mut self, expected: Token) -> Result<(), String> {
679        match self.next() {
680            Some(token) if *token == expected => Ok(()),
681            Some(token) => Err(format!("expected {:?}, found {:?}", expected, token)),
682            None => Err(format!("expected {:?}, found end of input", expected)),
683        }
684    }
685
686    fn expect_end(&self) -> Result<(), String> {
687        if self.index == self.tokens.len() {
688            Ok(())
689        } else {
690            Err(format!(
691                "unexpected trailing tokens: {:?}",
692                &self.tokens[self.index..]
693            ))
694        }
695    }
696
697    fn peek(&self) -> Option<&'a Token> {
698        self.tokens.get(self.index)
699    }
700
701    fn peek_n(&self, offset: usize) -> Option<&'a Token> {
702        self.tokens.get(self.index + offset)
703    }
704
705    fn next(&mut self) -> Option<&'a Token> {
706        let token = self.tokens.get(self.index);
707        if token.is_some() {
708            self.index += 1;
709        }
710        token
711    }
712}
713
714fn expr_as_string(expr: &Expr) -> Option<&str> {
715    match expr {
716        Expr::String(value) => Some(value.as_str()),
717        _ => None,
718    }
719}
720
721fn expr_as_bool(expr: &Expr) -> Option<bool> {
722    match expr {
723        Expr::Bool(value) => Some(*value),
724        _ => None,
725    }
726}
727
728fn extract_string_values(expr: &Expr) -> Vec<String> {
729    match expr {
730        Expr::String(value) => vec![value.clone()],
731        Expr::Array(values) => values
732            .iter()
733            .filter_map(expr_as_string)
734            .map(ToOwned::to_owned)
735            .collect(),
736        _ => Vec::new(),
737    }
738}
739
740crate::register_parser!(
741    "Meson meson.build manifest",
742    &["**/meson.build"],
743    "meson",
744    "",
745    Some("https://mesonbuild.com/Syntax.html"),
746);