Skip to main content

provenant/parsers/
meson.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use crate::parser_warn as warn;
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
10
11use super::PackageParser;
12use super::license_normalization::normalize_spdx_declared_license;
13
14pub struct MesonParser;
15
16impl PackageParser for MesonParser {
17    const PACKAGE_TYPE: PackageType = PackageType::Meson;
18
19    fn is_match(path: &Path) -> bool {
20        path.file_name().is_some_and(|name| name == "meson.build")
21    }
22
23    fn extract_packages(path: &Path) -> Vec<PackageData> {
24        let content = match fs::read_to_string(path) {
25            Ok(content) => content,
26            Err(error) => {
27                warn!("Failed to read meson.build at {:?}: {}", path, error);
28                return vec![default_package_data()];
29            }
30        };
31
32        match parse_meson_build(&content) {
33            Ok(package) => vec![package],
34            Err(_) => vec![default_package_data()],
35        }
36    }
37}
38
39fn parse_meson_build(content: &str) -> Result<PackageData, String> {
40    let sanitized = strip_comments(content)?;
41    let statements = split_statements(&sanitized);
42
43    let mut package = default_package_data();
44    let mut extra_data = HashMap::new();
45    let mut dependencies = Vec::new();
46    let mut control_flow_depth = 0usize;
47
48    for statement in statements {
49        let trimmed = statement.trim();
50        if trimmed.is_empty() {
51            continue;
52        }
53
54        if is_block_closer(trimmed) {
55            control_flow_depth = control_flow_depth.saturating_sub(1);
56            continue;
57        }
58
59        if control_flow_depth > 0 {
60            if is_block_opener(trimmed) {
61                control_flow_depth += 1;
62            }
63            continue;
64        }
65
66        if is_block_opener(trimmed) {
67            control_flow_depth += 1;
68            continue;
69        }
70
71        let Ok(parsed) = parse_statement(trimmed) else {
72            continue;
73        };
74        match parsed {
75            Statement::Expr(expr) | Statement::Assignment(expr) => {
76                handle_top_level_expr(&expr, &mut package, &mut extra_data, &mut dependencies)
77            }
78        }
79    }
80
81    package.dependencies = dependencies;
82    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
83    package.purl = package
84        .name
85        .as_deref()
86        .and_then(|name| build_project_purl(name, package.version.as_deref()));
87
88    Ok(package)
89}
90
91fn handle_top_level_expr(
92    expr: &Expr,
93    package: &mut PackageData,
94    extra_data: &mut HashMap<String, JsonValue>,
95    dependencies: &mut Vec<Dependency>,
96) {
97    let Expr::Call(call) = expr else {
98        return;
99    };
100
101    match call.name.as_str() {
102        "project" if package.name.is_none() => apply_project_call(call, package, extra_data),
103        "dependency" => dependencies.extend(extract_dependencies_from_call(call)),
104        _ => {}
105    }
106}
107
108fn apply_project_call(
109    call: &CallExpr,
110    package: &mut PackageData,
111    extra_data: &mut HashMap<String, JsonValue>,
112) {
113    let Some(name) = call.positional.first().and_then(expr_as_string) else {
114        return;
115    };
116
117    package.package_type = Some(PackageType::Meson);
118    package.datasource_id = Some(DatasourceId::MesonBuild);
119    package.name = Some(name.to_string());
120
121    let languages = call
122        .positional
123        .iter()
124        .skip(1)
125        .flat_map(extract_string_values)
126        .collect::<Vec<_>>();
127    if let Some(primary_language) = languages.first() {
128        package.primary_language = Some(primary_language.clone());
129    }
130    if !languages.is_empty() {
131        extra_data.insert(
132            "languages".to_string(),
133            JsonValue::Array(languages.iter().cloned().map(JsonValue::String).collect()),
134        );
135    }
136
137    if let Some(version) = call.keyword.get("version").and_then(expr_as_string) {
138        package.version = Some(version.to_string());
139    }
140
141    let licenses = call
142        .keyword
143        .get("license")
144        .map(extract_string_values)
145        .unwrap_or_default();
146    if !licenses.is_empty() {
147        package.extracted_license_statement = Some(licenses.join("\n"));
148        if licenses.len() == 1 {
149            let (declared_license_expression, declared_license_expression_spdx, license_detections) =
150                normalize_spdx_declared_license(licenses.first().map(String::as_str));
151            package.declared_license_expression = declared_license_expression;
152            package.declared_license_expression_spdx = declared_license_expression_spdx;
153            package.license_detections = license_detections;
154        }
155    }
156
157    let license_files = call
158        .keyword
159        .get("license_files")
160        .map(extract_string_values)
161        .unwrap_or_default();
162    if !license_files.is_empty() {
163        extra_data.insert(
164            "license_files".to_string(),
165            JsonValue::Array(license_files.into_iter().map(JsonValue::String).collect()),
166        );
167    }
168
169    if let Some(meson_version) = call.keyword.get("meson_version").and_then(expr_as_string) {
170        extra_data.insert(
171            "meson_version".to_string(),
172            JsonValue::String(meson_version.to_string()),
173        );
174    }
175}
176
177fn extract_dependencies_from_call(call: &CallExpr) -> Vec<Dependency> {
178    let dependency_names = call
179        .positional
180        .iter()
181        .filter_map(expr_as_string)
182        .map(ToOwned::to_owned)
183        .collect::<Vec<_>>();
184
185    if dependency_names.is_empty() {
186        return Vec::new();
187    }
188
189    let extracted_requirement = call.keyword.get("version").map(|expr| {
190        extract_string_values(expr)
191            .into_iter()
192            .filter(|value| !value.is_empty())
193            .collect::<Vec<_>>()
194            .join(", ")
195    });
196    let required = call.keyword.get("required").and_then(expr_as_bool);
197    let native = call.keyword.get("native").and_then(expr_as_bool);
198
199    dependency_names
200        .into_iter()
201        .map(|name| {
202            let mut extra_data = HashMap::new();
203
204            if let Some(requirement) = extracted_requirement
205                .as_ref()
206                .filter(|value| !value.is_empty())
207            {
208                extra_data.insert(
209                    "version".to_string(),
210                    JsonValue::String(requirement.clone()),
211                );
212            }
213            if let Some(required) = required {
214                extra_data.insert("required".to_string(), JsonValue::Bool(required));
215            }
216            if let Some(method) = call.keyword.get("method").and_then(expr_as_string) {
217                extra_data.insert("method".to_string(), JsonValue::String(method.to_string()));
218            }
219            if let Some(native) = native {
220                extra_data.insert("native".to_string(), JsonValue::Bool(native));
221            }
222
223            let modules = call
224                .keyword
225                .get("modules")
226                .map(extract_string_values)
227                .unwrap_or_default();
228            if !modules.is_empty() {
229                extra_data.insert(
230                    "modules".to_string(),
231                    JsonValue::Array(modules.into_iter().map(JsonValue::String).collect()),
232                );
233            }
234
235            let fallback = call
236                .keyword
237                .get("fallback")
238                .map(extract_string_values)
239                .unwrap_or_default();
240            if !fallback.is_empty() {
241                extra_data.insert(
242                    "fallback".to_string(),
243                    JsonValue::Array(fallback.into_iter().map(JsonValue::String).collect()),
244                );
245            }
246
247            Dependency {
248                purl: build_dependency_purl(&name),
249                extracted_requirement: extracted_requirement
250                    .clone()
251                    .filter(|value| !value.is_empty()),
252                scope: Some("dependencies".to_string()),
253                is_runtime: Some(native != Some(true)),
254                is_optional: Some(required == Some(false)),
255                is_pinned: Some(false),
256                is_direct: Some(true),
257                resolved_package: None,
258                extra_data: (!extra_data.is_empty()).then_some(extra_data),
259            }
260        })
261        .collect()
262}
263
264fn build_project_purl(name: &str, version: Option<&str>) -> Option<String> {
265    let mut purl = PackageUrl::new(PackageType::Meson.as_str(), name).ok()?;
266    if let Some(version) = version {
267        purl.with_version(version).ok()?;
268    }
269    Some(purl.to_string())
270}
271
272fn build_dependency_purl(name: &str) -> Option<String> {
273    let mut purl = PackageUrl::new("generic", name).ok()?;
274    purl.with_namespace("meson").ok()?;
275    Some(purl.to_string())
276}
277
278fn default_package_data() -> PackageData {
279    PackageData {
280        package_type: Some(PackageType::Meson),
281        datasource_id: Some(DatasourceId::MesonBuild),
282        ..Default::default()
283    }
284}
285
286fn is_block_opener(statement: &str) -> bool {
287    matches!(
288        statement.split_whitespace().next(),
289        Some("if") | Some("foreach")
290    )
291}
292
293fn is_block_closer(statement: &str) -> bool {
294    matches!(statement.trim(), "endif" | "endforeach")
295}
296
297fn strip_comments(input: &str) -> Result<String, String> {
298    let chars: Vec<char> = input.chars().collect();
299    let mut output = String::with_capacity(input.len());
300    let mut index = 0usize;
301    let mut in_string = false;
302    let mut string_delimiter = '\0';
303    let mut escaped = false;
304
305    while index < chars.len() {
306        let ch = chars[index];
307
308        if in_string {
309            output.push(ch);
310            if escaped {
311                escaped = false;
312            } else if ch == '\\' {
313                escaped = true;
314            } else if ch == string_delimiter {
315                in_string = false;
316            }
317            index += 1;
318            continue;
319        }
320
321        if matches!(ch, '\'' | '"') {
322            in_string = true;
323            string_delimiter = ch;
324            output.push(ch);
325            index += 1;
326            continue;
327        }
328
329        if ch == '#' {
330            index += 1;
331            while index < chars.len() && chars[index] != '\n' {
332                index += 1;
333            }
334            continue;
335        }
336
337        output.push(ch);
338        index += 1;
339    }
340
341    if in_string {
342        return Err("unterminated string literal".to_string());
343    }
344
345    Ok(output)
346}
347
348fn split_statements(input: &str) -> Vec<String> {
349    let mut statements = Vec::new();
350    let mut current = String::new();
351    let mut paren_depth = 0usize;
352    let mut bracket_depth = 0usize;
353    let mut in_string = false;
354    let mut string_delimiter = '\0';
355    let mut escaped = false;
356
357    for ch in input.chars() {
358        current.push(ch);
359
360        if in_string {
361            if escaped {
362                escaped = false;
363            } else if ch == '\\' {
364                escaped = true;
365            } else if ch == string_delimiter {
366                in_string = false;
367            }
368            continue;
369        }
370
371        match ch {
372            '\'' | '"' => {
373                in_string = true;
374                string_delimiter = ch;
375            }
376            '(' => paren_depth += 1,
377            ')' => paren_depth = paren_depth.saturating_sub(1),
378            '[' => bracket_depth += 1,
379            ']' => bracket_depth = bracket_depth.saturating_sub(1),
380            '\n' if paren_depth == 0 && bracket_depth == 0 => {
381                let trimmed = current.trim();
382                if !trimmed.is_empty() {
383                    statements.push(trimmed.to_string());
384                }
385                current.clear();
386            }
387            _ => {}
388        }
389    }
390
391    let trimmed = current.trim();
392    if !trimmed.is_empty() {
393        statements.push(trimmed.to_string());
394    }
395
396    statements
397}
398
399#[derive(Debug, Clone)]
400enum Statement {
401    Expr(Expr),
402    Assignment(Expr),
403}
404
405#[derive(Debug, Clone)]
406enum Expr {
407    String(String),
408    Bool(bool),
409    Array(Vec<Expr>),
410    Identifier,
411    Call(CallExpr),
412}
413
414#[derive(Debug, Clone)]
415struct CallExpr {
416    name: String,
417    positional: Vec<Expr>,
418    keyword: HashMap<String, Expr>,
419}
420
421#[derive(Debug, Clone, PartialEq, Eq)]
422enum Token {
423    Ident(String),
424    Str(String),
425    Bool(bool),
426    LParen,
427    RParen,
428    LBracket,
429    RBracket,
430    Colon,
431    Comma,
432    Equal,
433}
434
435fn parse_statement(statement: &str) -> Result<Statement, String> {
436    let tokens = tokenize(statement)?;
437    if tokens.is_empty() {
438        return Err("empty statement".to_string());
439    }
440
441    if let [Token::Ident(name), Token::Equal, rest @ ..] = tokens.as_slice() {
442        let mut parser = Parser::new(rest);
443        let expr = parser.parse_expr()?;
444        parser.expect_end()?;
445        let _ = name;
446        return Ok(Statement::Assignment(expr));
447    }
448
449    let mut parser = Parser::new(&tokens);
450    let expr = parser.parse_expr()?;
451    parser.expect_end()?;
452    Ok(Statement::Expr(expr))
453}
454
455fn tokenize(input: &str) -> Result<Vec<Token>, String> {
456    let chars: Vec<char> = input.chars().collect();
457    let mut tokens = Vec::new();
458    let mut index = 0usize;
459
460    while index < chars.len() {
461        let ch = chars[index];
462        if ch.is_whitespace() {
463            index += 1;
464            continue;
465        }
466
467        match ch {
468            '(' => {
469                tokens.push(Token::LParen);
470                index += 1;
471            }
472            ')' => {
473                tokens.push(Token::RParen);
474                index += 1;
475            }
476            '[' => {
477                tokens.push(Token::LBracket);
478                index += 1;
479            }
480            ']' => {
481                tokens.push(Token::RBracket);
482                index += 1;
483            }
484            ':' => {
485                tokens.push(Token::Colon);
486                index += 1;
487            }
488            ',' => {
489                tokens.push(Token::Comma);
490                index += 1;
491            }
492            '=' => {
493                tokens.push(Token::Equal);
494                index += 1;
495            }
496            '\'' | '"' => {
497                let delimiter = ch;
498                index += 1;
499                let start = index;
500                let mut escaped = false;
501                while index < chars.len() {
502                    let current = chars[index];
503                    if escaped {
504                        escaped = false;
505                    } else if current == '\\' {
506                        escaped = true;
507                    } else if current == delimiter {
508                        break;
509                    }
510                    index += 1;
511                }
512
513                if index >= chars.len() {
514                    return Err("unterminated string token".to_string());
515                }
516
517                let value: String = chars[start..index].iter().collect();
518                tokens.push(Token::Str(value));
519                index += 1;
520            }
521            _ if is_ident_start(ch) => {
522                let start = index;
523                index += 1;
524                while index < chars.len() && is_ident_continue(chars[index]) {
525                    index += 1;
526                }
527                let ident: String = chars[start..index].iter().collect();
528                match ident.as_str() {
529                    "true" => tokens.push(Token::Bool(true)),
530                    "false" => tokens.push(Token::Bool(false)),
531                    _ => tokens.push(Token::Ident(ident)),
532                }
533            }
534            _ => {
535                return Err(format!("unsupported token '{}'", ch));
536            }
537        }
538    }
539
540    Ok(tokens)
541}
542
543fn is_ident_start(ch: char) -> bool {
544    ch.is_ascii_alphabetic() || ch == '_'
545}
546
547fn is_ident_continue(ch: char) -> bool {
548    ch.is_ascii_alphanumeric() || ch == '_'
549}
550
551struct Parser<'a> {
552    tokens: &'a [Token],
553    index: usize,
554}
555
556impl<'a> Parser<'a> {
557    fn new(tokens: &'a [Token]) -> Self {
558        Self { tokens, index: 0 }
559    }
560
561    fn parse_expr(&mut self) -> Result<Expr, String> {
562        match self.peek() {
563            Some(Token::Str(value)) => {
564                self.index += 1;
565                Ok(Expr::String(value.clone()))
566            }
567            Some(Token::Bool(value)) => {
568                self.index += 1;
569                Ok(Expr::Bool(*value))
570            }
571            Some(Token::LBracket) => self.parse_array(),
572            Some(Token::Ident(_)) => self.parse_identifier_or_call(),
573            Some(token) => Err(format!("unexpected token {:?}", token)),
574            None => Err("unexpected end of input".to_string()),
575        }
576    }
577
578    fn parse_array(&mut self) -> Result<Expr, String> {
579        self.expect(Token::LBracket)?;
580        let mut values = Vec::new();
581        while !matches!(self.peek(), Some(Token::RBracket)) {
582            values.push(self.parse_expr()?);
583            if matches!(self.peek(), Some(Token::Comma)) {
584                self.index += 1;
585            } else if !matches!(self.peek(), Some(Token::RBracket)) {
586                return Err("expected ',' or ']' in array".to_string());
587            }
588        }
589        self.expect(Token::RBracket)?;
590        Ok(Expr::Array(values))
591    }
592
593    fn parse_identifier_or_call(&mut self) -> Result<Expr, String> {
594        let Token::Ident(name) = self
595            .next()
596            .cloned()
597            .ok_or_else(|| "expected identifier".to_string())?
598        else {
599            return Err("expected identifier".to_string());
600        };
601
602        if !matches!(self.peek(), Some(Token::LParen)) {
603            let _ = name;
604            return Ok(Expr::Identifier);
605        }
606
607        self.expect(Token::LParen)?;
608        let mut positional = Vec::new();
609        let mut keyword = HashMap::new();
610
611        while !matches!(self.peek(), Some(Token::RParen)) {
612            if let (Some(Token::Ident(arg_name)), Some(Token::Colon)) =
613                (self.peek(), self.peek_n(1))
614            {
615                let arg_name = arg_name.clone();
616                self.index += 2;
617                let value = self.parse_expr()?;
618                keyword.insert(arg_name, value);
619            } else {
620                positional.push(self.parse_expr()?);
621            }
622
623            if matches!(self.peek(), Some(Token::Comma)) {
624                self.index += 1;
625            } else if !matches!(self.peek(), Some(Token::RParen)) {
626                return Err("expected ',' or ')' in call".to_string());
627            }
628        }
629
630        self.expect(Token::RParen)?;
631        Ok(Expr::Call(CallExpr {
632            name,
633            positional,
634            keyword,
635        }))
636    }
637
638    fn expect(&mut self, expected: Token) -> Result<(), String> {
639        match self.next() {
640            Some(token) if *token == expected => Ok(()),
641            Some(token) => Err(format!("expected {:?}, found {:?}", expected, token)),
642            None => Err(format!("expected {:?}, found end of input", expected)),
643        }
644    }
645
646    fn expect_end(&self) -> Result<(), String> {
647        if self.index == self.tokens.len() {
648            Ok(())
649        } else {
650            Err(format!(
651                "unexpected trailing tokens: {:?}",
652                &self.tokens[self.index..]
653            ))
654        }
655    }
656
657    fn peek(&self) -> Option<&'a Token> {
658        self.tokens.get(self.index)
659    }
660
661    fn peek_n(&self, offset: usize) -> Option<&'a Token> {
662        self.tokens.get(self.index + offset)
663    }
664
665    fn next(&mut self) -> Option<&'a Token> {
666        let token = self.tokens.get(self.index);
667        if token.is_some() {
668            self.index += 1;
669        }
670        token
671    }
672}
673
674fn expr_as_string(expr: &Expr) -> Option<&str> {
675    match expr {
676        Expr::String(value) => Some(value.as_str()),
677        _ => None,
678    }
679}
680
681fn expr_as_bool(expr: &Expr) -> Option<bool> {
682    match expr {
683        Expr::Bool(value) => Some(*value),
684        _ => None,
685    }
686}
687
688fn extract_string_values(expr: &Expr) -> Vec<String> {
689    match expr {
690        Expr::String(value) => vec![value.clone()],
691        Expr::Array(values) => values
692            .iter()
693            .filter_map(expr_as_string)
694            .map(ToOwned::to_owned)
695            .collect(),
696        _ => Vec::new(),
697    }
698}
699
700crate::register_parser!(
701    "Meson meson.build manifest",
702    &["**/meson.build"],
703    "meson",
704    "",
705    Some("https://mesonbuild.com/Syntax.html"),
706);