Skip to main content

provenant/parsers/
meson.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use crate::parser_warn as warn;
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
10
11use super::PackageParser;
12use super::license_normalization::normalize_spdx_declared_license;
13
14pub struct MesonParser;
15
16impl PackageParser for MesonParser {
17    const PACKAGE_TYPE: PackageType = PackageType::Meson;
18
19    fn is_match(path: &Path) -> bool {
20        path.file_name().is_some_and(|name| name == "meson.build")
21    }
22
23    fn extract_packages(path: &Path) -> Vec<PackageData> {
24        let content = match fs::read_to_string(path) {
25            Ok(content) => content,
26            Err(error) => {
27                warn!("Failed to read meson.build at {:?}: {}", path, error);
28                return vec![default_package_data()];
29            }
30        };
31
32        match parse_meson_build(&content) {
33            Ok(package) => vec![package],
34            Err(error) => {
35                warn!("Failed to parse meson.build at {:?}: {}", path, error);
36                vec![default_package_data()]
37            }
38        }
39    }
40}
41
42fn parse_meson_build(content: &str) -> Result<PackageData, String> {
43    let sanitized = strip_comments(content)?;
44    let statements = split_statements(&sanitized);
45
46    let mut package = default_package_data();
47    let mut extra_data = HashMap::new();
48    let mut dependencies = Vec::new();
49    let mut control_flow_depth = 0usize;
50
51    for statement in statements {
52        let trimmed = statement.trim();
53        if trimmed.is_empty() {
54            continue;
55        }
56
57        if is_block_closer(trimmed) {
58            control_flow_depth = control_flow_depth.saturating_sub(1);
59            continue;
60        }
61
62        if control_flow_depth > 0 {
63            if is_block_opener(trimmed) {
64                control_flow_depth += 1;
65            }
66            continue;
67        }
68
69        if is_block_opener(trimmed) {
70            control_flow_depth += 1;
71            continue;
72        }
73
74        let Ok(parsed) = parse_statement(trimmed) else {
75            continue;
76        };
77        match parsed {
78            Statement::Expr(expr) | Statement::Assignment(expr) => {
79                handle_top_level_expr(&expr, &mut package, &mut extra_data, &mut dependencies)
80            }
81        }
82    }
83
84    package.dependencies = dependencies;
85    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
86    package.purl = package
87        .name
88        .as_deref()
89        .and_then(|name| build_project_purl(name, package.version.as_deref()));
90
91    Ok(package)
92}
93
94fn handle_top_level_expr(
95    expr: &Expr,
96    package: &mut PackageData,
97    extra_data: &mut HashMap<String, JsonValue>,
98    dependencies: &mut Vec<Dependency>,
99) {
100    let Expr::Call(call) = expr else {
101        return;
102    };
103
104    match call.name.as_str() {
105        "project" if package.name.is_none() => apply_project_call(call, package, extra_data),
106        "dependency" => dependencies.extend(extract_dependencies_from_call(call)),
107        _ => {}
108    }
109}
110
111fn apply_project_call(
112    call: &CallExpr,
113    package: &mut PackageData,
114    extra_data: &mut HashMap<String, JsonValue>,
115) {
116    let Some(name) = call.positional.first().and_then(expr_as_string) else {
117        return;
118    };
119
120    package.package_type = Some(PackageType::Meson);
121    package.datasource_id = Some(DatasourceId::MesonBuild);
122    package.name = Some(name.to_string());
123
124    let languages = call
125        .positional
126        .iter()
127        .skip(1)
128        .flat_map(extract_string_values)
129        .collect::<Vec<_>>();
130    if let Some(primary_language) = languages.first() {
131        package.primary_language = Some(primary_language.clone());
132    }
133    if !languages.is_empty() {
134        extra_data.insert(
135            "languages".to_string(),
136            JsonValue::Array(languages.iter().cloned().map(JsonValue::String).collect()),
137        );
138    }
139
140    if let Some(version) = call.keyword.get("version").and_then(expr_as_string) {
141        package.version = Some(version.to_string());
142    }
143
144    let licenses = call
145        .keyword
146        .get("license")
147        .map(extract_string_values)
148        .unwrap_or_default();
149    if !licenses.is_empty() {
150        package.extracted_license_statement = Some(licenses.join("\n"));
151        if licenses.len() == 1 {
152            let (declared_license_expression, declared_license_expression_spdx, license_detections) =
153                normalize_spdx_declared_license(licenses.first().map(String::as_str));
154            package.declared_license_expression = declared_license_expression;
155            package.declared_license_expression_spdx = declared_license_expression_spdx;
156            package.license_detections = license_detections;
157        }
158    }
159
160    let license_files = call
161        .keyword
162        .get("license_files")
163        .map(extract_string_values)
164        .unwrap_or_default();
165    if !license_files.is_empty() {
166        extra_data.insert(
167            "license_files".to_string(),
168            JsonValue::Array(license_files.into_iter().map(JsonValue::String).collect()),
169        );
170    }
171
172    if let Some(meson_version) = call.keyword.get("meson_version").and_then(expr_as_string) {
173        extra_data.insert(
174            "meson_version".to_string(),
175            JsonValue::String(meson_version.to_string()),
176        );
177    }
178}
179
180fn extract_dependencies_from_call(call: &CallExpr) -> Vec<Dependency> {
181    let dependency_names = call
182        .positional
183        .iter()
184        .filter_map(expr_as_string)
185        .map(ToOwned::to_owned)
186        .collect::<Vec<_>>();
187
188    if dependency_names.is_empty() {
189        return Vec::new();
190    }
191
192    let extracted_requirement = call.keyword.get("version").map(|expr| {
193        extract_string_values(expr)
194            .into_iter()
195            .filter(|value| !value.is_empty())
196            .collect::<Vec<_>>()
197            .join(", ")
198    });
199    let required = call.keyword.get("required").and_then(expr_as_bool);
200    let native = call.keyword.get("native").and_then(expr_as_bool);
201
202    dependency_names
203        .into_iter()
204        .map(|name| {
205            let mut extra_data = HashMap::new();
206
207            if let Some(requirement) = extracted_requirement
208                .as_ref()
209                .filter(|value| !value.is_empty())
210            {
211                extra_data.insert(
212                    "version".to_string(),
213                    JsonValue::String(requirement.clone()),
214                );
215            }
216            if let Some(required) = required {
217                extra_data.insert("required".to_string(), JsonValue::Bool(required));
218            }
219            if let Some(method) = call.keyword.get("method").and_then(expr_as_string) {
220                extra_data.insert("method".to_string(), JsonValue::String(method.to_string()));
221            }
222            if let Some(native) = native {
223                extra_data.insert("native".to_string(), JsonValue::Bool(native));
224            }
225
226            let modules = call
227                .keyword
228                .get("modules")
229                .map(extract_string_values)
230                .unwrap_or_default();
231            if !modules.is_empty() {
232                extra_data.insert(
233                    "modules".to_string(),
234                    JsonValue::Array(modules.into_iter().map(JsonValue::String).collect()),
235                );
236            }
237
238            let fallback = call
239                .keyword
240                .get("fallback")
241                .map(extract_string_values)
242                .unwrap_or_default();
243            if !fallback.is_empty() {
244                extra_data.insert(
245                    "fallback".to_string(),
246                    JsonValue::Array(fallback.into_iter().map(JsonValue::String).collect()),
247                );
248            }
249
250            Dependency {
251                purl: build_dependency_purl(&name),
252                extracted_requirement: extracted_requirement
253                    .clone()
254                    .filter(|value| !value.is_empty()),
255                scope: Some("dependencies".to_string()),
256                is_runtime: Some(native != Some(true)),
257                is_optional: Some(required == Some(false)),
258                is_pinned: Some(false),
259                is_direct: Some(true),
260                resolved_package: None,
261                extra_data: (!extra_data.is_empty()).then_some(extra_data),
262            }
263        })
264        .collect()
265}
266
267fn build_project_purl(name: &str, version: Option<&str>) -> Option<String> {
268    let mut purl = PackageUrl::new(PackageType::Meson.as_str(), name).ok()?;
269    if let Some(version) = version {
270        purl.with_version(version).ok()?;
271    }
272    Some(purl.to_string())
273}
274
275fn build_dependency_purl(name: &str) -> Option<String> {
276    let mut purl = PackageUrl::new("generic", name).ok()?;
277    purl.with_namespace("meson").ok()?;
278    Some(purl.to_string())
279}
280
281fn default_package_data() -> PackageData {
282    PackageData {
283        package_type: Some(PackageType::Meson),
284        datasource_id: Some(DatasourceId::MesonBuild),
285        ..Default::default()
286    }
287}
288
289fn is_block_opener(statement: &str) -> bool {
290    matches!(
291        statement.split_whitespace().next(),
292        Some("if") | Some("foreach")
293    )
294}
295
296fn is_block_closer(statement: &str) -> bool {
297    matches!(statement.trim(), "endif" | "endforeach")
298}
299
300fn strip_comments(input: &str) -> Result<String, String> {
301    let chars: Vec<char> = input.chars().collect();
302    let mut output = String::with_capacity(input.len());
303    let mut index = 0usize;
304    let mut in_string = false;
305    let mut string_delimiter = '\0';
306    let mut escaped = false;
307
308    while index < chars.len() {
309        let ch = chars[index];
310
311        if in_string {
312            output.push(ch);
313            if escaped {
314                escaped = false;
315            } else if ch == '\\' {
316                escaped = true;
317            } else if ch == string_delimiter {
318                in_string = false;
319            }
320            index += 1;
321            continue;
322        }
323
324        if matches!(ch, '\'' | '"') {
325            in_string = true;
326            string_delimiter = ch;
327            output.push(ch);
328            index += 1;
329            continue;
330        }
331
332        if ch == '#' {
333            index += 1;
334            while index < chars.len() && chars[index] != '\n' {
335                index += 1;
336            }
337            continue;
338        }
339
340        output.push(ch);
341        index += 1;
342    }
343
344    if in_string {
345        return Err("unterminated string literal".to_string());
346    }
347
348    Ok(output)
349}
350
351fn split_statements(input: &str) -> Vec<String> {
352    let mut statements = Vec::new();
353    let mut current = String::new();
354    let mut paren_depth = 0usize;
355    let mut bracket_depth = 0usize;
356    let mut in_string = false;
357    let mut string_delimiter = '\0';
358    let mut escaped = false;
359
360    for ch in input.chars() {
361        current.push(ch);
362
363        if in_string {
364            if escaped {
365                escaped = false;
366            } else if ch == '\\' {
367                escaped = true;
368            } else if ch == string_delimiter {
369                in_string = false;
370            }
371            continue;
372        }
373
374        match ch {
375            '\'' | '"' => {
376                in_string = true;
377                string_delimiter = ch;
378            }
379            '(' => paren_depth += 1,
380            ')' => paren_depth = paren_depth.saturating_sub(1),
381            '[' => bracket_depth += 1,
382            ']' => bracket_depth = bracket_depth.saturating_sub(1),
383            '\n' if paren_depth == 0 && bracket_depth == 0 => {
384                let trimmed = current.trim();
385                if !trimmed.is_empty() {
386                    statements.push(trimmed.to_string());
387                }
388                current.clear();
389            }
390            _ => {}
391        }
392    }
393
394    let trimmed = current.trim();
395    if !trimmed.is_empty() {
396        statements.push(trimmed.to_string());
397    }
398
399    statements
400}
401
402#[derive(Debug, Clone)]
403enum Statement {
404    Expr(Expr),
405    Assignment(Expr),
406}
407
408#[derive(Debug, Clone)]
409enum Expr {
410    String(String),
411    Bool(bool),
412    Array(Vec<Expr>),
413    Identifier,
414    Call(CallExpr),
415}
416
417#[derive(Debug, Clone)]
418struct CallExpr {
419    name: String,
420    positional: Vec<Expr>,
421    keyword: HashMap<String, Expr>,
422}
423
424#[derive(Debug, Clone, PartialEq, Eq)]
425enum Token {
426    Ident(String),
427    Str(String),
428    Bool(bool),
429    LParen,
430    RParen,
431    LBracket,
432    RBracket,
433    Colon,
434    Comma,
435    Equal,
436}
437
438fn parse_statement(statement: &str) -> Result<Statement, String> {
439    let tokens = tokenize(statement)?;
440    if tokens.is_empty() {
441        return Err("empty statement".to_string());
442    }
443
444    if let [Token::Ident(name), Token::Equal, rest @ ..] = tokens.as_slice() {
445        let mut parser = Parser::new(rest);
446        let expr = parser.parse_expr()?;
447        parser.expect_end()?;
448        let _ = name;
449        return Ok(Statement::Assignment(expr));
450    }
451
452    let mut parser = Parser::new(&tokens);
453    let expr = parser.parse_expr()?;
454    parser.expect_end()?;
455    Ok(Statement::Expr(expr))
456}
457
458fn tokenize(input: &str) -> Result<Vec<Token>, String> {
459    let chars: Vec<char> = input.chars().collect();
460    let mut tokens = Vec::new();
461    let mut index = 0usize;
462
463    while index < chars.len() {
464        let ch = chars[index];
465        if ch.is_whitespace() {
466            index += 1;
467            continue;
468        }
469
470        match ch {
471            '(' => {
472                tokens.push(Token::LParen);
473                index += 1;
474            }
475            ')' => {
476                tokens.push(Token::RParen);
477                index += 1;
478            }
479            '[' => {
480                tokens.push(Token::LBracket);
481                index += 1;
482            }
483            ']' => {
484                tokens.push(Token::RBracket);
485                index += 1;
486            }
487            ':' => {
488                tokens.push(Token::Colon);
489                index += 1;
490            }
491            ',' => {
492                tokens.push(Token::Comma);
493                index += 1;
494            }
495            '=' => {
496                tokens.push(Token::Equal);
497                index += 1;
498            }
499            '\'' | '"' => {
500                let delimiter = ch;
501                index += 1;
502                let start = index;
503                let mut escaped = false;
504                while index < chars.len() {
505                    let current = chars[index];
506                    if escaped {
507                        escaped = false;
508                    } else if current == '\\' {
509                        escaped = true;
510                    } else if current == delimiter {
511                        break;
512                    }
513                    index += 1;
514                }
515
516                if index >= chars.len() {
517                    return Err("unterminated string token".to_string());
518                }
519
520                let value: String = chars[start..index].iter().collect();
521                tokens.push(Token::Str(value));
522                index += 1;
523            }
524            _ if is_ident_start(ch) => {
525                let start = index;
526                index += 1;
527                while index < chars.len() && is_ident_continue(chars[index]) {
528                    index += 1;
529                }
530                let ident: String = chars[start..index].iter().collect();
531                match ident.as_str() {
532                    "true" => tokens.push(Token::Bool(true)),
533                    "false" => tokens.push(Token::Bool(false)),
534                    _ => tokens.push(Token::Ident(ident)),
535                }
536            }
537            _ => {
538                return Err(format!("unsupported token '{}'", ch));
539            }
540        }
541    }
542
543    Ok(tokens)
544}
545
546fn is_ident_start(ch: char) -> bool {
547    ch.is_ascii_alphabetic() || ch == '_'
548}
549
550fn is_ident_continue(ch: char) -> bool {
551    ch.is_ascii_alphanumeric() || ch == '_'
552}
553
554struct Parser<'a> {
555    tokens: &'a [Token],
556    index: usize,
557}
558
559impl<'a> Parser<'a> {
560    fn new(tokens: &'a [Token]) -> Self {
561        Self { tokens, index: 0 }
562    }
563
564    fn parse_expr(&mut self) -> Result<Expr, String> {
565        match self.peek() {
566            Some(Token::Str(value)) => {
567                self.index += 1;
568                Ok(Expr::String(value.clone()))
569            }
570            Some(Token::Bool(value)) => {
571                self.index += 1;
572                Ok(Expr::Bool(*value))
573            }
574            Some(Token::LBracket) => self.parse_array(),
575            Some(Token::Ident(_)) => self.parse_identifier_or_call(),
576            Some(token) => Err(format!("unexpected token {:?}", token)),
577            None => Err("unexpected end of input".to_string()),
578        }
579    }
580
581    fn parse_array(&mut self) -> Result<Expr, String> {
582        self.expect(Token::LBracket)?;
583        let mut values = Vec::new();
584        while !matches!(self.peek(), Some(Token::RBracket)) {
585            values.push(self.parse_expr()?);
586            if matches!(self.peek(), Some(Token::Comma)) {
587                self.index += 1;
588            } else if !matches!(self.peek(), Some(Token::RBracket)) {
589                return Err("expected ',' or ']' in array".to_string());
590            }
591        }
592        self.expect(Token::RBracket)?;
593        Ok(Expr::Array(values))
594    }
595
596    fn parse_identifier_or_call(&mut self) -> Result<Expr, String> {
597        let Token::Ident(name) = self
598            .next()
599            .cloned()
600            .ok_or_else(|| "expected identifier".to_string())?
601        else {
602            return Err("expected identifier".to_string());
603        };
604
605        if !matches!(self.peek(), Some(Token::LParen)) {
606            let _ = name;
607            return Ok(Expr::Identifier);
608        }
609
610        self.expect(Token::LParen)?;
611        let mut positional = Vec::new();
612        let mut keyword = HashMap::new();
613
614        while !matches!(self.peek(), Some(Token::RParen)) {
615            if let (Some(Token::Ident(arg_name)), Some(Token::Colon)) =
616                (self.peek(), self.peek_n(1))
617            {
618                let arg_name = arg_name.clone();
619                self.index += 2;
620                let value = self.parse_expr()?;
621                keyword.insert(arg_name, value);
622            } else {
623                positional.push(self.parse_expr()?);
624            }
625
626            if matches!(self.peek(), Some(Token::Comma)) {
627                self.index += 1;
628            } else if !matches!(self.peek(), Some(Token::RParen)) {
629                return Err("expected ',' or ')' in call".to_string());
630            }
631        }
632
633        self.expect(Token::RParen)?;
634        Ok(Expr::Call(CallExpr {
635            name,
636            positional,
637            keyword,
638        }))
639    }
640
641    fn expect(&mut self, expected: Token) -> Result<(), String> {
642        match self.next() {
643            Some(token) if *token == expected => Ok(()),
644            Some(token) => Err(format!("expected {:?}, found {:?}", expected, token)),
645            None => Err(format!("expected {:?}, found end of input", expected)),
646        }
647    }
648
649    fn expect_end(&self) -> Result<(), String> {
650        if self.index == self.tokens.len() {
651            Ok(())
652        } else {
653            Err(format!(
654                "unexpected trailing tokens: {:?}",
655                &self.tokens[self.index..]
656            ))
657        }
658    }
659
660    fn peek(&self) -> Option<&'a Token> {
661        self.tokens.get(self.index)
662    }
663
664    fn peek_n(&self, offset: usize) -> Option<&'a Token> {
665        self.tokens.get(self.index + offset)
666    }
667
668    fn next(&mut self) -> Option<&'a Token> {
669        let token = self.tokens.get(self.index);
670        if token.is_some() {
671            self.index += 1;
672        }
673        token
674    }
675}
676
677fn expr_as_string(expr: &Expr) -> Option<&str> {
678    match expr {
679        Expr::String(value) => Some(value.as_str()),
680        _ => None,
681    }
682}
683
684fn expr_as_bool(expr: &Expr) -> Option<bool> {
685    match expr {
686        Expr::Bool(value) => Some(*value),
687        _ => None,
688    }
689}
690
691fn extract_string_values(expr: &Expr) -> Vec<String> {
692    match expr {
693        Expr::String(value) => vec![value.clone()],
694        Expr::Array(values) => values
695            .iter()
696            .filter_map(expr_as_string)
697            .map(ToOwned::to_owned)
698            .collect(),
699        _ => Vec::new(),
700    }
701}
702
703crate::register_parser!(
704    "Meson meson.build manifest",
705    &["**/meson.build"],
706    "meson",
707    "",
708    Some("https://mesonbuild.com/Syntax.html"),
709);