Skip to main content

provenant/parsers/
meson.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use packageurl::PackageUrl;
6use serde_json::Value as JsonValue;
7
8use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
9
10use super::PackageParser;
11use super::license_normalization::normalize_spdx_declared_license;
12use super::utils::{MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, truncate_field};
13
14pub struct MesonParser;
15
16impl PackageParser for MesonParser {
17    const PACKAGE_TYPE: PackageType = PackageType::Meson;
18
19    fn is_match(path: &Path) -> bool {
20        path.file_name().is_some_and(|name| name == "meson.build")
21    }
22
23    fn extract_packages(path: &Path) -> Vec<PackageData> {
24        let content = match read_file_to_string(path, None) {
25            Ok(content) => content,
26            Err(error) => {
27                warn!("Failed to read meson.build at {:?}: {}", path, error);
28                return vec![default_package_data()];
29            }
30        };
31
32        match parse_meson_build(&content) {
33            Ok(package) => vec![package],
34            Err(_) => vec![default_package_data()],
35        }
36    }
37}
38
39fn parse_meson_build(content: &str) -> Result<PackageData, String> {
40    let sanitized = strip_comments(content)?;
41    let statements = split_statements(&sanitized);
42
43    let mut package = default_package_data();
44    let mut extra_data = HashMap::new();
45    let mut dependencies = Vec::new();
46    let mut control_flow_depth = 0usize;
47
48    for statement in statements.into_iter().take(MAX_ITERATION_COUNT) {
49        let trimmed = statement.trim();
50        if trimmed.is_empty() {
51            continue;
52        }
53
54        if is_block_closer(trimmed) {
55            control_flow_depth = control_flow_depth.saturating_sub(1);
56            continue;
57        }
58
59        if control_flow_depth > 0 {
60            if is_block_opener(trimmed) {
61                control_flow_depth += 1;
62            }
63            continue;
64        }
65
66        if is_block_opener(trimmed) {
67            control_flow_depth += 1;
68            continue;
69        }
70
71        let Ok(parsed) = parse_statement(trimmed) else {
72            continue;
73        };
74        match parsed {
75            Statement::Expr(expr) | Statement::Assignment(expr) => {
76                handle_top_level_expr(&expr, &mut package, &mut extra_data, &mut dependencies)
77            }
78        }
79    }
80
81    package.dependencies = dependencies;
82    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
83    package.purl = package
84        .name
85        .as_deref()
86        .and_then(|name| build_project_purl(name, package.version.as_deref()));
87
88    Ok(package)
89}
90
91fn handle_top_level_expr(
92    expr: &Expr,
93    package: &mut PackageData,
94    extra_data: &mut HashMap<String, JsonValue>,
95    dependencies: &mut Vec<Dependency>,
96) {
97    let Expr::Call(call) = expr else {
98        return;
99    };
100
101    match call.name.as_str() {
102        "project" if package.name.is_none() => apply_project_call(call, package, extra_data),
103        "dependency" => dependencies.extend(extract_dependencies_from_call(call)),
104        _ => {}
105    }
106}
107
108fn apply_project_call(
109    call: &CallExpr,
110    package: &mut PackageData,
111    extra_data: &mut HashMap<String, JsonValue>,
112) {
113    let Some(name) = call.positional.first().and_then(expr_as_string) else {
114        return;
115    };
116
117    package.package_type = Some(PackageType::Meson);
118    package.datasource_id = Some(DatasourceId::MesonBuild);
119    package.name = Some(truncate_field(name.to_string()));
120
121    let languages = call
122        .positional
123        .iter()
124        .skip(1)
125        .flat_map(extract_string_values)
126        .collect::<Vec<_>>();
127    if let Some(primary_language) = languages.first() {
128        package.primary_language = Some(truncate_field(primary_language.clone()));
129    }
130    if !languages.is_empty() {
131        extra_data.insert(
132            "languages".to_string(),
133            JsonValue::Array(languages.iter().cloned().map(JsonValue::String).collect()),
134        );
135    }
136
137    if let Some(version) = call.keyword.get("version").and_then(expr_as_string) {
138        package.version = Some(truncate_field(version.to_string()));
139    }
140
141    let licenses = call
142        .keyword
143        .get("license")
144        .map(extract_string_values)
145        .unwrap_or_default();
146    if !licenses.is_empty() {
147        package.extracted_license_statement = Some(truncate_field(licenses.join("\n")));
148        if licenses.len() == 1 {
149            let (declared_license_expression, declared_license_expression_spdx, license_detections) =
150                normalize_spdx_declared_license(licenses.first().map(String::as_str));
151            package.declared_license_expression = declared_license_expression;
152            package.declared_license_expression_spdx = declared_license_expression_spdx;
153            package.license_detections = license_detections;
154        }
155    }
156
157    let license_files = call
158        .keyword
159        .get("license_files")
160        .map(extract_string_values)
161        .unwrap_or_default();
162    if !license_files.is_empty() {
163        extra_data.insert(
164            "license_files".to_string(),
165            JsonValue::Array(license_files.into_iter().map(JsonValue::String).collect()),
166        );
167    }
168
169    if let Some(meson_version) = call.keyword.get("meson_version").and_then(expr_as_string) {
170        extra_data.insert(
171            "meson_version".to_string(),
172            JsonValue::String(meson_version.to_string()),
173        );
174    }
175}
176
177fn extract_dependencies_from_call(call: &CallExpr) -> Vec<Dependency> {
178    let dependency_names = call
179        .positional
180        .iter()
181        .filter_map(expr_as_string)
182        .map(ToOwned::to_owned)
183        .collect::<Vec<_>>();
184
185    if dependency_names.is_empty() {
186        return Vec::new();
187    }
188
189    let extracted_requirement = call.keyword.get("version").map(|expr| {
190        extract_string_values(expr)
191            .into_iter()
192            .filter(|value| !value.is_empty())
193            .collect::<Vec<_>>()
194            .join(", ")
195    });
196    let required = call.keyword.get("required").and_then(expr_as_bool);
197    let native = call.keyword.get("native").and_then(expr_as_bool);
198
199    dependency_names
200        .into_iter()
201        .take(MAX_ITERATION_COUNT)
202        .map(|name| {
203            let mut extra_data = HashMap::new();
204
205            if let Some(requirement) = extracted_requirement
206                .as_ref()
207                .filter(|value| !value.is_empty())
208            {
209                extra_data.insert(
210                    "version".to_string(),
211                    JsonValue::String(requirement.clone()),
212                );
213            }
214            if let Some(required) = required {
215                extra_data.insert("required".to_string(), JsonValue::Bool(required));
216            }
217            if let Some(method) = call.keyword.get("method").and_then(expr_as_string) {
218                extra_data.insert("method".to_string(), JsonValue::String(method.to_string()));
219            }
220            if let Some(native) = native {
221                extra_data.insert("native".to_string(), JsonValue::Bool(native));
222            }
223
224            let modules = call
225                .keyword
226                .get("modules")
227                .map(extract_string_values)
228                .unwrap_or_default();
229            if !modules.is_empty() {
230                extra_data.insert(
231                    "modules".to_string(),
232                    JsonValue::Array(modules.into_iter().map(JsonValue::String).collect()),
233                );
234            }
235
236            let fallback = call
237                .keyword
238                .get("fallback")
239                .map(extract_string_values)
240                .unwrap_or_default();
241            if !fallback.is_empty() {
242                extra_data.insert(
243                    "fallback".to_string(),
244                    JsonValue::Array(fallback.into_iter().map(JsonValue::String).collect()),
245                );
246            }
247
248            Dependency {
249                purl: build_dependency_purl(&name),
250                extracted_requirement: extracted_requirement
251                    .clone()
252                    .filter(|value| !value.is_empty())
253                    .map(truncate_field),
254                scope: Some("dependencies".to_string()),
255                is_runtime: Some(native != Some(true)),
256                is_optional: Some(required == Some(false)),
257                is_pinned: Some(false),
258                is_direct: Some(true),
259                resolved_package: None,
260                extra_data: (!extra_data.is_empty()).then_some(extra_data),
261            }
262        })
263        .collect()
264}
265
266fn build_project_purl(name: &str, version: Option<&str>) -> Option<String> {
267    let mut purl = PackageUrl::new(PackageType::Meson.as_str(), name).ok()?;
268    if let Some(version) = version {
269        purl.with_version(version).ok()?;
270    }
271    Some(truncate_field(purl.to_string()))
272}
273
274fn build_dependency_purl(name: &str) -> Option<String> {
275    let mut purl = PackageUrl::new("generic", name).ok()?;
276    purl.with_namespace("meson").ok()?;
277    Some(truncate_field(purl.to_string()))
278}
279
280fn default_package_data() -> PackageData {
281    PackageData {
282        package_type: Some(PackageType::Meson),
283        datasource_id: Some(DatasourceId::MesonBuild),
284        ..Default::default()
285    }
286}
287
288fn is_block_opener(statement: &str) -> bool {
289    matches!(
290        statement.split_whitespace().next(),
291        Some("if") | Some("foreach")
292    )
293}
294
295fn is_block_closer(statement: &str) -> bool {
296    matches!(statement.trim(), "endif" | "endforeach")
297}
298
299fn strip_comments(input: &str) -> Result<String, String> {
300    let chars: Vec<char> = input.chars().collect();
301    let mut output = String::with_capacity(input.len());
302    let mut index = 0usize;
303    let mut in_string = false;
304    let mut string_delimiter = '\0';
305    let mut escaped = false;
306    let mut chars_processed = 0usize;
307
308    while index < chars.len() {
309        chars_processed += 1;
310        if chars_processed > MAX_ITERATION_COUNT {
311            break;
312        }
313        let ch = chars[index];
314
315        if in_string {
316            output.push(ch);
317            if escaped {
318                escaped = false;
319            } else if ch == '\\' {
320                escaped = true;
321            } else if ch == string_delimiter {
322                in_string = false;
323            }
324            index += 1;
325            continue;
326        }
327
328        if matches!(ch, '\'' | '"') {
329            in_string = true;
330            string_delimiter = ch;
331            output.push(ch);
332            index += 1;
333            continue;
334        }
335
336        if ch == '#' {
337            index += 1;
338            while index < chars.len() && chars[index] != '\n' {
339                index += 1;
340            }
341            continue;
342        }
343
344        output.push(ch);
345        index += 1;
346    }
347
348    if in_string {
349        return Err("unterminated string literal".to_string());
350    }
351
352    Ok(output)
353}
354
355fn split_statements(input: &str) -> Vec<String> {
356    let mut statements = Vec::new();
357    let mut current = String::new();
358    let mut paren_depth = 0usize;
359    let mut bracket_depth = 0usize;
360    let mut in_string = false;
361    let mut string_delimiter = '\0';
362    let mut escaped = false;
363    let mut chars_processed = 0usize;
364
365    for ch in input.chars() {
366        chars_processed += 1;
367        if chars_processed > MAX_ITERATION_COUNT {
368            break;
369        }
370        current.push(ch);
371
372        if in_string {
373            if escaped {
374                escaped = false;
375            } else if ch == '\\' {
376                escaped = true;
377            } else if ch == string_delimiter {
378                in_string = false;
379            }
380            continue;
381        }
382
383        match ch {
384            '\'' | '"' => {
385                in_string = true;
386                string_delimiter = ch;
387            }
388            '(' => paren_depth += 1,
389            ')' => paren_depth = paren_depth.saturating_sub(1),
390            '[' => bracket_depth += 1,
391            ']' => bracket_depth = bracket_depth.saturating_sub(1),
392            '\n' if paren_depth == 0 && bracket_depth == 0 => {
393                let trimmed = current.trim();
394                if !trimmed.is_empty() {
395                    statements.push(trimmed.to_string());
396                }
397                current.clear();
398            }
399            _ => {}
400        }
401    }
402
403    let trimmed = current.trim();
404    if !trimmed.is_empty() {
405        statements.push(trimmed.to_string());
406    }
407
408    statements
409}
410
411#[derive(Debug, Clone)]
412enum Statement {
413    Expr(Expr),
414    Assignment(Expr),
415}
416
417#[derive(Debug, Clone)]
418enum Expr {
419    String(String),
420    Bool(bool),
421    Array(Vec<Expr>),
422    Identifier,
423    Call(CallExpr),
424}
425
426#[derive(Debug, Clone)]
427struct CallExpr {
428    name: String,
429    positional: Vec<Expr>,
430    keyword: HashMap<String, Expr>,
431}
432
433#[derive(Debug, Clone, PartialEq, Eq)]
434enum Token {
435    Ident(String),
436    Str(String),
437    Bool(bool),
438    LParen,
439    RParen,
440    LBracket,
441    RBracket,
442    Colon,
443    Comma,
444    Equal,
445}
446
447fn parse_statement(statement: &str) -> Result<Statement, String> {
448    let tokens = tokenize(statement)?;
449    if tokens.is_empty() {
450        return Err("empty statement".to_string());
451    }
452
453    if let [Token::Ident(name), Token::Equal, rest @ ..] = tokens.as_slice() {
454        let mut parser = Parser::new(rest);
455        let expr = parser.parse_expr()?;
456        parser.expect_end()?;
457        let _ = name;
458        return Ok(Statement::Assignment(expr));
459    }
460
461    let mut parser = Parser::new(&tokens);
462    let expr = parser.parse_expr()?;
463    parser.expect_end()?;
464    Ok(Statement::Expr(expr))
465}
466
467fn tokenize(input: &str) -> Result<Vec<Token>, String> {
468    let chars: Vec<char> = input.chars().collect();
469    let mut tokens = Vec::new();
470    let mut index = 0usize;
471
472    while index < chars.len() {
473        if tokens.len() >= MAX_ITERATION_COUNT {
474            break;
475        }
476        let ch = chars[index];
477        if ch.is_whitespace() {
478            index += 1;
479            continue;
480        }
481
482        match ch {
483            '(' => {
484                tokens.push(Token::LParen);
485                index += 1;
486            }
487            ')' => {
488                tokens.push(Token::RParen);
489                index += 1;
490            }
491            '[' => {
492                tokens.push(Token::LBracket);
493                index += 1;
494            }
495            ']' => {
496                tokens.push(Token::RBracket);
497                index += 1;
498            }
499            ':' => {
500                tokens.push(Token::Colon);
501                index += 1;
502            }
503            ',' => {
504                tokens.push(Token::Comma);
505                index += 1;
506            }
507            '=' => {
508                tokens.push(Token::Equal);
509                index += 1;
510            }
511            '\'' | '"' => {
512                let delimiter = ch;
513                index += 1;
514                let start = index;
515                let mut escaped = false;
516                while index < chars.len() {
517                    let current = chars[index];
518                    if escaped {
519                        escaped = false;
520                    } else if current == '\\' {
521                        escaped = true;
522                    } else if current == delimiter {
523                        break;
524                    }
525                    index += 1;
526                }
527
528                if index >= chars.len() {
529                    return Err("unterminated string token".to_string());
530                }
531
532                let value: String = chars[start..index].iter().collect();
533                tokens.push(Token::Str(value));
534                index += 1;
535            }
536            _ if is_ident_start(ch) => {
537                let start = index;
538                index += 1;
539                while index < chars.len() && is_ident_continue(chars[index]) {
540                    index += 1;
541                }
542                let ident: String = chars[start..index].iter().collect();
543                match ident.as_str() {
544                    "true" => tokens.push(Token::Bool(true)),
545                    "false" => tokens.push(Token::Bool(false)),
546                    _ => tokens.push(Token::Ident(ident)),
547                }
548            }
549            _ => {
550                return Err(format!("unsupported token '{}'", ch));
551            }
552        }
553    }
554
555    Ok(tokens)
556}
557
558fn is_ident_start(ch: char) -> bool {
559    ch.is_ascii_alphabetic() || ch == '_'
560}
561
562fn is_ident_continue(ch: char) -> bool {
563    ch.is_ascii_alphanumeric() || ch == '_'
564}
565
566struct Parser<'a> {
567    tokens: &'a [Token],
568    index: usize,
569    guard: RecursionGuard<()>,
570}
571
572impl<'a> Parser<'a> {
573    fn new(tokens: &'a [Token]) -> Self {
574        Self {
575            tokens,
576            index: 0,
577            guard: RecursionGuard::depth_only(),
578        }
579    }
580
581    fn parse_expr(&mut self) -> Result<Expr, String> {
582        if self.guard.descend() {
583            return Err("recursion depth exceeded".to_string());
584        }
585        let result = match self.peek() {
586            Some(Token::Str(value)) => {
587                self.index += 1;
588                Ok(Expr::String(value.clone()))
589            }
590            Some(Token::Bool(value)) => {
591                self.index += 1;
592                Ok(Expr::Bool(*value))
593            }
594            Some(Token::LBracket) => self.parse_array(),
595            Some(Token::Ident(_)) => self.parse_identifier_or_call(),
596            Some(token) => Err(format!("unexpected token {:?}", token)),
597            None => Err("unexpected end of input".to_string()),
598        };
599        self.guard.ascend();
600        result
601    }
602
603    fn parse_array(&mut self) -> Result<Expr, String> {
604        self.expect(Token::LBracket)?;
605        let mut values = Vec::new();
606        let mut element_count = 0usize;
607        while !matches!(self.peek(), Some(Token::RBracket)) {
608            element_count += 1;
609            if element_count > MAX_ITERATION_COUNT {
610                break;
611            }
612            let expr = self.parse_expr()?;
613            values.push(expr);
614            if matches!(self.peek(), Some(Token::Comma)) {
615                self.index += 1;
616            } else if !matches!(self.peek(), Some(Token::RBracket)) {
617                return Err("expected ',' or ']' in array".to_string());
618            }
619        }
620        self.expect(Token::RBracket)?;
621        Ok(Expr::Array(values))
622    }
623
624    fn parse_identifier_or_call(&mut self) -> Result<Expr, String> {
625        let Token::Ident(name) = self
626            .next()
627            .cloned()
628            .ok_or_else(|| "expected identifier".to_string())?
629        else {
630            return Err("expected identifier".to_string());
631        };
632
633        if !matches!(self.peek(), Some(Token::LParen)) {
634            let _ = name;
635            return Ok(Expr::Identifier);
636        }
637
638        self.expect(Token::LParen)?;
639        let mut positional = Vec::new();
640        let mut keyword = HashMap::new();
641        let mut arg_count = 0usize;
642
643        while !matches!(self.peek(), Some(Token::RParen)) {
644            arg_count += 1;
645            if arg_count > MAX_ITERATION_COUNT {
646                break;
647            }
648            if let (Some(Token::Ident(arg_name)), Some(Token::Colon)) =
649                (self.peek(), self.peek_n(1))
650            {
651                let arg_name = arg_name.clone();
652                self.index += 2;
653                let value = self.parse_expr()?;
654                keyword.insert(arg_name, value);
655            } else {
656                let expr = self.parse_expr()?;
657                positional.push(expr);
658            }
659
660            if matches!(self.peek(), Some(Token::Comma)) {
661                self.index += 1;
662            } else if !matches!(self.peek(), Some(Token::RParen)) {
663                return Err("expected ',' or ')' in call".to_string());
664            }
665        }
666
667        self.expect(Token::RParen)?;
668        Ok(Expr::Call(CallExpr {
669            name,
670            positional,
671            keyword,
672        }))
673    }
674
675    fn expect(&mut self, expected: Token) -> Result<(), String> {
676        match self.next() {
677            Some(token) if *token == expected => Ok(()),
678            Some(token) => Err(format!("expected {:?}, found {:?}", expected, token)),
679            None => Err(format!("expected {:?}, found end of input", expected)),
680        }
681    }
682
683    fn expect_end(&self) -> Result<(), String> {
684        if self.index == self.tokens.len() {
685            Ok(())
686        } else {
687            Err(format!(
688                "unexpected trailing tokens: {:?}",
689                &self.tokens[self.index..]
690            ))
691        }
692    }
693
694    fn peek(&self) -> Option<&'a Token> {
695        self.tokens.get(self.index)
696    }
697
698    fn peek_n(&self, offset: usize) -> Option<&'a Token> {
699        self.tokens.get(self.index + offset)
700    }
701
702    fn next(&mut self) -> Option<&'a Token> {
703        let token = self.tokens.get(self.index);
704        if token.is_some() {
705            self.index += 1;
706        }
707        token
708    }
709}
710
711fn expr_as_string(expr: &Expr) -> Option<&str> {
712    match expr {
713        Expr::String(value) => Some(value.as_str()),
714        _ => None,
715    }
716}
717
718fn expr_as_bool(expr: &Expr) -> Option<bool> {
719    match expr {
720        Expr::Bool(value) => Some(*value),
721        _ => None,
722    }
723}
724
725fn extract_string_values(expr: &Expr) -> Vec<String> {
726    match expr {
727        Expr::String(value) => vec![value.clone()],
728        Expr::Array(values) => values
729            .iter()
730            .filter_map(expr_as_string)
731            .map(ToOwned::to_owned)
732            .collect(),
733        _ => Vec::new(),
734    }
735}
736
737crate::register_parser!(
738    "Meson meson.build manifest",
739    &["**/meson.build"],
740    "meson",
741    "",
742    Some("https://mesonbuild.com/Syntax.html"),
743);