Skip to main content

provenant/parsers/
meson.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use log::warn;
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
10
11use super::PackageParser;
12
13pub struct MesonParser;
14
15impl PackageParser for MesonParser {
16    const PACKAGE_TYPE: PackageType = PackageType::Meson;
17
18    fn is_match(path: &Path) -> bool {
19        path.file_name().is_some_and(|name| name == "meson.build")
20    }
21
22    fn extract_packages(path: &Path) -> Vec<PackageData> {
23        let content = match fs::read_to_string(path) {
24            Ok(content) => content,
25            Err(error) => {
26                warn!("Failed to read meson.build at {:?}: {}", path, error);
27                return vec![default_package_data()];
28            }
29        };
30
31        match parse_meson_build(&content) {
32            Ok(package) => vec![package],
33            Err(error) => {
34                warn!("Failed to parse meson.build at {:?}: {}", path, error);
35                vec![default_package_data()]
36            }
37        }
38    }
39}
40
41fn parse_meson_build(content: &str) -> Result<PackageData, String> {
42    let sanitized = strip_comments(content)?;
43    let statements = split_statements(&sanitized);
44
45    let mut package = default_package_data();
46    let mut extra_data = HashMap::new();
47    let mut dependencies = Vec::new();
48    let mut control_flow_depth = 0usize;
49
50    for statement in statements {
51        let trimmed = statement.trim();
52        if trimmed.is_empty() {
53            continue;
54        }
55
56        if is_block_closer(trimmed) {
57            control_flow_depth = control_flow_depth.saturating_sub(1);
58            continue;
59        }
60
61        if control_flow_depth > 0 {
62            if is_block_opener(trimmed) {
63                control_flow_depth += 1;
64            }
65            continue;
66        }
67
68        if is_block_opener(trimmed) {
69            control_flow_depth += 1;
70            continue;
71        }
72
73        let Ok(parsed) = parse_statement(trimmed) else {
74            continue;
75        };
76        match parsed {
77            Statement::Expr(expr) | Statement::Assignment(expr) => {
78                handle_top_level_expr(&expr, &mut package, &mut extra_data, &mut dependencies)
79            }
80        }
81    }
82
83    package.dependencies = dependencies;
84    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
85    package.purl = package
86        .name
87        .as_deref()
88        .and_then(|name| build_project_purl(name, package.version.as_deref()));
89
90    Ok(package)
91}
92
93fn handle_top_level_expr(
94    expr: &Expr,
95    package: &mut PackageData,
96    extra_data: &mut HashMap<String, JsonValue>,
97    dependencies: &mut Vec<Dependency>,
98) {
99    let Expr::Call(call) = expr else {
100        return;
101    };
102
103    match call.name.as_str() {
104        "project" if package.name.is_none() => apply_project_call(call, package, extra_data),
105        "dependency" => dependencies.extend(extract_dependencies_from_call(call)),
106        _ => {}
107    }
108}
109
110fn apply_project_call(
111    call: &CallExpr,
112    package: &mut PackageData,
113    extra_data: &mut HashMap<String, JsonValue>,
114) {
115    let Some(name) = call.positional.first().and_then(expr_as_string) else {
116        return;
117    };
118
119    package.package_type = Some(PackageType::Meson);
120    package.datasource_id = Some(DatasourceId::MesonBuild);
121    package.name = Some(name.to_string());
122
123    let languages = call
124        .positional
125        .iter()
126        .skip(1)
127        .flat_map(extract_string_values)
128        .collect::<Vec<_>>();
129    if let Some(primary_language) = languages.first() {
130        package.primary_language = Some(primary_language.clone());
131    }
132    if !languages.is_empty() {
133        extra_data.insert(
134            "languages".to_string(),
135            JsonValue::Array(languages.iter().cloned().map(JsonValue::String).collect()),
136        );
137    }
138
139    if let Some(version) = call.keyword.get("version").and_then(expr_as_string) {
140        package.version = Some(version.to_string());
141    }
142
143    let licenses = call
144        .keyword
145        .get("license")
146        .map(extract_string_values)
147        .unwrap_or_default();
148    if !licenses.is_empty() {
149        package.extracted_license_statement = Some(licenses.join("\n"));
150    }
151
152    let license_files = call
153        .keyword
154        .get("license_files")
155        .map(extract_string_values)
156        .unwrap_or_default();
157    if !license_files.is_empty() {
158        extra_data.insert(
159            "license_files".to_string(),
160            JsonValue::Array(license_files.into_iter().map(JsonValue::String).collect()),
161        );
162    }
163
164    if let Some(meson_version) = call.keyword.get("meson_version").and_then(expr_as_string) {
165        extra_data.insert(
166            "meson_version".to_string(),
167            JsonValue::String(meson_version.to_string()),
168        );
169    }
170}
171
172fn extract_dependencies_from_call(call: &CallExpr) -> Vec<Dependency> {
173    let dependency_names = call
174        .positional
175        .iter()
176        .filter_map(expr_as_string)
177        .map(ToOwned::to_owned)
178        .collect::<Vec<_>>();
179
180    if dependency_names.is_empty() {
181        return Vec::new();
182    }
183
184    let extracted_requirement = call.keyword.get("version").map(|expr| {
185        extract_string_values(expr)
186            .into_iter()
187            .filter(|value| !value.is_empty())
188            .collect::<Vec<_>>()
189            .join(", ")
190    });
191    let required = call.keyword.get("required").and_then(expr_as_bool);
192    let native = call.keyword.get("native").and_then(expr_as_bool);
193
194    dependency_names
195        .into_iter()
196        .map(|name| {
197            let mut extra_data = HashMap::new();
198
199            if let Some(requirement) = extracted_requirement
200                .as_ref()
201                .filter(|value| !value.is_empty())
202            {
203                extra_data.insert(
204                    "version".to_string(),
205                    JsonValue::String(requirement.clone()),
206                );
207            }
208            if let Some(required) = required {
209                extra_data.insert("required".to_string(), JsonValue::Bool(required));
210            }
211            if let Some(method) = call.keyword.get("method").and_then(expr_as_string) {
212                extra_data.insert("method".to_string(), JsonValue::String(method.to_string()));
213            }
214            if let Some(native) = native {
215                extra_data.insert("native".to_string(), JsonValue::Bool(native));
216            }
217
218            let modules = call
219                .keyword
220                .get("modules")
221                .map(extract_string_values)
222                .unwrap_or_default();
223            if !modules.is_empty() {
224                extra_data.insert(
225                    "modules".to_string(),
226                    JsonValue::Array(modules.into_iter().map(JsonValue::String).collect()),
227                );
228            }
229
230            let fallback = call
231                .keyword
232                .get("fallback")
233                .map(extract_string_values)
234                .unwrap_or_default();
235            if !fallback.is_empty() {
236                extra_data.insert(
237                    "fallback".to_string(),
238                    JsonValue::Array(fallback.into_iter().map(JsonValue::String).collect()),
239                );
240            }
241
242            Dependency {
243                purl: build_dependency_purl(&name),
244                extracted_requirement: extracted_requirement
245                    .clone()
246                    .filter(|value| !value.is_empty()),
247                scope: Some("dependencies".to_string()),
248                is_runtime: Some(native != Some(true)),
249                is_optional: Some(required == Some(false)),
250                is_pinned: Some(false),
251                is_direct: Some(true),
252                resolved_package: None,
253                extra_data: (!extra_data.is_empty()).then_some(extra_data),
254            }
255        })
256        .collect()
257}
258
259fn build_project_purl(name: &str, version: Option<&str>) -> Option<String> {
260    let mut purl = PackageUrl::new(PackageType::Meson.as_str(), name).ok()?;
261    if let Some(version) = version {
262        purl.with_version(version).ok()?;
263    }
264    Some(purl.to_string())
265}
266
267fn build_dependency_purl(name: &str) -> Option<String> {
268    let mut purl = PackageUrl::new("generic", name).ok()?;
269    purl.with_namespace("meson").ok()?;
270    Some(purl.to_string())
271}
272
273fn default_package_data() -> PackageData {
274    PackageData {
275        package_type: Some(PackageType::Meson),
276        datasource_id: Some(DatasourceId::MesonBuild),
277        ..Default::default()
278    }
279}
280
281fn is_block_opener(statement: &str) -> bool {
282    matches!(
283        statement.split_whitespace().next(),
284        Some("if") | Some("foreach")
285    )
286}
287
288fn is_block_closer(statement: &str) -> bool {
289    matches!(statement.trim(), "endif" | "endforeach")
290}
291
292fn strip_comments(input: &str) -> Result<String, String> {
293    let chars: Vec<char> = input.chars().collect();
294    let mut output = String::with_capacity(input.len());
295    let mut index = 0usize;
296    let mut in_string = false;
297    let mut string_delimiter = '\0';
298    let mut escaped = false;
299
300    while index < chars.len() {
301        let ch = chars[index];
302
303        if in_string {
304            output.push(ch);
305            if escaped {
306                escaped = false;
307            } else if ch == '\\' {
308                escaped = true;
309            } else if ch == string_delimiter {
310                in_string = false;
311            }
312            index += 1;
313            continue;
314        }
315
316        if matches!(ch, '\'' | '"') {
317            in_string = true;
318            string_delimiter = ch;
319            output.push(ch);
320            index += 1;
321            continue;
322        }
323
324        if ch == '#' {
325            index += 1;
326            while index < chars.len() && chars[index] != '\n' {
327                index += 1;
328            }
329            continue;
330        }
331
332        output.push(ch);
333        index += 1;
334    }
335
336    if in_string {
337        return Err("unterminated string literal".to_string());
338    }
339
340    Ok(output)
341}
342
343fn split_statements(input: &str) -> Vec<String> {
344    let mut statements = Vec::new();
345    let mut current = String::new();
346    let mut paren_depth = 0usize;
347    let mut bracket_depth = 0usize;
348    let mut in_string = false;
349    let mut string_delimiter = '\0';
350    let mut escaped = false;
351
352    for ch in input.chars() {
353        current.push(ch);
354
355        if in_string {
356            if escaped {
357                escaped = false;
358            } else if ch == '\\' {
359                escaped = true;
360            } else if ch == string_delimiter {
361                in_string = false;
362            }
363            continue;
364        }
365
366        match ch {
367            '\'' | '"' => {
368                in_string = true;
369                string_delimiter = ch;
370            }
371            '(' => paren_depth += 1,
372            ')' => paren_depth = paren_depth.saturating_sub(1),
373            '[' => bracket_depth += 1,
374            ']' => bracket_depth = bracket_depth.saturating_sub(1),
375            '\n' if paren_depth == 0 && bracket_depth == 0 => {
376                let trimmed = current.trim();
377                if !trimmed.is_empty() {
378                    statements.push(trimmed.to_string());
379                }
380                current.clear();
381            }
382            _ => {}
383        }
384    }
385
386    let trimmed = current.trim();
387    if !trimmed.is_empty() {
388        statements.push(trimmed.to_string());
389    }
390
391    statements
392}
393
394#[derive(Debug, Clone)]
395enum Statement {
396    Expr(Expr),
397    Assignment(Expr),
398}
399
400#[derive(Debug, Clone)]
401enum Expr {
402    String(String),
403    Bool(bool),
404    Array(Vec<Expr>),
405    Identifier,
406    Call(CallExpr),
407}
408
409#[derive(Debug, Clone)]
410struct CallExpr {
411    name: String,
412    positional: Vec<Expr>,
413    keyword: HashMap<String, Expr>,
414}
415
416#[derive(Debug, Clone, PartialEq, Eq)]
417enum Token {
418    Ident(String),
419    Str(String),
420    Bool(bool),
421    LParen,
422    RParen,
423    LBracket,
424    RBracket,
425    Colon,
426    Comma,
427    Equal,
428}
429
430fn parse_statement(statement: &str) -> Result<Statement, String> {
431    let tokens = tokenize(statement)?;
432    if tokens.is_empty() {
433        return Err("empty statement".to_string());
434    }
435
436    if let [Token::Ident(name), Token::Equal, rest @ ..] = tokens.as_slice() {
437        let mut parser = Parser::new(rest);
438        let expr = parser.parse_expr()?;
439        parser.expect_end()?;
440        let _ = name;
441        return Ok(Statement::Assignment(expr));
442    }
443
444    let mut parser = Parser::new(&tokens);
445    let expr = parser.parse_expr()?;
446    parser.expect_end()?;
447    Ok(Statement::Expr(expr))
448}
449
450fn tokenize(input: &str) -> Result<Vec<Token>, String> {
451    let chars: Vec<char> = input.chars().collect();
452    let mut tokens = Vec::new();
453    let mut index = 0usize;
454
455    while index < chars.len() {
456        let ch = chars[index];
457        if ch.is_whitespace() {
458            index += 1;
459            continue;
460        }
461
462        match ch {
463            '(' => {
464                tokens.push(Token::LParen);
465                index += 1;
466            }
467            ')' => {
468                tokens.push(Token::RParen);
469                index += 1;
470            }
471            '[' => {
472                tokens.push(Token::LBracket);
473                index += 1;
474            }
475            ']' => {
476                tokens.push(Token::RBracket);
477                index += 1;
478            }
479            ':' => {
480                tokens.push(Token::Colon);
481                index += 1;
482            }
483            ',' => {
484                tokens.push(Token::Comma);
485                index += 1;
486            }
487            '=' => {
488                tokens.push(Token::Equal);
489                index += 1;
490            }
491            '\'' | '"' => {
492                let delimiter = ch;
493                index += 1;
494                let start = index;
495                let mut escaped = false;
496                while index < chars.len() {
497                    let current = chars[index];
498                    if escaped {
499                        escaped = false;
500                    } else if current == '\\' {
501                        escaped = true;
502                    } else if current == delimiter {
503                        break;
504                    }
505                    index += 1;
506                }
507
508                if index >= chars.len() {
509                    return Err("unterminated string token".to_string());
510                }
511
512                let value: String = chars[start..index].iter().collect();
513                tokens.push(Token::Str(value));
514                index += 1;
515            }
516            _ if is_ident_start(ch) => {
517                let start = index;
518                index += 1;
519                while index < chars.len() && is_ident_continue(chars[index]) {
520                    index += 1;
521                }
522                let ident: String = chars[start..index].iter().collect();
523                match ident.as_str() {
524                    "true" => tokens.push(Token::Bool(true)),
525                    "false" => tokens.push(Token::Bool(false)),
526                    _ => tokens.push(Token::Ident(ident)),
527                }
528            }
529            _ => {
530                return Err(format!("unsupported token '{}'", ch));
531            }
532        }
533    }
534
535    Ok(tokens)
536}
537
538fn is_ident_start(ch: char) -> bool {
539    ch.is_ascii_alphabetic() || ch == '_'
540}
541
542fn is_ident_continue(ch: char) -> bool {
543    ch.is_ascii_alphanumeric() || ch == '_'
544}
545
546struct Parser<'a> {
547    tokens: &'a [Token],
548    index: usize,
549}
550
551impl<'a> Parser<'a> {
552    fn new(tokens: &'a [Token]) -> Self {
553        Self { tokens, index: 0 }
554    }
555
556    fn parse_expr(&mut self) -> Result<Expr, String> {
557        match self.peek() {
558            Some(Token::Str(value)) => {
559                self.index += 1;
560                Ok(Expr::String(value.clone()))
561            }
562            Some(Token::Bool(value)) => {
563                self.index += 1;
564                Ok(Expr::Bool(*value))
565            }
566            Some(Token::LBracket) => self.parse_array(),
567            Some(Token::Ident(_)) => self.parse_identifier_or_call(),
568            Some(token) => Err(format!("unexpected token {:?}", token)),
569            None => Err("unexpected end of input".to_string()),
570        }
571    }
572
573    fn parse_array(&mut self) -> Result<Expr, String> {
574        self.expect(Token::LBracket)?;
575        let mut values = Vec::new();
576        while !matches!(self.peek(), Some(Token::RBracket)) {
577            values.push(self.parse_expr()?);
578            if matches!(self.peek(), Some(Token::Comma)) {
579                self.index += 1;
580            } else if !matches!(self.peek(), Some(Token::RBracket)) {
581                return Err("expected ',' or ']' in array".to_string());
582            }
583        }
584        self.expect(Token::RBracket)?;
585        Ok(Expr::Array(values))
586    }
587
588    fn parse_identifier_or_call(&mut self) -> Result<Expr, String> {
589        let Token::Ident(name) = self
590            .next()
591            .cloned()
592            .ok_or_else(|| "expected identifier".to_string())?
593        else {
594            return Err("expected identifier".to_string());
595        };
596
597        if !matches!(self.peek(), Some(Token::LParen)) {
598            let _ = name;
599            return Ok(Expr::Identifier);
600        }
601
602        self.expect(Token::LParen)?;
603        let mut positional = Vec::new();
604        let mut keyword = HashMap::new();
605
606        while !matches!(self.peek(), Some(Token::RParen)) {
607            if let (Some(Token::Ident(arg_name)), Some(Token::Colon)) =
608                (self.peek(), self.peek_n(1))
609            {
610                let arg_name = arg_name.clone();
611                self.index += 2;
612                let value = self.parse_expr()?;
613                keyword.insert(arg_name, value);
614            } else {
615                positional.push(self.parse_expr()?);
616            }
617
618            if matches!(self.peek(), Some(Token::Comma)) {
619                self.index += 1;
620            } else if !matches!(self.peek(), Some(Token::RParen)) {
621                return Err("expected ',' or ')' in call".to_string());
622            }
623        }
624
625        self.expect(Token::RParen)?;
626        Ok(Expr::Call(CallExpr {
627            name,
628            positional,
629            keyword,
630        }))
631    }
632
633    fn expect(&mut self, expected: Token) -> Result<(), String> {
634        match self.next() {
635            Some(token) if *token == expected => Ok(()),
636            Some(token) => Err(format!("expected {:?}, found {:?}", expected, token)),
637            None => Err(format!("expected {:?}, found end of input", expected)),
638        }
639    }
640
641    fn expect_end(&self) -> Result<(), String> {
642        if self.index == self.tokens.len() {
643            Ok(())
644        } else {
645            Err(format!(
646                "unexpected trailing tokens: {:?}",
647                &self.tokens[self.index..]
648            ))
649        }
650    }
651
652    fn peek(&self) -> Option<&'a Token> {
653        self.tokens.get(self.index)
654    }
655
656    fn peek_n(&self, offset: usize) -> Option<&'a Token> {
657        self.tokens.get(self.index + offset)
658    }
659
660    fn next(&mut self) -> Option<&'a Token> {
661        let token = self.tokens.get(self.index);
662        if token.is_some() {
663            self.index += 1;
664        }
665        token
666    }
667}
668
669fn expr_as_string(expr: &Expr) -> Option<&str> {
670    match expr {
671        Expr::String(value) => Some(value.as_str()),
672        _ => None,
673    }
674}
675
676fn expr_as_bool(expr: &Expr) -> Option<bool> {
677    match expr {
678        Expr::Bool(value) => Some(*value),
679        _ => None,
680    }
681}
682
683fn extract_string_values(expr: &Expr) -> Vec<String> {
684    match expr {
685        Expr::String(value) => vec![value.clone()],
686        Expr::Array(values) => values
687            .iter()
688            .filter_map(expr_as_string)
689            .map(ToOwned::to_owned)
690            .collect(),
691        _ => Vec::new(),
692    }
693}
694
695crate::register_parser!(
696    "Meson meson.build manifest",
697    &["**/meson.build"],
698    "meson",
699    "",
700    Some("https://mesonbuild.com/Syntax.html"),
701);