Skip to main content

provenant/parsers/
composer.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//!
5//! Extracts package metadata and dependencies from PHP Composer manifests
6//! (composer.json) and lockfiles (composer.lock).
7//!
8//! # Supported Formats
9//! - composer.json (manifest)
10//! - composer.lock (lockfile)
11//!
12//! # Key Features
13//! - Dependency extraction from require and require-dev
14//! - PSR-4 autoload and repository metadata capture
15//! - Locked dependency versions with dist/source hashes
16//!
17//! # Implementation Notes
18//! - Uses serde_json for parsing
19//! - Graceful error handling with warn!()
20//! - Package URL (purl) generation via packageurl
21//!
22use std::collections::HashMap;
23use std::path::Path;
24
25use crate::parser_warn as warn;
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27use packageurl::PackageUrl;
28use serde_json::Value;
29
30use crate::models::{
31    DatasourceId, Dependency, LicenseDetection, PackageData, PackageType, Party, ResolvedPackage,
32    Sha1Digest, Sha256Digest, Sha512Digest,
33};
34
35use super::PackageParser;
36use super::license_normalization::{
37    DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
38    normalize_spdx_declared_license,
39};
40
41const FIELD_NAME: &str = "name";
42const FIELD_VERSION: &str = "version";
43const FIELD_DESCRIPTION: &str = "description";
44const FIELD_HOMEPAGE: &str = "homepage";
45const FIELD_TYPE: &str = "type";
46const FIELD_LICENSE: &str = "license";
47const FIELD_AUTHORS: &str = "authors";
48const FIELD_KEYWORDS: &str = "keywords";
49const FIELD_REQUIRE: &str = "require";
50const FIELD_REQUIRE_DEV: &str = "require-dev";
51const FIELD_PROVIDE: &str = "provide";
52const FIELD_CONFLICT: &str = "conflict";
53const FIELD_REPLACE: &str = "replace";
54const FIELD_SUGGEST: &str = "suggest";
55const FIELD_SUPPORT: &str = "support";
56const FIELD_AUTOLOAD: &str = "autoload";
57const FIELD_PSR4: &str = "psr-4";
58const FIELD_REPOSITORIES: &str = "repositories";
59
60const FIELD_PACKAGES: &str = "packages";
61const FIELD_PACKAGES_DEV: &str = "packages-dev";
62const FIELD_SOURCE: &str = "source";
63const FIELD_DIST: &str = "dist";
64
65/// Composer manifest parser for composer.json files.
66pub struct ComposerJsonParser;
67
68impl PackageParser for ComposerJsonParser {
69    const PACKAGE_TYPE: PackageType = PackageType::Composer;
70
71    fn extract_packages(path: &Path) -> Vec<PackageData> {
72        let json_content = match read_json_file(path) {
73            Ok(content) => content,
74            Err(e) => {
75                warn!("Failed to read composer.json at {:?}: {}", path, e);
76                return vec![default_package_data(Some(DatasourceId::PhpComposerJson))];
77            }
78        };
79
80        let full_name = json_content
81            .get(FIELD_NAME)
82            .and_then(|value| value.as_str())
83            .map(|value| value.trim())
84            .filter(|value| !value.is_empty());
85
86        let (namespace, name) = split_optional_namespace_name(full_name);
87        let is_private = name.is_none();
88
89        let version = json_content
90            .get(FIELD_VERSION)
91            .and_then(|value| value.as_str())
92            .map(|value| truncate_field(value.trim().to_string()));
93
94        let description = json_content
95            .get(FIELD_DESCRIPTION)
96            .and_then(|value| value.as_str())
97            .map(|value| truncate_field(value.trim().to_string()))
98            .filter(|value| !value.is_empty());
99
100        let homepage_url = json_content
101            .get(FIELD_HOMEPAGE)
102            .and_then(|value| value.as_str())
103            .map(|value| truncate_field(value.trim().to_string()))
104            .filter(|value| !value.is_empty());
105
106        let keywords = extract_keywords(&json_content);
107
108        let (
109            extracted_license_statement,
110            declared_license_expression,
111            declared_license_expression_spdx,
112            license_detections,
113        ) = extract_license_data(&json_content, is_private);
114
115        let dependencies =
116            extract_dependencies(&json_content, FIELD_REQUIRE, "require", true, false);
117        let dev_dependencies =
118            extract_dependencies(&json_content, FIELD_REQUIRE_DEV, "require-dev", false, true);
119        let provide_dependencies =
120            extract_dependencies(&json_content, FIELD_PROVIDE, "provide", true, false);
121        let conflict_dependencies =
122            extract_dependencies(&json_content, FIELD_CONFLICT, "conflict", true, true);
123        let replace_dependencies =
124            extract_dependencies(&json_content, FIELD_REPLACE, "replace", true, true);
125        let suggest_dependencies =
126            extract_dependencies(&json_content, FIELD_SUGGEST, "suggest", true, true);
127
128        let (bug_tracking_url, code_view_url) = extract_support(&json_content);
129        let vcs_url = extract_source_vcs_url(&json_content);
130        let download_url = extract_dist_download_url(&json_content);
131        let extra_data = build_extra_data(&json_content);
132        let parties = extract_parties(&json_content, &namespace);
133
134        vec![PackageData {
135            package_type: Some(Self::PACKAGE_TYPE),
136            namespace: namespace.clone(),
137            name: name.clone(),
138            version: version.clone(),
139            qualifiers: None,
140            subpath: None,
141            primary_language: Some("PHP".to_string()),
142            description,
143            release_date: None,
144            parties,
145            keywords,
146            homepage_url,
147            download_url,
148            size: None,
149            sha1: None,
150            md5: None,
151            sha256: None,
152            sha512: None,
153            bug_tracking_url,
154            code_view_url,
155            vcs_url,
156            copyright: None,
157            holder: None,
158            declared_license_expression,
159            declared_license_expression_spdx,
160            license_detections,
161            other_license_expression: None,
162            other_license_expression_spdx: None,
163            other_license_detections: Vec::new(),
164            extracted_license_statement,
165            notice_text: None,
166            source_packages: Vec::new(),
167            file_references: Vec::new(),
168            is_private,
169            is_virtual: false,
170            extra_data,
171            dependencies: [
172                dependencies,
173                dev_dependencies,
174                provide_dependencies,
175                conflict_dependencies,
176                replace_dependencies,
177                suggest_dependencies,
178            ]
179            .concat(),
180            repository_homepage_url: build_repository_homepage_url(&namespace, &name),
181            repository_download_url: None,
182            api_data_url: build_api_data_url(&namespace, &name),
183            datasource_id: Some(DatasourceId::PhpComposerJson),
184            purl: build_package_purl(&namespace, &name, &version),
185        }]
186    }
187
188    fn is_match(path: &Path) -> bool {
189        path.file_name()
190            .and_then(|name| name.to_str())
191            .is_some_and(is_composer_manifest_filename)
192    }
193
194    fn metadata() -> Vec<super::metadata::ParserMetadata> {
195        vec![super::metadata::ParserMetadata {
196            description: "PHP composer manifest",
197            file_patterns: &["**/*composer.json", "**/composer.*.json"],
198            package_type: "composer",
199            primary_language: "PHP",
200            documentation_url: Some("https://getcomposer.org/doc/04-schema.md"),
201        }]
202    }
203}
204
205/// Composer lockfile parser for composer.lock files.
206pub struct ComposerLockParser;
207
208impl PackageParser for ComposerLockParser {
209    const PACKAGE_TYPE: PackageType = PackageType::Composer;
210
211    fn extract_packages(path: &Path) -> Vec<PackageData> {
212        let json_content = match read_json_file(path) {
213            Ok(content) => content,
214            Err(e) => {
215                warn!("Failed to read composer.lock at {:?}: {}", path, e);
216                return vec![default_package_data(Some(DatasourceId::PhpComposerLock))];
217            }
218        };
219
220        let dependencies = extract_lock_dependencies(&json_content);
221
222        let mut package_data = default_package_data(Some(DatasourceId::PhpComposerLock));
223        package_data.dependencies = dependencies;
224
225        let mut packages = vec![package_data];
226        packages.extend(extract_lock_packages(&json_content));
227        packages
228    }
229
230    fn is_match(path: &Path) -> bool {
231        path.file_name()
232            .and_then(|name| name.to_str())
233            .is_some_and(is_composer_lock_filename)
234    }
235
236    fn metadata() -> Vec<super::metadata::ParserMetadata> {
237        vec![super::metadata::ParserMetadata {
238            description: "PHP composer lockfile",
239            file_patterns: &["**/*composer.lock", "**/composer.*.lock"],
240            package_type: "composer",
241            primary_language: "PHP",
242            documentation_url: Some(
243                "https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
244            ),
245        }]
246    }
247}
248
249fn is_composer_manifest_filename(name: &str) -> bool {
250    name == "composer.json"
251        || name.ends_with(".composer.json")
252        || (name.starts_with("composer.") && name.ends_with(".json"))
253}
254
255fn is_composer_lock_filename(name: &str) -> bool {
256    name == "composer.lock"
257        || name.ends_with(".composer.lock")
258        || (name.starts_with("composer.") && name.ends_with(".lock"))
259}
260
261fn read_json_file(path: &Path) -> Result<Value, String> {
262    let content = read_file_to_string(path, None).map_err(|e| e.to_string())?;
263    serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))
264}
265
266fn extract_dependencies(
267    json_content: &Value,
268    field: &str,
269    scope: &str,
270    is_runtime: bool,
271    is_optional: bool,
272) -> Vec<Dependency> {
273    json_content
274        .get(field)
275        .and_then(|value| value.as_object())
276        .map_or_else(Vec::new, |deps| {
277            deps.iter()
278                .take(MAX_ITERATION_COUNT)
279                .filter_map(|(name, requirement)| {
280                    let requirement_str = requirement.as_str()?;
281                    let (namespace, package_name) = split_namespace_name(name);
282                    let is_pinned = is_composer_version_pinned(requirement_str);
283                    let version_for_purl = if is_pinned {
284                        Some(normalize_requirement_version(requirement_str))
285                    } else {
286                        None
287                    };
288
289                    let purl = build_dependency_purl(
290                        namespace.as_deref(),
291                        &package_name,
292                        version_for_purl.as_deref(),
293                    );
294
295                    Some(Dependency {
296                        purl,
297                        extracted_requirement: Some(truncate_field(requirement_str.to_string())),
298                        scope: Some(truncate_field(scope.to_string())),
299                        is_runtime: Some(is_runtime),
300                        is_optional: Some(is_optional),
301                        is_pinned: Some(is_pinned),
302                        is_direct: Some(true),
303                        resolved_package: None,
304                        extra_data: None,
305                    })
306                })
307                .collect()
308        })
309}
310
311fn extract_lock_dependencies(json_content: &Value) -> Vec<Dependency> {
312    let mut dependencies = Vec::new();
313
314    let packages = json_content
315        .get(FIELD_PACKAGES)
316        .and_then(|value| value.as_array())
317        .map(|packages| packages.as_slice())
318        .unwrap_or(&[]);
319    let packages_dev = json_content
320        .get(FIELD_PACKAGES_DEV)
321        .and_then(|value| value.as_array())
322        .map(|packages| packages.as_slice())
323        .unwrap_or(&[]);
324
325    dependencies.reserve(packages.len() + packages_dev.len());
326    dependencies.extend(extract_lock_package_list(packages, "require", true, false));
327    dependencies.extend(extract_lock_package_list(
328        packages_dev,
329        "require-dev",
330        false,
331        true,
332    ));
333
334    dependencies
335}
336
337fn extract_lock_packages(json_content: &Value) -> Vec<PackageData> {
338    let packages = json_content
339        .get(FIELD_PACKAGES)
340        .and_then(|value| value.as_array())
341        .map(|packages| packages.as_slice())
342        .unwrap_or(&[]);
343    let packages_dev = json_content
344        .get(FIELD_PACKAGES_DEV)
345        .and_then(|value| value.as_array())
346        .map(|packages| packages.as_slice())
347        .unwrap_or(&[]);
348
349    let mut extracted = Vec::with_capacity(packages.len() + packages_dev.len());
350
351    for package in packages.iter().take(MAX_ITERATION_COUNT) {
352        if let Some(package_data) = build_lock_package_data(package, false) {
353            extracted.push(package_data);
354        }
355    }
356
357    for package in packages_dev.iter().take(MAX_ITERATION_COUNT) {
358        if let Some(package_data) = build_lock_package_data(package, true) {
359            extracted.push(package_data);
360        }
361    }
362
363    extracted
364}
365
366fn extract_lock_package_list(
367    packages: &[Value],
368    scope: &str,
369    is_runtime: bool,
370    is_optional: bool,
371) -> Vec<Dependency> {
372    let mut dependencies = Vec::new();
373
374    for package in packages.iter().take(MAX_ITERATION_COUNT) {
375        if let Some(dependency) = build_lock_dependency(package, scope, is_runtime, is_optional) {
376            dependencies.push(dependency);
377        }
378
379        dependencies.extend(extract_lock_package_relationships(package));
380    }
381
382    dependencies
383}
384
385fn extract_lock_package_relationships(package: &Value) -> Vec<Dependency> {
386    [
387        extract_dependencies(package, FIELD_REQUIRE, "require", true, false),
388        extract_dependencies(package, FIELD_REQUIRE_DEV, "require-dev", false, true),
389        extract_dependencies(package, FIELD_PROVIDE, "provide", true, false),
390        extract_dependencies(package, FIELD_CONFLICT, "conflict", true, true),
391        extract_dependencies(package, FIELD_REPLACE, "replace", true, true),
392        extract_dependencies(package, FIELD_SUGGEST, "suggest", true, true),
393    ]
394    .concat()
395}
396
397fn build_lock_dependency(
398    package: &Value,
399    scope: &str,
400    is_runtime: bool,
401    is_optional: bool,
402) -> Option<Dependency> {
403    let name = package.get(FIELD_NAME).and_then(|value| value.as_str())?;
404    let version = package
405        .get(FIELD_VERSION)
406        .and_then(|value| value.as_str())?;
407    let package_type = package.get(FIELD_TYPE).and_then(|value| value.as_str());
408
409    let (namespace, package_name) = split_namespace_name(name);
410    let purl = build_dependency_purl(namespace.as_deref(), &package_name, Some(version));
411
412    let source = package
413        .get(FIELD_SOURCE)
414        .and_then(|value| value.as_object());
415    let dist = package.get(FIELD_DIST).and_then(|value| value.as_object());
416
417    let (sha1, sha256, sha512, dist_shasum) = extract_dist_hashes(dist);
418    let dist_url = dist
419        .and_then(|map| map.get("url"))
420        .and_then(|value| value.as_str())
421        .map(|value| truncate_field(value.to_string()));
422
423    let mut extra_data = HashMap::new();
424
425    if let Some(package_type) = package_type {
426        extra_data.insert("type".to_string(), Value::String(package_type.to_string()));
427    }
428
429    if let Some(source_map) = source {
430        if let Some(source_reference) = source_map.get("reference").and_then(|value| value.as_str())
431        {
432            extra_data.insert(
433                "source_reference".to_string(),
434                Value::String(source_reference.to_string()),
435            );
436        }
437
438        if let Some(source_url) = source_map.get("url").and_then(|value| value.as_str()) {
439            extra_data.insert(
440                "source_url".to_string(),
441                Value::String(source_url.to_string()),
442            );
443        }
444
445        if let Some(source_type) = source_map.get("type").and_then(|value| value.as_str()) {
446            extra_data.insert(
447                "source_type".to_string(),
448                Value::String(source_type.to_string()),
449            );
450        }
451    }
452
453    if let Some(dist_map) = dist {
454        if let Some(dist_reference) = dist_map.get("reference").and_then(|value| value.as_str()) {
455            extra_data.insert(
456                "dist_reference".to_string(),
457                Value::String(dist_reference.to_string()),
458            );
459        }
460
461        if let Some(dist_url) = dist_map.get("url").and_then(|value| value.as_str()) {
462            extra_data.insert("dist_url".to_string(), Value::String(dist_url.to_string()));
463        }
464
465        if let Some(dist_type) = dist_map.get("type").and_then(|value| value.as_str()) {
466            extra_data.insert(
467                "dist_type".to_string(),
468                Value::String(dist_type.to_string()),
469            );
470        }
471    }
472
473    if let Some(shasum) = dist_shasum {
474        extra_data.insert("dist_shasum".to_string(), Value::String(shasum));
475    }
476
477    let extra_data = if extra_data.is_empty() {
478        None
479    } else {
480        Some(extra_data)
481    };
482
483    let resolved_package = ResolvedPackage {
484        primary_language: Some("PHP".to_string()),
485        download_url: dist_url,
486        sha1: sha1.and_then(|h| Sha1Digest::from_hex(&h).ok()),
487        sha256: sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
488        sha512: sha512.and_then(|h| Sha512Digest::from_hex(&h).ok()),
489        md5: None,
490        is_virtual: true,
491        extra_data: None,
492        dependencies: Vec::new(),
493        repository_homepage_url: None,
494        repository_download_url: None,
495        api_data_url: None,
496        datasource_id: Some(DatasourceId::PhpComposerLock),
497        purl: None,
498        ..ResolvedPackage::new(
499            ComposerLockParser::PACKAGE_TYPE,
500            namespace.clone().unwrap_or_default(),
501            package_name.clone(),
502            version.to_string(),
503        )
504    };
505
506    Some(Dependency {
507        purl,
508        extracted_requirement: None,
509        scope: Some(truncate_field(scope.to_string())),
510        is_runtime: Some(is_runtime),
511        is_optional: Some(is_optional),
512        is_pinned: Some(true),
513        is_direct: Some(true),
514        resolved_package: Some(Box::new(resolved_package)),
515        extra_data,
516    })
517}
518
519fn build_lock_package_data(package: &Value, is_dev_package: bool) -> Option<PackageData> {
520    let dependency = build_lock_dependency(
521        package,
522        if is_dev_package {
523            "packages-dev"
524        } else {
525            "packages"
526        },
527        !is_dev_package,
528        is_dev_package,
529    )?;
530    let resolved = dependency.resolved_package.as_deref()?;
531
532    let mut package_data = default_package_data(Some(DatasourceId::PhpComposerLock));
533    package_data.package_type = Some(ComposerLockParser::PACKAGE_TYPE);
534    package_data.namespace = (!resolved.namespace.is_empty()).then(|| resolved.namespace.clone());
535    package_data.name = Some(truncate_field(resolved.name.clone()));
536    package_data.version = Some(truncate_field(resolved.version.clone()));
537    package_data.primary_language = resolved.primary_language.clone();
538    package_data.description = resolved.description.clone();
539    package_data.release_date = resolved.release_date.clone();
540    package_data.parties = resolved.parties.clone();
541    package_data.keywords = resolved.keywords.clone();
542    package_data.homepage_url = resolved.homepage_url.clone();
543    package_data.download_url = resolved.download_url.clone();
544    package_data.sha1 = resolved.sha1;
545    package_data.md5 = resolved.md5;
546    package_data.sha256 = resolved.sha256;
547    package_data.sha512 = resolved.sha512;
548    package_data.bug_tracking_url = resolved.bug_tracking_url.clone();
549    package_data.code_view_url = resolved.code_view_url.clone();
550    package_data.vcs_url = resolved.vcs_url.clone();
551    package_data.copyright = resolved.copyright.clone();
552    package_data.holder = resolved.holder.clone();
553    package_data.declared_license_expression = resolved.declared_license_expression.clone();
554    package_data.declared_license_expression_spdx =
555        resolved.declared_license_expression_spdx.clone();
556    package_data.license_detections = resolved.license_detections.clone();
557    package_data.other_license_expression = resolved.other_license_expression.clone();
558    package_data.other_license_expression_spdx = resolved.other_license_expression_spdx.clone();
559    package_data.other_license_detections = resolved.other_license_detections.clone();
560    package_data.extracted_license_statement = resolved.extracted_license_statement.clone();
561    package_data.notice_text = resolved.notice_text.clone();
562    package_data.source_packages = resolved.source_packages.clone();
563    package_data.file_references = resolved.file_references.clone();
564    package_data.is_private = resolved.is_private;
565    package_data.is_virtual = resolved.is_virtual;
566    package_data.extra_data = dependency
567        .extra_data
568        .clone()
569        .or_else(|| resolved.extra_data.clone());
570    package_data.dependencies = resolved.dependencies.clone();
571    package_data.repository_homepage_url = resolved.repository_homepage_url.clone();
572    package_data.repository_download_url = resolved.repository_download_url.clone();
573    package_data.api_data_url = resolved.api_data_url.clone();
574    package_data.purl = dependency.purl.clone();
575
576    Some(package_data)
577}
578
579fn extract_dist_hashes(
580    dist: Option<&serde_json::Map<String, Value>>,
581) -> (
582    Option<String>,
583    Option<String>,
584    Option<String>,
585    Option<String>,
586) {
587    let mut sha1 = None;
588    let mut sha256 = None;
589    let mut sha512 = None;
590    let mut raw_shasum = None;
591
592    if let Some(dist) = dist {
593        if let Some(shasum) = dist.get("shasum").and_then(|value| value.as_str()) {
594            let trimmed = shasum.trim();
595            if !trimmed.is_empty() {
596                raw_shasum = Some(trimmed.to_string());
597                let (parsed_sha1, parsed_sha256, parsed_sha512) = parse_hash_value(trimmed);
598                sha1 = parsed_sha1;
599                sha256 = parsed_sha256;
600                sha512 = parsed_sha512;
601            }
602        }
603
604        if let Some(value) = dist.get("sha1").and_then(|value| value.as_str())
605            && is_hex_hash(value)
606        {
607            sha1 = Some(value.to_string());
608        }
609        if let Some(value) = dist.get("sha256").and_then(|value| value.as_str())
610            && is_hex_hash(value)
611        {
612            sha256 = Some(value.to_string());
613        }
614        if let Some(value) = dist.get("sha512").and_then(|value| value.as_str())
615            && is_hex_hash(value)
616        {
617            sha512 = Some(value.to_string());
618        }
619    }
620
621    (sha1, sha256, sha512, raw_shasum)
622}
623
624fn parse_hash_value(hash: &str) -> (Option<String>, Option<String>, Option<String>) {
625    let trimmed = hash.trim();
626    if trimmed.is_empty() || !is_hex_hash(trimmed) {
627        return (None, None, None);
628    }
629
630    match trimmed.len() {
631        40 => (Some(trimmed.to_string()), None, None),
632        64 => (None, Some(trimmed.to_string()), None),
633        128 => (None, None, Some(trimmed.to_string())),
634        _ => (None, None, None),
635    }
636}
637
638fn is_hex_hash(value: &str) -> bool {
639    value.chars().all(|c| c.is_ascii_hexdigit())
640}
641
642fn extract_license_statement(json_content: &Value) -> Option<String> {
643    let mut licenses = Vec::new();
644
645    if let Some(license_value) = json_content.get(FIELD_LICENSE) {
646        match license_value {
647            Value::String(value) => {
648                let trimmed = value.trim();
649                if !trimmed.is_empty() {
650                    licenses.push(trimmed.to_string());
651                }
652            }
653            Value::Array(values) => {
654                for value in values {
655                    if let Some(license_str) = value.as_str() {
656                        let trimmed = license_str.trim();
657                        if !trimmed.is_empty() {
658                            licenses.push(trimmed.to_string());
659                        }
660                    }
661                }
662            }
663            _ => {}
664        }
665    }
666
667    if licenses.is_empty() {
668        return None;
669    }
670
671    if licenses.len() == 1 {
672        Some(truncate_field(licenses[0].clone()))
673    } else {
674        Some(truncate_field(licenses.join(" OR ")))
675    }
676}
677
678fn extract_license_data(
679    json_content: &Value,
680    is_private: bool,
681) -> (
682    Option<String>,
683    Option<String>,
684    Option<String>,
685    Vec<LicenseDetection>,
686) {
687    let extracted_license_statement = extract_license_statement(json_content)
688        .or_else(|| is_private.then(|| "proprietary-license".to_string()));
689    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
690        normalize_composer_license_data(extracted_license_statement.as_deref());
691
692    (
693        extracted_license_statement,
694        declared_license_expression,
695        declared_license_expression_spdx,
696        license_detections,
697    )
698}
699
700fn normalize_composer_license_data(
701    extracted_license_statement: Option<&str>,
702) -> (Option<String>, Option<String>, Vec<LicenseDetection>) {
703    let Some(extracted_license_statement) = extracted_license_statement
704        .map(str::trim)
705        .filter(|value| !value.is_empty())
706    else {
707        return super::license_normalization::empty_declared_license_data();
708    };
709
710    if extracted_license_statement.eq_ignore_ascii_case("proprietary") {
711        return build_declared_license_data_from_pair(
712            "proprietary-license",
713            "LicenseRef-scancode-proprietary-license",
714            DeclaredLicenseMatchMetadata::single_line(extracted_license_statement),
715        );
716    }
717
718    if extracted_license_statement.eq_ignore_ascii_case("proprietary-license") {
719        return build_declared_license_data_from_pair(
720            "proprietary-license",
721            "LicenseRef-scancode-proprietary-license",
722            DeclaredLicenseMatchMetadata::single_line(extracted_license_statement),
723        );
724    }
725
726    normalize_spdx_declared_license(Some(extracted_license_statement))
727}
728
729fn extract_keywords(json_content: &Value) -> Vec<String> {
730    json_content
731        .get(FIELD_KEYWORDS)
732        .and_then(|value| value.as_array())
733        .map(|values| {
734            values
735                .iter()
736                .take(MAX_ITERATION_COUNT)
737                .filter_map(|value| {
738                    value
739                        .as_str()
740                        .map(|value| truncate_field(value.to_string()))
741                })
742                .collect()
743        })
744        .unwrap_or_default()
745}
746
747fn extract_parties(json_content: &Value, namespace: &Option<String>) -> Vec<Party> {
748    let mut parties = Vec::new();
749
750    if let Some(authors) = json_content
751        .get(FIELD_AUTHORS)
752        .and_then(|value| value.as_array())
753    {
754        for author in authors.iter().take(MAX_ITERATION_COUNT) {
755            if let Some(author) = author.as_object() {
756                let name = author
757                    .get("name")
758                    .and_then(|value| value.as_str())
759                    .map(|value| truncate_field(value.to_string()));
760                let role = author
761                    .get("role")
762                    .and_then(|value| value.as_str())
763                    .map(|value| truncate_field(value.to_string()))
764                    .or(Some("author".to_string()));
765                let email = author
766                    .get("email")
767                    .and_then(|value| value.as_str())
768                    .map(|value| truncate_field(value.to_string()));
769                let url = author
770                    .get("homepage")
771                    .and_then(|value| value.as_str())
772                    .map(|value| truncate_field(value.to_string()));
773
774                if name.is_some() || email.is_some() || url.is_some() {
775                    parties.push(Party {
776                        r#type: Some("person".to_string()),
777                        role,
778                        name,
779                        email,
780                        url,
781                        organization: None,
782                        organization_url: None,
783                        timezone: None,
784                    });
785                }
786            }
787        }
788    }
789
790    if let Some(vendor) = namespace
791        .as_ref()
792        .map(|value| value.trim())
793        .filter(|value| !value.is_empty())
794    {
795        parties.push(Party {
796            r#type: Some("person".to_string()),
797            role: Some("vendor".to_string()),
798            name: Some(truncate_field(vendor.to_string())),
799            email: None,
800            url: None,
801            organization: None,
802            organization_url: None,
803            timezone: None,
804        });
805    }
806
807    parties
808}
809
810fn extract_support(json_content: &Value) -> (Option<String>, Option<String>) {
811    let support = json_content.get(FIELD_SUPPORT).and_then(|v| v.as_object());
812
813    if let Some(support_obj) = support {
814        let bug_tracking_url = support_obj
815            .get("issues")
816            .and_then(|v| v.as_str())
817            .map(|s| truncate_field(s.to_string()));
818
819        let code_view_url = support_obj
820            .get("source")
821            .and_then(|v| v.as_str())
822            .map(|s| truncate_field(s.to_string()));
823
824        (bug_tracking_url, code_view_url)
825    } else {
826        (None, None)
827    }
828}
829
830fn build_extra_data(json_content: &Value) -> Option<HashMap<String, Value>> {
831    let mut extra_data = HashMap::new();
832
833    if let Some(package_type) = json_content
834        .get(FIELD_TYPE)
835        .and_then(|value| value.as_str())
836    {
837        extra_data.insert("type".to_string(), Value::String(package_type.to_string()));
838    }
839
840    if let Some(autoload) = json_content
841        .get(FIELD_AUTOLOAD)
842        .and_then(|value| value.as_object())
843        && let Some(psr4) = autoload.get(FIELD_PSR4)
844    {
845        extra_data.insert("autoload_psr4".to_string(), psr4.clone());
846    }
847
848    if let Some(repositories) = json_content.get(FIELD_REPOSITORIES) {
849        extra_data.insert("repositories".to_string(), repositories.clone());
850    }
851
852    if extra_data.is_empty() {
853        None
854    } else {
855        Some(extra_data)
856    }
857}
858
859fn extract_source_vcs_url(json_content: &Value) -> Option<String> {
860    let source = json_content.get(FIELD_SOURCE)?.as_object()?;
861    let source_type = source.get("type")?.as_str()?.trim();
862    let source_url = source.get("url")?.as_str()?.trim();
863    let source_reference = source
864        .get("reference")
865        .and_then(|value| value.as_str())
866        .map(str::trim)
867        .filter(|value| !value.is_empty());
868
869    if source_type.is_empty() || source_url.is_empty() {
870        return None;
871    }
872
873    Some(truncate_field(match source_reference {
874        Some(reference) => format!("{}+{}@{}", source_type, source_url, reference),
875        None => format!("{}+{}", source_type, source_url),
876    }))
877}
878
879fn extract_dist_download_url(json_content: &Value) -> Option<String> {
880    json_content
881        .get(FIELD_DIST)
882        .and_then(|value| value.as_object())
883        .and_then(|dist| dist.get("url"))
884        .and_then(|value| value.as_str())
885        .map(|value| truncate_field(value.trim().to_string()))
886        .filter(|value| !value.is_empty())
887}
888
889fn build_repository_homepage_url(
890    namespace: &Option<String>,
891    name: &Option<String>,
892) -> Option<String> {
893    match (
894        namespace.as_ref().filter(|value| !value.is_empty()),
895        name.as_ref(),
896    ) {
897        (Some(ns), Some(name)) => Some(format!("https://packagist.org/packages/{}/{}", ns, name)),
898        (None, Some(name)) => Some(format!("https://packagist.org/packages/{}", name)),
899        _ => None,
900    }
901}
902
903fn build_api_data_url(namespace: &Option<String>, name: &Option<String>) -> Option<String> {
904    match (namespace.as_ref(), name.as_ref()) {
905        (Some(ns), Some(name)) if !ns.is_empty() => Some(format!(
906            "https://packagist.org/p/packages/{}/{}.json",
907            ns, name
908        )),
909        (None, Some(name)) => Some(format!("https://packagist.org/p/packages/{}.json", name)),
910        (Some(_), Some(name)) => Some(format!("https://packagist.org/p/packages/{}.json", name)),
911        _ => None,
912    }
913}
914
915fn build_package_purl(
916    namespace: &Option<String>,
917    name: &Option<String>,
918    version: &Option<String>,
919) -> Option<String> {
920    let name = name.as_ref()?;
921    let mut package_url = match PackageUrl::new(ComposerJsonParser::PACKAGE_TYPE.as_str(), name) {
922        Ok(purl) => purl,
923        Err(e) => {
924            warn!(
925                "Failed to create PackageUrl for composer package '{}': {}",
926                name, e
927            );
928            return None;
929        }
930    };
931
932    if let Some(namespace) = namespace.as_ref().filter(|value| !value.is_empty())
933        && let Err(e) = package_url.with_namespace(namespace)
934    {
935        warn!(
936            "Failed to set namespace '{}' for composer package '{}': {}",
937            namespace, name, e
938        );
939        return None;
940    }
941
942    if let Some(version) = version.as_ref()
943        && let Err(e) = package_url.with_version(version)
944    {
945        warn!(
946            "Failed to set version '{}' for composer package '{}': {}",
947            version, name, e
948        );
949        return None;
950    }
951
952    Some(package_url.to_string())
953}
954
955fn build_dependency_purl(
956    namespace: Option<&str>,
957    name: &str,
958    version: Option<&str>,
959) -> Option<String> {
960    let mut package_url = match PackageUrl::new(ComposerJsonParser::PACKAGE_TYPE.as_str(), name) {
961        Ok(purl) => purl,
962        Err(e) => {
963            warn!(
964                "Failed to create PackageUrl for composer package '{}': {}",
965                name, e
966            );
967            return None;
968        }
969    };
970
971    if let Some(namespace) = namespace.filter(|value| !value.is_empty())
972        && let Err(e) = package_url.with_namespace(namespace)
973    {
974        warn!(
975            "Failed to set namespace '{}' for composer package '{}': {}",
976            namespace, name, e
977        );
978        return None;
979    }
980
981    if let Some(version) = version
982        && let Err(e) = package_url.with_version(version)
983    {
984        warn!(
985            "Failed to set version '{}' for composer package '{}': {}",
986            version, name, e
987        );
988        return None;
989    }
990
991    Some(package_url.to_string())
992}
993
994fn split_optional_namespace_name(full_name: Option<&str>) -> (Option<String>, Option<String>) {
995    match full_name {
996        Some(full_name) => {
997            let (namespace, name) = split_namespace_name(full_name);
998            (namespace, Some(name))
999        }
1000        None => (None, None),
1001    }
1002}
1003
1004fn split_namespace_name(full_name: &str) -> (Option<String>, String) {
1005    let mut iter = full_name.splitn(2, '/');
1006    let first = iter.next().unwrap_or("");
1007    let second = iter.next();
1008
1009    if let Some(name) = second {
1010        (
1011            Some(truncate_field(first.to_string())),
1012            truncate_field(name.to_string()),
1013        )
1014    } else {
1015        (None, truncate_field(first.to_string()))
1016    }
1017}
1018
1019fn normalize_requirement_version(requirement: &str) -> String {
1020    let trimmed = requirement.trim();
1021    trimmed.trim_start_matches('=').trim().to_string()
1022}
1023
1024fn is_composer_version_pinned(version: &str) -> bool {
1025    let trimmed = version.trim();
1026    if trimmed.is_empty() {
1027        return false;
1028    }
1029
1030    if trimmed.contains(" - ")
1031        || trimmed.contains('|')
1032        || trimmed.contains(',')
1033        || trimmed.contains('^')
1034        || trimmed.contains('~')
1035        || trimmed.contains('>')
1036        || trimmed.contains('<')
1037        || trimmed.contains('*')
1038    {
1039        return false;
1040    }
1041
1042    let without_prefix = trimmed.trim_start_matches('=').trim();
1043    let without_prefix = without_prefix.strip_prefix('v').unwrap_or(without_prefix);
1044    if without_prefix.is_empty() {
1045        return false;
1046    }
1047
1048    let lower = without_prefix.to_lowercase();
1049    if lower.contains("dev") {
1050        return false;
1051    }
1052
1053    if without_prefix
1054        .chars()
1055        .any(|c| !c.is_ascii_digit() && c != '.' && c != '-' && c != '+')
1056    {
1057        return false;
1058    }
1059
1060    without_prefix.matches('.').count() >= 2
1061}
1062
1063fn default_package_data(datasource_id: Option<DatasourceId>) -> PackageData {
1064    PackageData {
1065        package_type: Some(ComposerJsonParser::PACKAGE_TYPE),
1066        primary_language: Some("PHP".to_string()),
1067        datasource_id,
1068        ..Default::default()
1069    }
1070}