Skip to main content

ferro_maven_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Maven layout path parser.
3//!
4//! Translates between:
5//!
6//! - an incoming URL path such as
7//!   `com/example/foo/1.0/foo-1.0.jar`, and
8//! - a structured [`Coordinate`] plus a [`PathClass`] marker that
9//!   distinguishes the artifact itself, a checksum sidecar, or a
10//!   `maven-metadata.xml` document.
11//!
12//! Spec: Maven Repository Layout —
13//! <https://maven.apache.org/repository/layout.html>.
14
15use crate::checksum::ChecksumAlgo;
16use crate::coordinate::Coordinate;
17use crate::error::MavenError;
18use crate::snapshot::is_snapshot_version;
19
20/// Result of parsing a Maven repository path.
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct LayoutPath {
23    /// The coordinate identified by the path.
24    pub coordinate: Coordinate,
25    /// What kind of resource the path addresses.
26    pub class: PathClass,
27}
28
29/// Classification of a Maven layout path.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum PathClass {
32    /// A main artifact (jar, pom, war, tar.gz, ...).
33    Artifact,
34    /// A checksum sidecar alongside an artifact.
35    Checksum(ChecksumAlgo),
36    /// A `maven-metadata.xml` or `maven-metadata.xml.sha1` etc. under
37    /// the artifactId directory (groupLevel = false) or an optional
38    /// version-level metadata for SNAPSHOT builds.
39    Metadata {
40        /// Whether the metadata path is under a `version/` directory
41        /// (true = SNAPSHOT timestamp metadata, false = artifact index).
42        version_level: bool,
43        /// Optional checksum algorithm for metadata sidecars.
44        checksum: Option<ChecksumAlgo>,
45    },
46}
47
48/// Parse a layout path into a structured form.
49///
50/// Accepts paths with or without a leading slash. The path must contain
51/// at least three segments: `{groupPath}/{artifactId}/{version}/{filename}`.
52///
53/// # Errors
54///
55/// Returns [`MavenError::InvalidPath`] if the path has fewer than four
56/// segments or the filename does not match the expected
57/// `{artifactId}-{version}[-{classifier}].{extension}` shape. Paths that
58/// point at a `maven-metadata.xml` (with or without checksum sidecar
59/// suffix) are classified with [`PathClass::Metadata`] regardless of
60/// filename shape.
61pub fn parse_layout_path(path: &str) -> Result<LayoutPath, MavenError> {
62    let trimmed = path.trim_start_matches('/');
63    let segments: Vec<&str> = trimmed.split('/').filter(|s| !s.is_empty()).collect();
64
65    if segments.len() < 3 {
66        return Err(MavenError::InvalidPath(format!(
67            "path `{path}` has fewer than 3 segments"
68        )));
69    }
70
71    let filename = segments
72        .last()
73        .copied()
74        .ok_or_else(|| MavenError::InvalidPath("path has no filename".into()))?;
75
76    // Detect metadata documents: the filename begins with
77    // `maven-metadata.xml` and may optionally be followed by a checksum
78    // extension.
79    if let Some(kind) = maven_metadata_suffix(filename) {
80        let checksum = match kind {
81            MetadataKind::Raw => None,
82            MetadataKind::Sidecar(a) => Some(a),
83        };
84        return classify_metadata(&segments, checksum);
85    }
86
87    // Otherwise the path is {groupPath..}/{artifactId}/{version}/{filename}.
88    // Require at least 4 segments (group has >=1 segment).
89    if segments.len() < 4 {
90        return Err(MavenError::InvalidPath(format!(
91            "artifact path `{path}` has fewer than 4 segments"
92        )));
93    }
94
95    let version = segments[segments.len() - 2];
96    let artifact_id = segments[segments.len() - 3];
97    let group_segments = &segments[..segments.len() - 3];
98    let group_id = group_segments.join(".");
99
100    let (stripped, checksum) = strip_checksum_suffix(filename);
101    let (classifier, extension) = split_filename(artifact_id, version, stripped)?;
102
103    let coordinate = Coordinate::new(group_id, artifact_id, version, classifier, extension)
104        .map_err(|e| MavenError::InvalidPath(format!("{e}")))?;
105
106    let class = match checksum {
107        Some(algo) => PathClass::Checksum(algo),
108        None => PathClass::Artifact,
109    };
110
111    Ok(LayoutPath { coordinate, class })
112}
113
114fn classify_metadata(
115    segments: &[&str],
116    checksum: Option<ChecksumAlgo>,
117) -> Result<LayoutPath, MavenError> {
118    // Metadata can live at either
119    //   groupPath/artifactId/maven-metadata.xml   (>=2 segments before file)
120    //   groupPath/artifactId/version/maven-metadata.xml (>=3 segments before file)
121    // We heuristically classify by whether the penultimate segment looks
122    // like a version (contains a digit).
123    if segments.len() < 3 {
124        return Err(MavenError::InvalidPath(
125            "metadata path must have at least 2 path components before the filename".into(),
126        ));
127    }
128    let before_file = &segments[..segments.len() - 1];
129    let last = before_file.last().copied().unwrap_or_default();
130    let version_level = last.chars().any(|c| c.is_ascii_digit());
131
132    if version_level && before_file.len() >= 3 {
133        let version = last.to_string();
134        let artifact_id = before_file[before_file.len() - 2].to_string();
135        let group_id = before_file[..before_file.len() - 2].join(".");
136        let coordinate = Coordinate::new(group_id, artifact_id, version, None::<String>, "pom")
137            .map_err(|e| MavenError::InvalidPath(format!("{e}")))?;
138        Ok(LayoutPath {
139            coordinate,
140            class: PathClass::Metadata {
141                version_level: true,
142                checksum,
143            },
144        })
145    } else {
146        let artifact_id = last.to_string();
147        let group_id = before_file[..before_file.len() - 1].join(".");
148        // Use an "index" placeholder version for the coordinate since
149        // artifactId-level metadata is not tied to a specific version.
150        let coordinate = Coordinate::new(group_id, artifact_id, "index", None::<String>, "pom")
151            .map_err(|e| MavenError::InvalidPath(format!("{e}")))?;
152        Ok(LayoutPath {
153            coordinate,
154            class: PathClass::Metadata {
155                version_level: false,
156                checksum,
157            },
158        })
159    }
160}
161
162/// Marker for a `maven-metadata.xml` classification decision.
163enum MetadataKind {
164    /// Raw metadata document.
165    Raw,
166    /// Checksum sidecar alongside the metadata.
167    Sidecar(ChecksumAlgo),
168}
169
170fn maven_metadata_suffix(name: &str) -> Option<MetadataKind> {
171    if name == "maven-metadata.xml" {
172        return Some(MetadataKind::Raw);
173    }
174    let rest = name.strip_prefix("maven-metadata.xml.")?;
175    ChecksumAlgo::from_extension(rest).map(MetadataKind::Sidecar)
176}
177
178fn strip_checksum_suffix(name: &str) -> (&str, Option<ChecksumAlgo>) {
179    if let Some((stem, ext)) = name.rsplit_once('.')
180        && let Some(algo) = ChecksumAlgo::from_extension(ext)
181    {
182        return (stem, Some(algo));
183    }
184    (name, None)
185}
186
187/// Split `{artifactId}-{version}[-{classifier}].{extension}` into its
188/// classifier and extension.
189fn split_filename(
190    artifact_id: &str,
191    version: &str,
192    filename: &str,
193) -> Result<(Option<String>, String), MavenError> {
194    // Strip the prefix `{artifactId}-{version}` first.
195    let prefix = format!("{artifact_id}-{version}");
196    let rest = filename.strip_prefix(&prefix).ok_or_else(|| {
197        MavenError::InvalidPath(format!(
198            "filename `{filename}` does not start with `{prefix}`"
199        ))
200    })?;
201
202    if let Some(tail) = rest.strip_prefix('-') {
203        // Classifier present: `-{classifier}.{extension}`.
204        // Extension is everything after the last dot; support
205        // compound extensions (`tar.gz`, `tar.bz2`) by matching known
206        // patterns.
207        let (classifier, extension) = split_classifier_and_extension(tail).ok_or_else(|| {
208            MavenError::InvalidPath(format!(
209                "filename tail `{tail}` must be `classifier.extension`"
210            ))
211        })?;
212        Ok((Some(classifier), extension))
213    } else if let Some(tail) = rest.strip_prefix('.') {
214        Ok((None, tail.to_string()))
215    } else {
216        Err(MavenError::InvalidPath(format!(
217            "filename `{filename}` has no extension separator"
218        )))
219    }
220}
221
222fn split_classifier_and_extension(tail: &str) -> Option<(String, String)> {
223    // Recognise compound extensions first.
224    const COMPOUND: &[&str] = &["tar.gz", "tar.bz2", "tar.xz", "tar.zst"];
225    for compound in COMPOUND {
226        let dotted = format!(".{compound}");
227        if let Some(classifier) = tail.strip_suffix(&dotted)
228            && !classifier.is_empty()
229        {
230            return Some((classifier.to_string(), (*compound).to_string()));
231        }
232    }
233    let dot = tail.rfind('.')?;
234    let classifier = &tail[..dot];
235    let extension = &tail[dot + 1..];
236    if classifier.is_empty() || extension.is_empty() {
237        return None;
238    }
239    Some((classifier.to_string(), extension.to_string()))
240}
241
242/// Convenience: check whether a parsed [`LayoutPath`] sits on a SNAPSHOT
243/// version.
244#[must_use]
245pub fn layout_is_snapshot(path: &LayoutPath) -> bool {
246    is_snapshot_version(&path.coordinate.version)
247}
248
249#[cfg(test)]
250mod tests {
251    use super::{PathClass, parse_layout_path};
252    use crate::checksum::ChecksumAlgo;
253
254    #[test]
255    fn parses_simple_jar_path() {
256        let p = parse_layout_path("com/example/foo/1.0/foo-1.0.jar").expect("ok");
257        assert_eq!(p.coordinate.group_id, "com.example");
258        assert_eq!(p.coordinate.artifact_id, "foo");
259        assert_eq!(p.coordinate.version, "1.0");
260        assert_eq!(p.coordinate.extension, "jar");
261        assert_eq!(p.coordinate.classifier, None);
262        assert_eq!(p.class, PathClass::Artifact);
263    }
264
265    #[test]
266    fn parses_classifier_jar() {
267        let p = parse_layout_path("com/example/foo/1.0/foo-1.0-sources.jar").expect("ok");
268        assert_eq!(p.coordinate.classifier.as_deref(), Some("sources"));
269        assert_eq!(p.coordinate.extension, "jar");
270    }
271
272    #[test]
273    fn parses_sha1_sidecar() {
274        let p = parse_layout_path("com/example/foo/1.0/foo-1.0.jar.sha1").expect("ok");
275        assert_eq!(p.coordinate.extension, "jar");
276        assert_eq!(p.class, PathClass::Checksum(ChecksumAlgo::Sha1));
277    }
278
279    #[test]
280    fn parses_pom() {
281        let p = parse_layout_path("com/example/foo/1.0/foo-1.0.pom").expect("ok");
282        assert_eq!(p.coordinate.extension, "pom");
283    }
284
285    #[test]
286    fn parses_metadata_under_artifact_id() {
287        let p = parse_layout_path("com/example/foo/maven-metadata.xml").expect("ok");
288        assert!(matches!(
289            p.class,
290            PathClass::Metadata {
291                version_level: false,
292                checksum: None
293            }
294        ));
295    }
296
297    #[test]
298    fn parses_metadata_under_version() {
299        let p = parse_layout_path("com/example/foo/1.0-SNAPSHOT/maven-metadata.xml").expect("ok");
300        assert!(matches!(
301            p.class,
302            PathClass::Metadata {
303                version_level: true,
304                ..
305            }
306        ));
307    }
308
309    #[test]
310    fn rejects_too_short_path() {
311        let err = parse_layout_path("foo/1.0").expect_err("reject");
312        assert!(err.to_string().contains("fewer than 3 segments"));
313    }
314
315    #[test]
316    fn compound_tar_gz_extension_preserved() {
317        let p = parse_layout_path("com/example/foo/1.0/foo-1.0-dist.tar.gz").expect("ok");
318        assert_eq!(p.coordinate.extension, "tar.gz");
319        assert_eq!(p.coordinate.classifier.as_deref(), Some("dist"));
320    }
321
322    #[test]
323    fn round_trip_path_to_coordinate_and_back() {
324        let p = parse_layout_path("com/example/foo/1.0/foo-1.0-sources.jar").expect("ok");
325        assert_eq!(
326            p.coordinate.repository_path(),
327            "com/example/foo/1.0/foo-1.0-sources.jar"
328        );
329    }
330}