dockerfile_parser/
image.rs

1// (C) Copyright 2019-2020 Hewlett Packard Enterprise Development LP
2
3use std::collections::{HashMap, HashSet};
4use std::fmt;
5use std::iter::FromIterator;
6
7use lazy_static::lazy_static;
8use regex::Regex;
9
10use crate::{Dockerfile, Span, Splicer};
11
12/// A parsed docker image reference
13///
14/// The `Display` impl may be used to convert a parsed image back to a plain
15/// string:
16/// ```
17/// use dockerfile_parser::ImageRef;
18///
19/// let image = ImageRef::parse("alpine:3.11");
20/// assert_eq!(image.registry, None);
21/// assert_eq!(image.image, "alpine");
22/// assert_eq!(image.tag, Some("3.11".to_string()));
23/// assert_eq!(format!("{}", image), "alpine:3.11");
24/// ```
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct ImageRef {
27  /// an optional registry, generally Docker Hub if unset
28  pub registry: Option<String>,
29
30  /// an image string, possibly including a user or organization name
31  pub image: String,
32
33  /// An optional image tag (after the colon, e.g. `:1.2.3`), generally inferred
34  /// to mean `:latest` if unset
35  pub tag: Option<String>,
36
37  /// An optional embedded image hash, e.g. `sha256:...`. Conflicts with `tag`.
38  pub hash: Option<String>
39}
40
41/// Determines if an ImageRef token refers to a registry hostname or not
42///
43/// Based on rules from https://stackoverflow.com/a/42116190
44fn is_registry(token: &str) -> bool {
45  token == "localhost" || token.contains('.') || token.contains(':')
46}
47
48/// Given a map of key/value pairs, perform variable substitution on a given
49/// input string. `max_recursion_depth` controls the maximum allowed recursion
50/// depth if variables refer to other strings themselves containing variable
51/// references. A small number but reasonable is recommended by default, e.g.
52/// 16.
53/// If None is returned, substitution was impossible, either because a
54/// referenced variable did not exist, or recursion depth was exceeded.
55pub fn substitute<'a, 'b>(
56  s: &'a str,
57  vars: &'b HashMap<&'b str, &'b str>,
58  used_vars: &mut HashSet<String>,
59  max_recursion_depth: u8
60) -> Option<String> {
61  lazy_static! {
62    static ref VAR: Regex = Regex::new(r"\$(?:([A-Za-z0-9_]+)|\{([A-Za-z0-9_]+)\})").unwrap();
63  }
64
65  // note: docker also allows defaults in FROMs, e.g.
66  //   ARG tag
67  //   FROM alpine:${tag:-3.12}
68  // this isn't currently supported.
69
70  let mut splicer = Splicer::from_str(s);
71
72  for caps in VAR.captures_iter(s) {
73    if max_recursion_depth == 0 {
74      // can't substitute, so give up
75      return None;
76    }
77
78    let full_range = caps.get(0)?.range();
79    let var_name = caps.get(1).or_else(|| caps.get(2))?;
80    let var_content = vars.get(var_name.as_str())?;
81    let substituted_content = substitute(
82      var_content,
83      vars,
84      used_vars,
85      max_recursion_depth.saturating_sub(1)
86    )?;
87    used_vars.insert(var_name.as_str().to_string());
88
89    // splice the substituted content back into the output string
90    splicer.splice(&Span::new(full_range.start, full_range.end), &substituted_content);
91  }
92
93  Some(splicer.content)
94}
95
96impl ImageRef {
97  /// Parses an `ImageRef` from a string.
98  ///
99  /// This is not fallible, however malformed image strings may return
100  /// unexpected results.
101  pub fn parse(s: &str) -> ImageRef {
102    // tags may be one of:
103    // foo (implies registry.hub.docker.com/library/foo:latest)
104    // foo:bar (implies registry.hub.docker.com/library/foo:bar)
105    // org/foo:bar (implies registry.hub.docker.com/org/foo:bar)
106
107    // per https://stackoverflow.com/a/42116190, some extra rules are needed to
108    // disambiguate external registries
109    // localhost/foo:bar is allowed (localhost is special)
110    // example.com/foo:bar is allowed
111    // host/foo:bar is not allowed (conflicts with docker hub)
112    // host:443/foo:bar is allowed (':' or '.' make it unambiguous)
113
114    // we don't attempt to actually validate tags otherwise, so invalid
115    // characters could slip through
116
117    let parts: Vec<&str> = s.splitn(2, '/').collect();
118    let (registry, image_full) = if parts.len() == 2 && is_registry(parts[0]) {
119      // some 3rd party registry
120      (Some(parts[0].to_string()), parts[1])
121    } else {
122      // some other image on the default registry; return the original string
123      (None, s)
124    };
125
126    if let Some(at_pos) = image_full.find('@') {
127      // parts length is guaranteed to be at least 1 given an empty string
128      let (image, hash) = image_full.split_at(at_pos);
129
130      ImageRef {
131        registry,
132        image: image.to_string(),
133        hash: Some(hash[1..].to_string()),
134        tag: None
135      }
136    } else {
137      // parts length is guaranteed to be at least 1 given an empty string
138      let parts: Vec<&str> = image_full.splitn(2, ':').collect();
139      let image = parts[0].to_string();
140      let tag = parts.get(1).map(|p| String::from(*p));
141
142      ImageRef { registry, image, tag, hash: None }
143    }
144  }
145
146  /// Given a Dockerfile (and its global `ARG`s), perform any necessary
147  /// variable substitution to resolve any variable references in this
148  /// `ImageRef` and returns a list of variables included in the end result.
149  ///
150  /// If this `ImageRef` contains any unknown variables or if any references are
151  /// excessively recursive, returns None; otherwise, returns the
152  /// fully-substituted string.
153  pub fn resolve_vars_with_context<'a>(
154    &self, dockerfile: &'a Dockerfile
155  ) -> Option<(ImageRef, HashSet<String>)> {
156    let vars: HashMap<&'a str, &'a str> = HashMap::from_iter(
157      dockerfile.global_args
158        .iter()
159        .filter_map(|a| match a.value.as_ref() {
160          Some(v) => Some((a.name.as_ref(), v.as_ref())),
161          None => None
162        })
163    );
164
165    let mut used_vars = HashSet::new();
166
167    if let Some(s) = substitute(&self.to_string(), &vars, &mut used_vars, 16) {
168      Some((ImageRef::parse(&s), used_vars))
169    } else {
170      None
171    }
172  }
173
174  /// Given a Dockerfile (and its global `ARG`s), perform any necessary
175  /// variable substitution to resolve any variable references in this
176  /// `ImageRef`.
177  ///
178  /// If this `ImageRef` contains any unknown variables or if any references are
179  /// excessively recursive, returns None; otherwise, returns the
180  /// fully-substituted string.
181  pub fn resolve_vars(&self, dockerfile: &Dockerfile) -> Option<ImageRef> {
182    self.resolve_vars_with_context(dockerfile).map(|(image, _vars)| image)
183  }
184}
185
186impl fmt::Display for ImageRef {
187  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188    if let Some(registry) = &self.registry {
189      write!(f, "{}/", registry)?;
190    }
191
192    write!(f, "{}", self.image)?;
193
194    if let Some(tag) = &self.tag {
195      write!(f, ":{}", tag)?;
196    } else if let Some(hash) = &self.hash {
197      write!(f, "@{}", hash)?;
198    }
199
200    Ok(())
201  }
202}
203
204#[cfg(test)]
205mod tests {
206  use super::*;
207
208  use std::convert::TryInto;
209  use indoc::indoc;
210  use crate::instructions::*;
211
212  #[test]
213  fn test_image_parse_dockerhub() {
214    assert_eq!(
215      ImageRef::parse("alpine:3.10"),
216      ImageRef {
217        registry: None,
218        image: "alpine".into(),
219        tag: Some("3.10".into()),
220        hash: None
221      }
222    );
223
224    assert_eq!(
225      ImageRef::parse("foo/bar"),
226      ImageRef {
227        registry: None,
228        image: "foo/bar".into(),
229        tag: None,
230        hash: None
231      }
232    );
233
234    assert_eq!(
235      ImageRef::parse("clux/muslrust"),
236      ImageRef {
237        registry: None,
238        image: "clux/muslrust".into(),
239        tag: None,
240        hash: None
241      }
242    );
243
244    assert_eq!(
245      ImageRef::parse("clux/muslrust:1.41.0-stable"),
246      ImageRef {
247        registry: None,
248        image: "clux/muslrust".into(),
249        tag: Some("1.41.0-stable".into()),
250        hash: None
251      }
252    );
253
254    assert_eq!(
255      ImageRef::parse("fake_project/fake_image@fake_hash"),
256      ImageRef {
257        registry: None,
258        image: "fake_project/fake_image".into(),
259        tag: None,
260        hash: Some("fake_hash".into())
261      }
262    );
263
264    // invalid hashes, but should still not panic
265    assert_eq!(
266      ImageRef::parse("fake_project/fake_image@"),
267      ImageRef {
268        registry: None,
269        image: "fake_project/fake_image".into(),
270        tag: None,
271        hash: Some("".into())
272      }
273    );
274
275    assert_eq!(
276      ImageRef::parse("fake_project/fake_image@sha256:"),
277      ImageRef {
278        registry: None,
279        image: "fake_project/fake_image".into(),
280        tag: None,
281        hash: Some("sha256:".into())
282      }
283    );
284  }
285
286  #[test]
287  fn test_image_parse_registry() {
288    assert_eq!(
289      ImageRef::parse("quay.io/prometheus/node-exporter:v0.18.1"),
290      ImageRef {
291        registry: Some("quay.io".into()),
292        image: "prometheus/node-exporter".into(),
293        tag: Some("v0.18.1".into()),
294        hash: None
295      }
296    );
297
298    assert_eq!(
299      ImageRef::parse("gcr.io/fake_project/fake_image:fake_tag"),
300      ImageRef {
301        registry: Some("gcr.io".into()),
302        image: "fake_project/fake_image".into(),
303        tag: Some("fake_tag".into()),
304        hash: None
305      }
306    );
307
308    assert_eq!(
309      ImageRef::parse("gcr.io/fake_project/fake_image"),
310      ImageRef {
311        registry: Some("gcr.io".into()),
312        image: "fake_project/fake_image".into(),
313        tag: None,
314        hash: None
315      }
316    );
317
318    assert_eq!(
319      ImageRef::parse("gcr.io/fake_image"),
320      ImageRef {
321        registry: Some("gcr.io".into()),
322        image: "fake_image".into(),
323        tag: None,
324        hash: None
325      }
326    );
327
328    assert_eq!(
329      ImageRef::parse("gcr.io/fake_image:fake_tag"),
330      ImageRef {
331        registry: Some("gcr.io".into()),
332        image: "fake_image".into(),
333        tag: Some("fake_tag".into()),
334        hash: None
335      }
336    );
337
338    assert_eq!(
339      ImageRef::parse("quay.io/fake_project/fake_image@fake_hash"),
340      ImageRef {
341        registry: Some("quay.io".into()),
342        image: "fake_project/fake_image".into(),
343        tag: None,
344        hash: Some("fake_hash".into())
345      }
346    );
347  }
348
349  #[test]
350  fn test_image_parse_localhost() {
351    assert_eq!(
352      ImageRef::parse("localhost/foo"),
353      ImageRef {
354        registry: Some("localhost".into()),
355        image: "foo".into(),
356        tag: None,
357        hash: None
358      }
359    );
360
361    assert_eq!(
362      ImageRef::parse("localhost/foo:bar"),
363      ImageRef {
364        registry: Some("localhost".into()),
365        image: "foo".into(),
366        tag: Some("bar".into()),
367        hash: None
368      }
369    );
370
371    assert_eq!(
372      ImageRef::parse("localhost/foo/bar"),
373      ImageRef {
374        registry: Some("localhost".into()),
375        image: "foo/bar".into(),
376        tag: None,
377        hash: None
378      }
379    );
380
381    assert_eq!(
382      ImageRef::parse("localhost/foo/bar:baz"),
383      ImageRef {
384        registry: Some("localhost".into()),
385        image: "foo/bar".into(),
386        tag: Some("baz".into()),
387        hash: None
388      }
389    );
390  }
391
392  #[test]
393  fn test_image_parse_registry_port() {
394    assert_eq!(
395      ImageRef::parse("example.com:1234/foo"),
396      ImageRef {
397        registry: Some("example.com:1234".into()),
398        image: "foo".into(),
399        tag: None,
400        hash: None
401      }
402    );
403
404    assert_eq!(
405      ImageRef::parse("example.com:1234/foo:bar"),
406      ImageRef {
407        registry: Some("example.com:1234".into()),
408        image: "foo".into(),
409        tag: Some("bar".into()),
410        hash: None
411      }
412    );
413
414    assert_eq!(
415      ImageRef::parse("example.com:1234/foo/bar"),
416      ImageRef {
417        registry: Some("example.com:1234".into()),
418        image: "foo/bar".into(),
419        tag: None,
420        hash: None
421      }
422    );
423
424    assert_eq!(
425      ImageRef::parse("example.com:1234/foo/bar:baz"),
426      ImageRef {
427        registry: Some("example.com:1234".into()),
428        image: "foo/bar".into(),
429        tag: Some("baz".into()),
430        hash: None
431      }
432    );
433
434    // docker hub doesn't allow it, but other registries can allow arbitrarily
435    // nested images
436    assert_eq!(
437      ImageRef::parse("example.com:1234/foo/bar/baz:qux"),
438      ImageRef {
439        registry: Some("example.com:1234".into()),
440        image: "foo/bar/baz".into(),
441        tag: Some("qux".into()),
442        hash: None
443      }
444    );
445  }
446
447  #[test]
448  fn test_substitute() {
449    let mut vars = HashMap::new();
450    vars.insert("foo", "bar");
451    vars.insert("baz", "qux");
452    vars.insert("lorem", "$foo");
453    vars.insert("ipsum", "${lorem}");
454    vars.insert("recursion1", "$recursion2");
455    vars.insert("recursion2", "$recursion1");
456
457    let mut used_vars = HashSet::new();
458    assert_eq!(
459      substitute("hello world", &vars, &mut used_vars, 16).as_deref(),
460      Some("hello world")
461    );
462
463    let mut used_vars = HashSet::new();
464    assert_eq!(
465      substitute("hello $foo", &vars, &mut used_vars, 16).as_deref(),
466      Some("hello bar")
467    );
468    assert_eq!(used_vars, {
469      let mut h = HashSet::new();
470      h.insert("foo".to_string());
471      h
472    });
473
474    let mut used_vars = HashSet::new();
475    assert_eq!(
476      substitute("hello $foo", &vars, &mut used_vars, 0).as_deref(),
477      None
478    );
479    assert!(used_vars.is_empty());
480
481    let mut used_vars = HashSet::new();
482    assert_eq!(
483      substitute("hello ${foo}", &vars, &mut used_vars, 16).as_deref(),
484      Some("hello bar")
485    );
486    assert_eq!(used_vars, {
487      let mut h = HashSet::new();
488      h.insert("foo".to_string());
489      h
490    });
491
492    let mut used_vars = HashSet::new();
493    assert_eq!(
494      substitute("$baz $foo", &vars, &mut used_vars, 16).as_deref(),
495      Some("qux bar")
496    );
497    assert_eq!(used_vars, {
498      let mut h = HashSet::new();
499      h.insert("baz".to_string());
500      h.insert("foo".to_string());
501      h
502    });
503
504    let mut used_vars = HashSet::new();
505    assert_eq!(
506      substitute("hello $lorem", &vars, &mut used_vars, 16).as_deref(),
507      Some("hello bar")
508    );
509    assert_eq!(used_vars, {
510      let mut h = HashSet::new();
511      h.insert("foo".to_string());
512      h.insert("lorem".to_string());
513      h
514    });
515
516    let mut used_vars = HashSet::new();
517    assert_eq!(
518      substitute("hello $lorem", &vars, &mut used_vars, 1).as_deref(),
519      None
520    );
521    assert!(used_vars.is_empty());
522
523    let mut used_vars = HashSet::new();
524    assert_eq!(
525      substitute("hello $ipsum", &vars, &mut used_vars, 16).as_deref(),
526      Some("hello bar")
527    );
528    assert_eq!(used_vars, {
529      let mut h = HashSet::new();
530      h.insert("foo".to_string());
531      h.insert("lorem".to_string());
532      h.insert("ipsum".to_string());
533      h
534    });
535
536    let mut used_vars = HashSet::new();
537    assert_eq!(
538      substitute("hello $ipsum", &vars, &mut used_vars, 2).as_deref(),
539      None
540    );
541    assert!(used_vars.is_empty());
542
543    let mut used_vars = HashSet::new();
544    assert_eq!(
545      substitute("hello $recursion1", &vars, &mut used_vars, 16).as_deref(),
546      None
547    );
548    assert!(used_vars.is_empty());
549  }
550
551  #[test]
552  fn test_resolve_vars() {
553    let d = Dockerfile::parse(indoc!(r#"
554      ARG image=alpine:3.12
555      FROM $image
556    "#)).unwrap();
557
558    let from: &FromInstruction = d.instructions
559      .get(1).unwrap()
560      .try_into().unwrap();
561
562    assert_eq!(
563      from.image_parsed.resolve_vars(&d),
564      Some(ImageRef::parse("alpine:3.12"))
565    );
566  }
567
568  #[test]
569  fn test_resolve_vars_nested() {
570    let d = Dockerfile::parse(indoc!(r#"
571      ARG image=alpine
572      ARG unnecessarily_nested=${image}
573      ARG tag=3.12
574      FROM ${unnecessarily_nested}:${tag}
575    "#)).unwrap();
576
577    let from: &FromInstruction = d.instructions
578      .get(3).unwrap()
579      .try_into().unwrap();
580
581    assert_eq!(
582      from.image_parsed.resolve_vars(&d),
583      Some(ImageRef::parse("alpine:3.12"))
584    );
585  }
586
587  #[test]
588  fn test_resolve_vars_technically_invalid() {
589    // docker allows this, but we can't give an answer
590    let d = Dockerfile::parse(indoc!(r#"
591      ARG image
592      FROM $image
593    "#)).unwrap();
594
595    let from: &FromInstruction = d.instructions
596      .get(1).unwrap()
597      .try_into().unwrap();
598
599    assert_eq!(
600      from.image_parsed.resolve_vars(&d),
601      None
602    );
603  }
604
605  #[test]
606  fn test_resolve_vars_typo() {
607    // docker allows this, but we can't give an answer
608    let d = Dockerfile::parse(indoc!(r#"
609      ARG image="alpine:3.12"
610      FROM $foo
611    "#)).unwrap();
612
613    let from: &FromInstruction = d.instructions
614      .get(1).unwrap()
615      .try_into().unwrap();
616
617    assert_eq!(
618      from.image_parsed.resolve_vars(&d),
619      None
620    );
621  }
622
623  #[test]
624  fn test_resolve_vars_out_of_order() {
625    // docker allows this, but we can't give an answer
626    let d = Dockerfile::parse(indoc!(r#"
627      FROM $image
628      ARG image="alpine:3.12"
629    "#)).unwrap();
630
631    let from: &FromInstruction = d.instructions
632      .get(0).unwrap()
633      .try_into().unwrap();
634
635    assert_eq!(
636      from.image_parsed.resolve_vars(&d),
637      None
638    );
639  }
640}