b2_client/
validate.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2   License, v. 2.0. If a copy of the MPL was not distributed with this
3   file, You can obtain one at http://mozilla.org/MPL/2.0/.
4*/
5
6//! Validation functions used throughout the crate.
7
8use std::collections::HashMap;
9
10use crate::{
11    bucket::{LifecycleRule, ServerSideEncryption},
12    error::*,
13};
14
15use http_types::{
16    cache::{CacheControl, Expires},
17    Trailers,
18};
19
20
21/// Returns the provided HTTP header if it's valid; otherwise a ValidationError.
22///
23/// An HTTP header:
24///
25/// * must be valid ASCII.
26/// * must not include whitespace.
27/// * must not include ASCII codes outside alphanumeric codes or any of
28///   "!" "#" "$" "%" "&" "'" "*" "+" "-" "." "^" "_" "`" "|" or "~".
29///
30/// # Notes
31///
32/// We validate headers according to
33/// [RFC 7230](https://www.rfc-editor.org/rfc/rfc7230), notably
34/// [Section 3.2.6](https://www.rfc-editor.org/rfc/rfc7230#section-3.2.6). It is
35/// possible that the B2 API is more lenient.
36pub(crate) fn validated_http_header(header: &str) -> Result<&str, BadHeaderName>
37{
38    let is_valid = |c: char| "!#$%&'*+-.^_`|~".contains(c);
39
40    let invalid = header.chars()
41        .find(|c| !(c.is_ascii_alphanumeric() || is_valid(*c)));
42
43    if let Some(ch) = invalid {
44        Err(BadHeaderName {
45            header: header.to_owned(),
46            invalid_char: ch,
47        })
48    } else {
49        Ok(header)
50    }
51}
52
53pub(crate) fn validated_bucket_name(name: impl Into<String>)
54-> Result<String, BucketValidationError> {
55    let name = name.into();
56
57    if name.len() < 6 || name.len() > 50 {
58        return Err(BucketValidationError::BadNameLength(name.len()));
59    }
60
61    let invalid_char = |c: &char| !(c.is_ascii_alphanumeric() || *c == '-');
62
63    match name.chars().find(invalid_char) {
64        None => Ok(name),
65        Some(ch) => Err(BucketValidationError::InvalidChar(ch)),
66    }
67}
68
69/// Ensure a filename is valid.
70///
71/// Note that B2 disallows ASCII control characters, but other control
72/// characters defined by Unicode are allowed.
73pub(crate) fn validated_file_name(name: &str)
74-> Result<&str, FileNameValidationError> {
75    for ch in name.chars() {
76        if ch.is_ascii_control() {
77            return Err(FileNameValidationError::InvalidChar(ch));
78        }
79    }
80
81    if name.len() < 1024 {
82        Ok(name)
83    } else {
84        Err(FileNameValidationError::BadLength(name.len()))
85    }
86}
87
88pub(crate) fn validated_cors_rule_name(name: impl Into<String>)
89-> Result<String, CorsRuleValidationError> {
90    // The rules are the same as for bucket names.
91    validated_bucket_name(name)
92}
93
94/// Ensure that file metadata fits within the B2 length requirements.
95pub(crate) fn validate_file_metadata_size(
96    file_name: &str,
97    file_info: Option<&serde_json::Value>,
98    enc: Option<&ServerSideEncryption>
99) -> Result<(), ValidationError> {
100    let limit = match enc {
101        Some(&ServerSideEncryption::NoEncryption) => 7000,
102        _ => 2048,
103    };
104
105    // Only the keys and values count against the max limit, so we need to
106    // add them up rather than convert the entire Value to a string and
107    // check its length.
108    let info_len = file_info
109        .map(|v| v.as_object())
110        .flatten()
111        .map(|obj| obj.iter()
112            .fold(0, |acc, (k, v)| acc + k.len() + v.to_string().len())
113        )
114        .unwrap_or(0);
115
116    let name_len = file_name.len();
117
118    if info_len + name_len <= limit {
119        Ok(())
120    } else {
121        Err(ValidationError::OutOfBounds(format!(
122            "file_name and file_info lengths must not exceed {} bytes",
123            limit
124        )))
125    }
126}
127
128/// Ensure the keys and values of file metadata is correct.
129///
130/// We do not check the byte length limit since the limit applies to both the
131/// file info and the name. Use [validate_file_metadata_size] to check the
132/// length limit.
133pub(crate) fn validated_file_info(info: serde_json::Value)
134-> Result<serde_json::Value, ValidationError> {
135    let obj = info.as_object()
136        .ok_or_else(||
137            ValidationError::BadFormat("file_info is not an object".into())
138        )?;
139
140    if obj.len() > 10 {
141        return Err(ValidationError::BadFormat(
142            "file_info cannot contain more than 10 items".into()
143        ));
144    }
145
146    for (key, val) in obj {
147        validate_info_key_val(key, val)?;
148    }
149
150    Ok(info)
151}
152
153fn validate_info_key_val(key: &str, val: &serde_json::Value)
154-> Result<(), ValidationError> {
155    if key.len() > 50 {
156        return Err(ValidationError::BadFormat(format!(
157            "Key cannot exceed 50 bytes, but is {}", key.len()
158        )));
159    }
160
161    if key.starts_with("b2-") {
162        validate_info_val(key, val)?
163    }
164
165    let is_valid = |c: char| c.is_alphanumeric()
166        || ['-', '_', '.', '`', '~', '!', '#', '$', '%', '^', '&', '*', '\'',
167            '|', '+'].contains(&c);
168
169    for ch in key.chars() {
170        if ! is_valid(ch) {
171            return Err(ValidationError::BadFormat(format!(
172                "Invalid character in key: '{}'", ch
173            )));
174        }
175    }
176
177    Ok(())
178}
179
180/// Validate the file_info for B2-specific metadata.
181pub fn validate_info_val(key: &str, val: &serde_json::Value)
182-> Result<(), ValidationError> {
183    let val = val.as_str().ok_or_else(||
184        ValidationError::BadFormat(format!("{} value must be a string", key))
185    )?;
186
187    // TODO: We can likely validate the stuff I'm using http_types to validate
188    // more efficiently by doing it manually.
189    match key {
190        "b2-content-disposition" => {
191            validate_content_disposition(val, false)
192        },
193        "b2-content-language" => {
194            for ch in val.chars() {
195                if ! (ch.is_ascii_alphabetic() || ch == '-') {
196                    return Err(ValidationError::BadFormat(format!(
197                        "Invalid character in Content-Language: {}", ch
198                    )));
199                }
200            }
201            Ok(())
202        },
203        "b2-expires" => {
204            let mut hdr = Trailers::new();
205            hdr.insert("Expires", val);
206
207            Expires::from_headers(hdr.as_ref())
208                .map_err(|_| ValidationError::BadFormat(format!(
209                    "Invalid Expires value: {}", val
210                )))?;
211
212            Ok(())
213        },
214        "b2-cache-control" => {
215            // TODO: CacheControl type doesn't seem to validate cache-extension
216            // properly. See
217            // https://datatracker.ietf.org/doc/html/rfc2616#section-14.9
218            let mut hdr = Trailers::new();
219            hdr.insert("CacheControl", val);
220
221            CacheControl::from_headers(hdr.as_ref())
222                .map_err(|_| ValidationError::BadFormat(format!(
223                    "Invalid CacheControl value: {}", val
224                )))?;
225
226            Ok(())
227        },
228        "b2-content-encoding" => {
229            // B2 documentation says this must conform to RFC 2616, which seems
230            // to be more restrictive than RFC 7231, which supercedes it. We're
231            // going to validate that the value is a valid token, but not worry
232            // about the value itself.
233            if is_valid_token(val) {
234                Ok(())
235            } else {
236                Err(ValidationError::BadFormat(format!(
237                    "Invalid ContentEncoding: {}", val
238                )))
239            }
240        },
241        _ => Err(ValidationError::BadFormat(format!(
242            "Invalid key name: {}", key
243        ))),
244    }
245}
246
247pub fn validate_content_disposition(text: &str, allow_star: bool)
248-> Result<(), ValidationError> {
249    let sep_idx = text.find(';');
250
251    if sep_idx.is_none() {
252        // Lack of a ';' means the value is a simple token.
253        return if is_valid_token(text) {
254            Ok(())
255        } else {
256            Err(ValidationError::BadFormat(format!(
257                "Illegal Content-Disposition type: {}", text
258            )))
259        };
260    } else if text.ends_with(';') {
261        return Err(ValidationError::BadFormat(
262            "Content-Disposition cannot end with a semicolon".into()
263        ));
264    }
265    let sep_idx = sep_idx.unwrap();
266
267    for param in text[sep_idx+1..].split(';') {
268        if let Some((field, value)) = param.split_once('=') {
269            let field = field.trim();
270
271            if ! is_valid_token(field) {
272                return Err(ValidationError::BadFormat(format!(
273                    "Illegal character in field name: {}", field
274                )));
275            }
276
277            if ! allow_star && field == "*" {
278                return Err(ValidationError::BadFormat(
279                    "Asterisk ('*') is not allowed in a field name".into()
280                ));
281            }
282
283            let value = value.trim();
284
285            // TODO: We need to also verify that if the value is an ext-value as
286            // defined at
287            // https://datatracker.ietf.org/doc/html/rfc5987#section-3.2 that it
288            // is valid. We currently assume it's valid. Also see restrictions
289            // listed at https://www.backblaze.com/b2/docs/files.html
290            if ! (is_valid_token(value) || is_valid_quoted_string(value)) {
291                return Err(ValidationError::BadFormat(
292                    "Invalid field value".into()
293                ));
294            }
295        }
296    }
297
298    Ok(())
299}
300
301fn is_valid_token(s: &str) -> bool {
302    let separators = [
303        '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?',
304        '=', '{', '}', ' ', '\t',
305    ];
306
307    if s.is_empty() { return false; }
308
309    for ch in s.chars() {
310        if ! ch.is_ascii_alphanumeric() || ch.is_control()
311            || separators.contains(&ch)
312        {
313            return false;
314        }
315    }
316
317    true
318}
319
320fn is_valid_quoted_string(s: &str) -> bool {
321    if ! (s.starts_with('"') && s.ends_with('"'))
322    {
323        return false;
324    }
325
326    let s = s.as_bytes();
327
328    for i in 1..s.len() - 1 {
329        if ! s[i].is_ascii() || s[i].is_ascii_control()
330            || (s[i] == b'"' && s[i-1] != b'\\')
331        {
332            return false;
333        }
334    }
335
336    true
337}
338
339/// Return the provided list of [LifecycleRule]s or a map of errors.
340///
341/// No file within a bucket can be subject to multiple lifecycle rules. If any
342/// of the rules provided apply to multiple files or folders, we return the
343/// conflicting rules. The map's key is the broadest rule (highest in the
344/// hierarchy). The map may have duplicate entries when subfolders are
345/// involved.
346///
347/// The empty string (`""`) matches all paths, so if provided it must be the
348/// only lifecycle rule. If it is provided along with other rules, all of those
349/// rules will be listed as a conflict.
350pub(crate) fn validated_lifecycle_rules(rules: impl Into<Vec<LifecycleRule>>)
351-> Result<Vec<LifecycleRule>, LifecycleRuleValidationError> {
352    let mut rules = rules.into();
353
354    if rules.len() <= 1 {
355        Ok(rules)
356    } else if rules.len() > 100 {
357        Err(LifecycleRuleValidationError::TooManyRules(rules.len()))
358    } else {
359        rules.sort();
360
361        // TODO: May be worthwhile to reserve rules.len()/2 or something.
362        let mut checked: Vec<Vec<&LifecycleRule>> = vec![vec![&rules[0]]];
363
364        for rule in rules.iter().skip(1) {
365            for i in 0 .. checked.len() {
366                let root = &checked[i][0];
367
368                if rule.file_name_prefix.starts_with(&root.file_name_prefix) {
369                    checked[i].push(rule);
370                }  else {
371                    checked.push(vec![rule]);
372                }
373            }
374        }
375
376        let mut map = HashMap::new();
377
378        checked.into_iter()
379            .filter(|list| list.len() > 1) // Keep only conflicts.
380            .for_each(|list| {
381                let key = list[0].file_name_prefix.to_owned();
382
383                let val = list[1..].iter()
384                    .map(|v| (*v).to_owned())
385                    .collect::<Vec<LifecycleRule>>();
386
387                map.insert(key, val);
388            });
389
390        if ! map.is_empty() {
391            Err(LifecycleRuleValidationError::ConflictingRules(map))
392        } else {
393            Ok(rules)
394        }
395    }
396}
397
398/// Validate a list of origins for a CORS rule.
399///
400/// See [CorsRuleBuilder::with_allowed_origins] for the rules concerning a valid
401/// origin.
402pub(crate) fn validated_origins(origins: impl Into<Vec<String>>)
403-> Result<Vec<String>, ValidationError> {
404    let origins = origins.into();
405
406    if origins.is_empty() {
407        return Err(ValidationError::MissingData(
408            "There must be at least one origin covered by the rule".into()
409        ));
410    }
411
412    if ! (origins.len() == 1 && origins[0] == "*") {
413        let mut found_https = false;
414
415        for origin in origins.iter() {
416            // `http` and `https` are valid origins, but `Url::parse()` won't
417            // parse them, so we check them separately.
418            if origin == "https" {
419                if found_https {
420                    return Err(ValidationError::Incompatible(
421                        "There can only be one HTTPS rule".into()
422                    ));
423                }
424                found_https = true;
425            } else if origin != "http" {
426                if origin.chars().filter(|c| *c == '*').count() > 1 {
427                    return Err(ValidationError::BadFormat(
428                        "A URL cannot have more than one '*'".into()
429                    ));
430                }
431
432                let url = url::Url::parse(origin)?;
433
434                if url.scheme() == "https" {
435                    if found_https {
436                        return Err(ValidationError::Incompatible(
437                            "There can only be one HTTPS rule".into()
438                        ));
439                    }
440                    found_https = true;
441                }
442
443                if ! (url.scheme() == "https" || url.scheme() == "http") {
444                    return Err(ValidationError::BadUrl(url.to_string()));
445                }
446            }
447        }
448    }
449
450    Ok(origins)
451}
452
453#[cfg(test)]
454mod tests {
455    use super::*;
456    use serde_json::json;
457
458
459    fn make_rule(prefix: &str) -> LifecycleRule {
460        LifecycleRule::builder()
461            .filename_prefix(prefix).unwrap()
462            .delete_after_hide(chrono::Duration::days(3)).unwrap()
463            .build().unwrap()
464    }
465
466    #[test]
467    fn validate_good_lifecycle_rules() {
468        let rules = vec![
469            make_rule("Docs/Photos/"),
470            make_rule("Legal/"),
471            make_rule("Archive/"),
472        ];
473
474        let rules = validated_lifecycle_rules(rules).unwrap();
475        assert_eq!(rules.len(), 3);
476        assert_eq!(rules[0].file_name_prefix, "Archive/");
477        assert_eq!(rules[1].file_name_prefix, "Docs/Photos/");
478        assert_eq!(rules[2].file_name_prefix, "Legal/");
479    }
480
481    #[test]
482    fn validate_single_rule() {
483        let rules = vec![
484            make_rule("Docs/Photos/"),
485        ];
486
487        let rules = validated_lifecycle_rules(rules).unwrap();
488        assert_eq!(rules.len(), 1);
489        assert_eq!(rules[0].file_name_prefix, "Docs/Photos/");
490    }
491
492    #[test]
493    fn validate_one_lifecycle_rule_conflicts() {
494        let rules = vec![
495            make_rule("Docs/Photos/"),
496            make_rule("Legal/"),
497            make_rule("Legal/Taxes/"),
498            make_rule("Archive/"),
499        ];
500
501        match validated_lifecycle_rules(rules).unwrap_err() {
502            LifecycleRuleValidationError::ConflictingRules(conflicts) => {
503                assert_eq!(conflicts.len(), 1);
504
505                let conflicts = &conflicts["Legal/"];
506
507                assert_eq!(conflicts.len(), 1);
508                assert_eq!(conflicts[0].file_name_prefix, "Legal/Taxes/");
509            },
510            e => panic!("Unexpected error: {}", e),
511        }
512    }
513
514    #[test]
515    fn validate_many_lifecycle_rules_conflict() {
516        let rules = vec![
517            make_rule("Docs/Photos/"),
518            make_rule("Docs/"),
519            make_rule("Docs/Documents/"),
520            make_rule("Archive/Temporary/"),
521            make_rule("Legal/Taxes/"),
522            make_rule("Legal/Other/"),
523            make_rule("Docs/Photos/Vacations/"),
524            make_rule("Archive/"),
525        ];
526
527        match validated_lifecycle_rules(rules).unwrap_err() {
528            LifecycleRuleValidationError::ConflictingRules(c) => {
529                assert_eq!(c.len(), 3);
530
531                let conflicts = &c["Docs/"];
532
533                assert_eq!(conflicts.len(), 3);
534                assert_eq!(conflicts[0].file_name_prefix, "Docs/Documents/");
535                assert_eq!(conflicts[1].file_name_prefix, "Docs/Photos/");
536                assert_eq!(
537                    conflicts[2].file_name_prefix,
538                    "Docs/Photos/Vacations/"
539                );
540
541                // This is a duplicated record owing its existence to the way
542                // we've happened to implement the loops. I don't want to
543                // iterate the vectors yet again to eliminate it, and I think
544                // I'm OK with the duplication.
545                let conflicts = &c["Docs/Photos/"];
546
547                assert_eq!(conflicts.len(), 1);
548                assert_eq!(
549                    conflicts[0].file_name_prefix,
550                    "Docs/Photos/Vacations/"
551                );
552
553                let conflicts = &c["Archive/"];
554
555                assert_eq!(conflicts.len(), 1);
556                assert_eq!(conflicts[0].file_name_prefix, "Archive/Temporary/");
557            },
558            e => panic!("Unexpected error: {}", e),
559        }
560    }
561
562    #[test]
563    fn validate_many_lifecycle_rules_multiple_conflicts() {
564        let rules = vec![
565            make_rule("Docs/Photos/Vacations/"),
566            make_rule("Docs/Photos/Buildings/"),
567            make_rule("Docs/Photos/"),
568            make_rule("Docs/"),
569            make_rule("Docs/Documents/"),
570        ];
571
572        match validated_lifecycle_rules(rules).unwrap_err() {
573            LifecycleRuleValidationError::ConflictingRules(conflicts) => {
574                assert_eq!(conflicts.len(), 1);
575
576                let conflicts = &conflicts["Docs/"];
577
578                assert_eq!(conflicts.len(), 4);
579                assert_eq!(conflicts[0].file_name_prefix, "Docs/Documents/");
580                assert_eq!(conflicts[1].file_name_prefix, "Docs/Photos/");
581                assert_eq!(
582                    conflicts[2].file_name_prefix,
583                    "Docs/Photos/Buildings/"
584                );
585                assert_eq!(
586                    conflicts[3].file_name_prefix,
587                    "Docs/Photos/Vacations/"
588                );
589            },
590            e => panic!("Unexpected error: {}", e),
591        }
592    }
593
594    #[test]
595    fn validate_empty_lifecycle_rule_alone_is_good() {
596        let rules = vec![
597            make_rule(""),
598        ];
599
600        let rules = validated_lifecycle_rules(rules).unwrap();
601        assert_eq!(rules.len(), 1);
602        assert_eq!(rules[0].file_name_prefix, "");
603    }
604
605    #[test]
606    fn validate_empty_lifecycle_rule_with_others_is_error() {
607        let rules = vec![
608            make_rule("Docs/Photos/"),
609            make_rule(""),
610            make_rule("Legal/"),
611            make_rule("Legal/Taxes/"),
612            make_rule("Archive/"),
613        ];
614
615        match validated_lifecycle_rules(rules).unwrap_err() {
616            LifecycleRuleValidationError::ConflictingRules(conflicts) => {
617                assert_eq!(conflicts.len(), 1);
618
619                let conflicts = &conflicts[""];
620
621                assert_eq!(conflicts.len(), 4);
622                assert_eq!(conflicts[0].file_name_prefix, "Archive/");
623                assert_eq!(conflicts[1].file_name_prefix, "Docs/Photos/");
624                assert_eq!(conflicts[2].file_name_prefix, "Legal/");
625                assert_eq!(conflicts[3].file_name_prefix, "Legal/Taxes/");
626            },
627            e => panic!("Unexpected error: {}", e),
628        }
629    }
630
631    #[test]
632    fn validate_quoted_string() {
633        assert!(is_valid_quoted_string("\"\""));
634        assert!(is_valid_quoted_string("\"a\""));
635        assert!(is_valid_quoted_string("\"abcde\""));
636        assert!(is_valid_quoted_string("\"ab\\\"cde\""));
637
638        assert!(! is_valid_quoted_string("\"ab\"cd\""));
639    }
640
641    #[test]
642    fn validate_info_key_val_filters_disallowed_chars() {
643        validate_info_key_val("good-sep", &json!("asdf")).unwrap();
644        validate_info_key_val("good#sep", &json!("asdf")).unwrap();
645        validate_info_key_val("$goodsep", &json!("asdf")).unwrap();
646        validate_info_key_val("good-sep%", &json!("asdf")).unwrap();
647
648        validate_info_key_val("bad@sep", &json!("asdf")).unwrap_err();
649        validate_info_key_val("bad(sep", &json!("asdf")).unwrap_err();
650        validate_info_key_val("{badsep", &json!("asdf")).unwrap_err();
651        validate_info_key_val("badsep]", &json!("asdf")).unwrap_err();
652    }
653
654    #[test]
655    fn validate_content_disposition_fields() {
656        validate_info_val("b2-content-disposition", &json!("inline")).unwrap();
657        validate_info_val(
658            "b2-content-disposition",
659            &json!("attachment; filename=\"myfile.txt\"")
660        ).unwrap();
661        validate_info_val(
662            "b2-content-disposition",
663            &json!("attachment; something=value")
664        ).unwrap();
665        validate_info_val(
666            "b2-content-disposition",
667            &json!("attachment; filename=\"myfile.txt\"; something=value")
668        ).unwrap();
669
670        // RFC 6266 says that the semicolon without at least one field is
671        // illegal. It wouldn't surprise me if many clients allow it, but we're
672        // going to enforce the standard.
673        validate_info_val("b2-content-disposition", &json!("inline;"))
674            .unwrap_err();
675        validate_info_val("b2-content-disposition", &json!("inline; f="))
676            .unwrap_err();
677    }
678
679    #[test]
680    fn validate_content_language() {
681        validate_info_val("b2-content-language", &json!("en")).unwrap();
682        validate_info_val("b2-content-language", &json!("lang-dialect"))
683            .unwrap();
684
685        validate_info_val("b2-content-language", &json!("bad-lang/text"))
686            .unwrap_err();
687        validate_info_val("b2-content-language", &json!("bad+lang"))
688            .unwrap_err();
689    }
690
691    #[test]
692    fn validate_expires() {
693        validate_info_val("b2-expires", &json!("Thu, 01 Dec 1994 16:00:00 GMT"))
694            .unwrap();
695
696        validate_info_val("b2-expires", &json!("2021-1-1")).unwrap_err();
697    }
698
699    #[test]
700    fn validate_cache_control() {
701        validate_info_val("b2-cache-control", &json!("no-store")).unwrap();
702
703        // TODO: Implement cache-extension validation to test:
704        //validate_info_val("b2-cache-control", &json!("(not-valid-token)"))
705        //    .unwrap_err();
706    }
707}