Skip to main content

protify/validators/
bytes.rs

1use crate::validators::string::well_known_strings::*;
2mod builder;
3pub use builder::BytesValidatorBuilder;
4
5use ::bytes::Bytes;
6#[cfg(feature = "regex")]
7use regex::bytes::Regex;
8
9use super::*;
10
11impl_proto_type!(Bytes, Bytes);
12impl_proto_type!(Vec<u8>, Bytes);
13
14/// Validator for the [`Bytes`] type.
15#[non_exhaustive]
16#[derive(Clone, Debug, Default)]
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
18pub struct BytesValidator {
19	/// Adds custom validation using one or more [`CelRule`]s to this field.
20	pub cel: Vec<CelProgram>,
21
22	/// The conditions upon which this validator should be skipped.
23	pub ignore: Ignore,
24
25	/// A well known byte format that the target should fit in.
26	pub well_known: Option<WellKnownBytes>,
27
28	/// Specifies that the field must be set (if optional) or not equal to its zero value (if not optional) in order to be valid.
29	pub required: bool,
30
31	/// Specifies that the given `bytes` field must be of this exact length.
32	pub len: Option<usize>,
33
34	/// Specifies that the given `bytes` field must have a length that is equal to or higher than the given value.
35	pub min_len: Option<usize>,
36
37	/// Specifies that the given `bytes` field must have a length that is equal to or lower than the given value.
38	pub max_len: Option<usize>,
39
40	#[cfg(feature = "regex")]
41	#[cfg_attr(
42		feature = "serde",
43		serde(with = "crate::serde_impls::bytes_regex_serde")
44	)]
45	/// Specifies a regex pattern that must be matches by the value to pass validation.
46	pub pattern: Option<Regex>,
47
48	/// Specifies a prefix that the value must start with in order to pass validation.
49	pub prefix: Option<Bytes>,
50
51	/// Specifies a suffix that the value must end with in order to pass validation.
52	pub suffix: Option<Bytes>,
53
54	/// Specifies a subset of bytes that the value must contain in order to pass validation.
55	pub contains: Option<Bytes>,
56
57	/// Specifies that only the values in this list will be considered valid for this field.
58	pub in_: Option<SortedList<Bytes>>,
59
60	/// Specifies that the values in this list will be considered NOT valid for this field.
61	pub not_in: Option<SortedList<Bytes>>,
62
63	/// Specifies that only this specific value will be considered valid for this field.
64	pub const_: Option<Bytes>,
65
66	/// A map of custom error messages.
67	pub error_messages: Option<ErrorMessages<BytesViolation>>,
68}
69
70impl Eq for BytesValidator {}
71
72impl Hash for BytesValidator {
73	fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
74		self.cel.hash(state);
75		self.ignore.hash(state);
76		self.well_known.hash(state);
77		self.required.hash(state);
78		self.len.hash(state);
79		self.min_len.hash(state);
80		self.max_len.hash(state);
81		#[cfg(feature = "regex")]
82		self.pattern
83			.as_ref()
84			.map(|r| r.as_str())
85			.hash(state);
86		self.prefix.hash(state);
87		self.suffix.hash(state);
88		self.contains.hash(state);
89		self.in_.hash(state);
90		self.not_in.hash(state);
91		self.const_.hash(state);
92		self.error_messages.hash(state);
93	}
94}
95
96impl PartialEq for BytesValidator {
97	fn eq(&self, other: &Self) -> bool {
98		#[cfg(feature = "regex")]
99		let baseline =
100			self.pattern.as_ref().map(|r| r.as_str()) == other.pattern.as_ref().map(|r| r.as_str());
101		#[cfg(not(feature = "regex"))]
102		let baseline = true;
103
104		baseline
105			&& self.cel == other.cel
106			&& self.ignore == other.ignore
107			&& self.well_known == other.well_known
108			&& self.required == other.required
109			&& self.len == other.len
110			&& self.min_len == other.min_len
111			&& self.max_len == other.max_len
112			&& self.prefix == other.prefix
113			&& self.suffix == other.suffix
114			&& self.contains == other.contains
115			&& self.in_ == other.in_
116			&& self.not_in == other.not_in
117			&& self.const_ == other.const_
118			&& self.error_messages == other.error_messages
119	}
120}
121
122impl BytesValidator {
123	const fn has_pattern(&self) -> bool {
124		#[cfg(feature = "regex")]
125		{
126			self.pattern.is_some()
127		}
128		#[cfg(not(feature = "regex"))]
129		{
130			false
131		}
132	}
133}
134
135impl Validator<Bytes> for BytesValidator {
136	type Target = [u8];
137
138	#[inline(never)]
139	#[cold]
140	fn check_consistency(&self) -> Result<(), Vec<ConsistencyError>> {
141		let mut errors = Vec::new();
142
143		macro_rules! check_prop_some {
144      ($($id:ident),*) => {
145        $(self.$id.is_some()) ||*
146      };
147    }
148
149		if self.const_.is_some()
150			&& (!self.cel.is_empty()
151				|| check_prop_some!(
152					len, min_len, max_len, prefix, suffix, contains, in_, not_in, well_known
153				) || self.has_pattern())
154		{
155			errors.push(ConsistencyError::ConstWithOtherRules);
156		}
157
158		if let Some(custom_messages) = self.error_messages.as_deref() {
159			let mut unused_messages: Vec<String> = Vec::new();
160
161			for key in custom_messages.keys() {
162				macro_rules! check_unused_messages {
163          ($($name:ident),*) => {
164            paste! {
165              match key {
166                BytesViolation::Required => self.required,
167                BytesViolation::In => self.in_.is_some(),
168                BytesViolation::Const => self.const_.is_some(),
169                BytesViolation::Ip
170                | BytesViolation::Ipv4
171                | BytesViolation::Ipv6
172                | BytesViolation::Uuid => self.well_known.is_some(),
173                #[cfg(feature = "regex")]
174                BytesViolation::Pattern => self.pattern.is_some(),
175                $(BytesViolation::[< $name:camel >] => self.$name.is_some(),)*
176                _ => true,
177              }
178            }
179          };
180        }
181
182				let is_used =
183					check_unused_messages!(len, min_len, max_len, contains, prefix, suffix, not_in);
184
185				if !is_used {
186					unused_messages.push(format!("{key:?}"));
187				}
188			}
189
190			if !unused_messages.is_empty() {
191				errors.push(ConsistencyError::UnusedCustomMessages(unused_messages));
192			}
193		}
194
195		#[cfg(feature = "cel")]
196		if let Err(e) = self.__check_cel_programs() {
197			errors.extend(e.into_iter().map(ConsistencyError::from));
198		}
199
200		if let Err(e) = check_list_rules(self.in_.as_ref(), self.not_in.as_ref()) {
201			errors.push(e.into());
202		}
203
204		if let Err(e) = check_length_rules(
205			Some(length_rule_value!("len", self.len)),
206			length_rule_value!("min_len", self.min_len),
207			length_rule_value!("max_len", self.max_len),
208		) {
209			errors.push(e);
210		}
211
212		if errors.is_empty() {
213			Ok(())
214		} else {
215			Err(errors)
216		}
217	}
218
219	#[doc(hidden)]
220	#[inline(never)]
221	#[cold]
222	fn __cel_rules(&self) -> Vec<CelRule> {
223		self.cel
224			.iter()
225			.map(|p| p.rule().clone())
226			.collect()
227	}
228
229	#[cfg(feature = "cel")]
230	#[inline(never)]
231	#[cold]
232	#[doc(hidden)]
233	fn __check_cel_programs(&self) -> Result<(), Vec<CelError>> {
234		self.check_cel_programs_with(vec![])
235	}
236
237	#[cfg(feature = "cel")]
238	#[inline(never)]
239	#[cold]
240	fn check_cel_programs_with(
241		&self,
242		val: <Self::Target as ToOwned>::Owned,
243	) -> Result<(), Vec<CelError>> {
244		if self.cel.is_empty() {
245			Ok(())
246		} else {
247			// This one needs a special impl because Bytes does not support Into<Value>
248			test_programs(&self.cel, val)
249		}
250	}
251
252	fn execute_validation(
253		&self,
254		ctx: &mut ValidationCtx,
255		val: Option<&Self::Target>,
256	) -> ValidationResult {
257		handle_ignore_always!(&self.ignore);
258		handle_ignore_if_zero_value!(&self.ignore, val.is_none_or(|v| v.is_empty()));
259
260		let mut is_valid = IsValid::Yes;
261
262		macro_rules! handle_violation {
263			($id:ident, $default:expr) => {
264				is_valid &= ctx.add_violation(
265					ViolationKind::Bytes(BytesViolation::$id),
266					self.error_messages
267						.as_deref()
268						.and_then(|map| map.get(&BytesViolation::$id))
269						.map(|m| Cow::Borrowed(m.as_ref()))
270						.unwrap_or_else(|| Cow::Owned($default)),
271				)?;
272			};
273		}
274
275		if self.required && val.is_none_or(|v| v.is_empty()) {
276			handle_violation!(Required, "is required".to_string());
277			return Ok(is_valid);
278		}
279
280		if let Some(val) = val {
281			if let Some(const_val) = &self.const_ {
282				if *val != *const_val {
283					handle_violation!(
284						Const,
285						format!("must be equal to \"{}\"", const_val.escape_ascii())
286					);
287				}
288
289				// Using `const` implies no other rules
290				return Ok(is_valid);
291			}
292
293			if let Some(len) = self.len
294				&& val.len() != len
295			{
296				handle_violation!(
297					Len,
298					format!("must be exactly {len} byte{} long", pluralize!(len))
299				);
300			}
301
302			if let Some(min_len) = self.min_len
303				&& val.len() < min_len
304			{
305				handle_violation!(
306					MinLen,
307					format!(
308						"must be at least {min_len} byte{} long",
309						pluralize!(min_len)
310					)
311				);
312			}
313
314			if let Some(max_len) = self.max_len
315				&& val.len() > max_len
316			{
317				handle_violation!(
318					MaxLen,
319					format!(
320						"cannot be longer than {max_len} byte{}",
321						pluralize!(max_len)
322					)
323				);
324			}
325
326			if let Some(prefix) = &self.prefix
327				&& !val.starts_with(prefix)
328			{
329				handle_violation!(
330					Prefix,
331					format!("must start with \"{}\"", prefix.escape_ascii())
332				);
333			}
334
335			if let Some(suffix) = &self.suffix
336				&& !val.ends_with(suffix)
337			{
338				handle_violation!(
339					Suffix,
340					format!("must end with \"{}\"", suffix.escape_ascii())
341				);
342			}
343
344			if let Some(substring) = &self.contains
345				&& !val
346					.windows(val.len())
347					.any(|slice| slice == substring)
348			{
349				handle_violation!(
350					Contains,
351					format!("must contain \"{}\"", substring.escape_ascii())
352				);
353			}
354
355			#[cfg(feature = "regex")]
356			if let Some(pattern) = &self.pattern
357				&& !pattern.is_match(val)
358			{
359				handle_violation!(Pattern, format!("must match the pattern `{pattern}`"));
360			}
361
362			if let Some(allowed_list) = &self.in_
363				&& !allowed_list.contains(val.as_ref())
364			{
365				handle_violation!(
366					In,
367					format!(
368						"must be one of these values: {}",
369						Bytes::__format_list(allowed_list)
370					)
371				);
372			}
373
374			if let Some(forbidden_list) = &self.not_in
375				&& forbidden_list.contains(val.as_ref())
376			{
377				handle_violation!(
378					NotIn,
379					format!(
380						"cannot be one of these values: {}",
381						Bytes::__format_list(forbidden_list)
382					)
383				);
384			}
385
386			if let Some(well_known) = &self.well_known {
387				let byte_str = core::str::from_utf8(val).unwrap_or("");
388
389				match well_known {
390					#[cfg(feature = "regex")]
391					WellKnownBytes::Uuid => {
392						if !is_valid_uuid(byte_str) {
393							handle_violation!(Uuid, "must be a valid UUID".to_string());
394						}
395					}
396					WellKnownBytes::Ip => {
397						if !is_valid_ip(byte_str) {
398							handle_violation!(Ip, "must be a valid ip address".to_string());
399						}
400					}
401					WellKnownBytes::Ipv4 => {
402						if !is_valid_ipv4(byte_str) {
403							handle_violation!(Ipv4, "must be a valid ipv4 address".to_string());
404						}
405					}
406					WellKnownBytes::Ipv6 => {
407						if !is_valid_ipv6(byte_str) {
408							handle_violation!(Ipv6, "must be a valid ipv6 address".to_string());
409						}
410					}
411				};
412			}
413
414			#[cfg(feature = "cel")]
415			if !self.cel.is_empty() {
416				let cel_ctx = ProgramsExecutionCtx {
417					programs: &self.cel,
418					value: val.to_vec(),
419					ctx,
420				};
421
422				is_valid &= cel_ctx.execute_programs()?;
423			}
424		}
425
426		Ok(is_valid)
427	}
428
429	#[inline(never)]
430	#[cold]
431	fn schema(&self) -> Option<ValidatorSchema> {
432		Some(ValidatorSchema {
433			schema: self.clone().into(),
434			cel_rules: self.__cel_rules(),
435			imports: vec!["buf/validate/validate.proto".into()],
436		})
437	}
438}
439
440impl From<BytesValidator> for ProtoOption {
441	#[inline(never)]
442	#[cold]
443	fn from(validator: BytesValidator) -> Self {
444		let mut rules = OptionMessageBuilder::new();
445
446		macro_rules! set_options {
447      ($($name:ident),*) => {
448        rules
449        $(
450          .maybe_set(stringify!($name), validator.$name)
451        )*
452      };
453    }
454
455		set_options!(min_len, max_len, len, contains, prefix, suffix);
456
457		#[cfg(feature = "regex")]
458		if let Some(pattern) = validator.pattern {
459			rules.set("pattern", OptionValue::String(pattern.to_string().into()));
460		}
461
462		rules
463			.maybe_set("const", validator.const_)
464			.maybe_set(
465				"in",
466				validator.in_.map(|list| {
467					OptionValue::List(
468						list.items
469							.iter()
470							.map(|b| {
471								OptionValue::String(format_bytes_as_proto_string_literal(b).into())
472							})
473							.collect(),
474					)
475				}),
476			)
477			.maybe_set(
478				"not_in",
479				validator.not_in.map(|list| {
480					OptionValue::List(
481						list.items
482							.iter()
483							.map(|b| {
484								OptionValue::String(format_bytes_as_proto_string_literal(b).into())
485							})
486							.collect(),
487					)
488				}),
489			);
490
491		if let Some(well_known) = validator.well_known {
492			let (name, val) = well_known.to_option();
493			rules.set(name, val);
494		}
495
496		let mut outer_rules = OptionMessageBuilder::new();
497
498		if !rules.is_empty() {
499			outer_rules.set("bytes", OptionValue::Message(rules.into()));
500		}
501
502		outer_rules
503			.add_cel_options(validator.cel)
504			.set_required(validator.required)
505			.set_ignore(validator.ignore);
506
507		Self {
508			name: "(buf.validate.field)".into(),
509			value: OptionValue::Message(outer_rules.into()),
510		}
511	}
512}
513
514/// Well known formats for bytes that a value should adhere to in order to be valid.
515#[non_exhaustive]
516#[derive(Clone, Debug, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
517#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
518pub enum WellKnownBytes {
519	#[cfg(feature = "regex")]
520	Uuid,
521	Ip,
522	Ipv4,
523	Ipv6,
524}
525
526impl WellKnownBytes {
527	#[inline(never)]
528	#[cold]
529	pub(crate) fn to_option(self) -> (FixedStr, OptionValue) {
530		let name = match self {
531			#[cfg(feature = "regex")]
532			Self::Uuid => "uuid",
533			Self::Ip => "ip",
534			Self::Ipv4 => "ipv4",
535			Self::Ipv6 => "ipv6",
536		};
537
538		(name.into(), OptionValue::Bool(true))
539	}
540}
541
542#[inline(never)]
543#[cold]
544pub(crate) fn format_bytes_as_proto_string_literal(bytes: &[u8]) -> String {
545	let mut result = String::new();
546
547	for &byte in bytes {
548		match byte {
549			0x20..=0x21 | 0x23..=0x5B | 0x5D..=0x7E => {
550				result.push(byte as char);
551			}
552			b'\\' => result.push_str("\\\\"),
553			b'"' => result.push_str("\\\""),
554			_ => {
555				let _ = write!(result, "\\x{byte:02x}");
556			}
557		}
558	}
559
560	result
561}