use std::fmt;
use std::str::FromStr;
use serde::{Deserialize, Serialize};
use thiserror::Error;
pub const MAX_IDENTIFIER_LEN: usize = 250;
#[derive(Debug, Clone, Error, PartialEq, Eq)]
#[non_exhaustive]
pub enum IdentifierError {
#[error("{kind} must not be empty")]
Empty {
kind: &'static str,
},
#[error("{kind} must be at most {max_len} characters (got {len})")]
TooLong {
kind: &'static str,
max_len: usize,
len: usize,
},
#[error("{kind} contains invalid character {bad:?}; allowed: [a-zA-Z0-9_\\-\\.]")]
InvalidCharacter {
kind: &'static str,
bad: char,
},
}
#[inline]
const fn is_safe_airflow_char(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.'
}
fn validate_safe_identifier(
kind: &'static str,
value: &str,
max_len: usize,
) -> Result<(), IdentifierError> {
if value.is_empty() {
return Err(IdentifierError::Empty { kind });
}
let len = value.chars().count();
if len > max_len {
return Err(IdentifierError::TooLong { kind, max_len, len });
}
if let Some(bad) = value.chars().find(|c| !is_safe_airflow_char(*c)) {
return Err(IdentifierError::InvalidCharacter { kind, bad });
}
Ok(())
}
macro_rules! define_safe_identifier {
($(#[$meta:meta])* $name:ident, $kind:literal) => {
$(#[$meta])*
#[derive(
Debug,
Clone,
PartialEq,
Eq,
Hash,
PartialOrd,
Ord,
Serialize,
Deserialize,
)]
#[serde(transparent)]
pub struct $name(String);
impl $name {
pub fn new(value: impl Into<String>) -> Result<Self, IdentifierError> {
let s = value.into();
validate_safe_identifier($kind, &s, MAX_IDENTIFIER_LEN)?;
Ok(Self(s))
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.0
}
#[must_use]
pub fn into_inner(self) -> String {
self.0
}
}
impl fmt::Display for $name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for $name {
fn as_ref(&self) -> &str {
&self.0
}
}
impl FromStr for $name {
type Err = IdentifierError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}
impl TryFrom<String> for $name {
type Error = IdentifierError;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::new(value)
}
}
impl TryFrom<&str> for $name {
type Error = IdentifierError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
Self::new(value.to_owned())
}
}
};
}
define_safe_identifier!(
DagId,
"dag_id"
);
define_safe_identifier!(
TaskId,
"task_id"
);
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct ExtractedDag {
pub dag_id: Option<DagId>,
pub task_ids: Vec<TaskId>,
pub schedule: Option<String>,
pub has_default_args: bool,
pub deps_edges: Vec<(TaskId, TaskId)>,
pub source_span: Option<SourceSpan>,
}
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct SourceSpan {
pub start_line: u32,
pub end_line: u32,
}
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum ParseError {
#[error("python parse error: {0}")]
Parse(String),
#[error("invalid {kind} {value:?}: {reason}")]
InvalidIdentifier {
kind: &'static str,
value: String,
reason: String,
},
#[error("internal parser error: {0}")]
Internal(String),
#[error("io error reading {path:?}: {source}")]
Io {
path: std::path::PathBuf,
#[source]
source: std::io::Error,
},
#[error("no parser backend enabled (compile with the `parser-ruff` feature)")]
NoBackend,
}
impl ParseError {
pub(crate) fn from_id_error(kind: &'static str, value: String, err: &IdentifierError) -> Self {
Self::InvalidIdentifier {
kind,
value,
reason: err.to_string(),
}
}
}
#[cfg(any(feature = "parser-ruff", test))]
pub(crate) fn push_unique_task(into: &mut Vec<TaskId>, name: TaskId) {
if !into
.iter()
.any(|existing| existing.as_str() == name.as_str())
{
into.push(name);
}
}
pub(crate) fn make_dag_id(value: String) -> Result<DagId, ParseError> {
DagId::new(value.clone()).map_err(|e| ParseError::from_id_error("dag_id", value, &e))
}
pub(crate) fn make_task_id(value: String) -> Result<TaskId, ParseError> {
TaskId::new(value.clone()).map_err(|e| ParseError::from_id_error("task_id", value, &e))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extracted_dag_round_trips_through_json() {
let dag = ExtractedDag {
dag_id: Some(DagId::new("hello").unwrap()),
task_ids: vec![TaskId::new("a").unwrap(), TaskId::new("b").unwrap()],
schedule: Some("@daily".into()),
has_default_args: true,
deps_edges: vec![(TaskId::new("a").unwrap(), TaskId::new("b").unwrap())],
source_span: Some(SourceSpan {
start_line: 1,
end_line: 8,
}),
};
let json = serde_json::to_string(&dag).expect("serialise");
let back: ExtractedDag = serde_json::from_str(&json).expect("deserialise");
assert_eq!(dag, back);
}
#[test]
fn dag_id_rejects_invalid_chars() {
let e = DagId::new("has space").expect_err("must reject");
assert!(matches!(
e,
IdentifierError::InvalidCharacter {
kind: "dag_id",
bad: ' '
}
));
}
#[test]
fn dag_id_rejects_too_long() {
let long = "a".repeat(MAX_IDENTIFIER_LEN + 1);
let e = DagId::new(long).expect_err("must reject");
assert!(matches!(e, IdentifierError::TooLong { kind: "dag_id", .. }));
}
#[test]
fn dag_id_rejects_empty() {
let e = DagId::new("").expect_err("must reject");
assert!(matches!(e, IdentifierError::Empty { kind: "dag_id" }));
}
#[test]
fn task_id_accepts_dotted_dashed_underscored() {
for ok in &["a", "a.b", "a-b", "a_b", "a.b-c_d.0"] {
TaskId::new(*ok).unwrap_or_else(|_| panic!("must accept {ok:?}"));
}
}
#[test]
fn dag_id_displays_inner() {
let id = DagId::new("hello").unwrap();
assert_eq!(id.to_string(), "hello");
assert_eq!(id.as_str(), "hello");
}
#[test]
fn dag_id_from_str_round_trips() {
let parsed: DagId = "ok".parse().unwrap();
assert_eq!(parsed.as_str(), "ok");
}
#[test]
fn task_id_try_from_string_round_trips() {
let s = String::from("foo");
let id = TaskId::try_from(s).unwrap();
assert_eq!(id.as_str(), "foo");
}
#[test]
fn make_dag_id_wraps_validation_error() {
let e = make_dag_id("has space".into()).expect_err("must reject");
match e {
ParseError::InvalidIdentifier { kind, value, .. } => {
assert_eq!(kind, "dag_id");
assert_eq!(value, "has space");
}
other => panic!("expected InvalidIdentifier, got {other:?}"),
}
}
#[test]
fn push_unique_task_keeps_first_seen_order() {
let mut v = Vec::<TaskId>::new();
push_unique_task(&mut v, TaskId::new("x").unwrap());
push_unique_task(&mut v, TaskId::new("y").unwrap());
push_unique_task(&mut v, TaskId::new("x").unwrap());
assert_eq!(v.len(), 2);
assert_eq!(v[0].as_str(), "x");
assert_eq!(v[1].as_str(), "y");
}
}