use crate::Depfile;
use logos::{Logos, Span};
use std::borrow::Cow;
impl<'a> TryFrom<&'a str> for Depfile<'a> {
type Error = usize;
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
#[derive(PartialEq)]
enum State {
ExpectTarget,
ContinueTargetOrColon,
ExpectDeps,
}
let mut state = State::ExpectTarget;
let mut lex = Token::lexer(value);
let mut rules = Vec::new();
let mut target = Vec::<Cow<'_, str>>::new();
let mut buf = Buf::None;
let mut deps = Vec::new();
loop {
match lex.next() {
None => {
match state {
State::ExpectTarget => {}
State::ContinueTargetOrColon => {
return Err(lex.span().start);
}
State::ExpectDeps => {
if !buf.is_empty() {
deps.push(buf.take_into_cow(value));
}
add_rule(&mut rules, &mut target, &mut deps);
}
}
break;
}
Some(Ok(Token::Lit)) => {
match state {
State::ExpectTarget => {
buf.update(lex.span(), value);
state = State::ContinueTargetOrColon;
}
State::ContinueTargetOrColon => {
buf.update(lex.span(), value);
}
State::ExpectDeps => {
buf.update(lex.span(), value);
}
}
continue;
}
Some(Ok(Token::Colon)) => {
match state {
State::ExpectTarget => {
return Err(lex.span().start);
}
State::ContinueTargetOrColon => {
if !buf.is_empty() {
target.push(buf.take_into_cow(value));
}
debug_assert!(
!target.is_empty(),
"target should be non empty to enter ContinueTargetOrColon state"
);
state = State::ExpectDeps;
}
State::ExpectDeps => {
buf.update(lex.span(), value);
}
}
continue;
}
Some(Ok(Token::Space | Token::EscapeNewline)) => {
match state {
State::ExpectTarget => {}
State::ContinueTargetOrColon => {
if !buf.is_empty() {
target.push(buf.take_into_cow(value));
}
}
State::ExpectDeps => {
if !buf.is_empty() {
deps.push(buf.take_into_cow(value));
}
}
}
continue;
}
Some(Ok(Token::Cr)) => {
return Err(lex.span().start);
}
Some(Ok(Token::Newline)) => {
match state {
State::ExpectTarget => {}
State::ContinueTargetOrColon => {
}
State::ExpectDeps => {
if !buf.is_empty() {
deps.push(buf.take_into_cow(value));
}
add_rule(&mut rules, &mut target, &mut deps);
state = State::ExpectTarget
}
}
continue;
}
Some(Ok(Token::EscapeColon)) => {
buf.push(':', value);
continue;
}
Some(Ok(Token::EscapeBackslash)) => {
buf.push('\\', value);
continue;
}
Some(Ok(Token::EscapeSpace)) => {
buf.push(' ', value);
continue;
}
Some(Err(())) => {
return Err(lex.span().start);
}
}
}
Ok(Self { rules })
}
}
enum Buf {
None,
Span(Span),
String(String),
}
impl Buf {
fn is_empty(&self) -> bool {
match self {
Buf::None => true,
Buf::Span(range) => range.start == range.end,
Buf::String(s) => s.is_empty(),
}
}
fn take_into_cow<'a>(&mut self, source: &'a str) -> Cow<'a, str> {
let mut buf = Buf::None;
std::mem::swap(self, &mut buf);
buf.into_cow(source)
}
fn into_cow(self, source: &str) -> Cow<'_, str> {
match self {
Self::None => Cow::Borrowed(""),
Self::Span(span) => {
let source = unsafe { source.get_unchecked(span) };
Cow::Borrowed(source)
}
Self::String(string) => Cow::Owned(string),
}
}
fn update(&mut self, new_span: Span, source: &str) {
match self {
Self::None => *self = Self::Span(new_span.clone()),
Self::Span(span) => span.end = new_span.end,
Self::String(string) => {
let source = unsafe { source.get_unchecked(new_span) };
string.push_str(source);
}
}
}
fn push(&mut self, c: char, source: &str) {
match self {
Self::None => {
let mut s = String::new();
s.push(c);
*self = Self::String(s);
}
Self::Span(span) => {
let span = span.clone();
let source = unsafe { source.get_unchecked(span) };
*self = Self::String(format!("{source}{c}"));
}
Self::String(string) => {
string.push(c);
}
}
}
}
fn add_rule<'a>(
rules: &mut Vec<(Cow<'a, str>, Vec<Cow<'a, str>>)>,
targets: &mut Vec<Cow<'a, str>>,
deps: &mut Vec<Cow<'a, str>>,
) {
while !targets.is_empty() {
if targets.len() == 1 {
let t = targets.pop().unwrap();
rules.push((t, std::mem::take(deps)));
return;
}
let t = targets.pop().unwrap();
rules.push((t, deps.clone()));
}
}
#[derive(Logos, Debug, PartialEq)]
enum Token {
#[regex(r"\\\r?\n")]
EscapeNewline,
#[regex(r"\\[ \t]")]
EscapeSpace,
#[token(r"\\")]
EscapeBackslash,
#[token(r"\:")]
EscapeColon,
#[regex(r"\r?\n")]
Newline,
#[token("\r")]
Cr,
#[regex(r"[\t ]")]
Space,
#[token(r":")]
Colon,
#[regex(r"[^\\\r\n \t:]+")]
Lit,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_token() {
let mut lex = Token::lexer("");
assert_eq!(lex.next(), None);
let mut lex = Token::lexer("target: dep1 dep2 dep3");
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 0..6);
assert_eq!(lex.slice(), "target");
assert_eq!(lex.next(), Some(Ok(Token::Colon)));
assert_eq!(lex.span(), 6..7);
assert_eq!(lex.next(), Some(Ok(Token::Space)));
assert_eq!(lex.span(), 7..8);
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 8..12);
assert_eq!(lex.slice(), "dep1");
assert_eq!(lex.next(), Some(Ok(Token::Space)));
assert_eq!(lex.span(), 12..13);
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 13..17);
assert_eq!(lex.slice(), "dep2");
assert_eq!(lex.next(), Some(Ok(Token::Space)));
assert_eq!(lex.span(), 17..18);
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 18..22);
assert_eq!(lex.slice(), "dep3");
assert_eq!(lex.next(), None);
let mut lex = Token::lexer("tar\\ get: \\:dep1 \\\n dep2\\\r\n dep3");
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 0..3);
assert_eq!(lex.slice(), "tar");
assert_eq!(lex.next(), Some(Ok(Token::EscapeSpace)));
assert_eq!(lex.span(), 3..5);
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 5..8);
assert_eq!(lex.slice(), "get");
assert_eq!(lex.next(), Some(Ok(Token::Colon)));
assert_eq!(lex.span(), 8..9);
assert_eq!(lex.next(), Some(Ok(Token::Space)));
assert_eq!(lex.span(), 9..10);
assert_eq!(lex.next(), Some(Ok(Token::EscapeColon)));
assert_eq!(lex.span(), 10..12);
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 12..16);
assert_eq!(lex.slice(), "dep1");
assert_eq!(lex.next(), Some(Ok(Token::Space)));
assert_eq!(lex.span(), 16..17);
assert_eq!(lex.next(), Some(Ok(Token::EscapeNewline)));
assert_eq!(lex.span(), 17..19);
assert_eq!(lex.next(), Some(Ok(Token::Space)));
assert_eq!(lex.span(), 19..20);
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 20..24);
assert_eq!(lex.slice(), "dep2");
assert_eq!(lex.next(), Some(Ok(Token::EscapeNewline)));
assert_eq!(lex.span(), 24..27);
assert_eq!(lex.next(), Some(Ok(Token::Space)));
assert_eq!(lex.span(), 27..28);
assert_eq!(lex.next(), Some(Ok(Token::Lit)));
assert_eq!(lex.span(), 28..32);
assert_eq!(lex.slice(), "dep3");
assert_eq!(lex.next(), None);
}
#[test]
fn test_parse_empty() {
let df = crate::parse("").unwrap();
assert!(df.is_empty());
let df = crate::parse(" ").unwrap();
assert!(df.is_empty());
let df = crate::parse("\n").unwrap();
assert!(df.is_empty());
let df = crate::parse("\n\n \n").unwrap();
assert!(df.is_empty());
}
macro_rules! deps {
() => {
Some(vec![].as_ref())
};
($($x:literal),* $(,)?) => {
Some(vec![$( $x.into() ),*].as_ref())
};
}
#[test]
fn test_parse_single_target() {
let df = crate::parse("target:").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("target"), deps![]);
let df = crate::parse("tar\\ get:").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("tar get"), deps![]);
let df = crate::parse("tar\\:get:").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("tar:get"), deps![]);
assert_eq!(df.find("foo"), None);
let df = crate::parse("target:dep").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("target"), deps!["dep"]);
let df = crate::parse("tar\\ get:dep").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("tar get"), deps!["dep"]);
let df = crate::parse("tar\\:get:dep").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("tar:get"), deps!["dep"]);
let df = crate::parse("target: dep").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("target"), deps!["dep"]);
let df = crate::parse("tar\\ get: dep dep2.c").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("tar get"), deps!["dep", "dep2.c"]);
let df = crate::parse("tar\\:get: dep dep\\\\ 2.c").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("tar:get"), deps!["dep", "dep\\", "2.c"]);
let df = crate::parse("t\\ ar\\:get: dep \\\n dep2 \\\n 2.c").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("t ar:get"), deps!["dep", "dep2", "2.c"]);
let df = crate::parse(r"my\ project\ with\:files.o: C\:\\path\ with\ spaces\\file\:name.c")
.unwrap();
assert_eq!(df.len(), 1);
assert_eq!(
df.find("my project with:files.o"),
deps![r"C:\path with spaces\file:name.c"]
);
}
#[test]
fn test_parse_path_styles() {
let df = crate::parse(r"build/output/main.o: src/core/main.c include/header.h").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(
df.find("build/output/main.o"),
deps![r"src/core/main.c", r"include/header.h"]
);
let df =
crate::parse(r"build\\output\\main.o: src\\core\\main.c include\\header.h").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(
df.find(r"build\output\main.o"),
deps![r"src\core\main.c", r"include\header.h"]
);
let df = crate::parse(r"target.o: C:/Users/Me/documents/file.c D:\\Project/Module\\sub.c")
.unwrap();
assert_eq!(df.len(), 1);
assert_eq!(
df.find(r"target.o"),
deps![r"C:/Users/Me/documents/file.c", r"D:\Project/Module\sub.c"]
);
}
#[test]
fn test_parse_non_ascii() {
let df = crate::parse(r"prôjet/débût/файл.o: src/main.c").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("prôjet/débût/файл.o"), deps![r"src/main.c"]);
let df = crate::parse(
r"répertoire\ de\ travail/fichier\ final.o: dossier\ source/你好module spécial.c",
)
.unwrap();
assert_eq!(df.len(), 1);
assert_eq!(
df.find("répertoire de travail/fichier final.o"),
deps![r"dossier source/你好module", "spécial.c"]
);
}
#[test]
fn test_multiple_targets() {
let df = crate::parse(r"tar\ get1.o target2.o: dep1.c dep2.c").unwrap();
assert_eq!(df.len(), 2);
assert_eq!(df.find("tar get1.o"), deps![r"dep1.c", "dep2.c"]);
assert_eq!(df.find("target2.o"), deps![r"dep1.c", "dep2.c"]);
let df = crate::parse(
r"targetA.o : depA1.c depA2.c
targetB.o\
: depB1.c
targetC.o: depC1.c depC2.c depC3.c",
)
.unwrap();
assert_eq!(df.len(), 3);
assert_eq!(df.find("targetA.o"), deps![r"depA1.c", "depA2.c"]);
assert_eq!(df.find("targetB.o"), deps![r"depB1.c"]);
assert_eq!(
df.find("targetC.o"),
deps![r"depC1.c", "depC2.c", "depC3.c"]
);
let df = crate::parse(
r"a: b
c: d \
e
f g: d c",
)
.unwrap();
assert_eq!(df.len(), 4);
assert_eq!(df.find("a"), deps!["b"]);
assert_eq!(df.find("c"), deps!["d", "e"]);
assert_eq!(df.find("f"), deps!["d", "c"]);
assert_eq!(df.find("g"), deps!["d", "c"]);
}
#[test]
fn test_invalid() {
let df = crate::parse("target.0 depc");
assert_eq!(df, Err(13));
let df = crate::parse("target: :invalid").unwrap();
assert_eq!(df.len(), 1);
assert_eq!(df.find("target"), deps![":invalid"]);
let df = crate::parse("a:b\\");
assert_eq!(df, Err(3));
let df = crate::parse(":b");
assert_eq!(df, Err(0));
}
}