use futures::FutureExt;
use futures::future::BoxFuture;
use indexmap::IndexMap;
use itertools::EitherOrBoth;
use itertools::Itertools;
use tokio::fs;
use tokio::io::AsyncBufReadExt;
use tokio::io::BufReader;
use wdl_analysis::stdlib::STDLIB as ANALYSIS_STDLIB;
use wdl_analysis::types::PrimitiveType;
use wdl_ast::Diagnostic;
use wdl_grammar::lexer::v1::is_ident;
use super::CallContext;
use super::Callback;
use super::Function;
use super::Signature;
use crate::Array;
use crate::Object;
use crate::PrimitiveValue;
use crate::Value;
use crate::diagnostics::function_call_failed;
use crate::stdlib::download_file;
const FUNCTION_NAME: &str = "read_objects";
fn read_objects(context: CallContext<'_>) -> BoxFuture<'_, Result<Value, Diagnostic>> {
async move {
debug_assert!(context.arguments.len() == 1);
debug_assert!(context.return_type_eq(ANALYSIS_STDLIB.array_object_type().clone()));
let path = context
.coerce_argument(0, PrimitiveType::File)
.unwrap_file();
let file_path = download_file(context.transferer(), context.base_dir(), &path)
.await
.map_err(|e| function_call_failed(FUNCTION_NAME, e, context.arguments[0].span))?;
let read_error = |e: std::io::Error| {
function_call_failed(
FUNCTION_NAME,
format!(
"failed to read file `{path}`: {e}",
path = file_path.display()
),
context.call_site,
)
};
let file = fs::File::open(&file_path).await.map_err(read_error)?;
let mut lines = BufReader::new(file).lines();
let names = match lines.next_line().await.map_err(read_error)? {
Some(line) => line,
None => {
return Ok(Array::new_unchecked(
ANALYSIS_STDLIB.array_object_type().clone(),
Vec::new(),
)
.into());
}
};
for name in names.split('\t') {
if !is_ident(name) {
return Err(function_call_failed(
FUNCTION_NAME,
format!(
"line 1 of file `{path}` contains invalid column name `{name}`: column \
name must be a valid WDL identifier"
),
context.call_site,
));
}
}
let mut objects = Vec::new();
let mut i = 2;
while let Some(line) = lines.next_line().await.map_err(read_error)? {
let mut members = IndexMap::new();
for e in names.split('\t').zip_longest(line.split('\t')) {
match e {
EitherOrBoth::Both(name, value) => {
if members
.insert(name.to_string(), PrimitiveValue::new_string(value).into())
.is_some()
{
return Err(function_call_failed(
FUNCTION_NAME,
format!(
"line 1 of file `{path}` contains duplicate column name \
`{name}`"
),
context.call_site,
));
}
}
EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => {
return Err(function_call_failed(
FUNCTION_NAME,
format!(
"line {i} of file `{path}` does not contain the expected number \
of columns"
),
context.call_site,
));
}
}
}
objects.push(Object::new(members).into());
i += 1;
}
Ok(Array::new_unchecked(ANALYSIS_STDLIB.array_object_type().clone(), objects).into())
}
.boxed()
}
pub const fn descriptor() -> Function {
Function::new(
const {
&[Signature::new(
"(file: File) -> Array[Object]",
Callback::Async(read_objects),
)]
},
)
}
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use wdl_ast::version::V1;
use crate::v1::test::TestEnv;
use crate::v1::test::eval_v1_expr;
#[tokio::test]
async fn read_objects() {
let env = TestEnv::default();
env.write_file("empty.tsv", "");
env.write_file(
"objects.tsv",
"k0\tk1\tk2\na0\ta1\ta2\nb0\tb1\tb2\nc0\tc1\tc2\n",
);
env.write_file("only-header.tsv", "foo\tbar\n");
env.write_file("too-few-columns.tsv", "foo\tbar\nbaz\n");
env.write_file("too-many-columns.tsv", "foo\tbar\nbaz\tqux\twrong\n");
env.write_file("duplicate.tsv", "foo\tbar\tfoo\nbaz\tqux\tfoo\n");
env.write_file("invalid-name.tsv", "foo\tbar-wrong\tfoo\nbaz\tqux\tfoo\n");
let value = eval_v1_expr(&env, V1::Two, "read_objects('empty.tsv')")
.await
.unwrap();
assert!(value.unwrap_array().is_empty());
let value = eval_v1_expr(&env, V1::Two, "read_objects('only-header.tsv')")
.await
.unwrap();
assert!(value.unwrap_array().is_empty());
let diagnostic = eval_v1_expr(&env, V1::Two, "read_objects('too-many-columns.tsv')")
.await
.unwrap_err();
assert_eq!(
diagnostic.message(),
"call to function `read_objects` failed: line 2 of file `too-many-columns.tsv` does \
not contain the expected number of columns"
);
let diagnostic = eval_v1_expr(&env, V1::Two, "read_objects('too-few-columns.tsv')")
.await
.unwrap_err();
assert_eq!(
diagnostic.message(),
"call to function `read_objects` failed: line 2 of file `too-few-columns.tsv` does \
not contain the expected number of columns"
);
let diagnostic = eval_v1_expr(&env, V1::Two, "read_objects('duplicate.tsv')")
.await
.unwrap_err();
assert_eq!(
diagnostic.message(),
"call to function `read_objects` failed: line 1 of file `duplicate.tsv` contains \
duplicate column name `foo`"
);
let diagnostic = eval_v1_expr(&env, V1::Two, "read_objects('invalid-name.tsv')")
.await
.unwrap_err();
assert_eq!(
diagnostic.message(),
"call to function `read_objects` failed: line 1 of file `invalid-name.tsv` contains \
invalid column name `bar-wrong`: column name must be a valid WDL identifier"
);
for file in ["objects.tsv", "https://example.com/objects.tsv"] {
let value = eval_v1_expr(&env, V1::Two, &format!("read_objects('{file}')"))
.await
.unwrap();
assert_eq!(
value.unwrap_array().to_string(),
r#"[object {k0: "a0", k1: "a1", k2: "a2"}, object {k0: "b0", k1: "b1", k2: "b2"}, object {k0: "c0", k1: "c1", k2: "c2"}]"#
);
}
}
}