use std::borrow::Cow;
use std::path::Path;
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use futures::FutureExt;
use futures::future::BoxFuture;
use tokio::fs;
use url::Url;
use wdl_analysis::types::PrimitiveType;
use wdl_ast::Diagnostic;
use super::CallContext;
use super::Callback;
use super::Function;
use super::Signature;
use crate::CompoundValue;
use crate::EvaluationPath;
use crate::EvaluationPathKind;
use crate::HiddenValue;
use crate::PrimitiveValue;
use crate::StorageUnit;
use crate::Value;
use crate::diagnostics::function_call_failed;
use crate::http::Transferer;
use crate::is_file_url;
use crate::is_supported_url;
use crate::stdlib::ensure_local_path;
const FUNCTION_NAME: &str = "size";
fn size(context: CallContext<'_>) -> BoxFuture<'_, Result<Value, Diagnostic>> {
async move {
debug_assert!(!context.arguments.is_empty() && context.arguments.len() < 3);
debug_assert!(context.return_type_eq(PrimitiveType::Float));
let unit = if context.arguments.len() == 2 {
let unit = context
.coerce_argument(1, PrimitiveType::String)
.unwrap_string();
unit.parse().map_err(|_| {
function_call_failed(
FUNCTION_NAME,
format!(
"invalid storage unit `{unit}`: supported units are `B`, `KB`, `K`, `MB`, \
`M`, `GB`, `G`, `TB`, `T`, `KiB`, `Ki`, `MiB`, `Mi`, `GiB`, `Gi`, `TiB`, \
and `Ti`",
),
context.arguments[1].span,
)
})?
} else {
StorageUnit::default()
};
let value = match context.arguments[0].value.as_string() {
Some(_) => {
let path = context
.coerce_argument(0, PrimitiveType::File)
.unwrap_file();
if !is_file_url(&path.0) && is_supported_url(&path.0) {
PrimitiveValue::File(path).into()
} else {
let local_path =
ensure_local_path(context.base_dir(), &path.0).map_err(|e| {
function_call_failed(FUNCTION_NAME, format!("{e:?}"), context.call_site)
})?;
let metadata = fs::metadata(&local_path)
.await
.with_context(|| {
format!(
"failed to read metadata for path `{path}`",
path = local_path.display()
)
})
.map_err(|e| {
function_call_failed(FUNCTION_NAME, format!("{e:?}"), context.call_site)
})?;
if metadata.is_dir() {
PrimitiveValue::Directory(path).into()
} else {
PrimitiveValue::File(path).into()
}
}
}
_ => context.arguments[0].value.clone(),
};
calculate_disk_size(context.transferer(), &value, unit, context.base_dir())
.await
.map_err(|e| function_call_failed(FUNCTION_NAME, format!("{e:?}"), context.call_site))
.map(Into::into)
}
.boxed()
}
async fn file_size(path: impl AsRef<Path>) -> Result<u64> {
let path = path.as_ref();
let metadata = fs::metadata(path).await.with_context(|| {
format!(
"failed to read metadata for file `{path}`",
path = path.display()
)
})?;
if !metadata.is_file() {
bail!("path `{path}` is not a file", path = path.display());
}
Ok(metadata.len())
}
async fn resource_size(transferer: &dyn Transferer, url: &Url) -> Result<u64> {
transferer
.size(url)
.await
.with_context(|| format!("failed to determine content length of URL `{url}`"))?
.with_context(|| format!("URL `{url}` has an unknown content length"))
}
async fn file_path_size(
transferer: &dyn Transferer,
base_dir: &EvaluationPath,
path: &str,
) -> Result<u64> {
if is_supported_url(path)
&& let Ok(url) = path.parse()
{
return resource_size(transferer, &url).await;
}
if Path::new(path).is_absolute() {
return file_size(path).await;
}
match base_dir.join(path)?.kind() {
EvaluationPathKind::Local(path) => file_size(path).await,
EvaluationPathKind::Remote(url) => resource_size(transferer, url).await,
}
}
fn calculate_disk_size<'a>(
transferer: &'a dyn Transferer,
value: &'a Value,
unit: StorageUnit,
base_dir: &'a EvaluationPath,
) -> BoxFuture<'a, Result<f64>> {
async move {
match value {
Value::None(_) => Ok(0.0),
Value::Primitive(v) => primitive_disk_size(transferer, v, unit, base_dir).await,
Value::Compound(v) => compound_disk_size(transferer, v, unit, base_dir).await,
Value::Hidden(HiddenValue::Hints(_)) => {
bail!("the size of a hints value cannot be calculated")
}
Value::Hidden(HiddenValue::Input(_)) => {
bail!("the size of an input value cannot be calculated")
}
Value::Hidden(HiddenValue::Output(_)) => {
bail!("the size of an output value cannot be calculated")
}
Value::Hidden(HiddenValue::TaskPreEvaluation(_))
| Value::Hidden(HiddenValue::TaskPostEvaluation(_)) => {
bail!("the size of a task variable cannot be calculated")
}
Value::Hidden(HiddenValue::PreviousTaskData(_)) => {
bail!("the size of a task.previous value cannot be calculated")
}
Value::Call(_) => bail!("the size of a call value cannot be calculated"),
Value::TypeNameRef(_) => {
bail!("the size of a type name reference cannot be calculated")
}
}
}
.boxed()
}
async fn primitive_disk_size(
transferer: &dyn Transferer,
value: &PrimitiveValue,
unit: StorageUnit,
base_dir: &EvaluationPath,
) -> Result<f64> {
match value {
PrimitiveValue::File(path) => {
let size = file_path_size(transferer, base_dir, path.as_str()).await?;
Ok(unit.units(size))
}
PrimitiveValue::Directory(path) => {
let path = ensure_local_path(base_dir, path.as_str())?;
calculate_directory_size(&path, unit).await
}
_ => Ok(0.0),
}
}
async fn compound_disk_size(
transferer: &dyn Transferer,
value: &CompoundValue,
unit: StorageUnit,
base_dir: &EvaluationPath,
) -> Result<f64> {
match value {
CompoundValue::Pair(pair) => {
Ok(
calculate_disk_size(transferer, pair.left(), unit, base_dir).await?
+ calculate_disk_size(transferer, pair.right(), unit, base_dir).await?,
)
}
CompoundValue::Array(array) => {
let mut size = 0.0;
for e in array.as_slice() {
size += calculate_disk_size(transferer, e, unit, base_dir).await?;
}
Ok(size)
}
CompoundValue::Map(map) => {
let mut size = 0.0;
for (k, v) in map.iter() {
size += primitive_disk_size(transferer, k, unit, base_dir).await?
+ calculate_disk_size(transferer, v, unit, base_dir).await?;
}
Ok(size)
}
CompoundValue::Object(object) => {
let mut size = 0.0;
for (_, v) in object.iter() {
size += calculate_disk_size(transferer, v, unit, base_dir).await?;
}
Ok(size)
}
CompoundValue::Struct(s) => {
let mut size = 0.0;
for (_, v) in s.iter() {
size += calculate_disk_size(transferer, v, unit, base_dir).await?;
}
Ok(size)
}
CompoundValue::EnumVariant(_) => {
bail!("the size of an enum variant cannot be calculated")
}
}
}
async fn calculate_directory_size(path: &Path, unit: StorageUnit) -> Result<f64> {
let metadata = fs::symlink_metadata(&path).await.with_context(|| {
format!(
"failed to read metadata for directory `{path}`",
path = path.display()
)
})?;
if !metadata.is_dir() {
bail!("path `{path}` is not a directory", path = path.display());
}
let mut queue: Vec<Cow<'_, Path>> = Vec::new();
queue.push(path.into());
let mut size = 0.0;
while let Some(path) = queue.pop() {
let mut dir = fs::read_dir(&path).await.with_context(|| {
format!(
"failed to read entry of directory `{path}`",
path = path.display()
)
})?;
while let Some(entry) = dir.next_entry().await.with_context(|| {
format!(
"failed to read entry of directory `{path}`",
path = path.display()
)
})? {
let metadata = entry.metadata().await.with_context(|| {
format!(
"failed to read metadata for file `{path}`",
path = entry.path().display()
)
})?;
if metadata.is_dir() {
queue.push(entry.path().into());
} else {
size += unit.units(metadata.len());
}
}
}
Ok(size)
}
pub const fn descriptor() -> Function {
Function::new(
const {
&[
Signature::new(
"(value: None, <unit: String>) -> Float",
Callback::Async(size),
),
Signature::new(
"(value: File?, <unit: String>) -> Float",
Callback::Async(size),
),
Signature::new(
"(value: String?, <unit: String>) -> Float",
Callback::Async(size),
),
Signature::new(
"(value: Directory?, <unit: String>) -> Float",
Callback::Async(size),
),
Signature::new(
"(value: X, <unit: String>) -> Float where `X`: any compound type that \
recursively contains a `File` or `Directory`",
Callback::Async(size),
),
]
},
)
}
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use wdl_ast::version::V1;
use crate::PrimitiveValue;
use crate::v1::test::TestEnv;
use crate::v1::test::eval_v1_expr;
#[tokio::test]
async fn size() {
let mut env = TestEnv::default();
env.write_file("foo", "0123456789");
env.write_file("bar", "01234567890123456789");
env.write_file("baz", "012345678901234567890123456789");
env.insert_name(
"file",
PrimitiveValue::new_file(
env.base_dir()
.join("bar")
.unwrap()
.unwrap_local()
.to_str()
.expect("should be UTF-8"),
),
);
env.insert_name(
"dir",
PrimitiveValue::new_directory(env.base_dir().to_string()),
);
let diagnostic = eval_v1_expr(&env, V1::Two, "size('foo', 'invalid')")
.await
.unwrap_err();
assert_eq!(
diagnostic.message(),
"call to function `size` failed: invalid storage unit `invalid`: supported units are \
`B`, `KB`, `K`, `MB`, `M`, `GB`, `G`, `TB`, `T`, `KiB`, `Ki`, `MiB`, `Mi`, `GiB`, \
`Gi`, `TiB`, and `Ti`"
);
let value = eval_v1_expr(&env, V1::Two, "size('https://example.com/foo')")
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 1234.0);
let diagnostic = eval_v1_expr(&env, V1::Two, "size('does-not-exist', 'B')")
.await
.unwrap_err();
assert!(
diagnostic
.message()
.starts_with("call to function `size` failed: failed to read metadata for path")
);
let source = format!("size('{path}', 'B')", path = env.base_dir());
let value = eval_v1_expr(&env, V1::Two, &source).await.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 60.0);
for (expected, unit) in [
(10.0, "B"),
(0.01, "K"),
(0.01, "KB"),
(0.00001, "M"),
(0.00001, "MB"),
(0.00000001, "G"),
(0.00000001, "GB"),
(0.00000000001, "T"),
(0.00000000001, "TB"),
(0.009765625, "Ki"),
(0.009765625, "KiB"),
(0.0000095367431640625, "Mi"),
(0.0000095367431640625, "MiB"),
(0.000000009313225746154785, "Gi"),
(0.000000009313225746154785, "GiB"),
(0.000000000009094947017729282, "Ti"),
(0.000000000009094947017729282, "TiB"),
] {
let value = eval_v1_expr(&env, V1::Two, &format!("size('foo', '{unit}')"))
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), expected);
let value = eval_v1_expr(
&env,
V1::Two,
&format!(
"size('{path}', '{unit}')",
path = env.base_dir().join("foo").unwrap().unwrap_local().display()
),
)
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), expected);
}
let value = eval_v1_expr(&env, V1::Two, "size(None, 'B')")
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 0.0);
let value = eval_v1_expr(&env, V1::Two, "size(file, 'B')")
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 20.0);
let value = eval_v1_expr(&env, V1::Two, "size(dir, 'B')").await.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 60.0);
let value = eval_v1_expr(&env, V1::Two, "size((dir, dir), 'B')")
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 120.0);
let value = eval_v1_expr(&env, V1::Two, "size([file, file, file], 'B')")
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 60.0);
let value = eval_v1_expr(
&env,
V1::Two,
"size({ 'a': file, 'b': file, 'c': file }, 'B')",
)
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 60.0);
let value = eval_v1_expr(
&env,
V1::Two,
"size(object { a: file, b: file, c: file }, 'B')",
)
.await
.unwrap();
approx::assert_relative_eq!(value.unwrap_float(), 60.0);
}
}