use crate::{TestCli, TestDataset, TestSource};
use anyhow::anyhow;
use backoff::{retry, ExponentialBackoff};
use chrono::DateTime;
use pretty_assertions::assert_eq;
use reinfer_client::{AnnotatedComment, Comment, NewAnnotatedComment, NewComment};
#[test]
fn test_comments_lifecycle_basic() {
const SAMPLE_BASIC: &str = include_str!("./samples/basic.jsonl");
check_comments_lifecycle(SAMPLE_BASIC, vec!["--allow-duplicates", "--yes"]);
}
#[test]
fn test_comments_lifecycle_labellings() {
const SAMPLE_LABELLING: &str = include_str!("./samples/labelling.jsonl");
check_comments_lifecycle(SAMPLE_LABELLING, vec!["--allow-duplicates", "--yes"]);
}
#[test]
fn test_comments_lifecycle_legacy_labelling() {
const SAMPLE_LEGACY_LABELLING: &str = include_str!("./samples/legacy_labelling.jsonl");
check_comments_lifecycle(SAMPLE_LEGACY_LABELLING, vec!["--allow-duplicates", "--yes"]);
}
#[test]
fn test_comments_lifecycle_moon_forms() {
const SAMPLE_MOON_LABELLING: &str = include_str!("./samples/moon_forms.jsonl");
check_comments_lifecycle(SAMPLE_MOON_LABELLING, vec!["--allow-duplicates", "--yes"]);
check_comments_lifecycle(
SAMPLE_MOON_LABELLING,
vec!["--allow-duplicates", "--yes", "--use-moon-forms"],
);
}
#[test]
fn test_comments_lifecycle_audio() {
const SAMPLE_AUDIO: &str = include_str!("./samples/audio.jsonl");
check_comments_lifecycle(SAMPLE_AUDIO, vec!["--allow-duplicates", "--yes"]);
}
fn check_comments_lifecycle(comments_str: &str, args: Vec<&str>) {
let annotated_comments: Vec<NewAnnotatedComment> = comments_str
.lines()
.map(serde_json::from_str)
.collect::<Result<_, _>>()
.unwrap();
let cli = TestCli::get();
let source = TestSource::new();
let output = cli.run_with_stdin(
([
"create",
"comments",
&format!("--source={}", source.identifier()),
]
.into_iter()
.chain(args))
.collect::<Vec<&str>>(),
comments_str.as_bytes(),
);
assert!(output.is_empty());
let output = cli.run(["get", "comments", source.identifier()]);
assert_eq!(output.lines().count(), annotated_comments.len());
let mut output_comments: Vec<Comment> = output
.lines()
.map(|line| serde_json::from_str(line).expect("invalid comment"))
.map(|annotated_comment: AnnotatedComment| annotated_comment.comment)
.collect();
output_comments.sort_by(|a, b| a.id.cmp(&b.id));
let mut input_comments = annotated_comments
.iter()
.map(|annotated_comment| annotated_comment.comment.clone())
.collect::<Vec<NewComment>>();
input_comments.sort_by(|a, b| a.id.cmp(&b.id));
for (input_comment, output_comment) in input_comments.iter().zip(output_comments.iter()) {
assert_eq!(input_comment.id, output_comment.id);
assert_eq!(input_comment.messages, output_comment.messages);
assert_eq!(input_comment.timestamp, output_comment.timestamp);
}
let test_comment = annotated_comments.get(0).unwrap().comment.clone();
let output = cli.run([
"get",
"comment",
&format!("--source={}", source.identifier()),
&test_comment.id.0,
]);
let fetched_comment: AnnotatedComment =
serde_json::from_str(&output).expect("invalid annotated comment fetched");
assert_eq!(test_comment.id, fetched_comment.comment.id);
assert_eq!(test_comment.messages, fetched_comment.comment.messages);
assert_eq!(test_comment.timestamp, fetched_comment.comment.timestamp);
assert_eq!(
test_comment.user_properties,
fetched_comment.comment.user_properties
);
let output = cli.run([
"delete",
"comments",
&format!("--source={}", source.identifier()),
&annotated_comments.get(0).unwrap().comment.id.0,
]);
assert!(output.is_empty());
let output = cli.run(["get", "comments", source.identifier()]);
assert_eq!(output.lines().count(), annotated_comments.len() - 1);
let mut args = vec!["delete", "comments", "--source", source.identifier()];
args.extend(
annotated_comments
.iter()
.map(|annotated_comment| annotated_comment.comment.id.0.as_str()),
);
let output = cli.run(&args);
assert!(output.is_empty());
let output = cli.run(["get", "comments", source.identifier()]);
assert!(output.is_empty());
}
#[test]
fn test_delete_comments_in_range() {
let comments_str = include_str!("./samples/many.jsonl");
let annotated_comments: Vec<NewAnnotatedComment> = comments_str
.lines()
.map(serde_json::from_str)
.collect::<Result<_, _>>()
.unwrap();
let num_comments = annotated_comments.len();
let num_annotated = annotated_comments
.iter()
.filter(|comment| comment.has_annotations())
.count();
let cli = TestCli::get();
let source = TestSource::new();
let dataset1 = TestDataset::new_args(&[&format!("--source={}", source.identifier())]);
let output = cli.run_with_stdin(
[
"create",
"comments",
"--allow-duplicates",
"--yes",
&format!("--source={}", source.identifier()),
&format!("--dataset={}", dataset1.identifier()),
],
comments_str.as_bytes(),
);
assert!(output.is_empty());
let uploaded_all = cli.run(["get", "comments", source.identifier()]);
assert_eq!(uploaded_all.lines().count(), num_comments);
let uploaded_annotated = cli.run([
"get",
"comments",
"--reviewed-only",
"true",
"--dataset",
dataset1.identifier(),
source.identifier(),
]);
assert_eq!(uploaded_annotated.lines().count(), num_annotated);
let from_timestamp_str = "2020-01-03T00:00:00Z";
let from_timestamp = DateTime::parse_from_rfc3339(from_timestamp_str).unwrap();
let to_timestamp_str = "2020-02-01T00:00:00Z";
let to_timestamp = DateTime::parse_from_rfc3339(to_timestamp_str).unwrap();
cli.run([
"delete",
"bulk",
"--source",
source.identifier(),
"--from-timestamp",
from_timestamp_str,
"--to-timestamp",
to_timestamp_str,
"--include-annotated=false",
]);
let num_deleted = annotated_comments
.iter()
.filter(|comment| {
!comment.has_annotations()
&& comment.comment.timestamp <= to_timestamp
&& comment.comment.timestamp >= from_timestamp
})
.count();
let after_deleting_range = get_comments_with_delay(
cli,
&[
"get",
"comments",
"--dataset",
dataset1.identifier(),
source.identifier(),
],
num_comments - num_deleted,
);
assert_eq!(
after_deleting_range.lines().count(),
num_comments - num_deleted
);
cli.run([
"delete",
"bulk",
"--source",
source.identifier(),
"--include-annotated=false",
]);
let after_deleting_unannotated = get_comments_with_delay(
cli,
&[
"get",
"comments",
"--dataset",
dataset1.identifier(),
source.identifier(),
],
num_annotated,
);
assert_eq!(after_deleting_unannotated.lines().count(), num_annotated);
cli.run([
"delete",
"bulk",
&format!("--source={}", source.identifier()),
"--include-annotated=true",
]);
let after_deleting_all = get_comments_with_delay(
cli,
&[
"get",
"comments",
"--dataset",
dataset1.identifier(),
source.identifier(),
],
0,
);
assert_eq!(after_deleting_all.lines().count(), 0);
}
fn get_comments_with_delay(cli: &TestCli, command: &[&str], expected_count: usize) -> String {
let run_command = || {
let result = cli.run(command);
let actual_count = result.lines().count();
if actual_count == expected_count {
Ok(result)
} else {
Err(backoff::Error::transient(anyhow!(
"Expected {} results got {}",
expected_count,
actual_count
)))
}
};
retry(ExponentialBackoff::default(), run_command).unwrap()
}