use serde_json::json;
use std::collections::HashMap;
fn convert_execution_result_to_records(
execution_result: &datafold::transform::result_types::ExecutionResult,
) -> Result<
Vec<datafold::fold_db_core::query::formatter::Record>,
datafold::schema::types::SchemaError,
> {
let mut records = Vec::new();
let mut rows: HashMap<String, HashMap<String, Vec<serde_json::Value>>> = HashMap::new();
for (field_name, entries) in &execution_result.index_entries {
for entry in entries {
let row = rows.entry(entry.row_id.clone()).or_default();
row.entry(field_name.clone())
.or_default()
.push(entry.value.clone());
}
}
for (_, fields_map) in rows {
let mut record_fields = HashMap::new();
for (field_name, values) in fields_map {
let value = if values.len() == 1 {
values[0].clone()
} else {
serde_json::Value::Array(values)
};
record_fields.insert(field_name, value);
}
records.push(datafold::fold_db_core::query::formatter::Record {
fields: record_fields,
metadata: HashMap::new(),
});
}
Ok(records)
}
#[test]
fn execute_engine_and_convert_to_records() {
let transform_schema_json = json!({
"name": "BlogPostWordIndex",
"schema_type": "HashRange",
"key": {"hash_field": "word", "range_field": "publish_date"},
"transform_fields": {
"word": "BlogPost.content.split_by_word()",
"publish_date": "BlogPost.publish_date",
"author": "BlogPost.author",
"title": "BlogPost.title"
}
});
let transform_schema: datafold::schema::types::DeclarativeSchemaDefinition =
serde_json::from_value(transform_schema_json).unwrap();
let field_to_hash = transform_schema.get_field_to_hash_code();
let hash_to_code = transform_schema.hash_to_code();
let expressions: Vec<(String, String)> = field_to_hash
.iter()
.map(|(field, hash)| (field.clone(), hash_to_code.get(hash).unwrap().clone()))
.collect();
let parsed =
datafold::transform::shared_utilities::parse_expressions_batch(&expressions).unwrap();
let chains_map: HashMap<String, datafold::transform::chain_parser::ParsedChain> = parsed
.iter()
.map(|(field, chain)| (field.clone(), chain.clone()))
.collect();
let mut input_values: HashMap<String, serde_json::Value> = HashMap::new();
input_values.insert(
"BlogPost".to_string(),
json!([
{
"title": "First",
"content": "Rust empowers fearless concurrency",
"author": "Carol",
"publish_date": "2024-12-31"
},
{
"title": "Second",
"content": "Tests validate iterator stacks",
"author": "Dylan",
"publish_date": "2025-01-05"
}
]),
);
type FV = datafold::schema::types::field::FieldValue;
type KV = datafold::schema::types::key_value::KeyValue;
let mut typed_input: HashMap<String, HashMap<KV, FV>> = HashMap::new();
let k1 = KV::new(Some("h1".to_string()), Some("r1".to_string()));
let k2 = KV::new(Some("h2".to_string()), Some("r2".to_string()));
let mut insert_field = |name: &str, v1: serde_json::Value, v2: serde_json::Value| {
let mut m: HashMap<KV, FV> = HashMap::new();
m.insert(
k1.clone(),
FV {
value: v1,
atom_uuid: "a1".to_string(),
source_file_name: None,
},
);
m.insert(
k2.clone(),
FV {
value: v2,
atom_uuid: "a2".to_string(),
source_file_name: None,
},
);
typed_input.insert(name.to_string(), m);
};
insert_field(
"BlogPost.content",
json!("Rust empowers fearless concurrency"),
json!("Tests validate iterator stacks"),
);
insert_field(
"BlogPost.publish_date",
json!("2024-12-31"),
json!("2025-01-05"),
);
insert_field("BlogPost.author", json!("Carol"), json!("Dylan"));
insert_field("BlogPost.title", json!("First"), json!("Second"));
let exec = datafold::transform::iterator_stack_typed::adapter::execute_fields_typed(
&chains_map,
&typed_input,
);
let records = convert_execution_result_to_records(&exec).expect("convert ok");
assert!(!records.is_empty());
let mut word_records = 0;
let mut other_records = 0;
for record in &records {
let fields = &record.fields;
if fields.contains_key("word") {
word_records += 1;
if let Some(word_val) = fields.get("word") {
assert!(word_val.is_string(), "word field should be a string");
}
} else {
other_records += 1;
assert!(
fields.contains_key("title")
|| fields.contains_key("author")
|| fields.contains_key("publish_date"),
"Record should contain expected fields: {:?}",
fields.keys().collect::<Vec<_>>()
);
}
}
assert!(
word_records > 0,
"Should have word records from word splitting"
);
assert!(
other_records > 0,
"Should have parent records with other fields"
);
assert!(
records.len() >= 8,
"Should have at least 8 records (words from 2 blog posts)"
);
}