use std::sync::Arc;
use manifoldb_core::{CollectionId, Entity, Value};
use manifoldb_query::exec::row::{Row, Schema};
use manifoldb_query::exec::CollectionVectorProvider;
pub struct CollectionContext {
pub collection_id: CollectionId,
pub vector_names: Vec<String>,
pub provider: Arc<dyn CollectionVectorProvider>,
}
pub struct StorageScan {
entities: Vec<Entity>,
columns: Vec<String>,
schema: Arc<Schema>,
position: usize,
collection_context: Option<CollectionContext>,
}
impl StorageScan {
#[must_use]
pub fn new(entities: Vec<Entity>, columns: Vec<String>) -> Self {
let schema = Arc::new(Schema::new(columns.clone()));
Self { entities, columns, schema, position: 0, collection_context: None }
}
#[must_use]
pub fn with_collection_context(mut self, context: CollectionContext) -> Self {
self.collection_context = Some(context);
self
}
#[must_use]
pub fn schema(&self) -> Arc<Schema> {
Arc::clone(&self.schema)
}
#[must_use]
pub fn columns(&self) -> &[String] {
&self.columns
}
#[must_use]
pub fn len(&self) -> usize {
self.entities.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.entities.is_empty()
}
fn entity_to_row(&self, entity: &Entity) -> Row {
let values: Vec<Value> = self
.columns
.iter()
.map(|col| {
if col == "_rowid" {
Value::Int(entity.id.as_u64() as i64)
} else if col == "_labels" {
Value::String(
entity.labels.iter().map(|l| l.as_str()).collect::<Vec<_>>().join(","),
)
} else if let Some(ref ctx) = self.collection_context {
if ctx.vector_names.contains(col) {
match ctx.provider.get_vector(ctx.collection_id, entity.id, col) {
Ok(Some(data)) => vector_data_to_value(&data),
Ok(None) => Value::Null,
Err(_) => Value::Null,
}
} else {
entity.get_property(col).cloned().unwrap_or(Value::Null)
}
} else {
entity.get_property(col).cloned().unwrap_or(Value::Null)
}
})
.collect();
Row::new(Arc::clone(&self.schema), values)
}
pub fn next_row(&mut self) -> Option<Row> {
if self.position >= self.entities.len() {
return None;
}
let row = self.entity_to_row(&self.entities[self.position]);
self.position += 1;
Some(row)
}
#[must_use]
pub fn collect_rows(&self) -> Vec<Row> {
self.entities.iter().map(|e| self.entity_to_row(e)).collect()
}
#[must_use]
pub fn collect_values(&self) -> Vec<Vec<Value>> {
self.collect_rows().into_iter().map(|r| r.values().to_vec()).collect()
}
}
fn vector_data_to_value(data: &manifoldb_vector::types::VectorData) -> Value {
use manifoldb_vector::types::VectorData;
match data {
VectorData::Dense(v) => Value::Vector(v.clone()),
VectorData::Sparse(v) => Value::SparseVector(v.clone()),
VectorData::Multi(v) => Value::MultiVector(v.clone()),
VectorData::Binary(v) => Value::Bytes(v.clone()), }
}
impl Iterator for StorageScan {
type Item = Row;
fn next(&mut self) -> Option<Self::Item> {
self.next_row()
}
}
#[cfg(test)]
mod tests {
use super::*;
use manifoldb_core::EntityId;
#[test]
fn test_storage_scan_basic() {
let entities = vec![
Entity::new(EntityId::new(1))
.with_label("Person")
.with_property("name", "Alice")
.with_property("age", 30i64),
Entity::new(EntityId::new(2))
.with_label("Person")
.with_property("name", "Bob")
.with_property("age", 25i64),
];
let columns = vec!["_rowid".to_string(), "name".to_string(), "age".to_string()];
let mut scan = StorageScan::new(entities, columns);
assert_eq!(scan.len(), 2);
let row1 = scan.next_row().unwrap();
assert_eq!(row1.get(0), Some(&Value::Int(1)));
assert_eq!(row1.get(1), Some(&Value::String("Alice".to_string())));
assert_eq!(row1.get(2), Some(&Value::Int(30)));
let row2 = scan.next_row().unwrap();
assert_eq!(row2.get(0), Some(&Value::Int(2)));
assert_eq!(row2.get(1), Some(&Value::String("Bob".to_string())));
assert!(scan.next_row().is_none());
}
#[test]
fn test_storage_scan_missing_property() {
let entities =
vec![Entity::new(EntityId::new(1)).with_label("Person").with_property("name", "Alice")];
let columns = vec!["name".to_string(), "missing".to_string()];
let mut scan = StorageScan::new(entities, columns);
let row = scan.next_row().unwrap();
assert_eq!(row.get(0), Some(&Value::String("Alice".to_string())));
assert_eq!(row.get(1), Some(&Value::Null));
}
}