flatdata 0.3.0

Rust implementation of flatdata
Documentation

Implementation of heremaps/flatdata in Rust.

Flatdata is a library providing data structures for convenient creation, storage and access of packed memory-mappable structures with minimal overhead.

The idea is, that the user defines a schema of the data format using flatdata's very simple schema language supporting plain structs, vectors and multivectors. The schema is then used to generate builders and readers for serialization and deserialization of the data. The data is serialized in a portable way which allows zero-overhead random access to it by using memory mapped storage. Memory mapped approach makes it possible to use the operating system facilities for loading, caching and paging of the data, and most important, accessing it as if it were in memory. Read more in "Why flatdata?".

This crate provides:

The generator is part of the main heremaps/flatdata repository, the generate helper function is provided as a convenience wrapper.

For a comprehensive example, cf. coappearances schema and the corresponding usage.

Examples

First you design a schema for the data you want to store, e.g. if you'd want to store a list of prime factors for each natural number:

namespace prime {
// Represents a single prime factor of a number and how often it occurs
struct Factor {
value : u32 : 32;
count : u32 : 8;
}

// Points towards the beginning of the list of prime numbers
struct Number {
first_factor_ref : u32;
}

// Stores a list of prime factors for numbers from 0 to N
archive Archive {
@explicit_reference( Number.first_factor_ref, factors )
numbers : vector< Number >;

factors : vector< Factor >;
}
}

Maybe create a diagram using the dot generator from heremaps/flatdata:

diag

Then you generate code using e.g. the generate utility in a build.rs script and include it in your project. Now you can create a (disk-based) archive and fill it with data:

# #[macro_use] extern crate flatdata;
#
# // Do not edit: This code was generated by flatdata's generator.
# pub mod prime {
#
#     pub mod schema {
#         pub mod structs {
#             pub const FACTOR: &str = r#"namespace prime {
# struct Factor
# {
#     value : u32 : 32;
#     count : u32 : 8;
# }
# }
#
# "#;
#             pub const NUMBER: &str = r#"namespace prime {
# struct Number
# {
#     first_factor_ref : u32 : 32;
# }
# }
#
# "#;
#         }
#         pub mod archive {
#             pub const ARCHIVE: &str = r#"namespace prime {
# struct Number
# {
#     first_factor_ref : u32 : 32;
# }
# }
#
# namespace prime {
# struct Factor
# {
#     value : u32 : 32;
#     count : u32 : 8;
# }
# }
#
# namespace prime {
# archive Archive
# {
#     @explicit_reference( .prime.Number.first_factor_ref, .prime.Archive.factors )
#     numbers : vector< .prime.Number >;
#     factors : vector< .prime.Factor >;
# }
# }
#
# "#;
#             pub mod resources {
#                 pub const NUMBERS: &str = r#"namespace prime {
# struct Number
# {
#     first_factor_ref : u32 : 32;
# }
# }
#
# namespace prime {
# archive Archive
# {
#     @explicit_reference( .prime.Number.first_factor_ref, .prime.Archive.factors )
#     numbers : vector< .prime.Number >;
# }
# }
#
# "#;
#                 pub const FACTORS: &str = r#"namespace prime {
# struct Factor
# {
#     value : u32 : 32;
#     count : u32 : 8;
# }
# }
#
# namespace prime {
# archive Archive
# {
#     factors : vector< .prime.Factor >;
# }
# }
#
# "#;
#             }
#         }
#     }
#     // Represents a single prime factor of a number and how often it occurs
#     define_struct!(
#         Factor,
#         RefFactor,
#         RefMutFactor,
#         schema::structs::FACTOR,
#         5,
#         (value, set_value, u32, u32, 0, 32),
#         (count, set_count, u32, u32, 32, 8)
#     );
#
#     // Points towards the beginning of the list of prime numbers
#     define_struct!(
#         Number,
#         RefNumber,
#         RefMutNumber,
#         schema::structs::NUMBER,
#         4,
#         (first_factor_ref, set_first_factor_ref, u32, u32, 0, 32)
#     );
#
#     define_archive!(Archive, ArchiveBuilder, schema::archive::ARCHIVE;
#         vector(numbers, false, schema::archive::resources::NUMBERS, set_numbers, start_numbers, super::prime::Number),
#         vector(factors, false, schema::archive::resources::FACTORS, set_factors, start_factors, super::prime::Factor),
#     );
#
# }
pub fn calculate_prime_factors(
builder: &mut prime::ArchiveBuilder,
max_number: u32,
) -> std::io::Result<()> {
let mut numbers = builder.start_numbers()?;
let mut factors = builder.start_factors()?;
numbers.grow()?.set_first_factor_ref(0);
for mut x in 0..=max_number {
// Let's calculate prime factor in a very inefficient way
for y in 2..x {
let mut count = 0;
while x % y == 0 {
count += 1;
x /= y;
}
if count > 0 {
let mut factor = factors.grow()?;
factor.set_value(y);
factor.set_count(count);
}
}
numbers.grow()?.set_first_factor_ref(factors.len() as u32);
}
numbers.close().expect("Failed to close");
factors.close().expect("Failed to close");
Ok(())
}

# pub fn main() {
#     use flatdata::{Archive, ArchiveBuilder, MemoryResourceStorage};
#     let storage = MemoryResourceStorage::new("/primes");
let mut builder =
prime::ArchiveBuilder::new(storage.clone()).expect("failed to create builder");
calculate_prime_factors(&mut builder, 10000).expect("Failed to write archive");
// store archive for re-use
// ...
// in a different application open archive for use:
let archive = prime::Archive::open(storage).expect("failed to open archive");
let number = 1234;
let factor_range = archive.numbers().at(number).first_factor_ref() as usize
..archive.numbers().at(number + 1).first_factor_ref() as usize;
let factors: Vec<_> = archive
.factors()
.slice(factor_range)
.iter()
.flat_map(|x| std::iter::repeat(x.value()).take(x.count() as usize))
.collect();
println!("List if prime factors for {}: {:?}", number, factors);
# }

This will print

List if prime factors for 1234 is [2, 617]