use clap::{Parser, Subcommand};
use foldhash::fast::FixedState;
use massmap::{
MassMap, MassMapBuilder, MassMapDefaultHashLoader, MassMapHashConfig, MassMapHashLoader,
MassMapInner, MassMapMerger, MassMapReader,
};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::fmt::Display;
use std::fs::File;
use std::io::{BufReader, Error, ErrorKind, Result};
use std::path::{Path, PathBuf};
fn main() -> Result<()> {
let cli = Cli::parse();
match cli.command {
Command::Info(args) => run_info(args),
Command::Convert(args) => run_convert(args),
Command::Merge(args) => run_merge(args),
}
}
#[derive(Parser)]
#[command(
author,
version,
about = "massmap utility for inspecting and creating massmap files",
subcommand_required = true,
arg_required_else_help = true
)]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Info(InfoArgs),
Convert(ConvertArgs),
Merge(MergeArgs),
}
#[derive(clap::Args)]
struct InfoArgs {
#[arg(value_name = "FILE")]
input: PathBuf,
#[arg(short, long)]
key: Option<String>,
#[arg(short, long)]
bucket: Option<u64>,
}
#[derive(clap::Args)]
struct ConvertArgs {
#[arg(short, long, value_name = "FILE")]
input: PathBuf,
#[arg(short, long, value_name = "FILE")]
output: PathBuf,
#[arg(long, value_name = "SEED", default_value_t = 0)]
hash_seed: u64,
#[arg(long, value_name = "COUNT", default_value_t = 1 << 16)]
bucket_count: u64,
#[arg(long, value_name = "BYTES", default_value_t = 16 << 20)]
buffer_size: usize,
}
#[derive(clap::Args)]
struct MergeArgs {
#[arg(short, long, value_name = "FILE")]
input: Vec<PathBuf>,
#[arg(short, long, value_name = "FILE")]
output: PathBuf,
#[arg(long, value_name = "BYTES", default_value_t = 16 << 20)]
buffer_size: usize,
}
#[derive(Debug, Default)]
pub struct MassMapTolerableHashLoader;
impl MassMapHashLoader for MassMapTolerableHashLoader {
type BuildHasher = FixedState;
fn load(config: &MassMapHashConfig) -> Result<Self::BuildHasher> {
if config.name != MassMapDefaultHashLoader::NAME {
println!(
"Warning: Unsupported hash type: {}, defaulting to foldhash",
config.name
);
}
let seed = config
.parameters
.get("seed")
.and_then(|v| v.as_u64())
.unwrap_or(0);
Ok(FixedState::with_seed(seed))
}
}
fn do_query<K, R>(
map: MassMap<K, serde_json::Value, R, MassMapTolerableHashLoader>,
key: Option<K>,
bucket: Option<u64>,
) -> Result<()>
where
K: Serialize + for<'de> Deserialize<'de> + Display + std::hash::Hash + Eq,
R: MassMapReader,
{
if let Some(key) = key {
println!("Get {}: {:?}", key, map.get(&key)?);
}
if let Some(bucket_index) = bucket {
if bucket_index as usize >= map.bucket_count() {
return Err(Error::new(
ErrorKind::InvalidInput,
format!(
"Bucket index {} out of range >= {}",
bucket_index,
map.bucket_count()
),
));
}
let entries = map.get_bucket(bucket_index as usize)?;
let json = serde_json::to_string_pretty(&entries)
.map_err(|e| Error::other(format!("Failed to format JSON: {e}")))?;
println!("Bucket {} entries:\n{}", bucket_index, json);
}
Ok(())
}
fn run_info(args: InfoArgs) -> Result<()> {
let file = File::open(&args.input)?;
let map = MassMapInner::<_, MassMapTolerableHashLoader>::load(file)?;
let json = serde_json::to_string_pretty(&map.info())
.map_err(|e| Error::other(format!("Failed to format JSON: {e}")))?;
println!("{}", json);
match map.meta.key_type.as_str() {
"u8" => do_query(
map.cast::<u8, _>(),
args.key.map(|x| x.parse().unwrap()),
args.bucket,
)?,
"u16" => do_query(
map.cast::<u16, _>(),
args.key.map(|x| x.parse().unwrap()),
args.bucket,
)?,
"u32" => do_query(
map.cast::<u32, _>(),
args.key.map(|x| x.parse().unwrap()),
args.bucket,
)?,
"u64" => do_query(
map.cast::<u64, _>(),
args.key.map(|x| x.parse().unwrap()),
args.bucket,
)?,
"u128" => do_query(
map.cast::<u128, _>(),
args.key.map(|x| x.parse().unwrap()),
args.bucket,
)?,
_ if map.meta.key_type == std::any::type_name::<String>() => {
do_query(map.cast::<String, _>(), args.key, args.bucket)?
}
_ => {
assert!(
args.key.is_none() && args.bucket.is_none(),
"Unsupported key type: {}",
map.meta.key_type
);
}
}
Ok(())
}
fn run_convert(args: ConvertArgs) -> Result<()> {
let entries = load_entries_from_json(&args.input)?;
let writer = File::create(&args.output)?;
let info = MassMapBuilder::default()
.with_hash_seed(args.hash_seed)
.with_bucket_count(args.bucket_count)
.with_writer_buffer_size(args.buffer_size)
.build(&writer, entries.iter())?;
let json = serde_json::to_string_pretty(&info)
.map_err(|e| Error::other(format!("Failed to format JSON: {e}")))?;
println!("{}", json);
Ok(())
}
fn load_entries_from_json(path: &Path) -> Result<Vec<(String, Value)>> {
let file = File::open(path)?;
let reader = BufReader::new(file);
let value: Value = serde_json::from_reader(reader)
.map_err(|e| invalid_json(format!("Failed to parse JSON input: {e}")))?;
extract_entries(value)
}
fn extract_entries(value: Value) -> Result<Vec<(String, Value)>> {
match value {
Value::Object(map) => Ok(map.into_iter().collect::<Vec<_>>()),
Value::Array(items) => {
let mut entries = Vec::with_capacity(items.len());
for (index, item) in items.into_iter().enumerate() {
match item {
Value::Object(mut obj) => {
let key = obj.remove("key").ok_or_else(|| {
invalid_json(format!("entry {index} missing 'key' field"))
})?;
let value = obj.remove("value").ok_or_else(|| {
invalid_json(format!("entry {index} missing 'value' field"))
})?;
entries.push((expect_string(key, index)?, value));
}
Value::Array(mut pair) => {
if pair.len() != 2 {
return Err(invalid_json(format!(
"entry {index} expected array of length 2"
)));
}
let value = pair.pop().unwrap();
let key = pair.pop().unwrap();
entries.push((expect_string(key, index)?, value));
}
other => {
return Err(invalid_json(format!(
"unsupported entry format at index {index}: {other}"
)));
}
}
}
Ok(entries)
}
other => Err(invalid_json(format!(
"unsupported JSON top-level type: {other}"
))),
}
}
fn expect_string(value: Value, index: usize) -> Result<String> {
match value {
Value::String(s) => Ok(s),
other => Err(invalid_json(format!(
"entry {index} expects string key, found {other}"
))),
}
}
fn invalid_json(message: String) -> Error {
Error::new(ErrorKind::InvalidData, message)
}
fn run_merge(args: MergeArgs) -> Result<()> {
let maps = args
.input
.iter()
.map(|path| {
let file = File::open(path)?;
MassMap::<String, serde_json::Value, _, MassMapTolerableHashLoader>::load(file)
})
.collect::<Result<Vec<_>>>()?;
let writer = File::create(&args.output)?;
let info = MassMapMerger::default()
.with_writer_buffer_size(args.buffer_size)
.merge(&writer, maps)?;
let json = serde_json::to_string_pretty(&info)
.map_err(|e| Error::other(format!("Failed to format JSON: {e}")))?;
println!("{}", json);
Ok(())
}