pub fn iter_origins( dataset_dir: &PathBuf, ) -> Result<impl ParallelIterator<Item = (String, String)>>