fcb_core 0.7.6

FlatCityBuf is a library for reading and writing CityJSON with FlatBuffers. Contains code derived from FlatGeobuf (BSD-2-Clause) for spatial indexing.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
use anyhow::{Context, Result};
use clap::{Parser, ValueEnum};
use fcb_core::FcbReader;
use prettytable::{Cell, Row, Table};
use std::{
    collections::HashSet,
    fs::File,
    io::{BufRead, BufReader},
    path::{Path, PathBuf},
};

#[derive(Debug, Parser)]
#[command(
    name = "fcb_stats",
    about = "Calculate statistics for FlatCityBuf files compared to CityJSONSeq"
)]
struct Args {
    /// Directory containing pairs of .fcb and .city.jsonl files (defaults to current directory)
    #[arg(short, long, default_value = ".")]
    dir: PathBuf,

    /// Recursively search subdirectories for files
    #[arg(short, long)]
    recursive: bool,

    /// Output format (table by default)
    #[arg(short, long, value_enum, default_value_t = OutputFormat::Table)]
    format: OutputFormat,

    /// List of specific city prefixes to process (e.g., "NYC,Zurich")
    #[arg(short, long)]
    cities: Option<String>,
}

#[derive(Debug, Clone, ValueEnum)]
enum OutputFormat {
    /// Human-readable table
    Table,
    /// CSV format
    Csv,
    /// JSON format
    Json,
}

#[derive(Debug)]
struct FileStats {
    city_name: String,
    fcb_file_size: u64,
    jsonl_file_size: u64,
    compression_factor: f64,
    feature_count: u64,
    city_object_count: usize,
    vertex_count: usize,
    attribute_count: usize,
    semantic_attribute_count: usize,
    avg_vertices_per_object: f64,
    avg_vertices_per_feature: f64,
    avg_feature_size: f64,
    avg_cjseq_feature_size: f64,
    feature_compression_factor: f64,
}

fn format_size(size_bytes: u64) -> String {
    if size_bytes < 1024 {
        format!("{size_bytes} B")
    } else if size_bytes < 1024 * 1024 {
        format!("{:.2} KB", size_bytes as f64 / 1024.0)
    } else if size_bytes < 1024 * 1024 * 1024 {
        format!("{:.2} MB", size_bytes as f64 / (1024.0 * 1024.0))
    } else {
        format!("{:.2} GB", size_bytes as f64 / (1024.0 * 1024.0 * 1024.0))
    }
}

/// Calculate statistics for an FCB file
fn analyze_fcb(path: &Path) -> Result<(u64, usize, usize, usize, usize, f64)> {
    println!("analyzing fcb file: {}", path.display());

    let file = File::open(path).context("failed to open fcb file")?;
    let _file_size = file.metadata().context("failed to get fcb metadata")?.len();
    let reader = BufReader::new(file);

    let mut fcb_reader = FcbReader::open(reader)
        .context("failed to open fcb reader")?
        .select_all()
        .context("failed to select all from fcb reader")?;

    let header = fcb_reader.header();
    let feature_count = header.features_count();

    // Get attribute count from the header's columns
    let attribute_count = header.columns().map(|col| col.len()).unwrap_or(0);
    let semantic_attribute_count = header.semantic_columns().map(|col| col.len()).unwrap_or(0);

    // Count city objects and vertices
    let mut city_object_count = 0;
    let mut vertex_count = 0;
    let mut feat_num = 0;
    let mut total_feature_size = 0;

    while let Some(feat_buf) = fcb_reader.next().context("failed to read next feature")? {
        let feature = feat_buf.cur_feature();
        // let cj_feature = feat_buf.cur_cj_feature()?;
        // for (co_id, co) in cj_feature.city_objects {
        //     for geom in co.geometry.unwrap_or_default().iter() {
        //         if let Some(sem) = geom.semantics.as_ref() {
        //             println!("sem: {:?}", sem);
        //         }
        //     }
        // }
        total_feature_size += feat_buf.cur_feature_len();

        // Count vertices in this feature
        if let Some(vertices) = feature.vertices() {
            vertex_count += vertices.len();
        }

        // Count city objects
        if let Some(objects) = feature.objects() {
            for co in objects.iter() {
                for geom in co.geometry().unwrap_or_default().iter() {
                    if let Some(sem) = geom.semantics_objects() {
                        for _sem_obj in sem.iter() {
                            // println!("sem_obj: {:?}", sem_obj);
                        }
                    }
                }
            }
            city_object_count += objects.len();
        }

        feat_num += 1;
        if feat_num == feature_count {
            break;
        }
    }
    let avg_feature_size = total_feature_size as f64 / feature_count as f64;

    Ok((
        feature_count,
        city_object_count,
        vertex_count,
        attribute_count,
        semantic_attribute_count,
        avg_feature_size,
    ))
}

/// Analyze CityJSONSeq file to determine file size and average feature size
fn analyze_jsonl(path: &Path) -> Result<(u64, f64)> {
    println!("analyzing jsonl file: {}", path.display());

    let file = File::open(path).context("failed to open jsonl file")?;
    let file_size = file
        .metadata()
        .context("failed to get jsonl metadata")?
        .len();

    // Calculate average feature size by reading each line
    let reader =
        BufReader::new(File::open(path).context("failed to open jsonl file for feature analysis")?);
    let mut lines = reader.lines();

    // Skip header line (CityJSON metadata)
    if let Some(Ok(_)) = lines.next() {
        // Now count each feature and its size
        let mut feature_count = 0;
        let mut total_feature_size = 0;

        for line_content in lines.flatten() {
            total_feature_size += line_content.len();
            feature_count += 1;
        }

        let avg_feature_size = if feature_count > 0 {
            total_feature_size as f64 / feature_count as f64
        } else {
            0.0
        };

        Ok((file_size, avg_feature_size))
    } else {
        // If we can't read the header, just return the file size and 0 avg
        Ok((file_size, 0.0))
    }
}

/// Recursively find all FCB files in directory and subdirectories
fn find_fcb_files(
    dir: &Path,
    recursive: bool,
    filter_cities: Option<&HashSet<String>>,
    fcb_files: &mut Vec<(PathBuf, String)>,
) -> Result<()> {
    for entry in
        std::fs::read_dir(dir).context(format!("failed to read directory: {}", dir.display()))?
    {
        let entry = entry.context("failed to read directory entry")?;
        let path = entry.path();

        if path.is_dir() && recursive {
            // Recursively search subdirectories if requested
            find_fcb_files(&path, recursive, filter_cities, fcb_files)?;
        } else if let Some(ext) = path.extension() {
            if ext == "fcb" {
                if let Some(file_stem) = path.file_stem() {
                    let city_name = file_stem.to_string_lossy().to_string();

                    // If we have a filter and this city isn't in it, skip
                    if let Some(filter) = filter_cities {
                        if !filter.iter().any(|prefix| city_name.starts_with(prefix)) {
                            continue;
                        }
                    }

                    fcb_files.push((path, city_name));
                }
            }
        }
    }

    Ok(())
}

/// Find all dataset pairs in a directory
fn find_dataset_pairs(
    dir: &Path,
    recursive: bool,
    filter_cities: Option<&HashSet<String>>,
) -> Result<Vec<(PathBuf, PathBuf, String)>> {
    let mut pairs = Vec::new();
    let mut fcb_files = Vec::new();

    // Find all FCB files
    find_fcb_files(dir, recursive, filter_cities, &mut fcb_files)?;

    // Now find matching JSONL files for each FCB file
    for (fcb_path, city_name) in fcb_files {
        // Get the directory containing the FCB file
        let parent_dir = fcb_path.parent().unwrap_or(Path::new(""));

        // Look for a .city.jsonl file with the same base name
        let jsonl_name = format!("{city_name}.city.jsonl");
        let jsonl_path = parent_dir.join(&jsonl_name);

        if jsonl_path.exists() {
            pairs.push((fcb_path, jsonl_path, city_name));
        } else {
            // Try alternative naming pattern (.jsonl without .city)
            let alt_jsonl_name = format!("{city_name}.jsonl");
            let alt_jsonl_path = parent_dir.join(&alt_jsonl_name);

            if alt_jsonl_path.exists() {
                pairs.push((fcb_path, alt_jsonl_path, city_name));
            } else {
                println!("warning: no matching jsonl file found for {city_name}");
            }
        }
    }

    Ok(pairs)
}

fn main() -> Result<()> {
    let args = Args::parse();

    // Parse city filter if provided
    let filter_cities = args.cities.map(|cities_str| {
        cities_str
            .split(',')
            .map(|s| s.trim().to_string())
            .collect::<HashSet<String>>()
    });

    // Find all pairs of FCB and JSONL files
    let pairs = find_dataset_pairs(&args.dir, args.recursive, filter_cities.as_ref())?;
    if pairs.is_empty() {
        println!("no matching dataset pairs found in {}", args.dir.display());
        return Ok(());
    }

    println!("found {} dataset pairs", pairs.len());

    // Analyze each pair
    let mut stats = Vec::new();
    for (fcb_path, jsonl_path, city_name) in pairs {
        println!("processing city: {city_name}");

        // Analyze FCB file
        let (
            feature_count,
            city_object_count,
            vertex_count,
            attribute_count,
            semantic_attribute_count,
            avg_feature_size,
        ) = match analyze_fcb(&fcb_path) {
            Ok(stats) => stats,
            Err(e) => {
                println!("error analyzing {}: {}", fcb_path.display(), e);
                continue;
            }
        };

        // Get file sizes
        let fcb_file_size = match File::open(&fcb_path) {
            Ok(file) => match file.metadata() {
                Ok(metadata) => metadata.len(),
                Err(e) => {
                    println!("error getting metadata for {}: {}", fcb_path.display(), e);
                    continue;
                }
            },
            Err(e) => {
                println!("error opening {}: {}", fcb_path.display(), e);
                continue;
            }
        };

        let (jsonl_file_size, avg_cjseq_feature_size) = match analyze_jsonl(&jsonl_path) {
            Ok((size, avg)) => (size, avg),
            Err(e) => {
                println!("error analyzing {}: {}", jsonl_path.display(), e);
                continue;
            }
        };

        // Calculate compression factor
        let compression_factor = if jsonl_file_size > 0 {
            (jsonl_file_size as f64 - fcb_file_size as f64) / jsonl_file_size as f64
        } else {
            0.0
        };

        // Calculate average vertices per city object
        let avg_vertices_per_object = if city_object_count > 0 {
            vertex_count as f64 / city_object_count as f64
        } else {
            0.0
        };

        let avg_vertices_per_feature = if feature_count > 0 {
            vertex_count as f64 / feature_count as f64
        } else {
            0.0
        };

        // Calculate feature-level compression factor
        let feature_compression_factor = if avg_cjseq_feature_size > 0.0 {
            (avg_cjseq_feature_size - avg_feature_size) / avg_cjseq_feature_size
        } else {
            0.0
        };

        stats.push(FileStats {
            city_name,
            fcb_file_size,
            jsonl_file_size,
            compression_factor,
            feature_count,
            city_object_count,
            vertex_count,
            attribute_count,
            semantic_attribute_count,
            avg_vertices_per_object,
            avg_vertices_per_feature,
            avg_feature_size,
            avg_cjseq_feature_size,
            feature_compression_factor,
        });
    }

    // Sort stats by city name for consistent output
    stats.sort_by(|a, b| a.city_name.cmp(&b.city_name));

    // Output results based on selected format
    match args.format {
        OutputFormat::Table => output_table(&stats),
        OutputFormat::Csv => output_csv(&stats)?,
        OutputFormat::Json => output_json(&stats)?,
    }

    Ok(())
}

fn output_table(stats: &[FileStats]) {
    let mut table = Table::new();

    // Add header row
    table.add_row(Row::new(vec![
        Cell::new("City"),
        Cell::new("FCB Size"),
        Cell::new("JSONL Size"),
        Cell::new("Compression"),
        Cell::new("Features"),
        Cell::new("City Objects"),
        Cell::new("Vertices"),
        Cell::new("Vertices/Object"),
        Cell::new("Vertices/Feature"),
        Cell::new("Attributes"),
        Cell::new("Semantic Attributes"),
        Cell::new("Avg FCB Feat Size"),
        Cell::new("Avg JSONL Feat Size"),
        Cell::new("Feature Compression"),
    ]));

    // Add data rows
    for stat in stats {
        table.add_row(Row::new(vec![
            Cell::new(&stat.city_name),
            Cell::new(&format_size(stat.fcb_file_size)),
            Cell::new(&format_size(stat.jsonl_file_size)),
            Cell::new(&format!("{:.2}%", stat.compression_factor * 100.0)),
            Cell::new(&stat.feature_count.to_string()),
            Cell::new(&stat.city_object_count.to_string()),
            Cell::new(&stat.vertex_count.to_string()),
            Cell::new(&format!("{:.2}", stat.avg_vertices_per_object)),
            Cell::new(&format!("{:.2}", stat.avg_vertices_per_feature)),
            Cell::new(&stat.attribute_count.to_string()),
            Cell::new(&stat.semantic_attribute_count.to_string()),
            Cell::new(&format_size(stat.avg_feature_size as u64)),
            Cell::new(&format_size(stat.avg_cjseq_feature_size as u64)),
            Cell::new(&format!("{:.2}%", stat.feature_compression_factor * 100.0)),
        ]));
    }

    // Print the table
    table.printstd();
}

fn output_csv(stats: &[FileStats]) -> Result<()> {
    println!("city,fcb_size,jsonl_size,compression,features,city_objects,vertices,vertices_per_object,attributes,avg_fcb_feature_size,avg_jsonl_feature_size,feature_compression");

    for stat in stats {
        println!(
            "{},{},{},{:.4},{},{},{},{:.2},{:.2},{:.2},{:.2},{:.2},{:.4},{:.2}",
            stat.city_name,
            stat.fcb_file_size,
            stat.jsonl_file_size,
            stat.compression_factor,
            stat.feature_count,
            stat.city_object_count,
            stat.vertex_count,
            stat.avg_vertices_per_object,
            stat.avg_vertices_per_feature,
            stat.attribute_count,
            stat.semantic_attribute_count,
            stat.avg_feature_size,
            stat.avg_cjseq_feature_size,
            stat.feature_compression_factor
        );
    }

    Ok(())
}

fn output_json(stats: &[FileStats]) -> Result<()> {
    // Convert stats to JSON
    let _json = serde_json::to_string_pretty(
        &stats
            .iter()
            .map(|s| {
                serde_json::json!({
                    "city": s.city_name,
                    "fcb_size": s.fcb_file_size,
                    "jsonl_size": s.jsonl_file_size,
                    "compression": s.compression_factor,
                    "features": s.feature_count,
                    "city_objects": s.city_object_count,
                    "vertices": s.vertex_count,
                    "vertices_per_object": s.avg_vertices_per_object,
                    "vertices_per_feature": s.avg_vertices_per_feature,
                    "attributes": s.attribute_count,
                    "semantic_attributes": s.semantic_attribute_count,
                    "avg_fcb_feature_size": s.avg_feature_size,
                    "avg_jsonl_feature_size": s.avg_cjseq_feature_size,
                    "feature_compression": s.feature_compression_factor
                })
            })
            .collect::<Vec<_>>(),
    )?;

    Ok(())
}