evlib 0.8.1

Event Camera Data Processing Library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
#!/usr/bin/env python3
"""
E2VID Video Reconstruction CLI

Command-line tool for reconstructing videos from event camera data using the E2VID model.
Supports multiple event formats (EVT2, H5, text) and provides configurable reconstruction parameters.

Example usage:
    python scripts/e2vid.py --input data/prophersee/samples/evt2/80_balls.raw --output 80_balls_reconstructed.mp4
    python scripts/e2vid.py --input data/slider_depth/events.txt --output slider_depth.mp4 --fps 30 --duration 2.0
"""

import argparse
import sys
import time
from pathlib import Path
from typing import Optional, List, Tuple
import logging

import numpy as np
import cv2

try:
    import evlib
    import evlib.models
    import torch
except ImportError as e:
    print(f"Error: Could not import required packages: {e}")
    print("Please ensure evlib and torch are installed: pip install -e . torch")
    sys.exit(1)


def setup_logging(verbose: bool = False) -> logging.Logger:
    """Set up logging configuration."""
    level = logging.DEBUG if verbose else logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s - %(levelname)s - %(message)s",
        datefmt="%H:%M:%S",
    )
    return logging.getLogger(__name__)


def find_active_periods(
    input_path: str, stats: dict, logger: logging.Logger, min_events: int = 1000
) -> List[Tuple[float, float]]:
    """Find periods with significant event activity using sampling."""

    duration = stats["end_time"] - stats["start_time"]
    window_size = 0.5  # 500ms windows for faster analysis
    max_windows = 40  # Limit analysis to 40 windows maximum

    if duration > max_windows * window_size:
        # Sample windows across the duration
        step_size = duration / max_windows
        sample_times = [stats["start_time"] + i * step_size for i in range(max_windows)]
    else:
        # Use regular intervals
        num_windows = int(duration / window_size)
        sample_times = [
            stats["start_time"] + i * window_size for i in range(num_windows)
        ]

    logger.info(
        f"Sampling {len(sample_times)} windows across {duration:.1f}s to find active periods..."
    )

    active_periods = []
    window_activities = []

    # Quick sampling to find activity levels
    for t_start in sample_times:
        t_end = t_start + window_size
        window_events = evlib.load_events(input_path, t_start=t_start, t_end=t_end)
        event_count = len(window_events.collect())
        window_activities.append((t_start, event_count))

        if len(window_activities) % 10 == 0:
            logger.info(
                f"  Analyzed {len(window_activities)}/{len(sample_times)} windows..."
            )

    # Find continuous active regions
    import numpy as np

    active_threshold = max(
        min_events, np.percentile([count for _, count in window_activities], 75)
    )
    logger.info(
        f"Using activity threshold: {active_threshold:,} events per {window_size * 1000:.0f}ms window"
    )

    current_start = None
    for t_start, count in window_activities:
        if count >= active_threshold:
            if current_start is None:
                current_start = t_start
        else:
            if current_start is not None:
                active_periods.append((current_start, t_start))
                current_start = None

    # Handle case where file ends during active period
    if current_start is not None:
        active_periods.append((current_start, stats["end_time"]))

    # Merge nearby periods and extend slightly
    merged_periods = []
    for start, end in active_periods:
        # Extend periods slightly
        extended_start = max(stats["start_time"], start - 0.5)
        extended_end = min(stats["end_time"], end + 0.5)

        # Merge with previous if close
        if merged_periods and extended_start - merged_periods[-1][1] < 1.0:
            merged_periods[-1] = (merged_periods[-1][0], extended_end)
        else:
            merged_periods.append((extended_start, extended_end))

    total_active_duration = sum(end - start for start, end in merged_periods)
    logger.info(
        f"Found {len(merged_periods)} active periods totaling {total_active_duration:.1f}s"
    )

    for i, (start, end) in enumerate(merged_periods):
        logger.info(f"  Period {i + 1}: {start:.3f} - {end:.3f}s ({end - start:.1f}s)")

    return merged_periods


def load_and_analyze_events(
    input_path: str, logger: logging.Logger
) -> Tuple[object, dict]:
    """Load events and analyze temporal/spatial properties."""
    logger.info(f"Loading events from: {input_path}")

    # Load all events to analyze properties
    all_events = evlib.load_events(input_path)
    events_df = all_events.collect()

    # Analyze properties
    num_events = len(events_df)
    timestamps_sec = events_df["timestamp"].to_numpy().astype(np.float64) / 1e6
    duration = timestamps_sec.max() - timestamps_sec.min()

    width = int(events_df["x"].max()) + 1
    height = int(events_df["y"].max()) + 1

    # Polarity distribution
    polarities = events_df["polarity"].to_numpy()
    pos_events = np.sum(polarities == 1)
    neg_events = np.sum(polarities == 0) + np.sum(polarities == -1)

    stats = {
        "num_events": num_events,
        "duration": duration,
        "width": width,
        "height": height,
        "event_rate": num_events / duration if duration > 0 else 0,
        "start_time": timestamps_sec.min(),
        "end_time": timestamps_sec.max(),
        "pos_events": pos_events,
        "neg_events": neg_events,
    }

    logger.info("Dataset analysis:")
    logger.info(f"  Events: {num_events:,}")
    logger.info(f"  Duration: {duration:.3f} seconds")
    logger.info(f"  Resolution: {width}x{height}")
    logger.info(f"  Event rate: {stats['event_rate']:.0f} events/second")
    logger.info(f"  Polarity: {pos_events:,} positive, {neg_events:,} negative")

    return all_events, stats


def detect_and_setup_device(device: Optional[str], logger: logging.Logger) -> str:
    """Detect and set up the best available device for inference."""

    if device is not None:
        # User specified device
        if device == "auto":
            device = None  # Will be auto-detected below
        else:
            # Validate user-specified device
            try:
                torch.device(device)
                logger.info(f"Using user-specified device: {device}")
                return device
            except Exception as e:
                logger.warning(f"Invalid device '{device}': {e}. Auto-detecting...")
                device = None

    if device is None:
        # Auto-detect best device
        if torch.cuda.is_available():
            device = "cuda"
            gpu_name = torch.cuda.get_device_name(0)
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
            logger.info(f"Using CUDA GPU: {gpu_name} ({gpu_memory:.1f}GB)")
        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
            device = "mps"
            logger.info("Using Metal Performance Shaders (MPS) for GPU acceleration")
        else:
            device = "cpu"
            logger.info("Using CPU (no GPU acceleration available)")

    return device


def create_model(pretrained: bool, device: str, logger: logging.Logger) -> object:
    """Create and initialize E2VID model."""
    logger.info("Initializing E2VID model...")

    # Create model
    model = evlib.models.E2VID(pretrained=pretrained)

    # Move model to specified device
    if hasattr(model, "_model") and model._model is not None:
        model._model = model._model.to(device)
        model._device = torch.device(device)
        logger.info(f"Model moved to device: {device}")

    logger.info(f"Model: {model}")

    return model


def reconstruct_frames(
    input_path: str,
    model: object,
    stats: dict,
    fps: float,
    duration: Optional[float],
    start_time: Optional[float],
    max_resolution: Optional[int],
    logger: logging.Logger,
) -> List[np.ndarray]:
    """Reconstruct video frames from events."""

    # Calculate temporal parameters
    frame_duration = 1.0 / fps
    data_start_time = stats["start_time"]
    data_end_time = stats["end_time"]

    # Determine reconstruction window
    if start_time is not None:
        recon_start = data_start_time + start_time
    else:
        recon_start = data_start_time

    if duration is not None:
        recon_end = recon_start + duration
    else:
        recon_end = data_end_time

    # Ensure we don't exceed data bounds
    recon_start = max(recon_start, data_start_time)
    recon_end = min(recon_end, data_end_time)

    if recon_start >= recon_end:
        logger.error(f"Invalid time range: {recon_start:.3f} - {recon_end:.3f}")
        return []

    actual_duration = recon_end - recon_start
    num_frames = int(actual_duration / frame_duration)

    # Resolution limiting for performance
    original_width, original_height = stats["width"], stats["height"]
    if max_resolution and max(original_width, original_height) > max_resolution:
        # Calculate scale factor to limit resolution
        scale_factor = max_resolution / max(original_width, original_height)
        target_width = int(original_width * scale_factor)
        target_height = int(original_height * scale_factor)
        logger.info(
            f"Limiting resolution: {original_width}x{original_height} → {target_width}x{target_height}"
        )
    else:
        target_width, target_height = original_width, original_height

    logger.info("Reconstruction parameters:")
    logger.info(f"  Time range: {recon_start:.3f} - {recon_end:.3f} seconds")
    logger.info(f"  Duration: {actual_duration:.3f} seconds")
    logger.info(f"  Resolution: {target_width}x{target_height}")
    logger.info(f"  FPS: {fps}")
    logger.info(f"  Frame duration: {frame_duration * 1000:.0f}ms")
    logger.info(f"  Target frames: {num_frames}")

    # Analyze event distribution to warn about sparse periods
    logger.info("Analyzing event distribution...")
    sample_windows = min(20, num_frames)
    empty_windows = 0
    for i in range(sample_windows):
        t_start_sample = recon_start + (i / sample_windows) * actual_duration
        t_end_sample = t_start_sample + frame_duration
        sample_events = evlib.load_events(
            input_path, t_start=t_start_sample, t_end=t_end_sample
        )
        if len(sample_events.collect()) == 0:
            empty_windows += 1

    if empty_windows > sample_windows * 0.3:  # >30% empty
        logger.warning(
            f"Detected sparse event data: {empty_windows}/{sample_windows} sample windows are empty"
        )
        logger.warning("This is normal for event cameras during static periods")
        logger.warning(
            "Consider using longer frame duration (lower FPS) or selecting active time segments"
        )

    # Reconstruct frames
    frames = []
    successful_frames = 0

    logger.info("Starting frame reconstruction...")
    start_recon_time = time.time()

    for i in range(num_frames):
        t_start = recon_start + i * frame_duration
        t_end = t_start + frame_duration

        try:
            # Load events for this time window
            frame_events = evlib.load_events(input_path, t_start=t_start, t_end=t_end)
            events_count = len(frame_events.collect())

            if events_count == 0:
                logger.warning(
                    f"Frame {i + 1}/{num_frames}: No events, using previous frame or black"
                )
                # Use previous frame or create black frame
                if frames:
                    frames.append(frames[-1].copy())
                else:
                    frames.append(
                        np.zeros((target_height, target_width), dtype=np.float32)
                    )
                continue

            # Reconstruct frame
            frame = model.reconstruct(frame_events)

            # Apply resolution limiting if specified
            if max_resolution and max(original_width, original_height) > max_resolution:
                import cv2

                frame = cv2.resize(
                    frame,
                    (target_width, target_height),
                    interpolation=cv2.INTER_LANCZOS4,
                )

            # Enhance contrast for better visualization
            frame_enhanced = (frame - frame.min()) / (frame.max() - frame.min())
            frames.append(frame_enhanced)
            successful_frames += 1

            # Progress update
            if (i + 1) % max(1, num_frames // 20) == 0 or i == num_frames - 1:
                elapsed = time.time() - start_recon_time
                progress = (i + 1) / num_frames
                eta = elapsed / progress - elapsed if progress > 0 else 0

                logger.info(
                    f"Progress: {i + 1}/{num_frames} ({progress * 100:.1f}%) - "
                    f"{events_count:,} events - ETA: {eta:.1f}s"
                )

        except Exception as e:
            logger.error(f"Error reconstructing frame {i + 1}: {e}")
            # Use previous frame or black frame as fallback
            if frames:
                frames.append(frames[-1].copy())
            else:
                frames.append(np.zeros((target_height, target_width), dtype=np.float32))

    total_recon_time = time.time() - start_recon_time
    logger.info(
        f"Reconstruction complete: {successful_frames}/{num_frames} frames in {total_recon_time:.1f}s"
    )
    logger.info(f"Average: {total_recon_time / len(frames):.3f}s per frame")

    return frames


def save_video(
    frames: List[np.ndarray],
    output_path: str,
    fps: float,
    quality: str,
    logger: logging.Logger,
) -> bool:
    """Save frames as MP4 video."""

    if not frames:
        logger.error("No frames to save")
        return False

    logger.info(f"Saving video to: {output_path}")

    # Ensure output directory exists
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)

    # Video parameters
    height, width = frames[0].shape

    # Quality settings
    quality_settings = {
        "low": {"bitrate": 1000, "crf": 28},
        "medium": {"bitrate": 2000, "crf": 23},
        "high": {"bitrate": 5000, "crf": 18},
        "lossless": {"bitrate": 10000, "crf": 0},
    }

    # Get quality settings (for future bitrate configuration)
    _ = quality_settings.get(quality, quality_settings["medium"])

    # Create video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    video_writer = cv2.VideoWriter(
        output_path, fourcc, fps, (width, height), isColor=False
    )

    if not video_writer.isOpened():
        logger.error("Failed to open video writer")
        return False

    logger.info("Video parameters:")
    logger.info(f"  Resolution: {width}x{height}")
    logger.info(f"  FPS: {fps}")
    logger.info(f"  Frames: {len(frames)}")
    logger.info(f"  Quality: {quality}")

    # Write frames
    for i, frame in enumerate(frames):
        # Convert to 8-bit grayscale
        frame_8bit = (frame * 255).astype(np.uint8)
        video_writer.write(frame_8bit)

        if (i + 1) % max(1, len(frames) // 10) == 0:
            logger.info(f"Writing: {i + 1}/{len(frames)} frames")

    video_writer.release()

    # Verify output file
    if Path(output_path).exists():
        file_size_mb = Path(output_path).stat().st_size / (1024 * 1024)
        logger.info(f"Video saved successfully: {file_size_mb:.1f} MB")
        return True
    else:
        logger.error("Failed to save video file")
        return False


def main():
    """Main CLI entry point."""
    parser = argparse.ArgumentParser(
        description="E2VID Video Reconstruction from Event Data",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Basic reconstruction
  python scripts/e2vid.py --input data/events.txt --output video.mp4

  # High quality, 30 FPS reconstruction
  python scripts/e2vid.py --input data/80_balls.raw --output 80_balls.mp4 --fps 30 --quality high

  # Reconstruct specific time segment
  python scripts/e2vid.py --input data/events.h5 --output segment.mp4 --start 1.0 --duration 5.0

  # Fast reconstruction without pretrained weights
  python scripts/e2vid.py --input data/events.txt --output video.mp4 --no-pretrained --fps 15

  # GPU-accelerated reconstruction (auto-detects CUDA/MPS)
  python scripts/e2vid.py --input data/80_balls.raw --output 80_balls.mp4 --device auto

  # Force CPU inference
  python scripts/e2vid.py --input data/events.txt --output video.mp4 --device cpu

  # Auto-detect most active period (good for sparse data)
  python scripts/e2vid.py --input data/pedestrians.raw --output pedestrians.mp4 --auto-active
        """,
    )

    # Required arguments
    parser.add_argument(
        "--input",
        "-i",
        required=True,
        type=str,
        help="Input event data file (EVT2, H5, text formats supported)",
    )
    parser.add_argument(
        "--output", "-o", required=True, type=str, help="Output video file path (.mp4)"
    )

    # Reconstruction parameters
    parser.add_argument(
        "--fps", type=float, default=20.0, help="Output video frame rate (default: 20)"
    )
    parser.add_argument(
        "--duration",
        type=float,
        default=None,
        help="Duration to reconstruct in seconds (default: entire file)",
    )
    parser.add_argument(
        "--start",
        type=float,
        default=None,
        help="Start time offset in seconds (default: beginning of file)",
    )

    # Model parameters
    parser.add_argument(
        "--no-pretrained",
        action="store_true",
        help="Use randomly initialized weights instead of pretrained",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="auto",
        help="Device for inference: auto, cpu, cuda, mps (default: auto)",
    )
    parser.add_argument(
        "--max-resolution",
        type=int,
        default=None,
        help="Limit maximum resolution for performance (e.g., 1024)",
    )
    parser.add_argument(
        "--auto-active",
        action="store_true",
        help="Automatically detect and use most active time period",
    )

    # Output parameters
    parser.add_argument(
        "--quality",
        choices=["low", "medium", "high", "lossless"],
        default="medium",
        help="Video quality setting (default: medium)",
    )

    # General options
    parser.add_argument(
        "--verbose", "-v", action="store_true", help="Enable verbose logging"
    )

    args = parser.parse_args()

    # Setup logging
    logger = setup_logging(args.verbose)

    # Validate arguments
    if not Path(args.input).exists():
        logger.error(f"Input file does not exist: {args.input}")
        sys.exit(1)

    if not args.output.endswith(".mp4"):
        logger.warning("Output file should have .mp4 extension for best compatibility")

    if args.fps <= 0 or args.fps > 120:
        logger.error("FPS must be between 0 and 120")
        sys.exit(1)

    # Start processing
    logger.info("=== E2VID Video Reconstruction ===")
    total_start_time = time.time()

    try:
        # Step 1: Load and analyze events
        all_events, stats = load_and_analyze_events(args.input, logger)

        # Step 1b: Handle auto-active period detection
        if args.auto_active and args.start is None and args.duration is None:
            logger.info("Auto-detecting most active period...")
            active_periods = find_active_periods(args.input, stats, logger)

            if active_periods:
                # Use the longest active period
                longest_period = max(active_periods, key=lambda x: x[1] - x[0])
                args.start = (
                    longest_period[0] - stats["start_time"]
                )  # Convert to offset
                args.duration = min(
                    longest_period[1] - longest_period[0], 30.0
                )  # Max 30s
                logger.info(
                    f"Using most active period: {args.start:.3f}s offset, {args.duration:.1f}s duration"
                )
            else:
                logger.warning("No active periods found, using default time range")

        # Step 2: Setup device and create model
        device = detect_and_setup_device(args.device, logger)
        model = create_model(not args.no_pretrained, device, logger)

        # Step 3: Reconstruct frames
        frames = reconstruct_frames(
            args.input,
            model,
            stats,
            args.fps,
            args.duration,
            args.start,
            args.max_resolution,
            logger,
        )

        if not frames:
            logger.error("No frames were reconstructed")
            sys.exit(1)

        # Step 4: Save video
        success = save_video(frames, args.output, args.fps, args.quality, logger)

        if not success:
            logger.error("Failed to save video")
            sys.exit(1)

        # Summary
        total_time = time.time() - total_start_time
        logger.info("=== Reconstruction Complete ===")
        logger.info(f"Total time: {total_time:.1f} seconds")
        logger.info(f"Output: {args.output}")

    except KeyboardInterrupt:
        logger.info("Reconstruction interrupted by user")
        sys.exit(1)
    except Exception as e:
        logger.error(f"Reconstruction failed: {e}")
        if args.verbose:
            import traceback

            traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()