soundview 0.3.0

Live analyzer/voiceprint visualization of system audio
Documentation
use anyhow::{anyhow, Result};
use clap::{builder::PossibleValue, Arg};
use git_testament::{git_testament, render_testament};
use lazy_static::lazy_static;
use regex::Regex;
use tracing::{debug, info};

use soundview::{events, fourier, logging, recorder, webgpu::Orientation};

use std::str::FromStr;
use std::thread;

git_testament!(TESTAMENT);

lazy_static! {
    static ref TESTAMENT_VERSION: String = render_testament!(TESTAMENT);
}

#[derive(Debug)]
struct Args {
    buckets: usize,
    discard: i32,
    scroll: f32,

    device: Option<Regex>,
    frequency: i32,

    orientation: Orientation,
    fullscreen: bool,
}

/// Parses commandline arguments and returns the parsed result.
fn get_args() -> Result<Args> {
    let matches = clap::Command::new("soundview")
        .author("Nick Parker, nick@nickbp.com")
        .version(TESTAMENT_VERSION.as_str())
        .about("Audio Voiceprint and Analyzer - don't miss a beat")
        .args(&[
            // Audio config
            Arg::new("buckets")
                .display_order(1)
                .long("buckets")
                .long_help("The number of frequency buckets to calculate. Higher values will increase granularity and graphics load. Must be a power of two, with a theoretical max of 32768.")
                .default_value("2048")
                .value_parser([
                    // Keep things simple and just enumerate the list of legal base2 values.
                    PossibleValue::new("32"),
                    PossibleValue::new("64"),
                    PossibleValue::new("128"),
                    PossibleValue::new("256"),
                    PossibleValue::new("512"),
                    PossibleValue::new("1024"),
                    PossibleValue::new("2048"),
                    PossibleValue::new("4096"),
                    PossibleValue::new("8192"),
                    PossibleValue::new("16384"),
                    PossibleValue::new("32768"),
                ]),
            Arg::new("discard")
                .display_order(2)
                .long("discard")
                .long_help("The percentage of frequency buckets to discard. Higher values will drop more high-frequency audio from the display and reduce graphics load. Must be a value from 0 to 99.")
                .default_value("20")
                .value_parser(clap::value_parser!(i32).range(0..100)),
            Arg::new("scroll")
                .display_order(3)
                .long("scroll")
                .long_help("A separate adjustment of the voiceprint scroll speed, in pixels/sample. Higher values will increase voiceprint speed.")
                .default_value("2")
                // Ideally we'd avoid values under 0.1 or so since that results in texture size errors,
                // but that seems to be tricky in clap. Just allow any value for now...
                .value_parser(clap::value_parser!(f32)),

            // Device config
            Arg::new("device")
                .display_order(4)
                .long("device")
                .long_help("Name of the initial input device to select, as a regular expression. After startup, use leftarrow/rightarrow keys to switch between input devices, including ones that don't match this filter.")
                .value_parser(clap::builder::NonEmptyStringValueParser::new()),
            Arg::new("freq")
                .display_order(5)
                .long("freq")
                .long_help("Sample frequency in Hz for retrieving audio from all input devices. Higher values will result in faster scrolling at the same graphics load. May be <=0 to use suboptimal device-specific default sample rates.")
                .default_value("96000")
                .allow_negative_numbers(true)
                .value_parser(clap::value_parser!(i32)),

            // Display config
            Arg::new("orientation")
                .display_order(6)
                .long("orientation")
                .long_help("Initial display orientation. After startup, use the space bar or R key ('Rotate') to toggle the orientation.")
                .default_value("horiz")
                .value_parser([
                    PossibleValue::new("h"),
                    PossibleValue::new("horiz"),
                    PossibleValue::new("v"),
                    PossibleValue::new("vert"),
                ]),
            Arg::new("fullscreen")
                .display_order(7)
                .long("fullscreen")
                .long_help("Enables starting in fullscreen mode. After startup, use the F11 or F keys to toggle fullscreen mode.")
                .action(clap::ArgAction::SetTrue)
        ])
        .override_usage("[LOG_LEVEL=debug|info|warn] soundview [OPTIONS]")
        .get_matches();

    Ok(Args {
        buckets: usize::from_str(
            matches
                .get_one::<String>("buckets")
                .expect("missing default buckets")
                .as_str(),
        )
        .expect("invalid buckets uncaught by custom validator"),
        discard: *matches
            .get_one::<i32>("discard")
            .expect("missing default discard"),
        scroll: *matches
            .get_one::<f32>("scroll")
            .expect("missing default scroll"),
        device: matches.get_one::<String>("device").map(|pattern| {
            Regex::new(pattern).expect(format!("Invalid device regex: '{:?}'", pattern).as_str())
        }),
        // we check for negative values below (interpreted as "use device default")
        frequency: *matches
            .get_one::<i32>("freq")
            .expect("missing default freq"),
        orientation: Orientation::from_str(
            matches
                .get_one::<String>("orientation")
                .expect("missing default orientation"),
        )
        .expect("invalid orientation"),
        fullscreen: *matches
            .get_one::<bool>("fullscreen")
            .expect("missing fullscreen bool"),
    })
}

/// Main entry point. The UI and event handler runs on this thread, while separate threads
/// are launched for sampling device audio, and for running a fourier transform on that audio.
fn main() -> Result<()> {
    logging::init_logging();
    let args = get_args()?;
    debug!("{:?}", args);

    let sdl_version = sdl2::version::version();
    info!(
        "Soundview version {}, SDL {}.{}.{}",
        TESTAMENT_VERSION.as_str(),
        sdl_version.major,
        sdl_version.minor,
        sdl_version.patch
    );

    let (send_audio, recv_audio) = crossbeam_channel::bounded::<Vec<f32>>(100);
    let (send_processed, recv_processed) = crossbeam_channel::bounded::<Vec<f32>>(100);

    let frequency = match args.frequency {
        f if f <= 0 => None,
        f => Some(f),
    };

    let sdl_context = sdl2::init().map_err(|e| anyhow!(e))?;
    let mut rec = recorder::Recorder::new(
        recorder::init_audio(&sdl_context)?,
        frequency.clone(),
        // MUST be a power of 2, and must be 2x the size of the fourier output
        // (with u16 max 65536, resulting in a fourier output max size of 32768)
        Some(2 * args.buckets as u16),
        send_audio,
    );

    let fourier_thread = thread::Builder::new()
        .name("fourier".to_string())
        .spawn(move || {
            fourier::process_audio_loop(args.buckets, frequency, recv_audio, send_processed)
        })?;

    // Recording internally runs on a separate thread
    rec.autoselect_start(args.device)?;

    let retain_ratio = 1.0 - (args.discard as f32 / 100.);
    let texture_width = (args.buckets as f32 * retain_ratio) as usize;
    events::process_event_loop(
        &sdl_context,
        recv_processed,
        args.orientation,
        args.fullscreen,
        args.scroll,
        texture_width,
        rec,
        fourier_thread,
    )
}