sensorlm-rs 0.1.0

//! Three-level hierarchical caption generation pipeline.
//!
//! SensorLM's key insight is that **paired (sensor, text) training data** can
//! be generated automatically from unlabelled wearable recordings, eliminating
//! the need for human annotation at scale.
//!
//! ## Caption levels
//!
//! | Level | Module | Description | Token budget |
//! |-------|--------|-------------|--------------|
//! | 1 – Statistical | [`statistical`] | Mean/max/min/std per channel | 512 |
//! | 2 – Structural  | [`structural`]  | Trends & anomaly events      | 512 |
//! | 3 – Semantic    | [`semantic`]    | Activities, sleep, mood      | 256–1024 |
//!
//! ## Combination keys
//!
//! The training pipeline selects one of eight caption variants for each batch:
//!
//! ```text
//! low_level_caption             → level 1 only
//! middle_level_caption          → level 2 only
//! high_level_summary_caption    → level 3 only (short)
//! high_level_all_caption        → level 3 (full)
//! middle_low_level_caption      → levels 2 + 1
//! high_low_level_caption        → levels 3 + 1
//! high_middle_level_caption     → levels 3 + 2
//! high_middle_low_level_caption → levels 3 + 2 + 1
//! ```

pub mod semantic;
pub mod statistical;
pub mod structural;
pub mod templates;

use ndarray::{Array2, ArrayView2};
use rand::Rng;

use crate::config::CaptionKey;
use crate::error::Result;
use semantic::{ActivityEvent, MoodEvent, SleepEvent};

// ---------------------------------------------------------------------------
// High-level entry point
// ---------------------------------------------------------------------------

/// All contextual information needed to produce a full multi-level caption.
#[derive(Debug, Default)]
pub struct CaptionContext {
    /// Labelled activity events.
    pub activities: Vec<ActivityEvent>,
    /// Sleep intervals.
    pub sleep: Vec<SleepEvent>,
    /// Self-reported mood entries.
    pub moods: Vec<MoodEvent>,
    /// Maximum number of activity events to include (default: 8).
    pub top_k_activity: usize,
    /// Maximum number of sleep intervals to include (default: 2).
    pub top_k_sleep: usize,
    /// Minimum activity duration in minutes (default: 20).
    pub min_activity_duration: usize,
    /// Maximum insights per structural category (default: 7).
    pub max_structural_per_category: usize,
}

impl CaptionContext {
    /// Build a context with sensible defaults.
    pub fn new() -> Self {
        Self {
            top_k_activity: 8,
            top_k_sleep: 2,
            min_activity_duration: 20,
            max_structural_per_category: 7,
            ..Default::default()
        }
    }
}

/// Generate the caption text for the requested [`CaptionKey`].
///
/// # Arguments
///
/// * `x_norm`  – Z-score normalised sensor tensor, shape `(1440, C)`.
/// * `mask`    – Optional missingness mask (1 = imputed), same shape as `x_norm`.
/// * `ctx`     – Semantic context (activities, sleep, moods).
/// * `key`     – Which caption variant to produce.
/// * `rng`     – Random number generator for template selection.
pub fn generate_caption<R: Rng>(
    x_norm: &ArrayView2<f64>,
    mask: Option<&Array2<u8>>,
    ctx: &CaptionContext,
    key: CaptionKey,
    rng: &mut R,
) -> Result<String> {
    // Build individual levels lazily – only compute what is needed.
    let need_low    = matches!(key, CaptionKey::LowLevel | CaptionKey::MiddleLow
                                   | CaptionKey::HighLow | CaptionKey::HighMiddleLow);
    let need_mid    = matches!(key, CaptionKey::MiddleLevel | CaptionKey::MiddleLow
                                   | CaptionKey::HighMiddle | CaptionKey::HighMiddleLow);
    let need_high   = matches!(key, CaptionKey::HighLevelSummary | CaptionKey::HighLevelAll
                                   | CaptionKey::HighLow | CaptionKey::HighMiddle
                                   | CaptionKey::HighMiddleLow);

    let low = if need_low {
        statistical::generate_statistical_caption(x_norm, mask, rng)?
    } else {
        String::new()
    };

    let mid = if need_mid {
        structural::generate_structural_caption(
            x_norm,
            ctx.max_structural_per_category,
            rng,
        )?
    } else {
        String::new()
    };

    let high = if need_high {
        semantic::generate_semantic_caption(
            &ctx.activities,
            &ctx.sleep,
            &ctx.moods,
            ctx.top_k_activity,
            ctx.top_k_sleep,
            ctx.min_activity_duration,
            rng,
        )
    } else {
        String::new()
    };

    // Concatenate in order: high → mid → low (most abstract to most granular).
    let parts: Vec<&str> = [high.as_str(), mid.as_str(), low.as_str()]
        .into_iter()
        .filter(|s| !s.is_empty())
        .collect();

    Ok(parts.join("\n"))
}