iocaine 2.2.0

The deadliest poison known to AI
Documentation
// SPDX-FileCopyrightText: 2025 Gergely Nagy
// SPDX-FileContributor: Gergely Nagy
//
// SPDX-License-Identifier: MIT

use anyhow::Result;
use minijinja::{
    Environment, Error, ErrorKind, State,
    value::{Kwargs, Value, ViaDeserialize},
};
use rand::{Rng, seq::IndexedRandom};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;

use crate::{
    bullshit::{
        GargleBargle, GobbledyGook, WurstsalatGeneratorPro, qr_journey,
        wurstsalat_generator_pro::join_words,
    },
    config::Config,
};

#[derive(Debug)]
pub struct AssembledStatisticalSequences {
    pub config: Config,
    pub engine: Environment<'static>,
}

#[derive(Debug, Serialize)]
struct PageContext<'a> {
    pub content_type: &'a str,
    pub static_seed: String,
    pub request_host: &'a str,
    pub request_uri: &'a str,
    pub params: BTreeMap<String, String>,
}

impl AssembledStatisticalSequences {
    fn add_template_helpers(
        engine: &mut Environment<'static>,
        chain: WurstsalatGeneratorPro,
        words: GargleBargle,
    ) {
        Self::add_template_function_rand(engine);
        Self::add_template_function_regex_gen(engine);
        Self::add_template_function_markov_gen(engine, chain);
        Self::add_template_function_href_gen(engine, words);
        Self::add_template_function_qr(engine);
        Self::add_template_function_parse_file(engine);

        Self::add_template_filter_matches(engine);
        Self::add_template_filter_sanitize_filename(engine);
    }

    fn add_template_function_rand(engine: &mut Environment<'static>) {
        engine.add_function(
            "rand",
            |state: &State, options: Kwargs| -> Result<u32, Error> {
                let mut rng = GobbledyGook::from(state, &options, "default")?;
                let result = rng.generator.random_range(rng.min..=rng.max);

                Ok(result)
            },
        );
    }

    fn add_template_function_regex_gen(engine: &mut Environment<'static>) {
        engine.add_function(
            "regex_gen",
            |state: &State, pattern: &str, options: Kwargs| -> Result<String, Error> {
                let mut rng = GobbledyGook::from(state, &options, "regex")?;
                let generator = rand_regex::Regex::compile(pattern, options.get("max")?)
                    .map_err(|e| Error::new(ErrorKind::BadSerialization, e.to_string()))?;

                Ok(rng.generator.sample(&generator))
            },
        );
    }

    fn add_template_function_markov_gen(
        engine: &mut Environment<'static>,
        chain: WurstsalatGeneratorPro,
    ) {
        engine.add_function(
            "markov_gen",
            move |state: &State, options: Kwargs| -> Result<String, Error> {
                let mut rng = GobbledyGook::from(state, &options, "markov")?;
                let words = options
                    .get::<Option<usize>>("words")?
                    .unwrap_or_else(|| rng.generator.random_range(rng.min..=rng.max) as usize);

                let result = chain.generate(rng.generator).take(words);
                Ok(join_words(result))
            },
        );
    }

    fn add_template_function_href_gen(engine: &mut Environment<'static>, words: GargleBargle) {
        engine.add_function(
            "href_gen",
            move |state: &State, options: Kwargs| -> Result<String, Error> {
                let mut rng = GobbledyGook::from(state, &options, "href")?;
                let count = options
                    .get::<Option<usize>>("words")?
                    .unwrap_or_else(|| rng.generator.random_range(rng.min..=rng.max) as usize);

                let result = (1..=count)
                    .map(|_| words.0.choose(&mut rng.generator).unwrap().as_str())
                    .collect::<Vec<_>>()
                    .join("-");
                Ok(result)
            },
        );
    }

    fn add_template_function_qr(engine: &mut Environment<'static>) {
        engine.add_function(
            "qr",
            |state: &State, content: String, options: Kwargs| -> Result<String, Error> {
                qr_journey::generate(state, &content, &options)
            },
        );
    }

    fn add_template_function_parse_file(engine: &mut Environment<'static>) {
        engine.add_function(
            "parse_file",
            |file: String, options: Kwargs| -> Result<Value, Error> {
                #[derive(Debug, Deserialize)]
                enum ParseFileFormat {
                    #[serde(rename = "json")]
                    Json,
                    #[serde(rename = "yaml")]
                    Yaml,
                }

                let format: ParseFileFormat = options
                    .get::<ViaDeserialize<ParseFileFormat>>("format")
                    .unwrap_or(ViaDeserialize(ParseFileFormat::Json))
                    .0;
                let data = std::fs::read_to_string(&file)
                    .map_err(|err| Error::new(ErrorKind::CannotDeserialize, err.to_string()))?;

                let data: BTreeMap<Value, Value> = match format {
                    ParseFileFormat::Json => serde_json::from_str(&data)
                        .map_err(|err| Error::new(ErrorKind::CannotDeserialize, err.to_string()))?,
                    ParseFileFormat::Yaml => serde_yml::from_str(&data)
                        .map_err(|err| Error::new(ErrorKind::CannotDeserialize, err.to_string()))?,
                };

                Ok(Value::from_serialize(&data))
            },
        );
    }

    fn add_template_filter_matches(engine: &mut Environment<'static>) {
        engine.add_filter(
            "matches",
            |haystack: String, needle: String| -> Result<bool, Error> {
                let re = Regex::new(&needle)
                    .map_err(|e| Error::new(ErrorKind::BadSerialization, e.to_string()))?;
                Ok(re.is_match(&haystack))
            },
        );
    }

    fn add_template_filter_sanitize_filename(engine: &mut Environment<'static>) {
        engine.add_filter(
            "sanitize_filename",
            |filename: String, replacement: Option<&str>| -> String {
                sanitize_filename::sanitize_with_options(
                    filename,
                    sanitize_filename::Options {
                        replacement: replacement.unwrap_or(""),
                        ..Default::default()
                    },
                )
            },
        );
    }

    fn make_engine(
        config: &Config,
        chain: WurstsalatGeneratorPro,
        words: GargleBargle,
    ) -> Environment<'static> {
        let mut engine = Environment::new();

        if let Some(dir) = &config.templates.directory {
            engine.set_loader(minijinja::path_loader(dir));
        } else if let Some(main) = &config.templates.main {
            engine
                .add_template_owned("main.jinja", main.clone())
                .unwrap();
        } else {
            minijinja_embed::load_templates!(&mut engine);
        }
        Self::add_template_helpers(&mut engine, chain, words);

        engine
    }

    pub fn new(config: &Config) -> Self {
        tracing::info!("Loading training data");
        let chain = WurstsalatGeneratorPro::learn_from_files(&config.sources.markov).unwrap();
        let words = GargleBargle::load_words(&config.sources.words).unwrap();
        tracing::info!("Training data loaded");

        Self {
            config: config.clone(),
            engine: Self::make_engine(config, chain, words),
        }
    }

    pub fn generate(
        &self,
        host: &str,
        path: &str,
        params: &BTreeMap<String, String>,
        template: &str,
    ) -> Result<(String, String)> {
        let initial_seed = &self.config.generator.initial_seed;
        let serialized_params = params
            .iter()
            .map(|(k, v)| format!("{k}={v}"))
            .collect::<Vec<_>>()
            .join("-");
        let static_seed = format!("{host}/{path}#{initial_seed}{serialized_params}");

        let template = self.engine.get_template(template)?;

        let context = PageContext {
            content_type: "text/html",
            static_seed,
            request_host: host,
            request_uri: path,
            params: params.clone(),
        };

        let (garbage, state) = template.render_and_return_state(context)?;
        let content_type = state
            .lookup("content_type")
            .unwrap_or_else(|| "text/html".into());

        Ok((content_type.to_string(), garbage))
    }
}