llm_client 0.0.7

llm_client: The easiest Rust interface for local LLMs
Documentation
mod hierarchical_tagger;
mod tag;
mod tag_collection;
mod tag_describer;

use super::*;
pub use hierarchical_tagger::{Critera, HierarchicalEntityTagger};
pub use tag::Tag;
pub use tag_collection::TagCollection;

#[cfg(test)]
mod test {
    use crate::LlmClient;
    use llm_interface::requests::CompletionRequest;
    use llm_models::GgufPresetTrait;
    use workflows::classify::subject_of_text::ClassifySubjectOfText;

    use super::*;
    use crate::*;
    const INSTRUCTIONS: &str = "Apply classfication labels to the source of a microbial sample. The source is a place or a thing that the microbe was collected from. Our goal is to apply labels using specifically mentioned details from the input text. A category should apply to details specified about what or where the collection source. Classify the source of the microbial sample.";

    const ENTITY_DEFINITION: &str = "The sample microbe was collected from";

    #[tokio::test]
    #[ignore]
    pub async fn test_one() -> crate::Result<()> {
        // let llm_client: LlmClient = LlmClient::llama_cpp().llama3_1_8b_instruct().init().await?;
        let llm_client: LlmClient = LlmClient::llama_cpp()
            .mistral_nemo_instruct2407()
            .init()
            .await?;

        let mut tag_collection = TagCollection::default()
            .from_text_file_path("/workspaces/test/bacdive_hierarchy.txt")
            .tag_path_seperator(":")
            .load()
            .unwrap();

        tag_collection
            .populate_descriptions(&llm_client, INSTRUCTIONS)
            .await?;

        let critera = Critera {
            entity_definition: ENTITY_DEFINITION.to_owned(),
            instructions: INSTRUCTIONS.to_owned(),
        };

        let mut req = HierarchicalEntityTagger::new(
            &llm_client,
            "Gryllus bimaculatus",
            "Edible insect Gryllus bimaculatus (Pet Feed Store)",
            &critera,
            tag_collection.get_root_tag()?,
        );
        req.run().await?;
        println!("{}", req);

        Ok(())
    }

    #[tokio::test]
    #[ignore]
    pub async fn test_full_workflow() -> crate::Result<()> {
        let llm_client: LlmClient = LlmClient::llama_cpp()
            .llama3_1_70b_nemotron_instruct()
            .init()
            .await?;

        let input_text = "Edible insect Gryllus bimaculatus (Pet Feed Store)";
        let mut tag_collection = TagCollection::default()
            .from_text_file_path("/workspaces/test/bacdive_hierarchy.txt")
            .tag_path_seperator(":")
            .load()
            .unwrap();

        tag_collection
            .populate_descriptions(&llm_client, INSTRUCTIONS)
            .await?;

        let critera = Critera {
            entity_definition: ENTITY_DEFINITION.to_owned(),
            instructions: INSTRUCTIONS.to_owned(),
        };

        let entity = ClassifySubjectOfText::new(
            CompletionRequest::new(llm_client.backend.clone()),
            input_text,
        )
        .run()
        .await?
        .subject
        .ok_or_else(|| anyhow::anyhow!("Entity not classified from the text."))?;

        let mut req = HierarchicalEntityTagger::new(
            &llm_client,
            &entity,
            input_text,
            &critera,
            tag_collection.get_root_tag()?,
        );
        req.run().await?;

        println!("{}", req);

        Ok(())
    }

    const CASES: &[(&str, &str)] = &[
        ("Ciliate: Metopus sp. strain SALT15A", "ciliate"),
        ("Coastal soil sample", "soil"),
        (
            "Edible insect Gryllus bimaculatus (Pet Feed Store)",
            "insect",
        ),
        ("Public spring water", "water"),
        ("River snow from South Saskatchewan River", "snow"),
        ("A green turtle on a log in a mountain lake.", "turtle"),
        (
            "Green turtle on log\nSunlight warms her emerald shell\nStillness all around",
            "turtle",
        ),
    ];

    #[tokio::test]
    #[ignore]
    pub async fn test_cases() -> crate::Result<()> {
        let llm_client: LlmClient = LlmClient::llama_cpp().llama3_1_8b_instruct().init().await?;

        let mut tag_collection = TagCollection::default()
            .from_text_file_path("/workspaces/test/bacdive_hierarchy.txt")
            .tag_path_seperator(":")
            .load()
            .unwrap();

        tag_collection
            .populate_descriptions(&llm_client, INSTRUCTIONS)
            .await?;

        let critera = Critera {
            entity_definition: ENTITY_DEFINITION.to_owned(),
            instructions: INSTRUCTIONS.to_owned(),
        };
        let mut results = vec![];
        for (input_text, _) in CASES {
            let entity = ClassifySubjectOfText::new(
                CompletionRequest::new(llm_client.backend.clone()),
                input_text,
            )
            .run()
            .await?
            .subject
            .ok_or_else(|| anyhow::anyhow!("Entity not classified from the text."))?;

            let mut req = HierarchicalEntityTagger::new(
                &llm_client,
                &entity,
                input_text,
                &critera,
                tag_collection.get_root_tag()?,
            );
            req.run().await?;
            results.push(req);
        }
        for res in results {
            println!("{}", res);
        }

        Ok(())
    }
}