skilltest_core/
lib.rs

1//! `skilltest-core` — the library that powers the `skilltest` CLI and, through
2//! it, the language SDKs and test-framework packages.
3//!
4//! The flow is: load a [`Config`] and one or more [`TestCase`]s, build a
5//! [`Provider`] (the boundary to `oneharness` / a model), and hand both to a
6//! [`Runner`], which drives each case into a conversation, scores the transcript
7//! with natural-language [`Eval`]s, and returns a [`Report`]. The report's JSON
8//! form is the stable contract the language SDKs consume.
9//!
10//! Everything that crosses a trust boundary — config files, test-case YAML,
11//! skill frontmatter, and every provider response — is parsed into a typed model
12//! before use.
13
14#![forbid(unsafe_code)]
15
16pub mod config;
17pub mod conversation;
18pub mod error;
19pub mod eval;
20pub mod exit;
21pub mod provider;
22pub mod report;
23pub mod runner;
24pub mod skill;
25pub mod testcase;
26
27pub use config::{CommandConfig, Config, OneharnessConfig, Overrides, ProviderConfig};
28pub use conversation::{Message, Role, Transcript};
29pub use error::{Error, Result};
30pub use eval::{Comparator, Eval, EvalDetail, EvalOutcome, JudgeValue};
31pub use exit::ExitCode;
32pub use provider::{
33    supports_resume, AssistantTurn, CommandProvider, JudgeKind, JudgeQuery, JudgeVerdict,
34    OneharnessProvider, Provider, SkillRef, Usage, UserTurn,
35};
36pub use report::{CaseRun, Report, Summary, ValidationFinding, ValidationReport};
37pub use runner::Runner;
38pub use skill::{load_skill, validate_path, validate_skill, Finding, SkillDefinition};
39pub use testcase::{discover_cases, SimulatedUser, TestCase};
skilltest_core/lib.rs

skilltest_core/
lib.rs