Skip to main content

skilltest_core/
lib.rs

1//! `skilltest-core` — the library that powers the `skilltest` CLI and, through
2//! it, the language SDKs and test-framework packages.
3//!
4//! The flow is: load a [`Config`] and one or more [`TestCase`]s, build a
5//! [`Provider`] (the boundary to `oneharness` / a model), and hand both to a
6//! [`Runner`], which drives each case into a conversation, scores the transcript
7//! with natural-language [`Eval`]s, and returns a [`Report`]. The report's JSON
8//! form is the stable contract the language SDKs consume.
9//!
10//! Everything that crosses a trust boundary — config files, test-case YAML,
11//! skill frontmatter, and every provider response — is parsed into a typed model
12//! before use.
13
14#![forbid(unsafe_code)]
15
16pub mod config;
17pub mod conversation;
18pub mod error;
19pub mod eval;
20pub mod exit;
21pub mod provider;
22pub mod report;
23pub mod runner;
24pub mod skill;
25pub mod testcase;
26
27pub use config::{
28    ApiJudgeConfig, ApiVendor, CommandConfig, Config, JudgeConfig, OneharnessConfig, Overrides,
29    ProviderConfig,
30};
31pub use conversation::{Message, Role, Transcript};
32pub use error::{Error, Result};
33pub use eval::{Comparator, Eval, EvalDetail, EvalOutcome, JudgeValue};
34pub use exit::ExitCode;
35pub use provider::{
36    supports_resume, ApiJudgeProvider, AssistantTurn, CommandProvider, JudgeKind, JudgeQuery,
37    JudgeVerdict, OneharnessProvider, Provider, SkillRef, SplitProvider, Usage, UserTurn,
38};
39pub use report::{CaseRun, Report, Summary, ValidationFinding, ValidationReport};
40pub use runner::Runner;
41pub use skill::{load_skill, validate_path, validate_skill, Finding, SkillDefinition};
42pub use testcase::{discover_cases, SimulatedUser, TestCase};