use std::ffi::OsStr;
use std::fs;
use std::path::{Path, PathBuf};
use gukhanmun::{
Builder, ContextWindow, MapDictionary, MatchMark, Preset, Recovery, RenderMode, RubyBase,
};
use serde::Deserialize;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum FixtureFormat {
Html,
Markdown,
Text,
}
impl FixtureFormat {
fn from_ext(ext: &str) -> Option<Self> {
match ext {
"html" => Some(Self::Html),
"md" => Some(Self::Markdown),
"txt" => Some(Self::Text),
_ => None,
}
}
fn ext_str(self) -> &'static str {
match self {
Self::Html => "html",
Self::Markdown => "md",
Self::Text => "txt",
}
}
}
#[derive(Debug)]
pub struct Fixture {
pub name: String,
pub format: FixtureFormat,
pub input_path: PathBuf,
pub expected_path: PathBuf,
pub sidecar_path: Option<PathBuf>,
}
#[derive(Debug, Default, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Sidecar {
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub preset: Option<PresetName>,
#[serde(default)]
pub rendering: Option<RenderingName>,
#[serde(default)]
pub use_bundled_stdict: Option<bool>,
#[serde(default)]
pub options: SidecarOptions,
#[serde(default)]
pub engine: SidecarEngine,
#[serde(default)]
pub assertion: SidecarAssertion,
#[serde(default)]
pub dictionary: SidecarDictionary,
#[serde(default)]
pub recovery: Option<RecoveryName>,
#[serde(default)]
pub markdown: SidecarMarkdown,
}
#[derive(Clone, Copy, Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum RecoveryName {
Strict,
Lenient,
}
impl RecoveryName {
fn into_recovery(self) -> Recovery {
match self {
RecoveryName::Strict => Recovery::Strict,
RecoveryName::Lenient => Recovery::Lenient,
}
}
}
#[derive(Clone, Copy, Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum PresetName {
KoKr,
KoKp,
}
impl PresetName {
fn into_preset(self) -> Preset {
match self {
PresetName::KoKr => Preset::KoKr,
PresetName::KoKp => Preset::KoKp,
}
}
}
#[derive(Clone, Copy, Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum RenderingName {
HangulOnly,
HangulHanjaParens,
HanjaHangulParens,
RubyOnHangul,
RubyOnHanja,
Original,
}
impl RenderingName {
fn into_render_mode(self) -> RenderMode {
match self {
RenderingName::HangulOnly => RenderMode::HangulOnly,
RenderingName::HangulHanjaParens => RenderMode::HangulHanjaParens,
RenderingName::HanjaHangulParens => RenderMode::HanjaHangulParens,
RenderingName::RubyOnHangul => RenderMode::Ruby(RubyBase::OnHangul),
RenderingName::RubyOnHanja => RenderMode::Ruby(RubyBase::OnHanja),
RenderingName::Original => RenderMode::Original,
}
}
}
#[derive(Debug, Default, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct SidecarOptions {
#[serde(default)]
pub initial_sound_law: Option<bool>,
#[serde(default)]
pub collapse_redundant_parens: Option<bool>,
}
#[derive(Debug, Default, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct SidecarEngine {
#[serde(default)]
pub disambiguation: Option<DisambiguationWindow>,
}
#[derive(Clone, Copy, Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum DisambiguationWindow {
PerBlock,
PerDocument,
Off,
}
impl DisambiguationWindow {
fn into_window(self) -> ContextWindow {
match self {
DisambiguationWindow::PerBlock => ContextWindow::PerBlock,
DisambiguationWindow::PerDocument => ContextWindow::PerDocument,
DisambiguationWindow::Off => ContextWindow::Off,
}
}
}
#[derive(Debug, Default, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct SidecarAssertion {
#[serde(default)]
pub kind: AssertionKind,
#[serde(default)]
pub needles: Vec<String>,
}
#[derive(Clone, Copy, Debug, Default, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum AssertionKind {
#[default]
Exact,
Contains,
}
#[derive(Debug, Default, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct SidecarDictionary {
#[serde(default)]
pub records: Vec<DictionaryRecord>,
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct DictionaryRecord {
pub hanja: String,
pub reading: String,
#[serde(default)]
pub require_hanja: bool,
#[serde(default)]
pub require_hangul: bool,
}
#[derive(Debug, Default, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct SidecarMarkdown {
#[serde(default)]
pub variant: Option<MarkdownVariantName>,
}
#[derive(Clone, Copy, Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum MarkdownVariantName {
CommonMark,
Gfm,
}
#[cfg(feature = "markdown")]
impl MarkdownVariantName {
fn into_variant(self) -> gukhanmun::markdown::MarkdownVariant {
match self {
MarkdownVariantName::CommonMark => gukhanmun::markdown::MarkdownVariant::CommonMark,
MarkdownVariantName::Gfm => gukhanmun::markdown::MarkdownVariant::Gfm,
}
}
}
pub fn fixtures_root() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
.join("tests")
.join("fixtures")
}
pub fn discover(root: &Path) -> Vec<Fixture> {
assert!(
root.exists(),
"fixtures root does not exist: {}",
root.display()
);
let root_meta = fs::symlink_metadata(root)
.unwrap_or_else(|e| panic!("stat fixtures root {}: {e}", root.display()));
assert!(
!root_meta.file_type().is_symlink(),
"fixtures root must not be a symlink: {}",
root.display()
);
let mut fixtures = Vec::new();
let categories = read_subdirs(root);
for category in categories {
let category_name = category
.file_name()
.and_then(OsStr::to_str)
.unwrap_or_else(|| panic!("non-utf8 category name: {}", category.display()))
.to_owned();
for path in read_files(&category) {
if let Some(fixture) = try_discover_fixture(&category_name, &path) {
fixtures.push(fixture);
}
}
}
assert!(
!fixtures.is_empty(),
"fixtures root {} contains no fixtures; expected `*.input.<ext>` files \
under category subdirectories",
root.display()
);
fixtures
}
fn read_subdirs(root: &Path) -> Vec<PathBuf> {
let entries =
fs::read_dir(root).unwrap_or_else(|e| panic!("read fixtures root {}: {e}", root.display()));
let mut out = Vec::new();
for entry in entries {
let entry =
entry.unwrap_or_else(|e| panic!("enumerate fixtures root {}: {e}", root.display()));
let file_type = entry
.file_type()
.unwrap_or_else(|e| panic!("stat fixture entry {}: {e}", entry.path().display()));
if file_type.is_symlink() {
panic!(
"symlinks are not permitted in the fixtures tree: {}",
entry.path().display()
);
}
if file_type.is_dir() {
out.push(entry.path());
}
}
out.sort();
out
}
fn read_files(category: &Path) -> Vec<PathBuf> {
let entries = fs::read_dir(category)
.unwrap_or_else(|e| panic!("read fixture category {}: {e}", category.display()));
let mut out = Vec::new();
for entry in entries {
let entry = entry
.unwrap_or_else(|e| panic!("enumerate fixture category {}: {e}", category.display()));
let file_type = entry
.file_type()
.unwrap_or_else(|e| panic!("stat fixture entry {}: {e}", entry.path().display()));
if file_type.is_symlink() {
panic!(
"symlinks are not permitted in the fixtures tree: {}",
entry.path().display()
);
}
if file_type.is_file() {
out.push(entry.path());
}
}
out.sort();
out
}
fn try_discover_fixture(category: &str, input_path: &Path) -> Option<Fixture> {
let file_name = input_path.file_name()?.to_str()?;
let ext = input_path.extension()?.to_str()?;
let format = FixtureFormat::from_ext(ext)?;
let stem = file_name.strip_suffix(&format!(".input.{ext}"))?.to_owned();
let expected_path =
input_path.with_file_name(format!("{}.expected.{}", stem, format.ext_str()));
let sidecar_candidate = input_path.with_file_name(format!("{stem}.toml"));
let sidecar_path = match fs::symlink_metadata(&sidecar_candidate) {
Ok(meta) => {
if meta.file_type().is_symlink() {
panic!(
"symlinks are not permitted in the fixtures tree: {}",
sidecar_candidate.display()
);
}
Some(sidecar_candidate)
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
Err(e) => panic!("stat fixture sidecar {}: {e}", sidecar_candidate.display()),
};
let name = format!("{category}::{}", stem.replace('-', "_"));
Some(Fixture {
name,
format,
input_path: input_path.to_owned(),
expected_path,
sidecar_path,
})
}
pub type RunResult = Result<(), String>;
pub fn run_fixture(fixture: &Fixture) -> RunResult {
let bless = std::env::var("GUKHANMUN_BLESS_FIXTURES").as_deref() == Ok("1");
let input = fs::read_to_string(&fixture.input_path).map_err(|e| {
format!(
"input file read failed at {}: {e}",
fixture.input_path.display()
)
})?;
let expected = match fs::read_to_string(&fixture.expected_path) {
Ok(text) => text,
Err(e) if bless && e.kind() == std::io::ErrorKind::NotFound => String::new(),
Err(e) => {
return Err(format!(
"expected file read failed at {}: {e}",
fixture.expected_path.display()
));
}
};
let sidecar = match &fixture.sidecar_path {
Some(path) => {
let text = fs::read_to_string(path)
.map_err(|e| format!("sidecar read failed at {}: {e}", path.display()))?;
Some(
toml::from_str::<Sidecar>(&text)
.map_err(|e| format!("sidecar parse failed at {}: {e}", path.display()))?,
)
}
None => None,
};
let sidecar = sidecar.as_ref();
let mut builder = match sidecar.and_then(|s| s.preset) {
Some(preset) => Builder::with_preset(preset.into_preset()),
None => Builder::new(),
};
let use_bundled = sidecar.and_then(|s| s.use_bundled_stdict).unwrap_or(false);
if !use_bundled {
builder = builder.no_bundled_dictionaries();
} else {
builder = builder.bundled_stdict();
}
if let Some(opts) = sidecar.map(|s| &s.options)
&& let Some(law) = opts.initial_sound_law
{
builder = builder.initial_sound_law(law);
}
if let Some(opts) = sidecar.map(|s| &s.options)
&& let Some(collapse) = opts.collapse_redundant_parens
{
builder = builder.collapse_redundant_parens(collapse);
}
if let Some(rendering) = sidecar.and_then(|s| s.rendering) {
builder = builder.rendering(rendering.into_render_mode());
}
if let Some(engine) = sidecar.map(|s| &s.engine)
&& let Some(window) = engine.disambiguation
{
builder = builder.homophone_window(window.into_window());
}
if let Some(recovery) = sidecar.and_then(|s| s.recovery) {
builder = builder.recovery(recovery.into_recovery());
}
if let Some(dict) = sidecar.map(|s| &s.dictionary)
&& !dict.records.is_empty()
{
let mut map = MapDictionary::new();
for record in &dict.records {
let mark = MatchMark {
require_hanja: record.require_hanja,
require_hangul: record.require_hangul,
};
map.insert_marked(record.hanja.clone(), record.reading.clone(), mark);
}
builder = builder.push_dictionary(map);
}
let converter = builder
.build()
.map_err(|e| format!("builder failed: {e}"))?;
let actual = match fixture.format {
FixtureFormat::Text => converter
.convert_text_to_string(&input)
.map_err(|e| format!("plain-text conversion failed: {e}"))?,
FixtureFormat::Html => {
#[cfg(feature = "html")]
{
converter
.convert_html_fragment_to_string(&input)
.map_err(|e| format!("HTML conversion failed: {e}"))?
}
#[cfg(not(feature = "html"))]
{
return Err("HTML fixture requires the `html` feature".into());
}
}
FixtureFormat::Markdown => {
#[cfg(feature = "markdown")]
{
let variant = sidecar
.and_then(|s| s.markdown.variant)
.map(MarkdownVariantName::into_variant)
.unwrap_or(gukhanmun::markdown::MarkdownVariant::CommonMark);
converter
.convert_markdown_to_string(&input, variant)
.map_err(|e| format!("Markdown conversion failed: {e}"))?
}
#[cfg(not(feature = "markdown"))]
{
return Err("Markdown fixture requires the `markdown` feature".into());
}
}
};
let assertion = sidecar.map(|s| &s.assertion).cloned().unwrap_or_default();
let description = sidecar.and_then(|s| s.description.as_deref());
if bless && matches!(assertion.kind, AssertionKind::Exact) {
fs::write(&fixture.expected_path, &actual).map_err(|e| {
format!(
"could not bless expected file {}: {e}",
fixture.expected_path.display()
)
})?;
return Ok(());
}
match assertion.kind {
AssertionKind::Exact => {
if strip_one_trailing_newline(&actual) == strip_one_trailing_newline(&expected) {
Ok(())
} else {
Err(annotate(description, diff_message(&expected, &actual)))
}
}
AssertionKind::Contains => {
if assertion.needles.is_empty() {
return Err(annotate(
description,
"assertion.kind = \"contains\" requires a non-empty \
`assertion.needles` array; an empty needles list would \
accept any converter output unconditionally"
.to_owned(),
));
}
let missing: Vec<&str> = assertion
.needles
.iter()
.filter(|needle| !actual.contains(needle.as_str()))
.map(String::as_str)
.collect();
if missing.is_empty() {
Ok(())
} else {
Err(annotate(
description,
format!(
"expected substrings missing from converter output: {missing:?}\n\
actual output:\n{actual}",
),
))
}
}
}
}
fn strip_one_trailing_newline(s: &str) -> &str {
s.strip_suffix('\n').unwrap_or(s)
}
fn annotate(description: Option<&str>, body: String) -> String {
match description {
Some(text) => format!("{}\n{body}", text.trim()),
None => body,
}
}
impl Clone for SidecarAssertion {
fn clone(&self) -> Self {
Self {
kind: self.kind,
needles: self.needles.clone(),
}
}
}
fn diff_message(expected: &str, actual: &str) -> String {
let mut msg = String::new();
msg.push_str("converter output diverged from expected fixture\n");
msg.push_str("--- expected ---\n");
msg.push_str(expected);
if !expected.ends_with('\n') {
msg.push('\n');
}
msg.push_str("--- actual ---\n");
msg.push_str(actual);
if !actual.ends_with('\n') {
msg.push('\n');
}
msg
}