mod attribute;
mod detect;
mod fingerprint;
mod pipeline;
mod store;
mod verbs;
use crate::prose::ProseLanguage;
pub(crate) use attribute::{AttributionWindows, attribute_spans};
pub(crate) use detect::detect_spans;
pub(crate) use pipeline::{character_names, refresh_book};
pub(crate) use store::DialogueStore;
pub(crate) use verbs::{DialogueLexicon, classify_tag_verb, lexicon_for_with};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum DialogueConvention {
QuotePair,
GuillemetsAndDash,
Hybrid,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum SpanForm {
QuotePair,
Guillemet,
EmDash,
}
impl SpanForm {
pub(crate) fn as_code(&self) -> &'static str {
match self {
SpanForm::QuotePair => "quote_pair",
SpanForm::Guillemet => "guillemet",
SpanForm::EmDash => "em_dash",
}
}
pub(crate) fn from_code(s: &str) -> SpanForm {
match s {
"guillemet" => SpanForm::Guillemet,
"em_dash" => SpanForm::EmDash,
_ => SpanForm::QuotePair,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum AttributionConfidence {
Certain,
Inferred,
None,
}
impl AttributionConfidence {
pub(crate) fn as_code(&self) -> &'static str {
match self {
AttributionConfidence::Certain => "certain",
AttributionConfidence::Inferred => "inferred",
AttributionConfidence::None => "none",
}
}
pub(crate) fn from_code(s: &str) -> AttributionConfidence {
match s {
"certain" => AttributionConfidence::Certain,
"inferred" => AttributionConfidence::Inferred,
_ => AttributionConfidence::None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum TagVerbClass {
Neutral,
SaidBookism,
}
impl TagVerbClass {
pub(crate) fn as_code(&self) -> &'static str {
match self {
TagVerbClass::Neutral => "neutral",
TagVerbClass::SaidBookism => "said_bookism",
}
}
pub(crate) fn from_code(s: &str) -> Option<TagVerbClass> {
match s {
"neutral" => Some(TagVerbClass::Neutral),
"said_bookism" => Some(TagVerbClass::SaidBookism),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct DialogueSpan {
pub para_id: String,
pub span_index: u32,
pub form: SpanForm,
pub char_start: usize,
pub char_end: usize,
pub speech_text: String,
pub word_count: u32,
pub attribution_name: Option<String>,
pub attribution_conf: AttributionConfidence,
pub has_attribution_signal: bool,
pub tag_verb: Option<String>,
pub tag_verb_class: Option<TagVerbClass>,
pub ends_question: bool,
pub ends_exclamation: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum DialogueFindingKind {
ZeroAttribution,
SaidBookism,
TalkingHead,
}
impl DialogueFindingKind {
pub(crate) fn as_code(&self) -> &'static str {
match self {
DialogueFindingKind::ZeroAttribution => "zero_attribution",
DialogueFindingKind::SaidBookism => "said_bookism",
DialogueFindingKind::TalkingHead => "talking_heads",
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct DialogueFinding {
pub kind: DialogueFindingKind,
pub chapter_ord: u32,
pub para_id: Option<String>,
pub detail: String,
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct ChapterDialogueStats {
pub chapter_ord: u32,
pub total_spans: u32,
pub zero_attribution_count: u32,
pub said_bookism_count: u32,
pub neutral_tag_count: u32,
pub said_bookism_density: f32,
pub dialogue_word_count: u32,
pub total_word_count: u32,
pub dialogue_density_ratio: f32,
pub talking_head_sequences: u32,
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct CharacterDialogueFingerprint {
pub character_name: String,
pub utterance_count: u32,
pub mean_utterance_words: f32,
pub utterance_mattr: f32,
pub question_ratio: f32,
pub exclamation_ratio: f32,
pub hedge_density: f32,
}
pub(crate) fn dialogue_convention(lang: &ProseLanguage) -> DialogueConvention {
match lang {
ProseLanguage::En | ProseLanguage::De => DialogueConvention::QuotePair,
ProseLanguage::Fr | ProseLanguage::Ru => DialogueConvention::GuillemetsAndDash,
ProseLanguage::Es => DialogueConvention::Hybrid,
ProseLanguage::Other(_) => DialogueConvention::QuotePair,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn convention_dispatch_matches_rfc_table() {
assert_eq!(dialogue_convention(&ProseLanguage::En), DialogueConvention::QuotePair);
assert_eq!(dialogue_convention(&ProseLanguage::De), DialogueConvention::QuotePair);
assert_eq!(
dialogue_convention(&ProseLanguage::Fr),
DialogueConvention::GuillemetsAndDash
);
assert_eq!(
dialogue_convention(&ProseLanguage::Ru),
DialogueConvention::GuillemetsAndDash
);
assert_eq!(dialogue_convention(&ProseLanguage::Es), DialogueConvention::Hybrid);
assert_eq!(
dialogue_convention(&ProseLanguage::Other("pl".into())),
DialogueConvention::QuotePair
);
}
}