osf 0.1.0

Parser for Open Screenplay Format (OSF) files used by Fade In Pro screenwriting software
Documentation
use serde::Serialize;

/// A fully parsed OSF screenplay document.
///
/// Contains all extracted metadata, scenes, and the reconstructed raw text.
/// The parser normalizes differences between OSF versions (v1.2, v2.x, v4.x)
/// into this unified structure.
///
/// # Example
///
/// ```
/// let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
/// <document type="Open Screenplay Format document" version="40">
///   <info uuid="abc-123" pagecount="5"/>
///   <settings/><styles/>
///   <paragraphs>
///     <para><style basestyle="Scene Heading"/><text>INT. LAB - NIGHT</text></para>
///     <para><style basestyle="Action"/><text>Beakers bubble.</text></para>
///   </paragraphs>
///   <titlepage>
///     <para bookmark="Title"><style basestyle="Action"/><text>The Formula</text></para>
///   </titlepage>
///   <lists>
///     <characters><character name="DR. CHEN"/></characters>
///     <locations><location name="Lab"/></locations>
///   </lists>
/// </document>"#;
///
/// let doc = osf::parse(xml.as_bytes()).unwrap();
/// assert_eq!(doc.version, osf::OsfVersion::V4);
/// assert_eq!(doc.title_page.title.as_deref(), Some("The Formula"));
/// assert_eq!(doc.characters, vec!["DR. CHEN"]);
/// ```
#[derive(Debug, Clone, Serialize)]
pub struct OsfDocument {
    /// The OSF format version detected during parsing.
    pub version: OsfVersion,
    /// Document UUID from the `<info>` element.
    pub uuid: Option<String>,
    /// Page count reported by the document.
    pub page_count: Option<u32>,
    /// Title page metadata (title, authors, draft, contact, copyright).
    pub title_page: TitlePage,
    /// Scenes extracted from the screenplay paragraphs.
    ///
    /// Each scene starts at a `Scene Heading` paragraph and includes all
    /// subsequent paragraphs until the next heading or end of document.
    pub scenes: Vec<Scene>,
    /// Known character names from the `<lists>` section.
    pub characters: Vec<String>,
    /// Known location names from the `<lists>` section.
    pub locations: Vec<String>,
    /// Full reconstructed screenplay text with standard indentation.
    pub raw_text: String,
}

/// OSF format version.
///
/// The parser normalizes version numbers from the XML `version` attribute
/// into these variants. All versions produce the same [`OsfDocument`] output.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
pub enum OsfVersion {
    /// Version 1.2 — metadata in `<info>` attributes, `basestylename` for styles.
    V1,
    /// Version 2.x — camelCase attributes (`baseStyleName`, `pageNumber`).
    V2,
    /// Version 4.0 — snake_case attributes (`basestyle`, `page_number`).
    V4,
    /// Unrecognized version number (still parsed with best-effort).
    Unknown(u32),
}

impl OsfVersion {
    /// Create from the raw version integer in the XML document element.
    ///
    /// ```
    /// use osf::OsfVersion;
    /// assert_eq!(OsfVersion::from_raw(12), OsfVersion::V1);
    /// assert_eq!(OsfVersion::from_raw(21), OsfVersion::V2);
    /// assert_eq!(OsfVersion::from_raw(40), OsfVersion::V4);
    /// ```
    pub fn from_raw(v: u32) -> Self {
        match v {
            10..=19 => Self::V1,
            20..=29 => Self::V2,
            30..=49 => Self::V4,
            0 => Self::Unknown(0),
            other => Self::Unknown(other),
        }
    }
}

/// Title page metadata extracted from an OSF document.
///
/// In v1.2, these are read from `<info>` element attributes.
/// In v2.x/v4.x, these are read from `<titlepage>` paragraphs identified by
/// `bookmark` attributes (`"Title"`, `"Author"`, `"Draft"`, `"Contact"`, `"Copyright"`).
#[derive(Debug, Clone, Default, Serialize)]
pub struct TitlePage {
    /// The screenplay title.
    pub title: Option<String>,
    /// Author name(s).
    pub authors: Vec<String>,
    /// Draft information (e.g., "First Draft", "Rev. 2 - Jan 2026").
    pub draft: Option<String>,
    /// Contact information (address, email, phone, agent).
    pub contact: Option<String>,
    /// Copyright notice.
    pub copyright: Option<String>,
}

/// A single scene in the screenplay.
///
/// Scenes are delimited by `Scene Heading` paragraphs. Everything between
/// two consecutive headings belongs to the first scene.
///
/// # Example
///
/// ```
/// let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
/// <document type="Open Screenplay Format document" version="40">
///   <info/><settings/><styles/>
///   <paragraphs>
///     <para page_number="3" number="5">
///       <style basestyle="Scene Heading"/>
///       <text>EXT. ROOFTOP - NIGHT</text>
///     </para>
///     <para><style basestyle="Action"/><text>Wind howls.</text></para>
///     <para><style basestyle="Character"/><text>MAYA</text></para>
///     <para><style basestyle="Dialogue"/><text>We have to jump.</text></para>
///   </paragraphs>
///   <titlepage/><lists/>
/// </document>"#;
///
/// let doc = osf::parse(xml.as_bytes()).unwrap();
/// let scene = &doc.scenes[0];
/// assert_eq!(scene.number, 5);
/// assert_eq!(scene.heading, "EXT. ROOFTOP - NIGHT");
/// assert_eq!(scene.page, Some(3));
/// assert!(scene.body.contains("MAYA"));
/// ```
#[derive(Debug, Clone, Serialize)]
pub struct Scene {
    /// Scene number. Uses the explicit number from the XML if present,
    /// otherwise falls back to sequential numbering.
    pub number: usize,
    /// The scene heading text (e.g., `"INT. COFFEE SHOP - DAY"`).
    pub heading: String,
    /// Page number where the scene starts, if available.
    pub page: Option<u32>,
    /// The scene body — all paragraphs formatted as screenplay text.
    pub body: String,
}

/// A single paragraph from the OSF XML.
///
/// This is an intermediate representation used during parsing.
/// Paragraphs are grouped into [`Scene`]s based on their style.
#[derive(Debug, Clone)]
pub struct Paragraph {
    /// The screenplay element type of this paragraph.
    pub style: ParaStyle,
    /// Concatenated text content across all `<text>` runs.
    pub text: String,
    /// Page number from the `page_number`/`pageNumber` attribute.
    pub page: Option<u32>,
    /// Explicit scene number from `number`/`sceneNumber` attribute.
    pub scene_number: Option<String>,
    /// Bookmark name (used on title page paragraphs).
    pub bookmark: Option<String>,
    /// Whether this paragraph starts a dual dialogue block.
    pub dual_dialogue: bool,
}

/// Screenplay paragraph element types.
///
/// These correspond to the eight built-in OSF styles, plus a catch-all
/// for custom styles.
///
/// ```
/// use osf::ParaStyle;
///
/// assert_eq!(ParaStyle::from_name("Scene Heading"), ParaStyle::SceneHeading);
/// assert_eq!(ParaStyle::from_name("Custom Style"), ParaStyle::Other("Custom Style".into()));
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub enum ParaStyle {
    /// Scene heading / slug line (e.g., `INT. OFFICE - DAY`).
    SceneHeading,
    /// Action / description paragraph.
    Action,
    /// Character name (typically above dialogue).
    Character,
    /// Parenthetical direction (e.g., `(whispering)`).
    Parenthetical,
    /// Spoken dialogue.
    Dialogue,
    /// Transition (e.g., `CUT TO:`, `FADE IN:`).
    Transition,
    /// Camera shot direction.
    Shot,
    /// Any custom or unrecognized style.
    Other(String),
}

impl ParaStyle {
    /// Convert a style name string to a [`ParaStyle`] variant.
    pub fn from_name(s: &str) -> Self {
        match s {
            "Scene Heading" => Self::SceneHeading,
            "Action" => Self::Action,
            "Character" => Self::Character,
            "Parenthetical" => Self::Parenthetical,
            "Dialogue" => Self::Dialogue,
            "Transition" => Self::Transition,
            "Shot" => Self::Shot,
            other => Self::Other(other.to_string()),
        }
    }
}