agent-book-translate 0.1.7

A powerful LLM-driven agentic EPUB book translator with robust progress monitoring and recovery
Documentation
use agent_book_translate::core::parser::{
    RenderedChunk, extract_and_flatten_text, extract_text_chunks, render_bilingual_node,
    render_file_from_chunks,
};
use scraper::Html;

#[test]
fn ruby_text_flattening() {
    let raw_xhtml = r#"<div class="chapter"><p>これは<ruby>漢<rt>かん</rt>字<rt>じ</rt></ruby>です。</p></div>"#;
    let document = Html::parse_document(raw_xhtml);
    assert_eq!(extract_and_flatten_text(&document), "これは漢字です。");
}

#[test]
fn bypass_images_extraction() {
    let raw_xhtml = r#"<p>Text before <img src="image.jpg" id="fig1"/> text after.</p>"#;
    let document = Html::parse_document(raw_xhtml);
    let text_chunks = extract_text_chunks(&document);
    assert_eq!(text_chunks.len(), 1);
    assert_eq!(text_chunks[0].text, "Text before  text after.");
    assert_eq!(text_chunks[0].node_id.as_deref(), Some("p-0"));
}

#[test]
fn bilingual_injection_preserves_images() {
    let original_html =
        r#"<p id="p1">Text before <img src="image.jpg" id="fig1"/> text after.</p>"#;
    let output = render_bilingual_node(original_html, "翻译");
    assert!(output.contains(r#"<img src="image.jpg" id="fig1""#));
    assert!(output.contains(r#"class="translation""#));
}

#[test]
fn drop_cap_splicing() {
    let raw_xhtml = r#"<p><span class="dropcap">O</span>nce upon a time...</p>"#;
    let document = Html::parse_document(raw_xhtml);
    assert_eq!(extract_and_flatten_text(&document), "Once upon a time...");
}

#[test]
fn duplicate_chunk_text_renders_by_chunk_index() {
    let raw_xhtml = r#"<div><p>Repeat me.</p><p>Repeat me.</p></div>"#;
    let rendered = render_file_from_chunks(
        raw_xhtml,
        &[
            RenderedChunk {
                file_name: "chapter.xhtml".to_string(),
                chunk_index: 0,
                original: "Repeat me.".to_string(),
                translated: "第一次".to_string(),
            },
            RenderedChunk {
                file_name: "chapter.xhtml".to_string(),
                chunk_index: 1,
                original: "Repeat me.".to_string(),
                translated: "第二次".to_string(),
            },
        ],
    );

    assert!(rendered.contains("第一次"));
    assert!(rendered.contains("第二次"));
    assert_eq!(rendered.matches("第一次").count(), 1);
    assert_eq!(rendered.matches("第二次").count(), 1);
}

#[test]
fn multiline_block_with_breaks_renders_by_node_index() {
    let raw_xhtml = r#"<div><p class="poem">Line one<br/>Line two</p><p>Tail</p></div>"#;
    let rendered = render_file_from_chunks(
        raw_xhtml,
        &[RenderedChunk {
            file_name: "chapter.xhtml".to_string(),
            chunk_index: 0,
            original: "Line oneLine two".to_string(),
            translated: "第一行\n第二行".to_string(),
        }],
    );

    assert!(rendered.contains(r#"<p class="poem">第一行"#));
    assert!(rendered.contains("第二行</p>"));
    assert!(rendered.contains("<p>Tail</p>"));
    assert!(!rendered.contains("Line one"));
}