use crate::{EbookError, Metadata, Result};
use crate::traits::{EbookReader, EbookWriter, EbookOperator, TocEntry, ImageData};
use std::fs::File;
use std::io::Read;
use std::path::Path;
#[derive(Default)]
pub struct AzwHandler {
metadata: Metadata,
content: String,
images: Vec<ImageData>,
raw_data: Vec<u8>,
azw_header: Option<AzwHeader>,
toc: Vec<TocEntry>,
}
#[derive(Debug, Clone, Default)]
struct AzwHeader {
magic: [u8; 4],
header_length: u32,
mobi_type: u32,
text_encoding: u32,
_id: u32,
_gen_version: u32,
first_image_index: u32,
has_drm: bool,
}
impl AzwHandler {
pub fn new() -> Self {
Self::default()
}
fn parse_azw_header(&mut self) -> Result<()> {
if self.raw_data.len() < 78 {
return Err(EbookError::InvalidStructure("File too small".to_string()));
}
let azw_magic_pos = 60;
if self.raw_data.len() > azw_magic_pos + 4 {
let magic = &self.raw_data[azw_magic_pos..azw_magic_pos + 4];
if magic == b"MOBI" || magic == b"AZW6" || magic == b"AZW3" {
return self.parse_full_azw_header(azw_magic_pos);
}
}
let name = std::str::from_utf8(&self.raw_data[0..32])
.unwrap_or("Unknown")
.trim_end_matches('\0');
if !name.is_empty() {
self.metadata.title = Some(name.to_string());
}
self.metadata.format = Some("AZW".to_string());
Ok(())
}
fn parse_full_azw_header(&mut self, pos: usize) -> Result<()> {
if self.raw_data.len() < pos + 232 {
return Err(EbookError::InvalidStructure("AZW header too small".to_string()));
}
let mut header = AzwHeader::default();
header.magic.copy_from_slice(&self.raw_data[pos..pos + 4]);
header.header_length = u32::from_be_bytes([
self.raw_data[pos + 4],
self.raw_data[pos + 5],
self.raw_data[pos + 6],
self.raw_data[pos + 7],
]);
header.mobi_type = u32::from_be_bytes([
self.raw_data[pos + 8],
self.raw_data[pos + 9],
self.raw_data[pos + 10],
self.raw_data[pos + 11],
]);
header.text_encoding = u32::from_be_bytes([
self.raw_data[pos + 16],
self.raw_data[pos + 17],
self.raw_data[pos + 18],
self.raw_data[pos + 19],
]);
if self.raw_data.len() > pos + 80 {
header.first_image_index = u32::from_be_bytes([
self.raw_data[pos + 76],
self.raw_data[pos + 77],
self.raw_data[pos + 78],
self.raw_data[pos + 79],
]);
}
if self.raw_data.len() > pos + 208 {
header.has_drm = self.raw_data[pos + 208] != 0;
}
self.azw_header = Some(header);
if self.raw_data.len() > pos + 88 {
let name_length = self.raw_data[pos + 88] as usize;
if self.raw_data.len() > pos + 92 + name_length {
let name_bytes = &self.raw_data[pos + 92..pos + 92 + name_length];
if let Ok(name) = std::str::from_utf8(name_bytes) {
self.metadata.title = Some(name.to_string());
}
}
}
if self.raw_data.len() > pos + 110 {
let lang_id = u16::from_be_bytes([
self.raw_data[pos + 108],
self.raw_data[pos + 109],
]);
self.metadata.language = Some(self.language_id_to_code(lang_id));
}
self.metadata.format = Some("AZW".to_string());
Ok(())
}
fn language_id_to_code(&self, id: u16) -> String {
match id {
0 => "en".to_string(),
1 => "fr".to_string(),
2 => "de".to_string(),
3 => "it".to_string(),
4 => "es".to_string(),
5 => "nl".to_string(),
6 => "sv".to_string(),
7 => "nb".to_string(),
8 => "da".to_string(),
9 => "fi".to_string(),
10 => "ja".to_string(),
11 => "zh".to_string(),
12 => "ko".to_string(),
13 => "ar".to_string(),
_ => "en".to_string(),
}
}
fn extract_text(&mut self) -> Result<()> {
if let Some(header) = &self.azw_header {
if header.has_drm {
return Err(EbookError::NotSupported(
"DRM-protected AZW files are not supported. Please use a DRM-free version.".to_string()
));
}
}
let text_start = if let Some(header) = &self.azw_header {
header.header_length as usize + 60
} else {
78
};
if self.raw_data.len() > text_start {
let text_data = &self.raw_data[text_start..];
if text_data.len() >= 2 {
let bom = u16::from_be_bytes([text_data[0], text_data[1]]);
if bom == 0xFEFF || bom == 0xFFFE {
if let Ok(text) = String::from_utf16(
&text_data[2..]
.chunks(2)
.map(|c| u16::from_be_bytes([c[0], c[1]]))
.collect::<Vec<_>>()
) {
self.content = text;
return Ok(());
}
}
}
if let Ok(text) = std::str::from_utf8(text_data) {
self.content = text.to_string();
} else {
let (decoded, _, _) = encoding_rs::UTF_8.decode(text_data);
self.content = decoded.to_string();
}
}
self.content = self.content
.replace("<mbp:pagebreak>", "\n\n---\n\n")
.replace("</mbp:pagebreak>", "")
.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace(""", "\"")
.replace("'", "'");
Ok(())
}
fn extract_toc(&mut self) -> Result<()> {
let mut toc = Vec::new();
let lines: Vec<&str> = self.content.lines().collect();
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("Chapter ")
|| trimmed.starts_with("CHAPTER ")
|| trimmed.starts_with("# ")
|| (trimmed.len() < 100 && trimmed.chars().all(|c| c.is_uppercase() || c == ' '))
{
toc.push(TocEntry {
id: idx as u32,
level: 0,
title: trimmed.to_string(),
href: None,
children: Vec::new(),
});
}
}
self.toc = toc;
Ok(())
}
}
impl EbookReader for AzwHandler {
fn read_from_file(&mut self, path: &Path) -> Result<()> {
log::info!("Reading AZW file: {path:?}");
let mut file = File::open(path)?;
file.read_to_end(&mut self.raw_data)?;
self.parse_azw_header()?;
self.extract_text()?;
self.extract_toc()?;
Ok(())
}
fn get_metadata(&self) -> Result<Metadata> {
Ok(self.metadata.clone())
}
fn get_content(&self) -> Result<String> {
Ok(self.content.clone())
}
fn get_toc(&self) -> Result<Vec<TocEntry>> {
Ok(self.toc.clone())
}
fn extract_images(&self) -> Result<Vec<ImageData>> {
Ok(self.images.clone())
}
}
impl EbookWriter for AzwHandler {
fn set_metadata(&mut self, metadata: Metadata) -> Result<()> {
self.metadata = metadata;
Ok(())
}
fn set_content(&mut self, content: &str) -> Result<()> {
self.content = content.to_string();
Ok(())
}
fn add_chapter(&mut self, _title: &str, content: &str) -> Result<()> {
self.content.push_str(content);
self.content.push('\n');
Ok(())
}
fn add_image(&mut self, name: &str, data: Vec<u8>) -> Result<()> {
let mime_type = crate::utils::guess_mime_type(name);
self.images.push(ImageData::new(name.to_string(), mime_type, data));
Ok(())
}
fn write_to_file(&self, path: &Path) -> Result<()> {
use std::io::Write;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let mut file = File::create(path)?;
let mut header = vec![0u8; 78];
let title = self.metadata.title.as_deref().unwrap_or("Untitled");
let title_bytes = title.as_bytes();
let copy_len = title_bytes.len().min(32);
header[0..copy_len].copy_from_slice(&title_bytes[0..copy_len]);
file.write_all(&header)?;
file.write_all(self.content.as_bytes())?;
Ok(())
}
}
impl EbookOperator for AzwHandler {
fn convert_to(&self, _target_format: &str, _output_path: &Path) -> Result<()> {
Err(EbookError::NotSupported("Conversion not yet implemented".to_string()))
}
fn validate(&self) -> Result<bool> {
if let Some(header) = &self.azw_header {
if header.has_drm {
return Ok(false); }
}
Ok(!self.raw_data.is_empty())
}
fn repair(&mut self) -> Result<()> {
if self.metadata.title.is_none() {
self.metadata.title = Some("Untitled".to_string());
}
Ok(())
}
}