use super::fkp::ChpxFkp;
use super::chp::CharacterProperties;
use super::piece_table::PieceTable;
use crate::ole::binary::read_u32_le;
use crate::ole::sprm::parse_sprms;
#[derive(Debug, Clone)]
pub struct CharacterRun {
pub start_cp: u32,
pub end_cp: u32,
pub properties: CharacterProperties,
}
#[derive(Debug)]
pub struct ChpBinTable {
runs: Vec<CharacterRun>,
}
impl ChpBinTable {
pub fn parse(
plcf_bte_chpx_data: &[u8],
word_document: &[u8],
piece_table: &PieceTable,
) -> Option<Self> {
if plcf_bte_chpx_data.len() < 8 {
return None;
}
let n = (plcf_bte_chpx_data.len() - 4) / 8;
eprintln!("DEBUG: ChpBinTable parsing {} BTE entries from {} bytes", n, plcf_bte_chpx_data.len());
if n >= 5 {
eprintln!("DEBUG: First 5 FC values:");
for i in 0..=5 {
let fc_offset = i * 4;
if fc_offset + 4 <= plcf_bte_chpx_data.len() {
let fc = read_u32_le(plcf_bte_chpx_data, fc_offset).unwrap_or(0);
eprintln!(" aFc[{}] = {} (0x{:08X})", i, fc, fc);
}
}
eprintln!("DEBUG: First 5 PN values:");
for i in 0..5 {
let pn_offset = (n + 1) * 4 + i * 4;
if pn_offset + 4 <= plcf_bte_chpx_data.len() {
let pn_raw = read_u32_le(plcf_bte_chpx_data, pn_offset).unwrap_or(0);
let pn = pn_raw & 0x3FFFFF;
eprintln!(" aPnBteChpx[{}] = {} (raw: 0x{:08X})", i, pn, pn_raw);
}
}
}
let mut all_runs = Vec::new();
for i in 0..n {
let pn_offset = (n + 1) * 4 + i * 4;
if pn_offset + 4 > plcf_bte_chpx_data.len() {
continue;
}
let pn_raw = read_u32_le(plcf_bte_chpx_data, pn_offset).unwrap_or(0);
if i < 5 {
eprintln!("DEBUG: BTE {}: Reading from offset {} (n={}, pn_offset={}+{}*4), raw_bytes=[{:02X} {:02X} {:02X} {:02X}], pn_raw=0x{:08X}",
i, pn_offset, n, (n+1)*4, i,
plcf_bte_chpx_data[pn_offset],
plcf_bte_chpx_data[pn_offset+1],
plcf_bte_chpx_data[pn_offset+2],
plcf_bte_chpx_data[pn_offset+3],
pn_raw);
}
let pn = pn_raw & 0x3FFFFF;
if pn == 0 || pn == 0x3FFFFF {
eprintln!("DEBUG: Skipping BTE {} with invalid PN 0x{:08X} (masked: 0x{:08X})", i, pn_raw, pn);
continue;
}
let page_offset = (pn as usize) * 512;
if pn == 133 || pn == 159 || pn == 204 || pn == 217 {
eprintln!("DEBUG: BTE {}: Found valid PN={} (0x{:02X}), page_offset={}", i, pn, pn, page_offset);
}
if i < 5 {
eprintln!("DEBUG: BTE {}: PN=0x{:08X}, page_offset={}", i, pn, page_offset);
}
if page_offset + 512 > word_document.len() {
eprintln!("DEBUG: Skipping BTE {}: page_offset {} exceeds WordDocument size {}",
i, page_offset, word_document.len());
continue;
}
let fkp_page = &word_document[page_offset..page_offset + 512];
if let Some(fkp) = ChpxFkp::parse(fkp_page, word_document) {
eprintln!("DEBUG: FKP {} has {} entries", i, fkp.count());
for j in 0..fkp.count() {
if let Some(entry) = fkp.entry(j) {
let end_fc = if j + 1 < fkp.count() {
fkp.entry(j + 1).map(|e| e.fc).unwrap_or(entry.fc)
} else {
entry.fc + 1000000 };
let start_cp = piece_table.fc_to_cp(entry.fc).unwrap_or(entry.fc);
let end_cp = piece_table.fc_to_cp(end_fc).unwrap_or(end_fc);
let properties = Self::parse_chpx(&entry.grpprl);
if all_runs.len() < 5 && !entry.grpprl.is_empty() {
eprint!("DEBUG: Entry {}: fc={}..{} -> cp={}..{}, grpprl_len={}, is_ole2={}, pic_offset={:?}, grpprl_bytes=",
j, entry.fc, end_fc, start_cp, end_cp, entry.grpprl.len(),
properties.is_ole2, properties.pic_offset);
for b in entry.grpprl.iter().take(20) {
eprint!("{:02X} ", b);
}
eprintln!();
}
all_runs.push(CharacterRun {
start_cp,
end_cp,
properties,
});
}
}
} else {
eprintln!("DEBUG: Failed to parse FKP at page offset {}", page_offset);
}
}
eprintln!("DEBUG: ChpBinTable parsed {} total runs", all_runs.len());
Some(Self { runs: all_runs })
}
fn parse_chpx(grpprl: &[u8]) -> CharacterProperties {
if grpprl.is_empty() {
return CharacterProperties::default();
}
let sprms = parse_sprms(grpprl);
let mut props = CharacterProperties::default();
for sprm in &sprms {
match sprm.opcode {
0x0835 | 0x0085 => {
props.is_bold = Some(sprm.operand_byte().unwrap_or(0) != 0);
}
0x0836 | 0x0086 => {
props.is_italic = Some(sprm.operand_byte().unwrap_or(0) != 0);
}
0x4A43 | 0x0043 => {
props.font_size = sprm.operand_word();
}
0x080A => {
let operand = sprm.operand_byte().unwrap_or(0);
props.is_ole2 = operand != 0;
eprintln!("DEBUG: Found SPRM_FOLE2 in FKP, operand=0x{:02X}, operand_len={}, is_ole2={}",
operand, sprm.operand.len(), props.is_ole2);
}
0x6A03 => {
props.pic_offset = sprm.operand_dword();
eprintln!("DEBUG: Found sprmCPicLocation (0x6A03) in FKP, pic_offset={:?}", props.pic_offset);
}
0x680E => {
props.pic_offset = sprm.operand_dword();
eprintln!("DEBUG: Found SPRM_OBJLOCATION (0x680E) in FKP, pic_offset={:?}", props.pic_offset);
}
_ => {}
}
}
props
}
#[inline]
pub fn runs(&self) -> &[CharacterRun] {
&self.runs
}
pub fn runs_in_range(&self, start_cp: u32, end_cp: u32) -> Vec<&CharacterRun> {
self.runs
.iter()
.filter(|run| {
run.end_cp > start_cp && run.start_cp < end_cp
})
.collect()
}
}