use crate::ole::binary::{PlcfParser, read_u32_le, read_u16_le};
#[derive(Debug, Clone)]
pub struct TextPiece {
pub cp_start: u32,
pub cp_end: u32,
pub fc: u32,
pub is_unicode: bool,
}
impl TextPiece {
#[inline]
pub fn length(&self) -> u32 {
self.cp_end - self.cp_start
}
pub fn cp_to_fc(&self, cp: u32) -> Option<u32> {
if cp < self.cp_start || cp > self.cp_end {
return None;
}
let offset = cp - self.cp_start;
let byte_offset = if self.is_unicode {
offset * 2 } else {
offset };
Some(self.fc + byte_offset)
}
pub fn fc_to_cp(&self, fc: u32) -> Option<u32> {
if fc < self.fc {
return None;
}
let byte_offset = fc - self.fc;
let char_offset = if self.is_unicode {
byte_offset / 2
} else {
byte_offset
};
let cp = self.cp_start + char_offset;
if cp > self.cp_end {
None
} else {
Some(cp)
}
}
}
#[derive(Debug, Clone)]
pub struct PieceTable {
pieces: Vec<TextPiece>,
}
impl PieceTable {
pub fn parse(clx_data: &[u8]) -> Option<Self> {
if clx_data.is_empty() {
eprintln!("DEBUG: PieceTable: clx_data is empty");
return None;
}
eprintln!("DEBUG: PieceTable: parsing {} bytes of CLX data", clx_data.len());
eprint!("DEBUG: PieceTable: first 36 bytes: ");
for item in clx_data.iter().take(clx_data.len().min(36)) {
eprint!("{:02X} ", item);
}
eprintln!();
let mut offset = 0;
while offset < clx_data.len() && clx_data[offset] == 0x01 {
offset += 1;
if offset + 2 > clx_data.len() {
return None;
}
let size = read_u16_le(clx_data, offset).unwrap_or(0) as usize;
offset += 2;
if offset + size > clx_data.len() {
return None;
}
offset += size;
}
if offset >= clx_data.len() || clx_data[offset] != 0x02 {
return None;
}
offset += 1;
if offset + 4 > clx_data.len() {
return None;
}
let lcb = read_u32_le(clx_data, offset).unwrap_or(0) as usize;
offset += 4;
if offset + lcb > clx_data.len() {
return None;
}
let plcpcd_data = &clx_data[offset..offset + lcb];
let plcf = PlcfParser::parse(plcpcd_data, 8)?;
let mut pieces = Vec::new();
for i in 0..plcf.count() {
let (cp_start, cp_end) = plcf.range(i)?;
let pcd_data = plcf.property(i)?;
if pcd_data.len() < 8 {
continue;
}
let fc_raw = read_u32_le(pcd_data, 2).unwrap_or(0);
let is_unicode = (fc_raw & 0x40000000) == 0;
let mut fc = fc_raw & 0x3FFFFFFF;
if !is_unicode {
fc /= 2;
}
pieces.push(TextPiece {
cp_start,
cp_end,
fc,
is_unicode,
});
}
pieces.sort_by_key(|p| p.cp_start);
eprintln!("DEBUG: PieceTable parsed {} pieces:", pieces.len());
for (i, piece) in pieces.iter().take(5).enumerate() {
eprintln!("DEBUG: Piece {}: cp={}..{}, fc={}, unicode={}",
i, piece.cp_start, piece.cp_end, piece.fc, piece.is_unicode);
}
Some(Self { pieces })
}
#[inline]
pub fn pieces(&self) -> &[TextPiece] {
&self.pieces
}
pub fn piece_for_cp(&self, cp: u32) -> Option<&TextPiece> {
self.pieces.iter().find(|piece| cp >= piece.cp_start && cp < piece.cp_end)
}
pub fn cp_to_fc(&self, cp: u32) -> Option<u32> {
let piece = self.piece_for_cp(cp)?;
piece.cp_to_fc(cp)
}
pub fn fc_to_cp(&self, fc: u32) -> Option<u32> {
for piece in &self.pieces {
if let Some(cp) = piece.fc_to_cp(fc) {
return Some(cp);
}
}
None
}
pub fn total_cps(&self) -> u32 {
self.pieces.last().map(|p| p.cp_end).unwrap_or(0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_piece_cp_to_fc() {
let piece = TextPiece {
cp_start: 100,
cp_end: 200,
fc: 500,
is_unicode: true,
};
assert_eq!(piece.cp_to_fc(100), Some(500));
assert_eq!(piece.cp_to_fc(150), Some(600));
assert_eq!(piece.cp_to_fc(200), Some(700));
assert_eq!(piece.cp_to_fc(50), None);
assert_eq!(piece.cp_to_fc(250), None);
}
#[test]
fn test_text_piece_fc_to_cp() {
let piece = TextPiece {
cp_start: 100,
cp_end: 200,
fc: 500,
is_unicode: false, };
assert_eq!(piece.fc_to_cp(500), Some(100));
assert_eq!(piece.fc_to_cp(550), Some(150));
assert_eq!(piece.fc_to_cp(600), Some(200));
assert_eq!(piece.fc_to_cp(400), None);
assert_eq!(piece.fc_to_cp(700), None);
}
}