dvi_to_text/
lib.rs

1use dvi::Instruction;
2use std::collections::HashMap;
3
4// Treat the entire input as a single batch
5pub fn text(dvi_bytes: &[u8]) -> Vec<u8> {
6    let mut bytes_remaining = dvi_bytes;
7    let mut machine = Machine::new();
8    while bytes_remaining.len() > 0 {
9        let (b, inst) =
10            Instruction::parse(bytes_remaining).expect("Bytes should be a valid DVI file");
11        machine.apply_inst(&inst);
12        bytes_remaining = b;
13    }
14    machine.text
15}
16
17#[derive(Clone)]
18struct Position {
19    /// Horizontal coordinate for the current position in the page, in DVI units
20    h: i32,
21    /// Vertical coordinate for the current position in the page, in DVI units
22    v: i32,
23    /// Horizontal spacing coordinate *w*, in DVI units
24    w: i32,
25    /// Horizontal spacing coordinate *x*, in DVI units
26    x: i32,
27    /// Vertical spacing coordinate *y*, in DVI units
28    y: i32,
29    /// Vertical spacing coordinate *z*, in DVI units
30    z: i32,
31}
32
33impl Position {
34    pub fn zero() -> Position {
35        Position {
36            h: 0,
37            v: 0,
38            w: 0,
39            x: 0,
40            y: 0,
41            z: 0,
42        }
43    }
44}
45
46struct CharPos {
47    /// Horizontal coordinate for the character's position in the page, in DVI units
48    h: i32,
49    /// Vertical coordinate for the character's position in the page, in DVI units
50    v: i32,
51    /// Font index for the character
52    font_index: u32,
53    /// Char code of the character to draw
54    code: u8,
55}
56
57struct Machine {
58    position: Position,
59    position_stack: Vec<Position>,
60    font_index: u32,
61    fonts: HashMap<u32, dvi::FontDef>,
62    /// characters collected so far on the current page
63    chars: Vec<CharPos>,
64    text: Vec<u8>,
65}
66
67impl Machine {
68    pub fn new() -> Machine {
69        Machine {
70            position: Position::zero(),
71            position_stack: Vec::new(),
72            font_index: 0,
73            fonts: HashMap::new(),
74            chars: Vec::new(),
75            text: Vec::new(),
76        }
77    }
78    fn put_char(&mut self, char: u8) {
79        self.chars.push(CharPos {
80            h: self.position.h,
81            v: self.position.v,
82            font_index: self.font_index,
83            code: char,
84        })
85    }
86    fn get_font(&self, font_index: u32) -> &dvi::FontDef {
87        if let Some(font) = self.fonts.get(&font_index) {
88            font
89        } else {
90            // catches all plain-ascii files
91            eprintln!("Error in reading file. Are you sure you loaded a DVI file?");
92            panic!("Font should be defined");
93        }
94    }
95    fn char_width(&self, font_index: u32, _char: u8) -> i32 {
96        // not using TFM files; just assume width is the design size.
97        return self
98            .get_font(font_index)
99            .design_size
100            .try_into()
101            .expect("Design size should fit inside i32");
102    }
103    pub fn apply_inst(&mut self, inst: &Instruction) {
104        use Instruction::*;
105        match inst {
106            Set(charcode) | Put(charcode) => {
107                let char = u8::try_from(*charcode).expect("Char to set should fit in u8");
108                self.put_char(char);
109                if let Set(_) = inst {
110                    self.position.h += self.char_width(self.font_index, char);
111                }
112            }
113            PutRule(_, _) => {}
114            SetRule(_, b) => {
115                self.position.h += b;
116            }
117            Nop => {}
118            Bop(_, _) => {
119                self.font_index = 0;
120                self.position = Position::zero();
121                self.position_stack = Vec::new();
122                // Currently just assuming the pages are in order in the DVI file; in particular, the ten
123                // c_i parameters are unused, and the p parameter is unused
124            }
125            Eop => {
126                assert!(
127                    self.position_stack.len() == 0,
128                    "Stack should be empty at end-of-page"
129                );
130                // Print what you have read since the previous bop
131                // Sort by v coordinate first, then by h coordinate
132                self.chars.sort_by(|a, b| (a.v, a.h).cmp(&(b.v, b.h)));
133                // horizontal coordinate of the right edge of the previous character
134                let mut prev_h = 0;
135                // vertical coordinate of the previous character
136                let mut prev_v = 0;
137                for char_pos in &self.chars {
138                    // insert newlines as necessary
139                    // ignore the first newline of every page
140                    let dy = char_pos.v - prev_v;
141                    if dy > 0 && prev_v > 0 {
142                        let design_size = self.get_font(char_pos.font_index).design_size;
143                        // assume baseline skip averages 1.2*design_size
144                        // division is equivalent to pseudocode:
145                        // let baseline_skip = (design_size*1.2)
146                        // let num_spaces = round(float_div(x / baseline_skip))
147                        let num_newlines = ((dy as u32) * 5 + design_size / 2) / 6 / design_size;
148                        for _ in 0..num_newlines {
149                            self.text.push(b'\n');
150                        }
151                        prev_h = 0;
152                    }
153                    // insert spaces as necessary
154                    let dx = char_pos.h - prev_h;
155                    if dx > 0 {
156                        let design_size = self.get_font(char_pos.font_index).design_size;
157                        // assume width of a space averages (1/3)*design_size
158                        // division is equivalent to pseudocode:
159                        // let space_width = (design_size/3)
160                        // let num_spaces = round(float_div(x / space_width))
161                        let num_spaces = ((dx as u32) * 3 + design_size / 2) / design_size;
162                        for _ in 0..num_spaces {
163                            self.text.push(b' ');
164                        }
165                    }
166                    // finally insert the character itself
167                    self.text.push(char_pos.code);
168                    // update coordinates
169                    prev_v = char_pos.v;
170                    prev_h = char_pos.h + self.char_width(char_pos.font_index, char_pos.code)
171                }
172                // Always trailing newline for each page
173                self.text.push(b'\n');
174                self.chars = Vec::new();
175            }
176            Push => self.position_stack.push(self.position.clone()),
177            Pop => {
178                self.position = self
179                    .position_stack
180                    .pop()
181                    .expect("Stack should be non-empty on pop")
182            }
183            Right(b) => self.position.h += b,
184            W(b) => {
185                if let Some(w) = b {
186                    self.position.w = *w;
187                }
188                self.position.h += self.position.w;
189            }
190            X(b) => {
191                if let Some(x) = b {
192                    self.position.x = *x;
193                }
194                self.position.h += self.position.x;
195            }
196            Down(b) => self.position.v += b,
197            Y(b) => {
198                if let Some(y) = b {
199                    self.position.y = *y;
200                }
201                self.position.v += self.position.y;
202            }
203            Font(k) => self.font_index = *k,
204            Xxx(_) => {}
205            FontDef(font_def) => {
206                self.fonts.insert(font_def.number, font_def.clone());
207            }
208            Z(b) => {
209                if let Some(z) = b {
210                    self.position.z = *z;
211                }
212                self.position.v += self.position.z;
213            }
214            Pre { .. } => {}
215            Post { .. } => {}
216            PostPost { .. } => {}
217        }
218    }
219}