1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
/// pdb中的一个record info
/// 一组8个字节
#[derive(Default, Debug, Clone)]
pub(crate) struct PDBRecordInfo {
/// the offset of record n from the start of the PDB of this record
pub(crate) offset: u32,
/// bit field. The least significant four bits are used to represent the category values. These are the categories used to split the databases for viewing on the screen. A few of the 16 categories are pre-defined but the user can add their own. There is an undefined category for use if the user or programmer hasn't set this.
/// 0x10 (16 decimal) Secret record bit.
/// 0x20 (32 decimal) Record in use (busy bit).
/// 0x40 (64 decimal) Dirty record bit.
/// 0x80 (128, unsigned decimal) Delete record on next HotSync.
pub(crate) attribute: u8,
/// The unique ID for this record. Often just a sequential count from 0
/// 实际是只有3个字节,最高位的一个字节不使用
pub(crate) unique_id: u32,
}
#[derive(Default, Debug)]
pub(crate) struct PDBHeader {
// name(32)
pub(crate) name: [u8; 32],
// attribute(2)
///
/// 0x0002 Read-Only
/// 0x0004 Dirty AppInfoArea
/// 0x0008 Backup this database (i.e. no conduit exists)
/// 0x0010 (16 decimal) Okay to install newer over existing copy, if present on PalmPilot
/// 0x0020 (32 decimal) Force the PalmPilot to reset after this database is installed
/// 0x0040 (64 decimal) Don't allow copy of file to be beamed to other Pilot.
pub(crate) attribute: u16,
/// file version
pub(crate) version: u16,
/// No. of seconds since start of January 1, 1904.
///
/// [https://wiki.mobileread.com/wiki/PDB#PDB%20Times] 对于时间又有新的规定
///
/// If the time has the top bit set, it's an unsigned 32-bit number counting from 1st Jan 1904
///
/// If the time has the top bit clear, it's a signed 32-bit number counting from 1st Jan 1970.
pub(crate) createion_date: u32,
/// No. of seconds since start of January 1, 1904.
pub(crate) modify_date: u32,
/// No. of seconds since start of January 1, 1904.
pub(crate) last_backup_date: u32,
/// No. of seconds since start of January 1, 1904.
pub(crate) modification_number: u32,
/// offset to start of Application Info (if present) or null
pub(crate) app_info_id: u32,
/// offset to start of Sort Info (if present) or null
pub(crate) sort_info_id: u32,
/// See above table. (For Applications this data will be 'appl')
pub(crate) _type: [u8; 4],
/// See above table. This program will be launched if the file is tapped
pub(crate) creator: [u8; 4],
/// used internally to identify record
pub(crate) unique_id_seed: u32,
/// Only used when in-memory on Palm OS. Always set to zero in stored files.
pub(crate) next_record_list_id: u32,
/// number of records in the file - N
pub(crate) number_of_records: u16,
/// record,每个8个字节,所有list结束后,有两个字节的空隙,无实际意义
pub(crate) record_info_list: Vec<PDBRecordInfo>,
}
#[derive(Default, Debug)]
pub(crate) struct MOBIDOCHeader {
/// 1 == no compression, 2 = PalmDOC compression, 17480 = HUFF/CDIC compression
/// 之后跳过2字节无用
pub(crate) compression: u16,
/// Uncompressed length of the entire text of the book
pub(crate) length: u32,
/// Number of PDB records used for the text of the book.
pub(crate) record_count: u16,
/// Maximum size of each record containing text, always 4096
pub(crate) record_size: u16,
/// Current reading position, as an offset into the uncompressed text
/// 如果 compression = 17480 ,这个字段会被拆分开
pub(crate) position: u32,
/// compression = 17480 时才有该字段
/// 0 == no encryption, 1 = Old Mobipocket Encryption, 2 = Mobipocket Encryption
pub(crate) encrypt_type: u16,
}
#[derive(Default, Debug)]
pub(crate) struct MOBIHeader {
// the characters M O B I
/// the length of the MOBI header, including the previous 4 bytes
pub(crate) header_len: u32,
/// The kind of Mobipocket file this is
/// 2 Mobipocket Book
/// 3 PalmDoc Book
/// 4 Audio
/// 232 mobipocket? generated by kindlegen1.2
/// 248 KF8: generated by kindlegen2
/// 257 News
/// 258 News_Feed
/// 259 News_Magazine
/// 513 PICS
/// 514 WORD
/// 515 XLS
/// 516 PPT
/// 517 TEXT
/// 518 HTML
pub(crate) mobi_type: u32,
/// 1252 = CP1252 (WinLatin1); 65001 = UTF-8
pub(crate) text_encoding: u32,
/// Some kind of unique ID number (random?)
pub(crate) unique_id: u32,
/// Version of the Mobipocket format used in this file.
pub(crate) file_version: u32,
/// Section number of orthographic meta index. 0xFFFFFFFF if index is not available.
pub(crate) ortographic_index: u32,
/// Section number of inflection meta index. 0xFFFFFFFF if index is not available.
pub(crate) inflection_index: u32,
/// 0xFFFFFFFF if index is not available.
pub(crate) index_names: u32,
/// 0xFFFFFFFF if index is not available.
pub(crate) index_keys: u32,
/// Section number of extra N meta index. 0xFFFFFFFF if index is not available.
pub(crate) extra_index: [u32; 6],
/// First record number (starting with 0) that's not the book's text
pub(crate) first_non_book_index: u32,
/// Offset in record 0 (not from start of file) of the full name of the book
pub(crate) full_name_offset: u32,
/// Length in bytes of the full name of the book
pub(crate) full_name_length: u32,
/// Book locale code. Low byte is main language 09= English, next byte is dialect, 08 = British, 04 = US. Thus US English is 1033, UK English is 2057.
pub(crate) locale: u32,
/// Input language for a dictionary
pub(crate) input_language: u32,
/// Output language for a dictionary
pub(crate) output_language: u32,
/// Minimum mobipocket version support needed to read this file.
pub(crate) min_version: u32,
/// First record number (starting with 0) that contains an image. Image records should be sequential.
pub(crate) first_image_index: u32,
/// The record number of the first huffman compression record.
pub(crate) huffman_record_offset: u32,
/// The number of huffman compression records.
pub(crate) huffman_record_count: u32,
/// offset
pub(crate) huffman_table_offset: u32,
/// len
pub(crate) huffman_table_length: u32,
/// bitfield. if bit 6 (0x40) is set, then there's an EXTH record
/// 当从低到高第六位为1,代表有EXTH,与其他bit无关
/// bit 12 为1 ,代表有嵌入字体
pub(crate) exth_flags: u32,
// 32 unknown bytes, if MOBI is long enough
// unknown_0: [u8; 8],
// /// Use 0xFFFFFFFF
// unknown_1: u32,
/// Offset to DRM key info in DRMed files. 0xFFFFFFFF if no DRM
/// 实际 没有drm这里是0?待测试
pub(crate) drm_offset: u32,
/// Number of entries in DRM info. 0xFFFFFFFF if no DRM
pub(crate) drm_count: u32,
/// Number of bytes in DRM info.
pub(crate) drm_size: u32,
/// Some flags concerning the DRM info.
pub(crate) drm_flags: u32,
// Bytes to the end of the MOBI header, including the following if the header length >= 228 (244 from start of record).Use 0x0000000000000000.
// unknown_2: u64,
/// Number of first text record. Normally 1.
pub(crate) first_content_record_number: u16,
/// Number of last image record or number of last text record if it contains no images. Includes Image, DATP, HUFF, DRM.
pub(crate) last_content_record_number: u16,
// FCIS record count? Use 0x00000001.
// unknown_3: u32,
pub(crate) fcis_record_number: u32,
// Use 0x00000001.
// unknown_4: u32,
pub(crate) flis_record_number: u32,
// Use 0x00000001.flis record count?
// unknown_5: u32,
// Use 0x0000000000000000.
// unknown_6: u64,
// Use 0xFFFFFFFF.
// unknown_7: u32,
/// Use 0x00000000.
pub(crate) first_compilation_data_section_count: u32,
/// Use 0xFFFFFFFF.
pub(crate) number_of_compilation_data_sections: u32,
// Use 0xFFFFFFFF.
// unknown_8: u32,
/// A set of binary flags, some of which indicate extra data at the end of each text block. This only seems to be valid for Mobipocket format version 5 and 6 (and higher?), when the header length is 228 (0xE4) or 232 (0xE8).
/// bit 1 (0x1) : <extra multibyte bytes><size>
/// bit 2 (0x2) : <TBS indexing description of this HTML record><size>
/// bit 3 (0x4) : <uncrossable breaks><size>
/// Setting bit 2 (0x2) disables <guide><reference type="start"> functionality.
pub(crate) extra_record_data_flags: u32,
/// (If not 0xFFFFFFFF)The record number of the first INDX record created from an ncx file.
pub(crate) indx_record_offset: u32,
}
#[derive(Debug, Clone)]
#[derive(Default)]
pub(crate) enum EXTHRecordType {
DrmServerId = 1,
DrmCommerceId = 2,
DrmEbookbaseBookId = 3,
Author = 100,
Publisher = 101,
Imprint = 102,
Description = 103,
Isbn = 104,
/// Could appear multiple times
Subject = 105,
PublishingDate = 106,
Review = 107,
Contributor = 108,
Rights = 109,
SubjectCode = 110,
Type = 111,
Source = 112,
/// Kindle Paperwhite labels books with "Personal" if they don't have this record.
Asin = 113,
VersionNumber = 114,
/// 0x0001 if the book content is only a sample of the full book
Sample = 115,
/// Position (4-byte offset) in file at which to open when first opened
StartReading = 116,
/// Mobipocket Creator adds this if Adult only is checked on its GUI; contents: "yes"
Adult = 117,
/// As text, e.g. "4.99"
RetailPrice = 118,
/// As text, e.g. "USD"
RetailPriceCurrency = 119,
Kf8BoundaryOffset = 121,
/// "true"
FixedLayout = 122,
/// "comic"
BookType = 123,
/// "none", "portrait", "landscape"
OrientationLock = 124,
CountOfResources = 125,
/// "1072x1448"
OriginalResolution = 126,
/// "true"
ZeroGutter = 127,
/// "true"
ZeroMargin = 128,
MetadataResourceUri = 129,
/// 固定为0?
Kf8UnknownCount = 131,
/// "true"
#[default]
Unknown1 = 132,
/// As text
DictionaryShortName = 200,
/// Add to first image field in Mobi Header to find PDB record containing the cover image
CoverOffset = 201,
/// Add to first image field in Mobi Header to find PDB record containing the thumbnail cover image
ThumbOffset = 202,
HasFakeCover = 203,
/// Known Values: 1=mobigen, 2=Mobipocket Creator, 200=kindlegen (Windows), 201=kindlegen (Linux), 202=kindlegen (Mac).
/// Warning: Calibre creates fake creator entries, pretending to be a Linux kindlegen 1.2 (201, 1, 2, 33307) for normal ebooks and a non-public Linux kindlegen 2.0 (201, 2, 0, 101) for periodicals
CreatorSoftware = 204,
CreatorMajorVersion = 205,
CreatorMinorVersion = 206,
CreatorBuildNumber = 207,
Watermark = 208,
/// Used by the Kindle (and Android app) for generating book-specific PIDs.
TamperProofKeys = 209,
Fontsignature = 300,
/// Integer percentage of the text allowed to be clipped. Usually 10.
ClippingLimit = 401,
PublisherLimit = 402,
Unknown2 = 403,
/// 1 - Text to Speech disabled; 0 - Text to Speech enabled
Ttsflag = 404,
/// 1 in this field seems to indicate a rental book
UnknownRentBorrowFlag = 405,
/// If this field is removed from a rental, the book says it expired in 1969
RentOrBorrowExpirationDate = 406,
Unknown3 = 407,
Unknown4 = 450,
Unknown5 = 451,
Unknown6 = 452,
Unknown7 = 453,
/// PDOC - Personal Doc; EBOK - ebook; EBSP - ebook sample;
Cdetype = 501,
LastupdateTime = 502,
UpdatedTitle = 503,
Language = 524,
/// I found horizontal-lr in this record.
WritingMode = 525,
OverrideKindleFonts = 528,
Unknown8 = 536,
/// Some Unix timestamp.
Unknown9 = 542,
/// String 'I\x00n\x00M\x00e\x00m\x00o\x00r\x00y\x00' found in this record, for KindleGen V2.9 build 1029-0897292
InMemory = 547,
}
impl EXTHRecordType {
pub(crate) fn code(&self) -> u32 {
self.clone() as u32
}
}
impl From<u32> for EXTHRecordType {
fn from(value: u32) -> Self {
match value {
1 => EXTHRecordType::DrmServerId,
2 => EXTHRecordType::DrmCommerceId,
3 => EXTHRecordType::DrmEbookbaseBookId,
100 => EXTHRecordType::Author,
101 => EXTHRecordType::Publisher,
102 => EXTHRecordType::Imprint,
103 => EXTHRecordType::Description,
104 => EXTHRecordType::Isbn,
105 => EXTHRecordType::Subject,
106 => EXTHRecordType::PublishingDate,
107 => EXTHRecordType::Review,
108 => EXTHRecordType::Contributor,
109 => EXTHRecordType::Rights,
110 => EXTHRecordType::SubjectCode,
111 => EXTHRecordType::Type,
112 => EXTHRecordType::Source,
113 => EXTHRecordType::Asin,
114 => EXTHRecordType::VersionNumber,
115 => EXTHRecordType::Sample,
116 => EXTHRecordType::StartReading,
117 => EXTHRecordType::Adult,
118 => EXTHRecordType::RetailPrice,
119 => EXTHRecordType::RetailPriceCurrency,
121 => EXTHRecordType::Kf8BoundaryOffset,
122 => EXTHRecordType::FixedLayout,
123 => EXTHRecordType::BookType,
124 => EXTHRecordType::OrientationLock,
125 => EXTHRecordType::CountOfResources,
126 => EXTHRecordType::OriginalResolution,
127 => EXTHRecordType::ZeroGutter,
128 => EXTHRecordType::ZeroMargin,
129 => EXTHRecordType::MetadataResourceUri,
131 => EXTHRecordType::Kf8UnknownCount,
132 => EXTHRecordType::Unknown1,
200 => EXTHRecordType::DictionaryShortName,
201 => EXTHRecordType::CoverOffset,
202 => EXTHRecordType::ThumbOffset,
203 => EXTHRecordType::HasFakeCover,
204 => EXTHRecordType::CreatorSoftware,
205 => EXTHRecordType::CreatorMajorVersion,
206 => EXTHRecordType::CreatorMinorVersion,
207 => EXTHRecordType::CreatorBuildNumber,
208 => EXTHRecordType::Watermark,
209 => EXTHRecordType::TamperProofKeys,
300 => EXTHRecordType::Fontsignature,
401 => EXTHRecordType::ClippingLimit,
402 => EXTHRecordType::PublisherLimit,
403 => EXTHRecordType::Unknown2,
404 => EXTHRecordType::Ttsflag,
405 => EXTHRecordType::UnknownRentBorrowFlag,
406 => EXTHRecordType::RentOrBorrowExpirationDate,
407 => EXTHRecordType::Unknown3,
450 => EXTHRecordType::Unknown4,
451 => EXTHRecordType::Unknown5,
452 => EXTHRecordType::Unknown6,
453 => EXTHRecordType::Unknown7,
501 => EXTHRecordType::Cdetype,
502 => EXTHRecordType::LastupdateTime,
503 => EXTHRecordType::UpdatedTitle,
524 => EXTHRecordType::Language,
505 => EXTHRecordType::WritingMode,
528 => EXTHRecordType::OverrideKindleFonts,
536 => EXTHRecordType::Unknown8,
542 => EXTHRecordType::Unknown9,
547 => EXTHRecordType::InMemory,
_ => EXTHRecordType::Unknown1,
}
}
}
#[derive(Default, Debug)]
pub(crate) struct EXTHRecord {
/// Exth Record type. Just a number identifying what's stored in the record
pub(crate) _type: EXTHRecordType,
/// length of EXTH record = L , including the 8 bytes in the type and length fields
pub(crate) len: u32,
/// Data,L - 8
pub(crate) data: Vec<u8>,
}
/// 参见 [https://wiki.mobileread.com/wiki/MOBI#EXTH_Header]
#[derive(Default, Debug)]
pub(crate) struct EXTHHeader {
// the characters E X T H
// identifier: [u8; 4],
/// the length of the EXTH header, including the previous 4 bytes - but not including the final padding.
pub(crate) len: u32,
/// The number of records in the EXTH header. the rest of the EXTH header consists of repeated EXTH records to the end of the EXTH length.
pub(crate) record_count: u32,
/// 不定长度的 record,
pub(crate) record_list: Vec<EXTHRecord>, // 多余的字节均为无用填充,跳过即可
}
/// 格式化时间戳
pub(crate) fn do_time_format(value: u32) -> String {
if value & 0x80000000 == 0x80000000 {
crate::common::DateTimeFormater::custom_start((value & 0x7fffffff) as u64, 1904)
.default_format()
} else {
crate::common::DateTimeFormater::new(value as u64).default_format()
}
}
fn u8_to_string<const N: usize>(v: [u8; N]) -> String {
// let mut v = [0u8;4];
// v[0] = (value >> 24 & 0xff) as u8;
// v[1]=(value >> 16 & 0xff) as u8;
// v[2] = (value >> 8 & 0xff) as u8;
// v[3] = (value & 0xff) as u8;
String::from_utf8(v.to_vec()).unwrap_or_default()
}
impl std::fmt::Display for PDBHeader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f,"PDBHeader {{ name: '{}', attribute: {}, version: {}, createion_date: {}, modify_date: {}, last_backup_date: {}, modification_number: {}, app_info_id: {}, sort_info_id: {}, _type: {}, creator: {}, unique_id_seed: {}, next_record_list_id: {}, number_of_records: {}, record_info_list: {:?}, record_list: [] }}"
,u8_to_string(self.name)
,self.attribute
,self.version
,do_time_format(self.createion_date)
,do_time_format(self.modify_date)
,do_time_format(self.last_backup_date)
,self.modification_number
,self.app_info_id
,self.sort_info_id
,u8_to_string(self._type)
,u8_to_string(self.creator)
,self.unique_id_seed
,self.next_record_list_id
,self.number_of_records
,self.record_info_list
)
}
}