1use super::{GgmlDType, QTensor};
6use crate::{Context, Device, Result};
7use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
8use std::collections::HashMap;
9
10pub const DEFAULT_ALIGNMENT: u64 = 32;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13enum Magic {
14 Gguf,
15}
16
17impl TryFrom<u32> for Magic {
18 type Error = crate::Error;
19 fn try_from(value: u32) -> Result<Self> {
20 let magic = match value {
21 0x46554747 | 0x47475546 => Self::Gguf,
22 _ => crate::bail!("unknown magic 0x{value:08x}"),
23 };
24 Ok(magic)
25 }
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum VersionedMagic {
30 GgufV1,
31 GgufV2,
32 GgufV3,
33}
34
35impl VersionedMagic {
36 fn read<R: std::io::Read>(reader: &mut R) -> Result<Self> {
37 let magic = reader.read_u32::<LittleEndian>()?;
38 let magic = Magic::try_from(magic)?;
39 let version = reader.read_u32::<LittleEndian>()?;
40 let versioned_magic = match (magic, version) {
41 (Magic::Gguf, 1) => Self::GgufV1,
42 (Magic::Gguf, 2) => Self::GgufV2,
43 (Magic::Gguf, 3) => Self::GgufV3,
44 _ => crate::bail!("gguf: unsupported magic/version {magic:?}/{version}"),
45 };
46 Ok(versioned_magic)
47 }
48}
49
50#[derive(Debug)]
51pub struct TensorInfo {
52 pub ggml_dtype: GgmlDType,
53 pub shape: crate::Shape,
54 pub offset: u64,
55}
56
57impl TensorInfo {
58 pub fn read<R: std::io::Seek + std::io::Read>(
59 &self,
60 reader: &mut R,
61 tensor_data_offset: u64,
62 device: &Device,
63 ) -> Result<QTensor> {
64 let tensor_elems = self.shape.elem_count();
65 let block_size = self.ggml_dtype.block_size();
66 if !tensor_elems.is_multiple_of(block_size) {
67 crate::bail!(
68 "the number of elements {tensor_elems} is not divisible by the block size {block_size}"
69 )
70 }
71 let size_in_bytes = tensor_elems / block_size * self.ggml_dtype.type_size();
72 let mut raw_data = vec![0u8; size_in_bytes];
73 reader.seek(std::io::SeekFrom::Start(tensor_data_offset + self.offset))?;
74 reader.read_exact(&mut raw_data)?;
75 super::ggml_file::qtensor_from_ggml(
76 self.ggml_dtype,
77 &raw_data,
78 self.shape.dims().to_vec(),
79 device,
80 )
81 }
82}
83
84#[derive(Debug)]
85pub struct Content {
86 pub magic: VersionedMagic,
87 pub metadata: HashMap<String, Value>,
88 pub tensor_infos: HashMap<String, TensorInfo>,
89 pub tensor_data_offset: u64,
90}
91
92fn read_string<R: std::io::Read>(reader: &mut R, magic: &VersionedMagic) -> Result<String> {
93 let len = match magic {
94 VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
95 VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
96 reader.read_u64::<LittleEndian>()? as usize
97 }
98 };
99 let mut v = vec![0u8; len];
100 reader.read_exact(&mut v)?;
101 while let Some(0) = v.last() {
103 v.pop();
104 }
105 Ok(String::from_utf8_lossy(&v).into_owned())
107}
108
109#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
110pub enum ValueType {
111 U8,
113 I8,
115 U16,
117 I16,
119 U32,
121 I32,
123 U64,
125 I64,
127 F32,
129 F64,
131 Bool,
135 String,
137 Array,
140}
141
142#[derive(Debug, Clone)]
143pub enum Value {
144 U8(u8),
145 I8(i8),
146 U16(u16),
147 I16(i16),
148 U32(u32),
149 I32(i32),
150 U64(u64),
151 I64(i64),
152 F32(f32),
153 F64(f64),
154 Bool(bool),
155 String(String),
156 Array(Vec<Value>),
157}
158
159impl Value {
160 pub fn value_type(&self) -> ValueType {
161 match self {
162 Self::U8(_) => ValueType::U8,
163 Self::I8(_) => ValueType::I8,
164 Self::U16(_) => ValueType::U16,
165 Self::I16(_) => ValueType::I16,
166 Self::U32(_) => ValueType::U32,
167 Self::I32(_) => ValueType::I32,
168 Self::U64(_) => ValueType::U64,
169 Self::I64(_) => ValueType::I64,
170 Self::F32(_) => ValueType::F32,
171 Self::F64(_) => ValueType::F64,
172 Self::Bool(_) => ValueType::Bool,
173 Self::String(_) => ValueType::String,
174 Self::Array(_) => ValueType::Array,
175 }
176 }
177
178 pub fn to_u8(&self) -> Result<u8> {
179 match self {
180 Self::U8(v) => Ok(*v),
181 v => crate::bail!("not a u8 {v:?}"),
182 }
183 }
184
185 pub fn to_i8(&self) -> Result<i8> {
186 match self {
187 Self::I8(v) => Ok(*v),
188 v => crate::bail!("not a i8 {v:?}"),
189 }
190 }
191
192 pub fn to_u16(&self) -> Result<u16> {
193 match self {
194 Self::U16(v) => Ok(*v),
195 v => crate::bail!("not a u16 {v:?}"),
196 }
197 }
198
199 pub fn to_i16(&self) -> Result<i16> {
200 match self {
201 Self::I16(v) => Ok(*v),
202 v => crate::bail!("not a i16 {v:?}"),
203 }
204 }
205
206 pub fn to_u32(&self) -> Result<u32> {
207 match self {
208 Self::U32(v) => Ok(*v),
209 v => crate::bail!("not a u32 {v:?}"),
210 }
211 }
212
213 pub fn to_i32(&self) -> Result<i32> {
214 match self {
215 Self::I32(v) => Ok(*v),
216 v => crate::bail!("not a i32 {v:?}"),
217 }
218 }
219
220 pub fn to_u64(&self) -> Result<u64> {
222 match self {
223 Self::U64(v) => Ok(*v),
224 Self::U8(v) => Ok(*v as u64),
226 Self::U16(v) => Ok(*v as u64),
227 Self::U32(v) => Ok(*v as u64),
228 Self::Bool(v) => Ok(*v as u64),
229 v => crate::bail!("not a u64 or upcastable to u64 {v:?}"),
230 }
231 }
232
233 pub fn to_i64(&self) -> Result<i64> {
234 match self {
235 Self::I64(v) => Ok(*v),
236 v => crate::bail!("not a i64 {v:?}"),
237 }
238 }
239
240 pub fn to_f32(&self) -> Result<f32> {
241 match self {
242 Self::F32(v) => Ok(*v),
243 v => crate::bail!("not a f32 {v:?}"),
244 }
245 }
246
247 pub fn to_f64(&self) -> Result<f64> {
248 match self {
249 Self::F64(v) => Ok(*v),
250 v => crate::bail!("not a f64 {v:?}"),
251 }
252 }
253
254 pub fn to_bool(&self) -> Result<bool> {
255 match self {
256 Self::Bool(v) => Ok(*v),
257 v => crate::bail!("not a bool {v:?}"),
258 }
259 }
260
261 pub fn to_vec(&self) -> Result<&Vec<Value>> {
262 match self {
263 Self::Array(v) => Ok(v),
264 v => crate::bail!("not a vec {v:?}"),
265 }
266 }
267
268 pub fn to_string(&self) -> Result<&String> {
269 match self {
270 Self::String(v) => Ok(v),
271 v => crate::bail!("not a string {v:?}"),
272 }
273 }
274
275 fn read<R: std::io::Read>(
276 reader: &mut R,
277 value_type: ValueType,
278 magic: &VersionedMagic,
279 ) -> Result<Self> {
280 let v = match value_type {
281 ValueType::U8 => Self::U8(reader.read_u8()?),
282 ValueType::I8 => Self::I8(reader.read_i8()?),
283 ValueType::U16 => Self::U16(reader.read_u16::<LittleEndian>()?),
284 ValueType::I16 => Self::I16(reader.read_i16::<LittleEndian>()?),
285 ValueType::U32 => Self::U32(reader.read_u32::<LittleEndian>()?),
286 ValueType::I32 => Self::I32(reader.read_i32::<LittleEndian>()?),
287 ValueType::U64 => Self::U64(reader.read_u64::<LittleEndian>()?),
288 ValueType::I64 => Self::I64(reader.read_i64::<LittleEndian>()?),
289 ValueType::F32 => Self::F32(reader.read_f32::<LittleEndian>()?),
290 ValueType::F64 => Self::F64(reader.read_f64::<LittleEndian>()?),
291 ValueType::Bool => match reader.read_u8()? {
292 0 => Self::Bool(false),
293 1 => Self::Bool(true),
294 b => crate::bail!("unexpected bool value {b}"),
295 },
296 ValueType::String => Self::String(read_string(reader, magic)?),
297 ValueType::Array => {
298 let value_type = reader.read_u32::<LittleEndian>()?;
299 let value_type = ValueType::from_u32(value_type)?;
300 let len = match magic {
301 VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
302 VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
303 reader.read_u64::<LittleEndian>()? as usize
304 }
305 };
306 let mut vs = Vec::with_capacity(len);
307 for _ in 0..len {
308 vs.push(Value::read(reader, value_type, magic)?)
309 }
310 Self::Array(vs)
311 }
312 };
313 Ok(v)
314 }
315
316 fn write<W: std::io::Write>(&self, w: &mut W) -> Result<()> {
317 match self {
318 &Self::U8(v) => w.write_u8(v)?,
319 &Self::I8(v) => w.write_i8(v)?,
320 &Self::U16(v) => w.write_u16::<LittleEndian>(v)?,
321 &Self::I16(v) => w.write_i16::<LittleEndian>(v)?,
322 &Self::U32(v) => w.write_u32::<LittleEndian>(v)?,
323 &Self::I32(v) => w.write_i32::<LittleEndian>(v)?,
324 &Self::U64(v) => w.write_u64::<LittleEndian>(v)?,
325 &Self::I64(v) => w.write_i64::<LittleEndian>(v)?,
326 &Self::F32(v) => w.write_f32::<LittleEndian>(v)?,
327 &Self::F64(v) => w.write_f64::<LittleEndian>(v)?,
328 &Self::Bool(v) => w.write_u8(u8::from(v))?,
329 Self::String(v) => write_string(w, v.as_str())?,
330 Self::Array(v) => {
331 let value_type = if v.is_empty() {
334 ValueType::U32
336 } else {
337 let value_type: std::collections::HashSet<_> =
338 v.iter().map(|elem| elem.value_type()).collect();
339 if value_type.len() != 1 {
340 crate::bail!("multiple value-types in the same array {value_type:?}")
341 }
342 value_type.into_iter().next().context("empty value_type")?
343 };
344 w.write_u32::<LittleEndian>(value_type.to_u32())?;
345 w.write_u64::<LittleEndian>(v.len() as u64)?;
346 for elem in v.iter() {
347 elem.write(w)?
348 }
349 }
350 }
351 Ok(())
352 }
353}
354
355impl ValueType {
356 fn from_u32(v: u32) -> Result<Self> {
357 let v = match v {
358 0 => Self::U8,
359 1 => Self::I8,
360 2 => Self::U16,
361 3 => Self::I16,
362 4 => Self::U32,
363 5 => Self::I32,
364 6 => Self::F32,
365 7 => Self::Bool,
366 8 => Self::String,
367 9 => Self::Array,
368 10 => Self::U64,
369 11 => Self::I64,
370 12 => Self::F64,
371 v => crate::bail!("unrecognized value-type {v:#08x}"),
372 };
373 Ok(v)
374 }
375
376 fn to_u32(self) -> u32 {
377 match self {
378 Self::U8 => 0,
379 Self::I8 => 1,
380 Self::U16 => 2,
381 Self::I16 => 3,
382 Self::U32 => 4,
383 Self::I32 => 5,
384 Self::F32 => 6,
385 Self::Bool => 7,
386 Self::String => 8,
387 Self::Array => 9,
388 Self::U64 => 10,
389 Self::I64 => 11,
390 Self::F64 => 12,
391 }
392 }
393}
394
395impl Content {
396 pub fn read<R: std::io::Seek + std::io::Read>(reader: &mut R) -> Result<Self> {
397 let magic = VersionedMagic::read(reader)?;
398
399 let tensor_count = match magic {
400 VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
401 VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
402 reader.read_u64::<LittleEndian>()? as usize
403 }
404 };
405 let metadata_kv_count = match magic {
406 VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
407 VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
408 reader.read_u64::<LittleEndian>()? as usize
409 }
410 };
411
412 let mut metadata = HashMap::new();
413 for _idx in 0..metadata_kv_count {
414 let key = read_string(reader, &magic)?;
415 let value_type = reader.read_u32::<LittleEndian>()?;
416 let value_type = ValueType::from_u32(value_type)?;
417 let value = Value::read(reader, value_type, &magic)?;
418 metadata.insert(key, value);
419 }
420 let mut tensor_infos = HashMap::new();
421 for _idx in 0..tensor_count {
422 let tensor_name = read_string(reader, &magic)?;
423 let n_dimensions = reader.read_u32::<LittleEndian>()?;
424
425 let mut dimensions: Vec<usize> = match magic {
426 VersionedMagic::GgufV1 => {
427 let mut dimensions = vec![0; n_dimensions as usize];
428 reader.read_u32_into::<LittleEndian>(&mut dimensions)?;
429 dimensions.into_iter().map(|c| c as usize).collect()
430 }
431 VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
432 let mut dimensions = vec![0; n_dimensions as usize];
433 reader.read_u64_into::<LittleEndian>(&mut dimensions)?;
434 dimensions.into_iter().map(|c| c as usize).collect()
435 }
436 };
437
438 dimensions.reverse();
439 let ggml_dtype = reader.read_u32::<LittleEndian>()?;
440 let ggml_dtype = GgmlDType::from_u32(ggml_dtype)?;
441 let offset = reader.read_u64::<LittleEndian>()?;
442 tensor_infos.insert(
443 tensor_name,
444 TensorInfo {
445 shape: crate::Shape::from(dimensions),
446 offset,
447 ggml_dtype,
448 },
449 );
450 }
451 let position = reader.stream_position()?;
452 let alignment = match metadata.get("general.alignment") {
453 Some(Value::U8(v)) => *v as u64,
454 Some(Value::U16(v)) => *v as u64,
455 Some(Value::U32(v)) => *v as u64,
456 Some(Value::I8(v)) if *v >= 0 => *v as u64,
457 Some(Value::I16(v)) if *v >= 0 => *v as u64,
458 Some(Value::I32(v)) if *v >= 0 => *v as u64,
459 _ => DEFAULT_ALIGNMENT,
460 };
461 let tensor_data_offset = position.div_ceil(alignment) * alignment;
462 Ok(Self {
463 magic,
464 metadata,
465 tensor_infos,
466 tensor_data_offset,
467 })
468 }
469
470 pub fn tensor<R: std::io::Seek + std::io::Read>(
471 &self,
472 reader: &mut R,
473 name: &str,
474 device: &Device,
475 ) -> Result<QTensor> {
476 let tensor_info = match self.tensor_infos.get(name) {
477 Some(tensor_info) => tensor_info,
478 None => crate::bail!("cannot find tensor info for {name}"),
479 };
480 tensor_info.read(reader, self.tensor_data_offset, device)
481 }
482}
483
484fn write_string<W: std::io::Write>(w: &mut W, str: &str) -> Result<()> {
485 let bytes = str.as_bytes();
486 w.write_u64::<LittleEndian>(bytes.len() as u64)?;
487 w.write_all(bytes)?;
488 Ok(())
489}
490
491pub fn write<W: std::io::Seek + std::io::Write>(
492 w: &mut W,
493 metadata: &[(&str, &Value)],
494 tensors: &[(&str, &QTensor)],
495) -> Result<()> {
496 w.write_u32::<LittleEndian>(0x46554747)?;
497 w.write_u32::<LittleEndian>(2)?; w.write_u64::<LittleEndian>(tensors.len() as u64)?;
499 w.write_u64::<LittleEndian>(metadata.len() as u64)?;
500 for (name, value) in metadata.iter() {
501 write_string(w, name)?;
502 w.write_u32::<LittleEndian>(value.value_type().to_u32())?;
503 value.write(w)?;
504 }
505 let mut offset = 0usize;
506 let mut offsets = Vec::with_capacity(tensors.len());
507 for (name, tensor) in tensors.iter() {
508 write_string(w, name)?;
509 let dims = tensor.shape().dims();
510 w.write_u32::<LittleEndian>(dims.len() as u32)?;
511 for &dim in dims.iter().rev() {
512 w.write_u64::<LittleEndian>(dim as u64)?;
513 }
514 w.write_u32::<LittleEndian>(tensor.dtype().to_u32())?;
515 w.write_u64::<LittleEndian>(offset as u64)?;
516 offsets.push(offset);
517 let size_in_bytes = tensor.storage_size_in_bytes();
518 let padding = 31 - (31 + size_in_bytes) % 32;
519 offset += size_in_bytes + padding;
520 }
521 let pos = w.stream_position()? as usize;
522 let padding = 31 - (31 + pos) % 32;
523 w.write_all(&vec![0u8; padding])?;
524 let tensor_start_pos = w.stream_position()? as usize;
525 for (offset, (_name, tensor)) in offsets.iter().zip(tensors.iter()) {
526 let pos = w.stream_position()? as usize;
527 if tensor_start_pos + offset != pos {
528 crate::bail!(
529 "internal error, unexpected current position {tensor_start_pos} {offset} {pos}"
530 )
531 }
532 let data = tensor.data()?;
533 let size_in_bytes = data.len();
534 w.write_all(&data)?;
535 let padding = 31 - (31 + size_in_bytes) % 32;
536 w.write_all(&vec![0u8; padding])?;
537 }
538 Ok(())
539}