osu_db/
lib.rs

1//! Representation and parsing for osu! binary formats: beatmap listing, collections, replays and
2//! scores.
3//!
4//! # A note on strings
5//!
6//! The osu `.db` file format allows for strings to be absent.
7//! This notably happens with the unicode versions of titles and authors.
8//! For this reason all of the parsed strings are expressed as `Option<String>` instead of a
9//! simple `String`.
10//! You can default to an empty string by using `string.unwrap_or_default()`, which does no
11//! allocations and is very cheap.
12//!
13//! # A note on features and replays
14//!
15//! By default, replay data will be decompressed and parsed, using the `xz2` dependency.
16//! To disable this behaviour and remove the dependency on `xz2`, disable the `compression` feature:
17//!
18//! ```toml
19//! osu-db = { version = "*", default-features = false }
20//! ```
21//!
22//! When `compression` is disabled, the
23//! [`Replay::replay_data`](replay/struct.Replay.html#structfield.replay_data) field will always be
24//! `None`, and will be ignored when writing.
25//! In any case, the
26//! [`Replay::raw_replay_data`](replay/struct.Replay.html#structfield.raw_replay_data) field is
27//! always available.
28//!
29//! # A note on future-proofness
30//!
31//! Osu `.db` formats are used internally by osu!, and are not intended to be shared.
32//! There does not seem to be any public contract on breaking changes, and breaking changes
33//! already occured twice (in 2014 and 2019), so this library might not work with future versions
34//! of osu!.
35//!
36//! It is currently guaranteed to work on osu! `.db` versions up to at least `20211103`.
37//! The current implementation might work for a long time, or break tomorrow.
38
39//Because otherwise compiling the large beatmap nom combinator fails
40#![recursion_limit = "128"]
41
42use crate::prelude::*;
43
44pub use crate::{collection::CollectionList, listing::Listing, replay::Replay, score::ScoreList};
45
46//Writer generator macro
47trait Writable {
48    type Args;
49    fn wr_args<W: Write>(&self, out: &mut W, args: Self::Args) -> io::Result<()>;
50}
51trait SimpleWritable
52where
53    Self: Writable,
54{
55    fn wr<W: Write>(&self, out: &mut W) -> io::Result<()>;
56}
57impl<T> SimpleWritable for T
58where
59    T: Writable<Args = ()>,
60{
61    fn wr<W: Write>(&self, out: &mut W) -> io::Result<()> {
62        self.wr_args(out, ())
63    }
64}
65macro_rules! writer {
66    ($type:ty [$this:ident, $out:ident] $code:expr) => {
67        writer!($type [$this, $out, _arg: ()] $code);
68    };
69    ($type:ty [$this:ident, $out:ident, $args:ident : $args_ty:ty] $code:expr) => {
70        impl crate::Writable for $type {
71            type Args=$args_ty;
72            fn wr_args<W: Write>(&self, $out: &mut W, $args: $args_ty) -> io::Result<()> {
73                let $this = self;
74                let () = $code;
75                Ok(())
76            }
77        }
78    };
79}
80
81mod prelude {
82    pub(crate) use crate::{
83        boolean, byte, datetime, double, int, long, opt_string, short, single, Bit, Error, ModSet,
84        Mode, PrefixedList, SimpleWritable, Writable,
85    };
86    pub(crate) use chrono::{DateTime, Duration, TimeZone, Utc};
87    pub(crate) use nom::{
88        bytes::complete::{tag, take, take_while, take_while1},
89        combinator::{cond, map, map_opt, map_res, opt},
90        error::{Error as NomError, ErrorKind as NomErrorKind},
91        multi::{length_count, length_data, many0},
92        Err as NomErr, IResult, Needed,
93    };
94    #[cfg(feature = "ser-de")]
95    pub use serde_derive::{Deserialize, Serialize};
96    pub(crate) use std::{
97        fmt,
98        fs::{self, File},
99        io::{self, BufWriter, Write},
100        ops,
101        path::Path,
102    };
103    #[cfg(feature = "compression")]
104    pub use xz2::stream::Error as LzmaError;
105}
106
107pub mod collection;
108pub mod listing;
109pub mod replay;
110pub mod score;
111
112#[derive(Debug)]
113pub enum Error {
114    /// Only available with the `compression` feature enabled.
115    #[cfg(feature = "compression")]
116    Compression(LzmaError),
117    Io(io::Error),
118    ParseError(NomErrorKind),
119    ParseIncomplete(Needed),
120}
121impl fmt::Display for Error {
122    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123        match self {
124            #[cfg(feature = "compression")]
125            Error::Compression(_err) => f.write_str("failed to compress/decompress replay data"),
126            Error::Io(_err) => f.write_str("failed to read osu .db file"),
127            Error::ParseError(kind) => {
128                write!(f, "failed to parse osu file: {}", kind.description())
129            }
130            Error::ParseIncomplete(Needed::Size(u)) => write!(
131                f,
132                "failed to parse osu file: parsing requires {} bytes/chars",
133                u
134            ),
135            Error::ParseIncomplete(Needed::Unknown) => {
136                f.write_str("failed to parse osu file: parsing requires more data")
137            }
138        }
139    }
140}
141impl std::error::Error for Error {
142    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
143        match self {
144            #[cfg(feature = "compression")]
145            Error::Compression(err) => Some(err as &dyn std::error::Error),
146            Error::Io(err) => Some(err as &dyn std::error::Error),
147            Error::ParseError(_kind) => None,
148            Error::ParseIncomplete(_needed) => None,
149        }
150    }
151}
152impl From<io::Error> for Error {
153    fn from(err: io::Error) -> Self {
154        Error::Io(err)
155    }
156}
157impl From<NomErr<NomError<&[u8]>>> for Error {
158    fn from(err: NomErr<NomError<&[u8]>>) -> Self {
159        match err {
160            NomErr::Incomplete(needed) => Self::ParseIncomplete(needed),
161            NomErr::Error(err) | NomErr::Failure(err) => Self::ParseError(err.code),
162        }
163    }
164}
165
166#[cfg(feature = "compression")]
167impl From<LzmaError> for Error {
168    fn from(err: LzmaError) -> Self {
169        Error::Compression(err)
170    }
171}
172
173trait Bit {
174    fn bit(&self, pos: u32) -> bool;
175    fn bit_range(&self, pos: ops::Range<u32>) -> Self;
176    fn set_bit(&mut self, pos: u32, val: bool);
177    fn set_bit_range(&mut self, pos: ops::Range<u32>, val: Self);
178}
179macro_rules! impl_bit {
180    (@ $ty:ty) => {
181        impl Bit for $ty {
182            fn bit(&self, pos: u32) -> bool {
183                (*self & 1 << pos) != 0
184            }
185            fn bit_range(&self, pos: ops::Range<u32>) -> Self {
186                (*self & ((1<<pos.end)-1)) >> pos.start
187            }
188            fn set_bit(&mut self, pos: u32, val: bool) {
189                *self = (*self & !(1<<pos)) | ((val as Self)<<pos);
190            }
191            fn set_bit_range(&mut self, pos: ops::Range<u32>, val: Self) {
192                let mask = ((1<<(pos.end-pos.start))-1) << pos.start;
193                *self = (*self & !mask) | ((val<<pos.start)&mask);
194            }
195        }
196    };
197    ($($ty:ty),*) => {
198        $(
199            impl_bit!(@ $ty);
200        )*
201    }
202}
203impl_bit!(u8, u16, u32, u64);
204
205//Common fixed-size osu `.db` primitives.
206use nom::number::complete::le_f32 as single;
207use nom::number::complete::le_f64 as double;
208use nom::number::complete::le_u16 as short;
209use nom::number::complete::le_u32 as int;
210use nom::number::complete::le_u64 as long;
211use nom::number::complete::le_u8 as byte;
212
213fn boolean(bytes: &[u8]) -> IResult<&[u8], bool> {
214    map(byte, |byte: u8| byte != 0)(bytes)
215}
216
217writer!(u8 [this,out] out.write_all(&this.to_le_bytes())?);
218writer!(u16 [this,out] out.write_all(&this.to_le_bytes())?);
219writer!(u32 [this,out] out.write_all(&this.to_le_bytes())?);
220writer!(u64 [this,out] out.write_all(&this.to_le_bytes())?);
221writer!(f32 [this,out] this.to_bits().wr(out)?);
222writer!(f64 [this,out] this.to_bits().wr(out)?);
223writer!(bool [this,out] (if *this {1_u8} else {0_u8}).wr(out)?);
224
225//Writer for a list of items preceded by its length as an int
226struct PrefixedList<'a, T>(&'a [T]);
227impl<T> Writable for PrefixedList<'_, T>
228where
229    T: Writable,
230    T::Args: Clone,
231{
232    type Args = T::Args;
233    fn wr_args<W: Write>(&self, out: &mut W, args: T::Args) -> io::Result<()> {
234        (self.0.len() as u32).wr(out)?;
235        for item in self.0 {
236            item.wr_args(out, args.clone())?;
237        }
238        Ok(())
239    }
240}
241
242/// Get a datetime from an amount of "windows ticks":
243/// The amount of 100-nanosecond units since midnight of the date 0001/01/01.
244fn windows_ticks_to_datetime(ticks: u64) -> DateTime<Utc> {
245    let epoch = Utc.ymd(1, 1, 1).and_hms(0, 0, 0);
246    epoch
247        + Duration::microseconds((ticks / 10) as i64)
248        + Duration::nanoseconds((ticks % 10 * 100) as i64)
249}
250
251fn datetime(bytes: &[u8]) -> IResult<&[u8], DateTime<Utc>> {
252    map(long, windows_ticks_to_datetime)(bytes)
253}
254
255fn datetime_to_windows_ticks(datetime: &DateTime<Utc>) -> u64 {
256    let epoch = Utc.ymd(1, 1, 1).and_hms(0, 0, 0);
257    let duration = datetime.signed_duration_since(epoch);
258    let ticks_since: i64 = (duration * 10).num_microseconds().unwrap_or(0);
259    ticks_since.max(0) as u64
260}
261writer!(DateTime<Utc> [this,out] datetime_to_windows_ticks(this).wr(out)?);
262
263// The variable-length ULEB128 encoding used mainly for string lengths.
264fn uleb(bytes: &[u8]) -> IResult<&[u8], usize> {
265    let (rem, prelude) = take_while(|b: u8| b.bit(7))(bytes)?;
266    let (rem, finalizer) = byte(rem)?;
267
268    let mut out = 0;
269    let mut offset = 0;
270
271    for byte in prelude {
272        out |= (byte.bit_range(0..7) as usize) << offset;
273        offset += 7;
274    }
275
276    out |= (finalizer as usize) << offset;
277
278    Ok((rem, out))
279}
280
281writer!(usize [this,out] {
282    let mut this=*this;
283    loop {
284        let mut byte=this as u8;
285        this>>=7;
286        let continues={this!=0};
287        byte.set_bit(7, continues);
288        byte.wr(out)?;
289        if !continues {break}
290    }
291});
292
293// An optional string.
294fn opt_string(bytes: &[u8]) -> IResult<&[u8], Option<String>> {
295    let (rem, first_byte) = byte(bytes)?;
296
297    match first_byte {
298        0x00 => Ok((rem, None)),
299        0x0b => {
300            let (rem, len) = uleb(rem)?;
301            let (rem, string) = map_res(take(len), std::str::from_utf8)(rem)?;
302
303            Ok((rem, Some(string.to_owned())))
304        }
305        _ => Err(NomErr::Error(NomError::new(bytes, NomErrorKind::Switch))),
306    }
307}
308
309writer!(Option<String> [this,out] {
310    match this {
311        Some(string) => {
312            0x0b_u8.wr(out)?;
313            string.len().wr(out)?;
314            out.write_all(string.as_bytes())?;
315        },
316        None => 0x00_u8.wr(out)?,
317    }
318});
319
320/// An osu! gamemode.
321#[cfg_attr(feature = "ser-de", derive(Serialize, Deserialize))]
322#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
323#[repr(u8)]
324pub enum Mode {
325    Standard,
326    Taiko,
327    CatchTheBeat,
328    Mania,
329}
330impl Mode {
331    pub fn raw(self) -> u8 {
332        self as u8
333    }
334
335    pub fn from_raw(raw: u8) -> Option<Mode> {
336        use self::Mode::*;
337        Some(match raw {
338            0 => Standard,
339            1 => Taiko,
340            2 => CatchTheBeat,
341            3 => Mania,
342            _ => return None,
343        })
344    }
345}
346
347/// A single osu! mod.
348#[cfg_attr(feature = "ser-de", derive(Serialize, Deserialize))]
349#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
350#[repr(u8)]
351pub enum Mod {
352    NoFail,
353    Easy,
354    TouchDevice,
355    Hidden,
356    HardRock,
357    SuddenDeath,
358    DoubleTime,
359    Relax,
360    HalfTime,
361    /// Always goes with `DoubleTime`.
362    Nightcore,
363    Flashlight,
364    Autoplay,
365    SpunOut,
366    /// Also called "Relax2".
367    Autopilot,
368    Perfect,
369    Key4,
370    Key5,
371    Key6,
372    Key7,
373    Key8,
374    FadeIn,
375    Random,
376    /// Cinema.
377    LastMod,
378    /// Only on osu!cuttingedge it seems.
379    TargetPractice,
380    Key9,
381    Coop,
382    Key1,
383    Key3,
384    Key2,
385}
386impl Mod {
387    /// Each of the 29 mods have a corresponding integer between [0,28], inclusive.
388    /// This method retrieves its integer.
389    pub fn raw(&self) -> u8 {
390        *self as u8
391    }
392
393    /// Build a mod from its corresponding integer.
394    /// Returns `None` if the integer is out-of-range (>28).
395    pub fn from_raw(bit_offset: u8) -> Option<Mod> {
396        use self::Mod::*;
397        Some(match bit_offset {
398            0 => NoFail,
399            1 => Easy,
400            2 => TouchDevice,
401            3 => Hidden,
402            4 => HardRock,
403            5 => SuddenDeath,
404            6 => DoubleTime,
405            7 => Relax,
406            8 => HalfTime,
407            9 => Nightcore,
408            10 => Flashlight,
409            11 => Autoplay,
410            12 => SpunOut,
411            13 => Autopilot,
412            14 => Perfect,
413            15 => Key4,
414            16 => Key5,
415            17 => Key6,
416            18 => Key7,
417            19 => Key8,
418            20 => FadeIn,
419            21 => Random,
420            22 => LastMod,
421            23 => TargetPractice,
422            24 => Key9,
423            25 => Coop,
424            26 => Key1,
425            27 => Key3,
426            28 => Key2,
427            _ => return None,
428        })
429    }
430}
431
432/// A combination of `Mod`s.
433///
434/// Very cheap to copy around, as it is a just a wrapped 32-bit integer.
435#[cfg_attr(feature = "ser-de", derive(Serialize, Deserialize))]
436#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
437pub struct ModSet(pub u32);
438impl ModSet {
439    pub fn bits(&self) -> u32 {
440        self.0
441    }
442    pub fn from_bits(bits: u32) -> ModSet {
443        ModSet(bits)
444    }
445
446    /// Create a `ModSet` with no mods included.
447    pub fn empty() -> ModSet {
448        ModSet::from_bits(0)
449    }
450
451    /// Check whether the set contains the given mod.
452    pub fn contains(&self, m: Mod) -> bool {
453        self.bits().bit(m.raw() as u32)
454    }
455
456    /// Make a new set of mods with the given mod included or not included.
457    pub fn set(&self, m: Mod, include: bool) -> ModSet {
458        let mut bits = self.bits();
459        bits.set_bit(m.raw() as u32, include);
460        ModSet::from_bits(bits)
461    }
462
463    /// Make a new set of mods with the given mod included.
464    pub fn with(&self, m: Mod) -> ModSet {
465        self.set(m, true)
466    }
467
468    /// Make a new set of mods with the given mod removed.
469    pub fn without(&self, m: Mod) -> ModSet {
470        self.set(m, false)
471    }
472}
473
474#[cfg(test)]
475mod test {
476    use super::*;
477
478    #[test]
479    fn basic() {
480        assert_eq!(
481            byte::<_, NomError<&[u8]>>(" ".as_bytes()),
482            Ok((&[][..], 32))
483        );
484        assert_eq!(
485            short::<_, NomError<&[u8]>>(&[10, 2][..]),
486            Ok((&[][..], 522))
487        );
488        assert_eq!(
489            int::<_, NomError<&[u8]>>(&[10, 10, 0, 0, 3][..]),
490            Ok((&[3][..], 2570))
491        );
492        assert_eq!(
493            long::<_, NomError<&[u8]>>(&[0, 0, 1, 0, 2, 0, 3, 0][..]),
494            Ok((&[][..], 844_433_520_132_096))
495        );
496        assert_eq!(
497            single::<_, NomError<&[u8]>>(&[0, 0, 0b00100000, 0b00111110, 4][..]),
498            Ok((&[4][..], 0.15625))
499        );
500        assert_eq!(
501            double::<_, NomError<&[u8]>>(&[0b00000010, 0, 0, 0, 0, 0, 0b11110000, 0b00111111][..]),
502            Ok((&[][..], 1.0000000000000004))
503        );
504        assert_eq!(boolean(&[34, 4, 0][..]), Ok((&[4, 0][..], true)));
505        assert_eq!(
506            int::<_, NomError<&[u8]>>(&[3, 5, 4][..]),
507            Err(NomErr::Error(NomError::new(
508                &[3, 5, 4][..],
509                NomErrorKind::Eof
510            )))
511        );
512        assert_eq!(
513            boolean(&[][..]),
514            Err(NomErr::Error(NomError::new(&[][..], NomErrorKind::Eof)))
515        );
516        assert_eq!(
517            double::<_, NomError<&[u8]>>(&[14, 25, 15, 24, 3][..]),
518            Err(NomErr::Error(NomError::new(
519                &[14, 25, 15, 24, 3][..],
520                NomErrorKind::Eof
521            )))
522        );
523    }
524
525    #[test]
526    fn uleb128() {
527        assert_eq!(uleb(&[70]), Ok((&[][..], 70)));
528        assert_eq!(
529            uleb(&[]),
530            Err(NomErr::Error(NomError::new(&[][..], NomErrorKind::Eof)))
531        );
532        assert_eq!(uleb(&[129, 2]), Ok((&[][..], 257)));
533        assert_eq!(uleb(&[124, 2]), Ok((&[2][..], 124)));
534    }
535
536    #[test]
537    fn strings() {
538        let long_str = "w".repeat(129);
539
540        assert_eq!(opt_string(b"\x00sf"), Ok((&b"sf"[..], None)));
541        assert_eq!(
542            opt_string(b"\x0b\x02ghf"),
543            Ok((&b"f"[..], Some("gh".to_string())))
544        );
545        //Invalid string header
546        assert!(opt_string(b"\x01ww").is_err());
547        //Invalid utf-8
548        assert!(opt_string(b"\x0b\x01\xff").is_err());
549        //Missing string length
550        assert_eq!(
551            opt_string(b"\x0b"),
552            Err(NomErr::Error(NomError::new(&[][..], NomErrorKind::Eof)))
553        );
554        //Long strings
555        let mut raw = Vec::from(&b"\x0b\x81\x01"[..]);
556        raw.extend_from_slice(long_str.as_bytes());
557        raw.extend_from_slice(&b"afaf"[..]);
558        assert_eq!(opt_string(&raw), Ok((&b"afaf"[..], Some(long_str))));
559    }
560}