1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
use std::cmp::{Ord, Ordering};
use std::fmt;
use uuid::Uuid;

#[cfg(test)]
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::error::Error;
use std::str::FromStr;
#[cfg(test)]
use std::sync::atomic;

/// Uuid identifying a segment.
///
/// Tantivy's segment are identified
/// by a UUID which is used to prefix the filenames
/// of all of the file associated with the segment.
///
/// In unit test, for reproducability, the `SegmentId` are
/// simply generated in an autoincrement fashion.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct SegmentId(Uuid);

#[cfg(test)]
static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(|| atomic::AtomicUsize::default());

#[cfg(test)]
const ZERO_ARRAY: [u8; 8] = [0u8; 8];

// During tests, we generate the segment id in a autoincrement manner
// for consistency of segment id between run.
//
// The order of the test execution is not guaranteed, but the order
// of segments within a single test is guaranteed.
#[cfg(test)]
fn create_uuid() -> Uuid {
    let new_auto_inc_id = (*AUTO_INC_COUNTER).fetch_add(1, atomic::Ordering::SeqCst);
    Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &ZERO_ARRAY).unwrap()
}

#[cfg(not(test))]
fn create_uuid() -> Uuid {
    Uuid::new_v4()
}

impl SegmentId {
    #[doc(hidden)]
    pub fn generate_random() -> SegmentId {
        SegmentId(create_uuid())
    }

    /// Returns a shorter identifier of the segment.
    ///
    /// We are using UUID4, so only 6 bits are fixed,
    /// and the rest is random.
    ///
    /// Picking the first 8 chars is ok to identify
    /// segments in a display message (e.g. a5c4dfcb).
    pub fn short_uuid_string(&self) -> String {
        (&self.0.to_simple_ref().to_string()[..8]).to_string()
    }

    /// Returns a segment uuid string.
    ///
    /// It consists in 32 lowercase hexadecimal chars
    /// (e.g. a5c4dfcbdfe645089129e308e26d5523)
    pub fn uuid_string(&self) -> String {
        self.0.to_simple_ref().to_string()
    }

    /// Build a `SegmentId` string from the full uuid string.
    ///
    /// E.g. "a5c4dfcbdfe645089129e308e26d5523"
    pub fn from_uuid_string(uuid_string: &str) -> Result<SegmentId, SegmentIdParseError> {
        FromStr::from_str(uuid_string)
    }
}

/// Error type used when parsing a `SegmentId` from a string fails.
pub struct SegmentIdParseError(uuid::Error);

impl Error for SegmentIdParseError {}

impl fmt::Debug for SegmentIdParseError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.0.fmt(f)
    }
}

impl fmt::Display for SegmentIdParseError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.0.fmt(f)
    }
}

impl FromStr for SegmentId {
    type Err = SegmentIdParseError;

    fn from_str(uuid_string: &str) -> Result<Self, SegmentIdParseError> {
        let uuid = Uuid::parse_str(uuid_string).map_err(SegmentIdParseError)?;
        Ok(SegmentId(uuid))
    }
}

impl fmt::Debug for SegmentId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "Seg({:?})", self.short_uuid_string())
    }
}

impl PartialOrd for SegmentId {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for SegmentId {
    fn cmp(&self, other: &Self) -> Ordering {
        self.0.as_bytes().cmp(other.0.as_bytes())
    }
}

#[cfg(test)]
mod tests {
    use super::SegmentId;

    #[test]
    fn test_to_uuid_string() {
        let full_uuid = "a5c4dfcbdfe645089129e308e26d5523";
        let segment_id = SegmentId::from_uuid_string(full_uuid).unwrap();
        assert_eq!(segment_id.uuid_string(), full_uuid);
        assert_eq!(segment_id.short_uuid_string(), "a5c4dfcb");
        // one extra char
        assert!(SegmentId::from_uuid_string("a5c4dfcbdfe645089129e308e26d5523b").is_err());
    }
}