git_internal/internal/
index.rs

1use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
2use sha1::{Digest, Sha1};
3use std::collections::BTreeMap;
4use std::fmt::{Display, Formatter};
5use std::fs::{self, File};
6use std::io;
7use std::io::{BufReader, Read, Write};
8#[cfg(unix)]
9use std::os::unix::fs::MetadataExt;
10use std::path::{Path, PathBuf};
11use std::time::{SystemTime, UNIX_EPOCH};
12
13use crate::errors::GitError;
14use crate::hash::SHA1;
15use crate::internal::pack::wrapper::Wrapper;
16use crate::utils;
17
18#[derive(PartialEq, Eq, Debug, Clone)]
19pub struct Time {
20    seconds: u32,
21    nanos: u32,
22}
23impl Time {
24    pub fn from_stream(stream: &mut impl Read) -> Result<Self, GitError> {
25        let seconds = stream.read_u32::<BigEndian>()?;
26        let nanos = stream.read_u32::<BigEndian>()?;
27        Ok(Time { seconds, nanos })
28    }
29
30    #[allow(dead_code)]
31    fn to_system_time(&self) -> SystemTime {
32        UNIX_EPOCH + std::time::Duration::new(self.seconds.into(), self.nanos)
33    }
34
35    pub fn from_system_time(system_time: SystemTime) -> Self {
36        match system_time.duration_since(UNIX_EPOCH) {
37            Ok(duration) => {
38                let seconds = duration
39                    .as_secs()
40                    .try_into()
41                    .expect("Time is too far in the future");
42                let nanos = duration.subsec_nanos();
43                Time { seconds, nanos }
44            }
45            Err(_) => panic!("Time is before the UNIX epoch"),
46        }
47    }
48}
49impl Display for Time {
50    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
51        write!(f, "{}:{}", self.seconds, self.nanos)
52    }
53}
54
55/// 16 bits
56#[derive(Debug)]
57pub struct Flags {
58    pub assume_valid: bool,
59    pub extended: bool,   // must be 0 in v2
60    pub stage: u8,        // 2-bit during merge
61    pub name_length: u16, // 12-bit
62}
63
64impl From<u16> for Flags {
65    fn from(flags: u16) -> Self {
66        Flags {
67            assume_valid: flags & 0x8000 != 0,
68            extended: flags & 0x4000 != 0,
69            stage: ((flags & 0x3000) >> 12) as u8,
70            name_length: flags & 0xFFF,
71        }
72    }
73}
74
75impl TryInto<u16> for &Flags {
76    type Error = &'static str;
77    fn try_into(self) -> Result<u16, Self::Error> {
78        let mut flags = 0u16;
79        if self.assume_valid {
80            flags |= 0x8000; // 16
81        }
82        if self.extended {
83            flags |= 0x4000; // 15
84        }
85        flags |= (self.stage as u16) << 12; // 13-14
86        if self.name_length > 0xFFF {
87            return Err("Name length is too long");
88        }
89        flags |= self.name_length; // 0-11
90        Ok(flags)
91    }
92}
93
94impl Flags {
95    pub fn new(name_len: u16) -> Self {
96        Flags {
97            assume_valid: true,
98            extended: false,
99            stage: 0,
100            name_length: name_len,
101        }
102    }
103}
104
105pub struct IndexEntry {
106    pub ctime: Time,
107    pub mtime: Time,
108    pub dev: u32,  // 0 for windows
109    pub ino: u32,  // 0 for windows
110    pub mode: u32, // 0o100644 // 4-bit object type + 3-bit unused + 9-bit unix permission
111    pub uid: u32,  // 0 for windows
112    pub gid: u32,  // 0 for windows
113    pub size: u32,
114    pub hash: SHA1,
115    pub flags: Flags,
116    pub name: String,
117}
118impl Display for IndexEntry {
119    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
120        write!(
121            f,
122            "IndexEntry {{ ctime: {}, mtime: {}, dev: {}, ino: {}, mode: {:o}, uid: {}, gid: {}, size: {}, hash: {}, flags: {:?}, name: {} }}",
123            self.ctime,
124            self.mtime,
125            self.dev,
126            self.ino,
127            self.mode,
128            self.uid,
129            self.gid,
130            self.size,
131            self.hash,
132            self.flags,
133            self.name
134        )
135    }
136}
137
138impl IndexEntry {
139    /** Metadata must be got by [fs::symlink_metadata] to avoid following symlink */
140    pub fn new(meta: &fs::Metadata, hash: SHA1, name: String) -> Self {
141        let mut entry = IndexEntry {
142            ctime: Time::from_system_time(meta.created().unwrap()),
143            mtime: Time::from_system_time(meta.modified().unwrap()),
144            dev: 0,
145            ino: 0,
146            uid: 0,
147            gid: 0,
148            size: meta.len() as u32,
149            hash,
150            flags: Flags::new(name.len() as u16),
151            name,
152            mode: 0o100644,
153        };
154        #[cfg(unix)]
155        {
156            entry.dev = meta.dev() as u32;
157            entry.ino = meta.ino() as u32;
158            entry.uid = meta.uid();
159            entry.gid = meta.gid();
160
161            entry.mode = match meta.mode() & 0o170000/* file mode */ {
162                0o100000 => {
163                    match meta.mode() & 0o111 {
164                        0 => 0o100644, // no execute permission
165                        _ => 0o100755, // with execute permission
166                    }
167                }
168                0o120000 => 0o120000, // symlink
169                _ =>  entry.mode, // keep the original mode
170            }
171        }
172        #[cfg(windows)]
173        {
174            if meta.is_symlink() {
175                entry.mode = 0o120000;
176            }
177        }
178        entry
179    }
180
181    /// - `file`: **to workdir path**
182    /// - `workdir`: absolute or relative path
183    pub fn new_from_file(file: &Path, hash: SHA1, workdir: &Path) -> io::Result<Self> {
184        let name = file.to_str().unwrap().to_string();
185        let file_abs = workdir.join(file);
186        let meta = fs::symlink_metadata(file_abs)?; // without following symlink
187        let index = IndexEntry::new(&meta, hash, name);
188        Ok(index)
189    }
190
191    pub fn new_from_blob(name: String, hash: SHA1, size: u32) -> Self {
192        IndexEntry {
193            ctime: Time {
194                seconds: 0,
195                nanos: 0,
196            },
197            mtime: Time {
198                seconds: 0,
199                nanos: 0,
200            },
201            dev: 0,
202            ino: 0,
203            mode: 0o100644,
204            uid: 0,
205            gid: 0,
206            size,
207            hash,
208            flags: Flags::new(name.len() as u16),
209            name,
210        }
211    }
212}
213
214/// see [index-format](https://git-scm.com/docs/index-format)
215/// <br> to Working Dir relative path
216pub struct Index {
217    entries: BTreeMap<(String, u8), IndexEntry>,
218}
219
220impl Index {
221    pub fn new() -> Self {
222        Index {
223            entries: BTreeMap::new(),
224        }
225    }
226
227    fn check_header(file: &mut impl Read) -> Result<u32, GitError> {
228        let mut magic = [0; 4];
229        file.read_exact(&mut magic)?;
230        if magic != *b"DIRC" {
231            return Err(GitError::InvalidIndexHeader(
232                String::from_utf8_lossy(&magic).to_string(),
233            ));
234        }
235
236        let version = file.read_u32::<BigEndian>()?;
237        // only support v2 now
238        if version != 2 {
239            return Err(GitError::InvalidIndexHeader(version.to_string()));
240        }
241
242        let entries = file.read_u32::<BigEndian>()?;
243        Ok(entries)
244    }
245
246    pub fn size(&self) -> usize {
247        self.entries.len()
248    }
249
250    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, GitError> {
251        let file = File::open(path.as_ref())?; // read-only
252        let total_size = file.metadata()?.len();
253        let file = &mut Wrapper::new(BufReader::new(file)); // TODO move Wrapper & utils to a common module
254
255        let num = Index::check_header(file)?;
256        let mut index = Index::new();
257
258        for _ in 0..num {
259            let mut entry = IndexEntry {
260                ctime: Time::from_stream(file)?,
261                mtime: Time::from_stream(file)?,
262                dev: file.read_u32::<BigEndian>()?, //utils::read_u32_be(file)?,
263                ino: file.read_u32::<BigEndian>()?,
264                mode: file.read_u32::<BigEndian>()?,
265                uid: file.read_u32::<BigEndian>()?,
266                gid: file.read_u32::<BigEndian>()?,
267                size: file.read_u32::<BigEndian>()?,
268                hash: utils::read_sha1(file)?,
269                flags: Flags::from(file.read_u16::<BigEndian>()?),
270                name: String::new(),
271            };
272            let name_len = entry.flags.name_length as usize;
273            let mut name = vec![0; name_len];
274            file.read_exact(&mut name)?;
275            // The exact encoding is undefined, but the '.' and '/' characters are encoded in 7-bit ASCII
276            entry.name =
277                String::from_utf8(name).map_err(|e| GitError::ConversionError(e.to_string()))?; // TODO check the encoding
278            index
279                .entries
280                .insert((entry.name.clone(), entry.flags.stage), entry);
281
282            // 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes
283            // while keeping the name NUL-terminated. // so at least 1 byte nul
284            let padding = 8 - ((22 + name_len) % 8); // 22 = sha1 + flags, others are 40 % 8 == 0
285            utils::read_bytes(file, padding)?;
286        }
287
288        // Extensions
289        while file.bytes_read() + SHA1::SIZE < total_size as usize {
290            // The remaining 20 bytes must be checksum
291            let sign = utils::read_bytes(file, 4)?;
292            println!(
293                "{:?}",
294                String::from_utf8(sign.clone())
295                    .map_err(|e| GitError::ConversionError(e.to_string()))?
296            );
297            // If the first byte is 'A'...'Z' the extension is optional and can be ignored.
298            if sign[0] >= b'A' && sign[0] <= b'Z' {
299                // Optional extension
300                let size = file.read_u32::<BigEndian>()?;
301                utils::read_bytes(file, size as usize)?; // Ignore the extension
302            } else {
303                // 'link' or 'sdir' extension
304                return Err(GitError::InvalidIndexFile(
305                    "Unsupported extension".to_string(),
306                ));
307            }
308        }
309
310        // check sum
311        let file_hash = file.final_hash();
312        let check_sum = utils::read_sha1(file)?;
313        if file_hash != check_sum {
314            return Err(GitError::InvalidIndexFile("Check sum failed".to_string()));
315        }
316        assert_eq!(index.size(), num as usize);
317        Ok(index)
318    }
319
320    pub fn to_file(&self, path: impl AsRef<Path>) -> Result<(), GitError> {
321        let mut file = File::create(path)?;
322        let mut hash = Sha1::new();
323
324        let mut header = Vec::new();
325        header.write_all(b"DIRC")?;
326        header.write_u32::<BigEndian>(2u32)?; // version 2
327        header.write_u32::<BigEndian>(self.entries.len() as u32)?;
328        file.write_all(&header)?;
329        hash.update(&header);
330
331        for (_, entry) in self.entries.iter() {
332            let mut entry_bytes = Vec::new();
333            entry_bytes.write_u32::<BigEndian>(entry.ctime.seconds)?;
334            entry_bytes.write_u32::<BigEndian>(entry.ctime.nanos)?;
335            entry_bytes.write_u32::<BigEndian>(entry.mtime.seconds)?;
336            entry_bytes.write_u32::<BigEndian>(entry.mtime.nanos)?;
337            entry_bytes.write_u32::<BigEndian>(entry.dev)?;
338            entry_bytes.write_u32::<BigEndian>(entry.ino)?;
339            entry_bytes.write_u32::<BigEndian>(entry.mode)?;
340            entry_bytes.write_u32::<BigEndian>(entry.uid)?;
341            entry_bytes.write_u32::<BigEndian>(entry.gid)?;
342            entry_bytes.write_u32::<BigEndian>(entry.size)?;
343            entry_bytes.write_all(&entry.hash.0)?;
344            entry_bytes.write_u16::<BigEndian>((&entry.flags).try_into().unwrap())?;
345            entry_bytes.write_all(entry.name.as_bytes())?;
346            let padding = 8 - ((22 + entry.name.len()) % 8);
347            entry_bytes.write_all(&vec![0; padding])?;
348
349            file.write_all(&entry_bytes)?;
350            hash.update(&entry_bytes);
351        }
352
353        // Extensions
354
355        // check sum
356        let file_hash: [u8; 20] = hash.finalize().into();
357        file.write_all(&file_hash)?;
358        Ok(())
359    }
360
361    pub fn refresh(&mut self, file: impl AsRef<Path>, workdir: &Path) -> Result<bool, GitError> {
362        let path = file.as_ref();
363        let name = path
364            .to_str()
365            .ok_or(GitError::InvalidPathError(format!("{path:?}")))?;
366
367        if let Some(entry) = self.entries.get_mut(&(name.to_string(), 0)) {
368            let abs_path = workdir.join(path);
369            let meta = fs::symlink_metadata(&abs_path)?;
370            // Try creation time; on error, warn and use modification time (or now)
371            let new_ctime = Time::from_system_time(Self::time_or_now(
372                "creation time",
373                &abs_path,
374                meta.created(),
375            ));
376            let new_mtime = Time::from_system_time(Self::time_or_now(
377                "modification time",
378                &abs_path,
379                meta.modified(),
380            ));
381            let new_size = meta.len() as u32;
382
383            // re-calculate SHA1
384            let mut file = File::open(&abs_path)?;
385            let mut hasher = Sha1::new();
386            io::copy(&mut file, &mut hasher)?;
387            let new_hash = SHA1::from_bytes(&hasher.finalize());
388
389            // refresh index
390            if entry.ctime != new_ctime
391                || entry.mtime != new_mtime
392                || entry.size != new_size
393                || entry.hash != new_hash
394            {
395                entry.ctime = new_ctime;
396                entry.mtime = new_mtime;
397                entry.size = new_size;
398                entry.hash = new_hash;
399                return Ok(true);
400            }
401        }
402        Ok(false)
403    }
404
405    /// Try to get a timestamp, logging on error, and finally falling back to now.
406    fn time_or_now(what: &str, path: &Path, res: io::Result<SystemTime>) -> SystemTime {
407        match res {
408            Ok(ts) => ts,
409            Err(e) => {
410                eprintln!(
411                    "warning: failed to get {what} for {path:?}: {e}; using SystemTime::now()",
412                    what = what,
413                    path = path.display()
414                );
415                SystemTime::now()
416            }
417        }
418    }
419}
420
421impl Default for Index {
422    fn default() -> Self {
423        Self::new()
424    }
425}
426
427impl Index {
428    /// Load index. If it does not exist, return an empty index.
429    pub fn load(index_file: impl AsRef<Path>) -> Result<Self, GitError> {
430        let path = index_file.as_ref();
431        if !path.exists() {
432            return Ok(Index::new());
433        }
434        Index::from_file(path)
435    }
436
437    pub fn update(&mut self, entry: IndexEntry) {
438        self.add(entry)
439    }
440
441    pub fn add(&mut self, entry: IndexEntry) {
442        self.entries
443            .insert((entry.name.clone(), entry.flags.stage), entry);
444    }
445
446    pub fn remove(&mut self, name: &str, stage: u8) -> Option<IndexEntry> {
447        self.entries.remove(&(name.to_string(), stage))
448    }
449
450    pub fn get(&self, name: &str, stage: u8) -> Option<&IndexEntry> {
451        self.entries.get(&(name.to_string(), stage))
452    }
453
454    pub fn tracked(&self, name: &str, stage: u8) -> bool {
455        self.entries.contains_key(&(name.to_string(), stage))
456    }
457
458    pub fn get_hash(&self, file: &str, stage: u8) -> Option<SHA1> {
459        self.get(file, stage).map(|entry| entry.hash)
460    }
461
462    pub fn verify_hash(&self, file: &str, stage: u8, hash: &SHA1) -> bool {
463        let inner_hash = self.get_hash(file, stage);
464        if let Some(inner_hash) = inner_hash {
465            &inner_hash == hash
466        } else {
467            false
468        }
469    }
470    /// is file modified after last `add` (need hash to confirm content change)
471    /// - `workdir` is used to rebuild absolute file path
472    pub fn is_modified(&self, file: &str, stage: u8, workdir: &Path) -> bool {
473        if let Some(entry) = self.get(file, stage) {
474            let path_abs = workdir.join(file);
475            let meta = path_abs.symlink_metadata().unwrap();
476            // TODO more fields
477            let same = entry.ctime
478                == Time::from_system_time(meta.created().unwrap_or(SystemTime::now()))
479                && entry.mtime
480                    == Time::from_system_time(meta.modified().unwrap_or(SystemTime::now()))
481                && entry.size == meta.len() as u32;
482
483            !same
484        } else {
485            panic!("File not found in index");
486        }
487    }
488
489    /// Get all entries with the same stage
490    pub fn tracked_entries(&self, stage: u8) -> Vec<&IndexEntry> {
491        // ? should use stage or not
492        self.entries
493            .iter()
494            .filter(|(_, entry)| entry.flags.stage == stage)
495            .map(|(_, entry)| entry)
496            .collect()
497    }
498
499    /// Get all tracked files(stage = 0)
500    pub fn tracked_files(&self) -> Vec<PathBuf> {
501        self.tracked_entries(0)
502            .iter()
503            .map(|entry| PathBuf::from(&entry.name))
504            .collect()
505    }
506
507    /// Judge if the file(s) of `dir` is in the index
508    /// - false if `dir` is a file
509    pub fn contains_dir_file(&self, dir: &str) -> bool {
510        let dir = Path::new(dir);
511        self.entries.iter().any(|((name, _), _)| {
512            let path = Path::new(name);
513            path.starts_with(dir) && path != dir // TODO change to is_sub_path!
514        })
515    }
516
517    /// remove all files in `dir` from index
518    /// - do nothing if `dir` is a file
519    pub fn remove_dir_files(&mut self, dir: &str) -> Vec<String> {
520        let dir = Path::new(dir);
521        let mut removed = Vec::new();
522        self.entries.retain(|(name, _), _| {
523            let path = Path::new(name);
524            if path.starts_with(dir) && path != dir {
525                removed.push(name.clone());
526                false
527            } else {
528                true
529            }
530        });
531        removed
532    }
533
534    /// saved to index file
535    pub fn save(&self, index_file: impl AsRef<Path>) -> Result<(), GitError> {
536        self.to_file(index_file)
537    }
538}
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543
544    #[test]
545    fn test_time() {
546        let time = Time {
547            seconds: 0,
548            nanos: 0,
549        };
550        let system_time = time.to_system_time();
551        let new_time = Time::from_system_time(system_time);
552        assert_eq!(time, new_time);
553    }
554
555    #[test]
556    fn test_check_header() {
557        let file = File::open("../tests/data/index/index-2").unwrap();
558        let entries = Index::check_header(&mut BufReader::new(file)).unwrap();
559        assert_eq!(entries, 2);
560    }
561
562    #[test]
563    fn test_index() {
564        let index = Index::from_file("../tests/data/index/index-760").unwrap();
565        assert_eq!(index.size(), 760);
566        for (_, entry) in index.entries.iter() {
567            println!("{entry}");
568        }
569    }
570
571    #[test]
572    fn test_index_to_file() {
573        let index = Index::from_file("../tests/data/index/index-760").unwrap();
574        index.to_file("/tmp/index-760").unwrap();
575        let new_index = Index::from_file("/tmp/index-760").unwrap();
576        assert_eq!(index.size(), new_index.size());
577    }
578
579    #[test]
580    fn test_index_entry_create() {
581        let file = Path::new("Cargo.toml"); // use as a normal file
582        let hash = SHA1::from_bytes(&[0; 20]);
583        let workdir = Path::new("../");
584        let entry = IndexEntry::new_from_file(file, hash, workdir).unwrap();
585        println!("{entry}");
586    }
587}