git_internal/internal/
index.rs

1use std::collections::BTreeMap;
2use std::fmt::{Display, Formatter};
3use std::fs::{self, File};
4use std::io;
5use std::io::{BufReader, Read, Write};
6#[cfg(unix)]
7use std::os::unix::fs::MetadataExt;
8use std::path::{Path, PathBuf};
9use std::time::{SystemTime, UNIX_EPOCH};
10
11use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
12
13use crate::errors::GitError;
14use crate::hash::{ObjectHash, get_hash_kind};
15use crate::internal::pack::wrapper::Wrapper;
16use crate::utils::{self, HashAlgorithm};
17
18#[derive(PartialEq, Eq, Debug, Clone)]
19pub struct Time {
20    seconds: u32,
21    nanos: u32,
22}
23impl Time {
24    pub fn from_stream(stream: &mut impl Read) -> Result<Self, GitError> {
25        let seconds = stream.read_u32::<BigEndian>()?;
26        let nanos = stream.read_u32::<BigEndian>()?;
27        Ok(Time { seconds, nanos })
28    }
29
30    #[allow(dead_code)]
31    fn to_system_time(&self) -> SystemTime {
32        UNIX_EPOCH + std::time::Duration::new(self.seconds.into(), self.nanos)
33    }
34
35    pub fn from_system_time(system_time: SystemTime) -> Self {
36        match system_time.duration_since(UNIX_EPOCH) {
37            Ok(duration) => {
38                let seconds = duration
39                    .as_secs()
40                    .try_into()
41                    .expect("Time is too far in the future");
42                let nanos = duration.subsec_nanos();
43                Time { seconds, nanos }
44            }
45            Err(_) => panic!("Time is before the UNIX epoch"),
46        }
47    }
48}
49impl Display for Time {
50    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
51        write!(f, "{}:{}", self.seconds, self.nanos)
52    }
53}
54
55/// 16 bits
56#[derive(Debug)]
57pub struct Flags {
58    pub assume_valid: bool,
59    pub extended: bool,   // must be 0 in v2
60    pub stage: u8,        // 2-bit during merge
61    pub name_length: u16, // 12-bit
62}
63
64impl From<u16> for Flags {
65    fn from(flags: u16) -> Self {
66        Flags {
67            assume_valid: flags & 0x8000 != 0,
68            extended: flags & 0x4000 != 0,
69            stage: ((flags & 0x3000) >> 12) as u8,
70            name_length: flags & 0xFFF,
71        }
72    }
73}
74
75impl TryInto<u16> for &Flags {
76    type Error = &'static str;
77    fn try_into(self) -> Result<u16, Self::Error> {
78        let mut flags = 0u16;
79        if self.assume_valid {
80            flags |= 0x8000; // 16
81        }
82        if self.extended {
83            flags |= 0x4000; // 15
84        }
85        flags |= (self.stage as u16) << 12; // 13-14
86        if self.name_length > 0xFFF {
87            return Err("Name length is too long");
88        }
89        flags |= self.name_length; // 0-11
90        Ok(flags)
91    }
92}
93
94impl Flags {
95    pub fn new(name_len: u16) -> Self {
96        Flags {
97            assume_valid: true,
98            extended: false,
99            stage: 0,
100            name_length: name_len,
101        }
102    }
103}
104
105pub struct IndexEntry {
106    pub ctime: Time,
107    pub mtime: Time,
108    pub dev: u32,  // 0 for windows
109    pub ino: u32,  // 0 for windows
110    pub mode: u32, // 0o100644 // 4-bit object type + 3-bit unused + 9-bit unix permission
111    pub uid: u32,  // 0 for windows
112    pub gid: u32,  // 0 for windows
113    pub size: u32,
114    pub hash: ObjectHash,
115    pub flags: Flags,
116    pub name: String,
117}
118impl Display for IndexEntry {
119    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
120        write!(
121            f,
122            "IndexEntry {{ ctime: {}, mtime: {}, dev: {}, ino: {}, mode: {:o}, uid: {}, gid: {}, size: {}, hash: {}, flags: {:?}, name: {} }}",
123            self.ctime,
124            self.mtime,
125            self.dev,
126            self.ino,
127            self.mode,
128            self.uid,
129            self.gid,
130            self.size,
131            self.hash,
132            self.flags,
133            self.name
134        )
135    }
136}
137
138impl IndexEntry {
139    /** Metadata must be got by [fs::symlink_metadata] to avoid following symlink */
140    pub fn new(meta: &fs::Metadata, hash: ObjectHash, name: String) -> Self {
141        let mut entry = IndexEntry {
142            ctime: Time::from_system_time(meta.created().unwrap()),
143            mtime: Time::from_system_time(meta.modified().unwrap()),
144            dev: 0,
145            ino: 0,
146            uid: 0,
147            gid: 0,
148            size: meta.len() as u32,
149            hash,
150            flags: Flags::new(name.len() as u16),
151            name,
152            mode: 0o100644,
153        };
154        #[cfg(unix)]
155        {
156            entry.dev = meta.dev() as u32;
157            entry.ino = meta.ino() as u32;
158            entry.uid = meta.uid();
159            entry.gid = meta.gid();
160
161            entry.mode = match meta.mode() & 0o170000/* file mode */ {
162                0o100000 => {
163                    match meta.mode() & 0o111 {
164                        0 => 0o100644, // no execute permission
165                        _ => 0o100755, // with execute permission
166                    }
167                }
168                0o120000 => 0o120000, // symlink
169                _ =>  entry.mode, // keep the original mode
170            }
171        }
172        #[cfg(windows)]
173        {
174            if meta.is_symlink() {
175                entry.mode = 0o120000;
176            }
177        }
178        entry
179    }
180
181    /// - `file`: **to workdir path**
182    /// - `workdir`: absolute or relative path
183    pub fn new_from_file(file: &Path, hash: ObjectHash, workdir: &Path) -> io::Result<Self> {
184        let name = file.to_str().unwrap().to_string();
185        let file_abs = workdir.join(file);
186        let meta = fs::symlink_metadata(file_abs)?; // without following symlink
187        let index = IndexEntry::new(&meta, hash, name);
188        Ok(index)
189    }
190
191    pub fn new_from_blob(name: String, hash: ObjectHash, size: u32) -> Self {
192        IndexEntry {
193            ctime: Time {
194                seconds: 0,
195                nanos: 0,
196            },
197            mtime: Time {
198                seconds: 0,
199                nanos: 0,
200            },
201            dev: 0,
202            ino: 0,
203            mode: 0o100644,
204            uid: 0,
205            gid: 0,
206            size,
207            hash,
208            flags: Flags::new(name.len() as u16),
209            name,
210        }
211    }
212}
213
214/// see [index-format](https://git-scm.com/docs/index-format)
215/// <br> to Working Dir relative path
216pub struct Index {
217    entries: BTreeMap<(String, u8), IndexEntry>,
218}
219
220impl Index {
221    pub fn new() -> Self {
222        Index {
223            entries: BTreeMap::new(),
224        }
225    }
226
227    fn check_header(file: &mut impl Read) -> Result<u32, GitError> {
228        let mut magic = [0; 4];
229        file.read_exact(&mut magic)?;
230        if magic != *b"DIRC" {
231            return Err(GitError::InvalidIndexHeader(
232                String::from_utf8_lossy(&magic).to_string(),
233            ));
234        }
235
236        let version = file.read_u32::<BigEndian>()?;
237        // only support v2 now
238        if version != 2 {
239            return Err(GitError::InvalidIndexHeader(version.to_string()));
240        }
241
242        let entries = file.read_u32::<BigEndian>()?;
243        Ok(entries)
244    }
245
246    pub fn size(&self) -> usize {
247        self.entries.len()
248    }
249
250    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, GitError> {
251        let file = File::open(path.as_ref())?; // read-only
252        let total_size = file.metadata()?.len();
253        let file = &mut Wrapper::new(BufReader::new(file)); // TODO move Wrapper & utils to a common module
254
255        let num = Index::check_header(file)?;
256        let mut index = Index::new();
257
258        for _ in 0..num {
259            let mut entry = IndexEntry {
260                ctime: Time::from_stream(file)?,
261                mtime: Time::from_stream(file)?,
262                dev: file.read_u32::<BigEndian>()?, //utils::read_u32_be(file)?,
263                ino: file.read_u32::<BigEndian>()?,
264                mode: file.read_u32::<BigEndian>()?,
265                uid: file.read_u32::<BigEndian>()?,
266                gid: file.read_u32::<BigEndian>()?,
267                size: file.read_u32::<BigEndian>()?,
268                hash: utils::read_sha(file)?,
269                flags: Flags::from(file.read_u16::<BigEndian>()?),
270                name: String::new(),
271            };
272            let name_len = entry.flags.name_length as usize;
273            let mut name = vec![0; name_len];
274            file.read_exact(&mut name)?;
275            // The exact encoding is undefined, but the '.' and '/' characters are encoded in 7-bit ASCII
276            entry.name =
277                String::from_utf8(name).map_err(|e| GitError::ConversionError(e.to_string()))?; // TODO check the encoding
278            index
279                .entries
280                .insert((entry.name.clone(), entry.flags.stage), entry);
281
282            // 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes
283            // while keeping the name NUL-terminated.
284            let hash_len = get_hash_kind().size();
285            let entry_len = hash_len + 2 + name_len;
286            let padding = 1 + ((8 - ((entry_len + 1) % 8)) % 8); // at least 1 byte nul
287            utils::read_bytes(file, padding)?;
288        }
289
290        // Extensions
291        while file.bytes_read() + get_hash_kind().size() < total_size as usize {
292            // The remaining bytes must be the pack checksum (size = get_hash_kind().size())
293            let sign = utils::read_bytes(file, 4)?;
294            println!(
295                "{:?}",
296                String::from_utf8(sign.clone())
297                    .map_err(|e| GitError::ConversionError(e.to_string()))?
298            );
299            // If the first byte is 'A'...'Z' the extension is optional and can be ignored.
300            if sign[0] >= b'A' && sign[0] <= b'Z' {
301                // Optional extension
302                let size = file.read_u32::<BigEndian>()?;
303                utils::read_bytes(file, size as usize)?; // Ignore the extension
304            } else {
305                // 'link' or 'sdir' extension
306                return Err(GitError::InvalidIndexFile(
307                    "Unsupported extension".to_string(),
308                ));
309            }
310        }
311
312        // check sum
313        let file_hash = file.final_hash();
314        let check_sum = utils::read_sha(file)?;
315        if file_hash != check_sum {
316            return Err(GitError::InvalidIndexFile("Check sum failed".to_string()));
317        }
318        assert_eq!(index.size(), num as usize);
319        Ok(index)
320    }
321
322    pub fn to_file(&self, path: impl AsRef<Path>) -> Result<(), GitError> {
323        let mut file = File::create(path)?;
324        let mut hash = HashAlgorithm::new();
325
326        let mut header = Vec::new();
327        header.write_all(b"DIRC")?;
328        header.write_u32::<BigEndian>(2u32)?; // version 2
329        header.write_u32::<BigEndian>(self.entries.len() as u32)?;
330        file.write_all(&header)?;
331        hash.update(&header);
332
333        for (_, entry) in self.entries.iter() {
334            let mut entry_bytes = Vec::new();
335            entry_bytes.write_u32::<BigEndian>(entry.ctime.seconds)?;
336            entry_bytes.write_u32::<BigEndian>(entry.ctime.nanos)?;
337            entry_bytes.write_u32::<BigEndian>(entry.mtime.seconds)?;
338            entry_bytes.write_u32::<BigEndian>(entry.mtime.nanos)?;
339            entry_bytes.write_u32::<BigEndian>(entry.dev)?;
340            entry_bytes.write_u32::<BigEndian>(entry.ino)?;
341            entry_bytes.write_u32::<BigEndian>(entry.mode)?;
342            entry_bytes.write_u32::<BigEndian>(entry.uid)?;
343            entry_bytes.write_u32::<BigEndian>(entry.gid)?;
344            entry_bytes.write_u32::<BigEndian>(entry.size)?;
345            entry_bytes.write_all(entry.hash.as_ref())?;
346            entry_bytes.write_u16::<BigEndian>((&entry.flags).try_into().unwrap())?;
347            entry_bytes.write_all(entry.name.as_bytes())?;
348            let hash_len = get_hash_kind().size();
349            let entry_len = hash_len + 2 + entry.name.len();
350            let padding = 1 + ((8 - ((entry_len + 1) % 8)) % 8); // at least 1 byte nul
351            entry_bytes.write_all(&vec![0; padding])?;
352            file.write_all(&entry_bytes)?;
353            hash.update(&entry_bytes);
354        }
355
356        // Extensions
357
358        // check sum
359        let file_hash =
360            ObjectHash::from_bytes(&hash.finalize()).map_err(GitError::InvalidIndexFile)?;
361        file.write_all(file_hash.as_ref())?;
362        Ok(())
363    }
364
365    pub fn refresh(&mut self, file: impl AsRef<Path>, workdir: &Path) -> Result<bool, GitError> {
366        let path = file.as_ref();
367        let name = path
368            .to_str()
369            .ok_or(GitError::InvalidPathError(format!("{path:?}")))?;
370
371        if let Some(entry) = self.entries.get_mut(&(name.to_string(), 0)) {
372            let abs_path = workdir.join(path);
373            let meta = fs::symlink_metadata(&abs_path)?;
374            // Try creation time; on error, warn and use modification time (or now)
375            let new_ctime = Time::from_system_time(Self::time_or_now(
376                "creation time",
377                &abs_path,
378                meta.created(),
379            ));
380            let new_mtime = Time::from_system_time(Self::time_or_now(
381                "modification time",
382                &abs_path,
383                meta.modified(),
384            ));
385            let new_size = meta.len() as u32;
386
387            // re-calculate SHA1/SHA256
388            let mut file = File::open(&abs_path)?;
389            let mut hasher = HashAlgorithm::new();
390            io::copy(&mut file, &mut hasher)?;
391            let new_hash = ObjectHash::from_bytes(&hasher.finalize()).unwrap();
392
393            // refresh index
394            if entry.ctime != new_ctime
395                || entry.mtime != new_mtime
396                || entry.size != new_size
397                || entry.hash != new_hash
398            {
399                entry.ctime = new_ctime;
400                entry.mtime = new_mtime;
401                entry.size = new_size;
402                entry.hash = new_hash;
403                return Ok(true);
404            }
405        }
406        Ok(false)
407    }
408
409    /// Try to get a timestamp, logging on error, and finally falling back to now.
410    fn time_or_now(what: &str, path: &Path, res: io::Result<SystemTime>) -> SystemTime {
411        match res {
412            Ok(ts) => ts,
413            Err(e) => {
414                eprintln!(
415                    "warning: failed to get {what} for {path:?}: {e}; using SystemTime::now()",
416                    what = what,
417                    path = path.display()
418                );
419                SystemTime::now()
420            }
421        }
422    }
423}
424
425impl Default for Index {
426    fn default() -> Self {
427        Self::new()
428    }
429}
430
431impl Index {
432    /// Load index. If it does not exist, return an empty index.
433    pub fn load(index_file: impl AsRef<Path>) -> Result<Self, GitError> {
434        let path = index_file.as_ref();
435        if !path.exists() {
436            return Ok(Index::new());
437        }
438        Index::from_file(path)
439    }
440
441    pub fn update(&mut self, entry: IndexEntry) {
442        self.add(entry)
443    }
444
445    pub fn add(&mut self, entry: IndexEntry) {
446        self.entries
447            .insert((entry.name.clone(), entry.flags.stage), entry);
448    }
449
450    pub fn remove(&mut self, name: &str, stage: u8) -> Option<IndexEntry> {
451        self.entries.remove(&(name.to_string(), stage))
452    }
453
454    pub fn get(&self, name: &str, stage: u8) -> Option<&IndexEntry> {
455        self.entries.get(&(name.to_string(), stage))
456    }
457
458    pub fn tracked(&self, name: &str, stage: u8) -> bool {
459        self.entries.contains_key(&(name.to_string(), stage))
460    }
461
462    pub fn get_hash(&self, file: &str, stage: u8) -> Option<ObjectHash> {
463        self.get(file, stage).map(|entry| entry.hash)
464    }
465
466    pub fn verify_hash(&self, file: &str, stage: u8, hash: &ObjectHash) -> bool {
467        let inner_hash = self.get_hash(file, stage);
468        if let Some(inner_hash) = inner_hash {
469            &inner_hash == hash
470        } else {
471            false
472        }
473    }
474    /// is file modified after last `add` (need hash to confirm content change)
475    /// - `workdir` is used to rebuild absolute file path
476    pub fn is_modified(&self, file: &str, stage: u8, workdir: &Path) -> bool {
477        if let Some(entry) = self.get(file, stage) {
478            let path_abs = workdir.join(file);
479            let meta = path_abs.symlink_metadata().unwrap();
480            // TODO more fields
481            let same = entry.ctime
482                == Time::from_system_time(meta.created().unwrap_or(SystemTime::now()))
483                && entry.mtime
484                    == Time::from_system_time(meta.modified().unwrap_or(SystemTime::now()))
485                && entry.size == meta.len() as u32;
486
487            !same
488        } else {
489            panic!("File not found in index");
490        }
491    }
492
493    /// Get all entries with the same stage
494    pub fn tracked_entries(&self, stage: u8) -> Vec<&IndexEntry> {
495        // ? should use stage or not
496        self.entries
497            .iter()
498            .filter(|(_, entry)| entry.flags.stage == stage)
499            .map(|(_, entry)| entry)
500            .collect()
501    }
502
503    /// Get all tracked files(stage = 0)
504    pub fn tracked_files(&self) -> Vec<PathBuf> {
505        self.tracked_entries(0)
506            .iter()
507            .map(|entry| PathBuf::from(&entry.name))
508            .collect()
509    }
510
511    /// Judge if the file(s) of `dir` is in the index
512    /// - false if `dir` is a file
513    pub fn contains_dir_file(&self, dir: &str) -> bool {
514        let dir = Path::new(dir);
515        self.entries.iter().any(|((name, _), _)| {
516            let path = Path::new(name);
517            path.starts_with(dir) && path != dir // TODO change to is_sub_path!
518        })
519    }
520
521    /// remove all files in `dir` from index
522    /// - do nothing if `dir` is a file
523    pub fn remove_dir_files(&mut self, dir: &str) -> Vec<String> {
524        let dir = Path::new(dir);
525        let mut removed = Vec::new();
526        self.entries.retain(|(name, _), _| {
527            let path = Path::new(name);
528            if path.starts_with(dir) && path != dir {
529                removed.push(name.clone());
530                false
531            } else {
532                true
533            }
534        });
535        removed
536    }
537
538    /// saved to index file
539    pub fn save(&self, index_file: impl AsRef<Path>) -> Result<(), GitError> {
540        self.to_file(index_file)
541    }
542}
543
544#[cfg(test)]
545mod tests {
546    use super::*;
547    use crate::hash::{HashKind, set_hash_kind_for_test};
548    #[test]
549    fn test_time() {
550        let time = Time {
551            seconds: 0,
552            nanos: 0,
553        };
554        let system_time = time.to_system_time();
555        let new_time = Time::from_system_time(system_time);
556        assert_eq!(time, new_time);
557    }
558
559    #[test]
560    fn test_check_header() {
561        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
562        source.push("tests/data/index/index-2");
563
564        let file = File::open(source).unwrap();
565        let entries = Index::check_header(&mut BufReader::new(file)).unwrap();
566        assert_eq!(entries, 2);
567    }
568
569    #[test]
570    fn test_index() {
571        let _guard = set_hash_kind_for_test(HashKind::Sha1);
572        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
573        source.push("tests/data/index/index-760");
574
575        let index = Index::from_file(source).unwrap();
576        assert_eq!(index.size(), 760);
577        for (_, entry) in index.entries.iter() {
578            println!("{entry}");
579        }
580    }
581    #[test]
582    fn test_index_sha256() {
583        let _guard = set_hash_kind_for_test(HashKind::Sha256);
584        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
585        source.push("tests/data/index/index-9-256");
586
587        let index = Index::from_file(source).unwrap();
588        assert_eq!(index.size(), 9);
589        for (_, entry) in index.entries.iter() {
590            println!("{entry}");
591        }
592    }
593
594    #[test]
595    fn test_index_to_file() {
596        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
597        source.push("tests/data/index/index-760");
598
599        let index = Index::from_file(source).unwrap();
600        index.to_file("/tmp/index-760").unwrap();
601        let new_index = Index::from_file("/tmp/index-760").unwrap();
602        assert_eq!(index.size(), new_index.size());
603    }
604
605    #[test]
606    fn test_index_entry_create() {
607        let _guard = set_hash_kind_for_test(HashKind::Sha1);
608        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
609        source.push("Cargo.toml");
610
611        let file = Path::new(source.as_path()); // use as a normal file
612        let hash = ObjectHash::from_bytes(&[0; 20]).unwrap();
613        let workdir = Path::new("../");
614        let entry = IndexEntry::new_from_file(file, hash, workdir).unwrap();
615        println!("{entry}");
616    }
617    #[test]
618    fn test_index_entry_create_sha256() {
619        let _guard = set_hash_kind_for_test(HashKind::Sha256);
620        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
621        source.push("Cargo.toml");
622
623        let file = Path::new(source.as_path());
624        let hash = ObjectHash::from_bytes(&[0u8; 32]).unwrap();
625        let workdir = Path::new("../");
626        let entry = IndexEntry::new_from_file(file, hash, workdir).unwrap();
627        println!("{entry}");
628    }
629}