netscape_bookmark_parser/
generate_json_from_html.rs

1use serde_json::{json, Value};
2use std::fs;
3use std::io::{self, BufRead};
4use std::path::Path;
5use chrono::{DateTime, Utc};
6
7// Constants and helper functions for timestamp conversion
8const LDAP_NT_EPOCH: i64 = 11644473600; // Seconds from 1601-01-01 to 1970-01-01
9
10/// Converts a Unix timestamp to a Windows NT timestamp.
11///
12/// # Arguments
13///
14/// * `unix_timestamp` - An i64 representing the Unix timestamp.
15///
16/// # Returns
17///
18/// * Returns an i64 representing the Windows NT timestamp.
19pub fn convert_to_nt_timestamp(unix_timestamp: i64) -> i64 {
20    let seconds_since_nt_epoch = unix_timestamp + LDAP_NT_EPOCH;
21    seconds_since_nt_epoch * 1_000_000 * 10 // Convert to 100-nanosecond intervals
22}
23
24/// Parses HTML bookmarks file and converts it to a JSON value.
25///
26/// # Arguments
27///
28/// * `html_path` - A reference to a Path representing the file path of the HTML bookmarks.
29///
30/// # Returns
31///
32/// * Returns an io::Result containing a serde_json::Value representing the parsed bookmarks.
33pub fn parse_html_bookmarks(html_path: &Path) -> io::Result<Value> {
34    let file = fs::File::open(html_path)?;
35    let reader = io::BufReader::new(file);
36    
37    let mut roots = json!({
38        "name": "Bookmarks Bar",
39        "type": "folder",
40        "children": []
41    });
42    let mut stack: Vec<Value> = Vec::new();
43    
44    for line in reader.lines() {
45        let line = line?;
46        if line.contains("<DT><H3") {
47            // Parse folder
48            let name = line.split('>').nth(2).and_then(|s| s.split('<').next()).unwrap_or("");
49            let add_date = line.split("ADD_DATE=\"").nth(1).and_then(|s| s.split('"').next()).and_then(|s| s.parse::<i64>().ok()).unwrap_or(0);
50            let last_modified = line.split("LAST_MODIFIED=\"").nth(1).and_then(|s| s.split('"').next()).and_then(|s| s.parse::<i64>().ok()).unwrap_or(0);
51            
52            let folder = json!({
53                "name": name,
54                "type": "folder",
55                "date_added": convert_to_nt_timestamp(add_date).to_string(),
56                "date_modified": convert_to_nt_timestamp(last_modified).to_string(),
57                "children": []
58            });
59            
60            // Push current folder to stack and make new folder current
61            stack.push(roots.clone());
62            roots = folder;
63        } else if line.contains("<DT><A") {
64            // Parse bookmark
65            let name = line.split('>').nth(2).and_then(|s| s.split('<').next()).unwrap_or("");
66            let url = line.split("HREF=\"").nth(1).and_then(|s| s.split('"').next()).unwrap_or("");
67            let add_date = line.split("ADD_DATE=\"").nth(1).and_then(|s| s.split('"').next()).and_then(|s| s.parse::<i64>().ok()).unwrap_or(0);
68            
69            let bookmark = json!({
70                "name": name,
71                "type": "url",
72                "url": url,
73                "date_added": convert_to_nt_timestamp(add_date).to_string()
74            });
75            
76            if let Some(children) = roots.get_mut("children") {
77                if let Some(arr) = children.as_array_mut() {
78                    arr.push(bookmark);
79                }
80            }
81        } else if line.contains("</DL>") {
82            // End of current folder - pop from stack
83            if let Some(mut parent) = stack.pop() {
84                if let Some(parent_children) = parent.get_mut("children") {
85                    if let Some(mut arr) = parent_children.as_array_mut() {
86                        arr.push(roots);
87                    }
88                }
89                roots = parent;
90            }
91        }
92    }
93    
94    Ok(json!({
95        "roots": {
96            "bookmark_bar": roots,
97            "other": {
98                "name": "Other Bookmarks",
99                "type": "folder",
100                "children": []
101            },
102            "synced": {
103                "name": "Synced Bookmarks",
104                "type": "folder",
105                "children": []
106            }
107        },
108        "version": 1
109    }))
110}
111
112/// Main function to run the HTML bookmarks to JSON conversion.
113///
114/// # Arguments
115///
116/// * `input` - A string slice representing the input file path of the HTML bookmarks.
117/// * `output` - A string slice representing the output directory path for the JSON file.
118///
119/// # Returns
120///
121/// * Returns an io::Result indicating success or failure.
122pub fn run(input: &str, output: &str) -> io::Result<()> {
123    let html_file_path = Path::new(input);
124    let json_file_dir = Path::new(output);
125    // let html_file_path = Path::new(&std::env::var("USERPROFILE").unwrap())
126    //     .join("Documents")
127    //     .join("EdgeChromium-Bookmarks.backup.html");
128        
129    // let json_file_dir = Path::new(&std::env::var("LOCALAPPDATA").unwrap())
130    //     .join("Microsoft")
131    //     .join("Edge")
132    //     .join("User Data")
133    //     .join("Default");
134        
135    let exported_time = chrono::Local::now().format("%Y-%m-%d_%H-%M-%S").to_string();
136    let json_file_path = json_file_dir.join(format!("Bookmarks_{}.json", exported_time));
137    
138    if !html_file_path.exists() {
139        return Err(io::Error::new(io::ErrorKind::NotFound, 
140            format!("Source file path {:?} does not exist!", html_file_path)));
141    }
142    
143    if !json_file_dir.exists() {
144        return Err(io::Error::new(io::ErrorKind::NotFound, 
145            format!("Destination directory path {:?} does not exist!", json_file_dir)));
146    }
147    
148    let json_data = parse_html_bookmarks(&html_file_path)?;
149    fs::write(json_file_path, json_data.to_string())?;
150    
151    Ok(())
152}