#![forbid(unsafe_code)]
use anyhow::{Context, Result, anyhow};
use clap::Parser;
use reqwest::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use tokio::io::AsyncWriteExt;
use url::Url;
#[cfg(feature = "dash")]
use roxmltree::{Document, Node};
#[derive(Parser, Debug)]
#[command(about = "Recursively mirror an HLS (.m3u8) or DASH (.mpd) stream for local hosting")]
struct Args {
#[arg(short, long)]
start_url: String,
#[arg(short, long)]
output_dir: PathBuf,
}
struct Mirror {
client: Client,
out_dir: PathBuf,
visited: HashSet<Url>,
master_url_path_components: Vec<String>,
url_to_path: HashMap<Url, PathBuf>,
}
impl Mirror {
fn new(out_dir: PathBuf, master_url_path_components: Vec<String>) -> Self {
let client = Client::builder()
.user_agent(format!(
"{}/{}",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION")
))
.build()
.expect("failed to build reqwest client");
Self {
client,
out_dir,
visited: HashSet::new(),
master_url_path_components,
url_to_path: HashMap::new(),
}
}
#[allow(unused_variables)]
fn path_for_url(&mut self, url: &Url, is_manifest: bool) -> PathBuf {
if let Some(existing) = self.url_to_path.get(url) {
return existing.clone();
}
let rel = url
.path()
.trim_start_matches('/')
.split('/')
.collect::<Vec<_>>();
let base = self.master_url_path_components.as_slice();
let mut idx = 0;
while idx < base.len().saturating_sub(1)
&& idx < rel.len().saturating_sub(1)
&& base[idx] == rel[idx]
{
idx += 1;
}
let rel_parts = rel[idx..].join("/");
let mut local_path = self.out_dir.join(rel_parts);
#[cfg(feature = "hls")]
{
if is_manifest && local_path.extension().is_none() {
local_path.set_extension("m3u8");
}
}
if let Some(q) = url.query() {
let fname = local_path.file_name().unwrap_or_default().to_string_lossy();
let (stem, ext) = fname
.rsplit_once('.')
.map(|(s, e)| (s.to_string(), Some(e.to_string())))
.unwrap_or((fname.to_string(), None));
let mut safe: String = q
.chars()
.map(|c| if c.is_ascii_alphanumeric() { c } else { '_' })
.collect();
if safe.len() > 32 {
safe.truncate(32);
}
let new_name = match ext {
Some(ext) => format!("{stem}__q_{safe}.{ext}"),
None => format!("{stem}__q_{safe}"),
};
local_path.set_file_name(new_name);
}
self.url_to_path.insert(url.clone(), local_path.clone());
local_path
}
#[cfg(feature = "hls")]
fn to_posix_relative(target: &std::path::Path, base: &std::path::Path) -> String {
let rel = pathdiff::diff_paths(target, base).unwrap_or_else(|| target.to_path_buf());
let parts: Vec<String> = rel
.components()
.map(|c| c.as_os_str().to_string_lossy().to_string())
.collect();
parts.join("/")
}
async fn mirror_root(&mut self, url: Url) -> Result<()> {
#[derive(Debug)]
enum StreamKind {
Hls,
Dash,
}
let resp = self
.client
.get(url.clone())
.send()
.await
.with_context(|| format!("GET (for type detection) {}", url))?
.error_for_status()
.with_context(|| format!("status error (for type detection) {}", url))?;
let ctype = resp
.headers()
.get(CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.map(|s| s.to_ascii_lowercase());
drop(resp);
let mut kind: Option<StreamKind> = None;
if let Some(ref ct) = ctype {
if ct.starts_with("application/vnd.apple.mpegurl")
|| ct.starts_with("application/x-mpegurl")
|| ct.starts_with("audio/mpegurl")
|| ct.starts_with("audio/x-mpegurl")
{
kind = Some(StreamKind::Hls);
} else if ct.starts_with("application/dash+xml") {
kind = Some(StreamKind::Dash);
}
}
if kind.is_none() {
let ext = url
.path()
.rsplit('.')
.next()
.unwrap_or("")
.to_ascii_lowercase();
kind = match ext.as_str() {
"m3u8" => Some(StreamKind::Hls),
"mpd" => Some(StreamKind::Dash),
_ => None,
};
}
let kind = kind.ok_or_else(|| {
anyhow!(
"Could not determine stream type from Content-Type {:?} or extension for {}",
ctype,
url
)
})?;
match kind {
StreamKind::Hls => {
#[cfg(feature = "hls")]
{
self.mirror_manifest(url).await
}
#[cfg(not(feature = "hls"))]
{
Err(anyhow!(
"Detected HLS (m3u8) stream, but `hls` feature is disabled. Build with --features hls."
))
}
}
StreamKind::Dash => {
#[cfg(feature = "dash")]
{
self.mirror_mpd(url).await
}
#[cfg(not(feature = "dash"))]
{
Err(anyhow!(
"Detected DASH (mpd) stream, but `dash` feature is disabled. Build with --features dash."
))
}
}
}
}
async fn mirror_binary(&mut self, url: Url) -> Result<()> {
if !self.visited.insert(url.clone()) {
return Ok(());
}
let local_path = self.path_for_url(&url, false);
if let Some(parent) = local_path.parent() {
tokio::fs::create_dir_all(parent)
.await
.with_context(|| format!("creating directory {}", parent.display()))?;
}
println!("[BIN ] {} -> {}", url, local_path.display());
let resp = self
.client
.get(url.clone())
.send()
.await
.with_context(|| format!("GET {}", url))?
.error_for_status()
.with_context(|| format!("status error for {}", url))?;
let bytes = resp.bytes().await?;
let mut file = tokio::fs::File::create(&local_path).await?;
file.write_all(&bytes).await?;
Ok(())
}
#[cfg(feature = "hls")]
fn find_uri_attr(line: &str) -> Option<(usize, usize)> {
let needle = "URI=\"";
let start_val = line.find(needle)? + needle.len();
let rest = &line[start_val..];
let end_rel = rest.find('"')?;
let end_val = start_val + end_rel;
Some((start_val, end_val))
}
#[cfg(feature = "hls")]
#[async_recursion::async_recursion]
async fn mirror_manifest(&mut self, url: Url) -> Result<()> {
if !self.visited.insert(url.clone()) {
return Ok(());
}
let local_path = self.path_for_url(&url, true);
if let Some(parent) = local_path.parent() {
tokio::fs::create_dir_all(parent)
.await
.with_context(|| format!("creating directory {}", parent.display()))?;
}
println!("[M3U8] {} -> {}", url, local_path.display());
let resp = self
.client
.get(url.clone())
.send()
.await
.with_context(|| format!("GET {}", url))?
.error_for_status()
.with_context(|| format!("status error for {}", url))?;
let text = resp.text().await?;
if !text.trim_start().starts_with("#EXTM3U") {
println!(" -> not an HLS manifest, saving as binary");
return self.mirror_binary(url).await;
}
let mut orig_path = local_path.clone();
if let Some(file_name) = orig_path.file_name().and_then(|f| f.to_str()) {
orig_path.set_file_name(format!("{file_name}.orig"));
} else {
orig_path.set_file_name("manifest.m3u8.orig");
}
let mut orig_file = tokio::fs::File::create(&orig_path).await?;
orig_file.write_all(text.as_bytes()).await?;
let mut output_lines = Vec::new();
let local_dir = local_path
.parent()
.ok_or_else(|| anyhow!("manifest path has no parent: {}", local_path.display()))?
.to_path_buf();
for line in text.lines() {
let trimmed = line.trim();
if trimmed.starts_with('#') {
if let Some((start, end)) = Self::find_uri_attr(line) {
let uri_val = &line[start..end];
let child_url = url.join(uri_val).with_context(|| {
format!("resolving URI '{}' relative to {}", uri_val, url)
})?;
let is_manifest = child_url.path().to_ascii_lowercase().ends_with(".m3u8");
if is_manifest {
self.mirror_manifest(child_url.clone()).await?;
} else {
self.mirror_binary(child_url.clone()).await?;
}
let target_path = self.path_for_url(&child_url, is_manifest);
let rel = Self::to_posix_relative(&target_path, &local_dir);
let mut new_line = String::new();
new_line.push_str(&line[..start]);
new_line.push_str(&rel);
new_line.push_str(&line[end..]);
output_lines.push(new_line);
} else {
output_lines.push(line.to_string());
}
continue;
}
if trimmed.is_empty() {
output_lines.push(line.to_string());
continue;
}
let uri_val = trimmed;
let child_url = url
.join(uri_val)
.with_context(|| format!("resolving URI '{}' relative to {}", uri_val, url))?;
let is_manifest = child_url.path().to_ascii_lowercase().ends_with(".m3u8");
if is_manifest {
self.mirror_manifest(child_url.clone()).await?;
} else {
self.mirror_binary(child_url.clone()).await?;
}
let target_path = self.path_for_url(&child_url, is_manifest);
let rel = Self::to_posix_relative(&target_path, &local_dir);
output_lines.push(rel);
}
let mut file = tokio::fs::File::create(&local_path).await?;
file.write_all(output_lines.join("\n").as_bytes()).await?;
file.write_all(b"\n").await?;
Ok(())
}
#[cfg(feature = "dash")]
async fn mirror_mpd(&mut self, url: Url) -> Result<()> {
if !self.visited.insert(url.clone()) {
return Ok(());
}
let local_path = self.path_for_url(&url, true);
if let Some(parent) = local_path.parent() {
tokio::fs::create_dir_all(parent)
.await
.with_context(|| format!("creating directory {}", parent.display()))?;
}
println!("[MPD ] {} -> {}", url, local_path.display());
let resp = self
.client
.get(url.clone())
.send()
.await
.with_context(|| format!("GET {}", url))?
.error_for_status()
.with_context(|| format!("status error for {}", url))?;
let text = resp.text().await?;
let mut orig_path = local_path.clone();
if let Some(file_name) = orig_path.file_name().and_then(|f| f.to_str()) {
orig_path.set_file_name(format!("{file_name}.orig"));
} else {
orig_path.set_file_name("manifest.mpd.orig");
}
let mut orig_file = tokio::fs::File::create(&orig_path).await?;
orig_file.write_all(text.as_bytes()).await?;
let mut file = tokio::fs::File::create(&local_path).await?;
file.write_all(text.as_bytes()).await?;
let doc = Document::parse(&text)?;
let root = doc.root_element();
if root.tag_name().name() != "MPD" {
println!(" -> not an MPD root element, treating as binary");
return self.mirror_binary(url).await;
}
let mpd_duration_secs = root
.attribute("mediaPresentationDuration")
.and_then(parse_iso8601_duration_seconds);
let mpd_url = url.clone();
for period in root
.children()
.filter(|n| n.is_element() && n.tag_name().name() == "Period")
{
let period_base = if let Some(b) = first_child_text(&period, "BaseURL") {
mpd_url
.join(b.trim())
.with_context(|| format!("joining Period BaseURL '{}' to {}", b, mpd_url))?
} else {
mpd_url.clone()
};
for aset in period
.children()
.filter(|n| n.is_element() && n.tag_name().name() == "AdaptationSet")
{
let aset_base = if let Some(b) = first_child_text(&aset, "BaseURL") {
period_base.join(b.trim()).with_context(|| {
format!("joining AdaptationSet BaseURL '{}' to {}", b, period_base)
})?
} else {
period_base.clone()
};
let aset_st = first_child_element(&aset, "SegmentTemplate");
for rep in aset
.children()
.filter(|n| n.is_element() && n.tag_name().name() == "Representation")
{
let rep_id = match rep.attribute("id") {
Some(id) => id.to_string(),
None => continue,
};
let (rep_base, rep_base_is_file) =
if let Some(b) = first_child_text(&rep, "BaseURL") {
let is_file = !b.trim_end().ends_with('/');
let url = aset_base.join(b.trim()).with_context(|| {
format!("joining Representation BaseURL '{}' to {}", b, aset_base)
})?;
(url, is_file)
} else {
(aset_base.clone(), false)
};
let rep_st = first_child_element(&rep, "SegmentTemplate").or(aset_st);
if let Some(st) = rep_st {
self.handle_segment_template(&rep_base, &rep_id, st, mpd_duration_secs)
.await?;
}
if rep_base_is_file {
self.mirror_binary(rep_base.clone()).await?;
}
}
}
}
Ok(())
}
#[cfg(feature = "dash")]
async fn handle_segment_template(
&mut self,
base_url: &Url,
representation_id: &str,
st: Node<'_, '_>,
mpd_duration_secs: Option<f64>,
) -> Result<()> {
let init_tmpl = st.attribute("initialization");
if let Some(tmpl) = init_tmpl {
let path = tmpl.replace("$RepresentationID$", representation_id);
let full = base_url
.join(path.trim())
.with_context(|| format!("joining init path '{}' to {}", path, base_url))?;
self.mirror_binary(full).await?;
}
let media_tmpl = match st.attribute("media") {
Some(v) if !v.is_empty() => v,
_ => return Ok(()),
};
let timescale = st
.attribute("timescale")
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(1);
let duration_units = st.attribute("duration").and_then(|v| v.parse::<u64>().ok());
let start_number = st
.attribute("startNumber")
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(1);
let end_number_attr = st
.attribute("endNumber")
.and_then(|v| v.parse::<u64>().ok());
let end_number = if let Some(en) = end_number_attr {
en
} else if let (Some(dur_u), Some(total_secs)) = (duration_units, mpd_duration_secs) {
let seg_secs = dur_u as f64 / timescale as f64;
let count = (total_secs / seg_secs).ceil() as u64;
start_number + count - 1
} else {
println!(
" -> Skipping media segments for {} (no endNumber and no duration/MPD duration)",
representation_id
);
return Ok(());
};
for num in start_number..=end_number {
let mut path = media_tmpl.replace("$RepresentationID$", representation_id);
path = path.replace("$Number$", &num.to_string());
let full = base_url
.join(path.trim())
.with_context(|| format!("joining media path '{}' to {}", path, base_url))?;
self.mirror_binary(full).await?;
}
Ok(())
}
}
#[cfg(feature = "dash")]
fn first_child_text(node: &Node, name: &str) -> Option<String> {
node.children()
.find(|n| n.is_element() && n.tag_name().name() == name)
.and_then(|n| n.text())
.map(|s| s.to_string())
}
#[cfg(feature = "dash")]
fn first_child_element<'a>(node: &'a Node<'a, 'a>, name: &'a str) -> Option<Node<'a, 'a>> {
node.children()
.find(|n| n.is_element() && n.tag_name().name() == name)
}
#[cfg(feature = "dash")]
fn parse_iso8601_duration_seconds(s: &str) -> Option<f64> {
if !s.starts_with("PT") {
return None;
}
let mut rest = &s[2..];
let mut hours = 0.0;
let mut mins = 0.0;
let mut secs = 0.0;
while !rest.is_empty() {
let mut i = 0;
let bytes = rest.as_bytes();
while i < bytes.len() {
let c = bytes[i] as char;
if c.is_ascii_digit() || c == '.' {
i += 1;
} else {
break;
}
}
if i == 0 || i >= rest.len() {
break;
}
let (num_str, tail) = rest.split_at(i);
let val: f64 = num_str.parse().ok()?;
let unit = tail.chars().next()?;
rest = &tail[1..];
match unit {
'H' => hours = val,
'M' => mins = val,
'S' => secs = val,
_ => return None,
}
}
Some(hours * 3600.0 + mins * 60.0 + secs)
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
let start_url = Url::parse(&args.start_url)
.with_context(|| format!("parsing start URL '{}'", args.start_url))?;
let out_dir = args.output_dir;
tokio::fs::create_dir_all(&out_dir)
.await
.with_context(|| format!("creating output dir {}", out_dir.display()))?;
let master_components = start_url
.path()
.trim_start_matches('/')
.split('/')
.map(|s| s.to_string())
.collect::<Vec<_>>();
let mut mirror = Mirror::new(out_dir, master_components);
mirror.mirror_root(start_url).await?;
println!("Done.");
Ok(())
}