use percent_encoding::percent_decode_str;
use std::borrow::Cow;
use super::{FileSpec, LinkAction, PdfAction, PdfDestination};
use crate::destination::not_nan;
use crate::pdf::{PdfDocument, PdfObject};
use crate::{DestinationKind, Error, Rect};
pub(crate) fn parse_external_link(uri: &str) -> Option<PdfAction> {
let uri = uri.trim();
if uri.is_empty() {
return None;
}
let (head, params) = uri
.split_once('#')
.map(|(head, params)| (head.trim(), params.trim()))
.unwrap_or((uri, ""));
if head.is_empty() {
let dest = match parse_params(params) {
ParsedFragment::Explicit(page, kind) => PdfDestination::Page { page, kind },
ParsedFragment::Named(name) => PdfDestination::Named(name),
ParsedFragment::ContainsUnknownKeys => {
let name = strip_prefix_icase(params, "nameddest=")
.map(|raw| raw.split_once(['&', '#']).map_or(raw, |(head, _)| head))
.unwrap_or(params);
PdfDestination::Named(decode_uri_component(name).into_owned())
}
ParsedFragment::Empty => return None,
};
return Some(PdfAction::GoTo(dest));
}
let (link, is_explicit_file) = strip_prefix_icase(head, "file://")
.or_else(|| strip_prefix_icase(head, "file:"))
.map(|path| (path, true))
.unwrap_or((head, false));
if is_pdf_path(link) {
let dest = match parse_params(params) {
ParsedFragment::Empty => PdfDestination::default(),
ParsedFragment::Explicit(page, kind) => PdfDestination::Page { page, kind },
ParsedFragment::Named(name) => PdfDestination::Named(name),
ParsedFragment::ContainsUnknownKeys => return Some(PdfAction::Uri(uri.to_owned())),
};
let is_url = !is_explicit_file && is_external_link(link);
let file = if is_url {
FileSpec::Url(link.to_string())
} else {
FileSpec::Path(decode_and_clean_path(link))
};
return Some(PdfAction::GoToR { file, dest });
}
if link.is_empty() && is_explicit_file {
return None;
}
let action = if is_explicit_file {
PdfAction::Launch(FileSpec::Path(decode_and_clean_path(link)))
} else if !is_external_link(uri) {
PdfAction::Launch(FileSpec::Path(decode_and_clean_path(uri)))
} else {
PdfAction::Uri(uri.to_string())
};
Some(action)
}
pub(super) fn is_external_link(uri: &str) -> bool {
let Some((scheme, _)) = uri.split_once(':') else {
return false;
};
if scheme.len() < 3 || !scheme.as_bytes()[0].is_ascii_alphabetic() {
return false;
}
scheme[1..]
.bytes()
.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'+' | b'-' | b'.'))
}
pub(super) fn is_pdf_path(file_name: &str) -> bool {
file_name
.get(file_name.len().saturating_sub(4)..)
.is_some_and(|extension| extension.eq_ignore_ascii_case(".pdf"))
}
#[derive(Debug, PartialEq)]
enum ParsedFragment {
Empty,
Explicit(u32, DestinationKind),
Named(String),
ContainsUnknownKeys,
}
fn parse_params(params: &str) -> ParsedFragment {
if params.is_empty() {
return ParsedFragment::Empty;
}
let mut page = None;
let mut kind = None;
let mut named_dest = None;
for (k, v) in fragment_kv_pairs(params) {
if k.eq_ignore_ascii_case("page") {
if let Some(new_page) = parse_page_1based_to_0based(v) {
page = Some(new_page);
kind = Some(DestinationKind::default());
named_dest = None; }
continue;
}
if k.eq_ignore_ascii_case("nameddest") {
named_dest = Some(v);
page = None; kind = None;
continue;
}
let new_kind = if k.eq_ignore_ascii_case("viewrect") {
parse_viewrect(v)
} else if k.eq_ignore_ascii_case("zoom") {
Some(parse_zoom(v))
} else if k.eq_ignore_ascii_case("view") {
parse_view(v)
} else {
return ParsedFragment::ContainsUnknownKeys;
};
if let Some(k) = new_kind {
kind = Some(k);
}
}
if let Some(name) = named_dest {
return if !name.is_empty() {
ParsedFragment::Named(decode_uri_component(name).into())
} else {
ParsedFragment::Empty
};
}
match (page, kind) {
(None, None) => ParsedFragment::Named(decode_uri_component(params).into()),
(p, k) => ParsedFragment::Explicit(p.unwrap_or_default(), k.unwrap_or_default()),
}
}
fn fragment_kv_pairs(fragment: &str) -> impl Iterator<Item = (&str, &str)> {
fragment
.split(['&', '#'])
.filter_map(|part| part.split_once('='))
}
fn parse_page_1based_to_0based(s: &str) -> Option<u32> {
let n: i32 = s.parse().ok()?;
if n < 2 {
Some(0)
} else {
Some(n as u32 - 1)
}
}
fn parse_viewrect(s: &str) -> Option<DestinationKind> {
let mut floats = FloatParser::new(s);
let x = floats.next_finite()?;
let y = floats.next_finite()?;
let w = floats.next_finite()?;
let h = floats.next_finite()?;
Some(DestinationKind::FitR {
left: x,
bottom: y,
right: x + w,
top: y + h,
})
}
fn parse_zoom(s: &str) -> DestinationKind {
let mut floats = FloatParser::new(s);
let zoom = floats.next_not_nan().map(|n| {
if n <= 0.0 || n.is_infinite() {
100.0
} else {
n
}
});
DestinationKind::XYZ {
left: floats.next_finite(),
top: floats.next_finite(),
zoom,
}
}
fn parse_view(s: &str) -> Option<DestinationKind> {
if s.is_empty() {
return None;
}
let mut iter = s.split(',').map(str::trim);
if let Some(key) = iter.next() {
if key.eq_ignore_ascii_case("Fit") {
return Some(DestinationKind::Fit);
} else if key.eq_ignore_ascii_case("FitB") {
return Some(DestinationKind::FitB);
}
let val = iter
.next()
.and_then(|s| s.parse::<f32>().ok())
.filter(|num| num.is_finite());
if key.eq_ignore_ascii_case("FitH") {
return Some(DestinationKind::FitH { top: val });
} else if key.eq_ignore_ascii_case("FitBH") {
return Some(DestinationKind::FitBH { top: val });
} else if key.eq_ignore_ascii_case("FitV") {
return Some(DestinationKind::FitV { left: val });
} else if key.eq_ignore_ascii_case("FitBV") {
return Some(DestinationKind::FitBV { left: val });
}
}
None
}
struct FloatParser<'a>(std::str::Split<'a, char>);
impl<'a> FloatParser<'a> {
fn new(s: &'a str) -> Self {
Self(s.split(','))
}
fn next(&mut self) -> Option<f32> {
self.0.next().and_then(|p| p.trim().parse::<f32>().ok())
}
fn next_finite(&mut self) -> Option<f32> {
self.next().filter(|num| num.is_finite())
}
fn next_not_nan(&mut self) -> Option<f32> {
self.next().filter(|num| !num.is_nan())
}
}
fn strip_prefix_icase<'a>(s: &'a str, pat: &str) -> Option<&'a str> {
let len = pat.len();
s.get(..len)
.filter(|head| head.eq_ignore_ascii_case(pat))
.and_then(|_| s.get(len..))
}
fn decode_and_clean_path(path: &str) -> String {
let decoded = decode_uri_component(path);
cleanname(&decoded)
}
fn cleanname(name: &str) -> String {
let rooted = name.starts_with('/');
let mut parts = Vec::with_capacity(8);
let mut dotdot_depth: usize = 0;
for component in name.split('/') {
match component {
"" | "." => {}
".." => {
if !rooted && parts.len() <= dotdot_depth {
parts.push("..");
dotdot_depth += 1;
} else if parts.len() > dotdot_depth {
parts.pop();
}
}
part => parts.push(part),
}
}
if parts.is_empty() {
return if rooted { "/".into() } else { ".".into() };
}
let cap = rooted as usize + parts.iter().map(|p| p.len()).sum::<usize>() + parts.len() - 1;
let mut out = String::with_capacity(cap);
if rooted {
out.push('/');
}
out.push_str(parts[0]);
for part in &parts[1..] {
out.push('/');
out.push_str(part);
}
out
}
pub(super) fn decode_uri_component(s: &str) -> Cow<'_, str> {
percent_decode_str(s)
.decode_utf8()
.unwrap_or(Cow::Borrowed(s))
}
pub(crate) fn parse_link_action_from_annot_dict(
obj: &PdfObject,
doc: &PdfDocument,
page_no: Option<i32>,
) -> Result<Option<LinkAction>, Error> {
if let Some(dest_obj) = obj.get_dict("Dest")? {
return parse_dest_value(&dest_obj, doc).map(|dest| dest.map(LinkAction::Dest));
}
if let Some(action_obj) = obj.get_dict("A")? {
return parse_action_dict(&action_obj, doc, page_no).map(|opt| opt.map(LinkAction::Action));
}
if let Some(add_action_obj) = obj.get_dict("AA")? {
if let Some(d) = add_action_obj.get_dict("D")? {
return parse_action_dict(&d, doc, page_no).map(|opt| opt.map(LinkAction::Action));
}
if let Some(u) = add_action_obj.get_dict("U")? {
return parse_action_dict(&u, doc, page_no).map(|opt| opt.map(LinkAction::Action));
}
}
Ok(None)
}
fn parse_dest_value(dest: &PdfObject, doc: &PdfDocument) -> Result<Option<PdfDestination>, Error> {
if dest.is_array()? && dest.len()? > 0 {
parse_dest_array(dest, doc).map(Some)
} else if dest.is_name()? {
let name = std::str::from_utf8(dest.as_name()?).map_err(|_| Error::InvalidUtf8)?;
Ok(Some(PdfDestination::Named(name.to_owned())))
} else if dest.is_string()? {
Ok(Some(PdfDestination::Named(dest.as_string()?.to_owned())))
} else {
Ok(None)
}
}
fn parse_dest_array(array: &PdfObject, doc: &PdfDocument) -> Result<PdfDestination, Error> {
let page_obj = array
.get_array(0)?
.ok_or_else(|| Error::InvalidDestination("missing page reference in dest array".into()))?;
let (page_idx, page_obj) = if page_obj.is_int()? {
let idx = page_obj.as_int()?;
(idx, doc.find_page(idx)?)
} else {
(doc.lookup_page_number(&page_obj)?, page_obj)
};
let page_count = doc.page_count()?;
let page = if page_count > 0 {
page_idx.clamp(0, page_count - 1)
} else {
0
};
let mut kind = DestinationKind::decode_from(array)?;
if page_obj.is_dict()? {
let ctm = page_obj.page_ctm()?;
kind = match kind {
DestinationKind::FitH { top } => DestinationKind::FitH {
top: top.map(|t| ctm.transform_xy(0.0, t).1),
},
DestinationKind::FitBH { top } => DestinationKind::FitBH {
top: top.map(|t| ctm.transform_xy(0.0, t).1),
},
DestinationKind::FitV { left } => DestinationKind::FitV {
left: left.map(|l| ctm.transform_xy(l, 0.0).0),
},
DestinationKind::FitBV { left } => DestinationKind::FitBV {
left: left.map(|l| ctm.transform_xy(l, 0.0).0),
},
DestinationKind::XYZ { left, top, zoom } => {
let (tx, ty) = ctm.transform_xy(left.unwrap_or(0.0), top.unwrap_or(0.0));
DestinationKind::XYZ {
left: left.and(not_nan(tx)),
top: top.and(not_nan(ty)),
zoom,
}
}
DestinationKind::FitR {
left,
bottom,
right,
top,
} => {
let tr = Rect::new(left, bottom, right, top).transform(&ctm);
DestinationKind::FitR {
left: tr.x0.min(tr.x1),
bottom: tr.y0.min(tr.y1),
right: tr.x0.max(tr.x1),
top: tr.y0.max(tr.y1),
}
}
kind => kind,
}
}
Ok(PdfDestination::Page {
page: page as u32,
kind,
})
}
fn parse_action_dict(
action: &PdfObject,
doc: &PdfDocument,
page_no: Option<i32>,
) -> Result<Option<PdfAction>, Error> {
let Some(type_obj) = action.get_dict("S")? else {
return Ok(None);
};
match type_obj.as_name()? {
b"GoTo" => {
let Some(dest_obj) = action.get_dict("D")? else {
return Ok(None);
};
let dest = parse_dest_value(&dest_obj, doc)?;
Ok(dest.map(PdfAction::GoTo))
}
b"URI" => {
let Some(uri_obj) = action.get_dict("URI")? else {
return Ok(None);
};
let uri = uri_obj.as_string()?.to_owned();
if is_external_link(&uri) {
Ok(Some(PdfAction::Uri(uri)))
} else {
let base = doc
.trailer()?
.get_dict("Root")?
.and_then(|root| root.get_dict("URI").ok().flatten())
.and_then(|uri_dict| uri_dict.get_dict("Base").ok().flatten())
.and_then(|base_obj| base_obj.as_string().ok().map(|s| s.to_owned()));
let mut full_uri = base.unwrap_or_else(|| "file://".to_owned());
full_uri.reserve(uri.len());
full_uri.push_str(&uri);
Ok(Some(PdfAction::Uri(full_uri)))
}
}
b"GoToR" => {
let file = match action.get_dict("F")? {
Some(f) => parse_filespec(&f)?,
None => return Ok(None),
};
let dest = match action.get_dict("D")? {
Some(dest) => {
if dest.is_array()? && dest.len()? > 0 {
let page_obj = dest.get_array(0)?.ok_or_else(|| {
Error::InvalidDestination("missing page in GoToR dest".into())
})?;
let page = page_obj.as_int()?.max(0) as u32;
let kind = DestinationKind::decode_from(&dest)?;
PdfDestination::Page { page, kind }
} else if dest.is_name()? {
let name =
std::str::from_utf8(dest.as_name()?).map_err(|_| Error::InvalidUtf8)?;
PdfDestination::Named(name.to_owned())
} else if dest.is_string()? {
PdfDestination::Named(dest.as_string()?.to_owned())
} else {
PdfDestination::default()
}
}
None => PdfDestination::default(),
};
Ok(Some(PdfAction::GoToR { file, dest }))
}
b"Launch" => match action.get_dict("F")? {
Some(f) => Ok(Some(PdfAction::Launch(parse_filespec(&f)?))),
None => Ok(None),
},
b"Named" => {
let Some(dest_obj) = action.get_dict("N")? else {
return Ok(None);
};
let total = doc.page_count()?;
let target = match dest_obj.as_name()? {
b"FirstPage" => Some(0),
b"LastPage" => Some((total - 1).max(0)),
b"PrevPage" => page_no.map(|p| (p - 1).max(0)),
b"NextPage" => page_no.map(|p| (p + 1).min(total - 1)),
_ => return Ok(None),
};
let Some(page) = target else {
return Ok(None);
};
Ok(Some(PdfAction::GoTo(PdfDestination::Page {
page: page as u32,
kind: DestinationKind::default(),
})))
}
_ => Ok(None),
}
}
fn parse_filespec(obj: &PdfObject) -> Result<FileSpec, Error> {
if obj.is_string()? {
return Ok(FileSpec::Path(obj.as_string()?.to_owned()));
}
if obj.is_dict()? {
if let Some(fs) = obj.get_dict("FS")? {
if fs.is_name()? && fs.as_name()? == b"URL" {
if let Some(f) = obj.get_dict("F")? {
return Ok(FileSpec::Url(f.as_string()?.to_owned()));
}
}
}
if let Some(name) = get_file_name(obj)? {
if name.is_string()? {
return Ok(FileSpec::Path(name.as_string()?.to_owned()));
}
}
}
Err(Error::InvalidDestination(
"invalid file specification object".into(),
))
}
fn get_file_name(fs: &PdfObject) -> Result<Option<PdfObject>, Error> {
for key in ["UF", "F", "Unix", "DOS", "Mac"] {
if let Some(v) = fs.get_dict(key)? {
return Ok(Some(v));
}
}
Ok(None)
}