vfox 2026.1.4

Interface to vfox plugins
Documentation
-- vim: ft=lua ts=2 sw=2

-- Syntactic Sugar {{{
local function rine(val) -- Return (val) If it's Not Empty (non-zero-length)
	return (val and #val>0) and val
end
local function rit(a) -- Return (a) If it's Table
	return (type(a) == "table") and a
end
local noop = function() end
local esc = function(s) return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end
local str = tostring
local char = string.char
local opts = rit(htmlparser_opts) or {} -- needed for silent/noerr/noout/nonl directives, also needed to be defined before `require` in such case
local prn = opts.silent and noop or function(l,f,...)
	local fd = (l=="i") and "stdout" or "stderr"
	local t = (" [%s] "):format(l:upper())
	io[fd]
		:write('[HTMLParser]'..t..f:format(...)
			..(opts.nonl or "\n")
		)
end
local err = opts.noerr and noop or function(f,...) prn("e",f,...) end
local out = opts.noout and noop or function(f,...) prn("i",f,...) end
local line = debug and function(lvl) return debug.getinfo(lvl or 2).currentline end or noop
local dbg = opts.debug and function(f,...) prn("d",f:gsub("#LINE#",str(line(3))),...) end or noop
-- }}}
-- Requires {{{
local ElementNode = require"htmlparser.ElementNode"
local voidelements = require"htmlparser.voidelements"
--}}}
local HtmlParser = {}
local function parse(text,limit) -- {{{
	local opts = rine(opts) -- use top-level opts-table (the one, defined before requiring the module), if exists
		or rit(htmlparser_opts) -- or defined after requiring (but before calling `parse`)
		or {} -- fallback otherwise
	opts.looplimit = opts.looplimit or htmlparser_looplimit

	local text = str(text)
	local limit = limit or opts.looplimit or 1000
	local tpl = false

	if not opts.keep_comments then -- Strip (or not) comments {{{
		text = text:gsub("<!%-%-.-%-%->","") -- Many chances commented code will have syntax errors, that'll lead to parser failures
	end -- }}}

	local tpr={}

	if not opts.keep_danger_placeholders then -- {{{ little speedup by cost of potential parsing breakages
		-- search unused "invalid" bytes {{{
		local busy,i={},0;
		repeat -- {{{
			local cc = char(i)
			if not(text:match(cc)) then -- {{{
				if not(tpr["<"]) or not(tpr[">"]) then -- {{{
					if not(busy[i]) then -- {{{
						if not(tpr["<"]) then -- {{{
							tpr["<"] = cc;
						elseif not(tpr[">"]) then
							tpr[">"] = cc;
						end -- }}}
						busy[i] = true
						dbg("c:{%s}||cc:{%d}||tpr[c]:{%s}",str(c),cc:byte(),str(tpr[c]))
						dbg("busy[i]:{%s},i:{%d}",str(busy[i]),i)
						dbg("[FindPH]:#LINE# Success! || i=%d",i)
					else -- if !busy
						dbg("[FindPH]:#LINE# Busy! || i=%d",i)
					end -- if !busy -- }}}
					dbg("c:{%s}||cc:{%d}||tpr[c]:{%s}",c,cc:byte(),str(tpr[c]))
					dbg("%s",str(busy[i]))
				else -- if < or >
					dbg("[FindPH]:#LINE# Done!",i)
					break
				end -- if < or > -- }}}
			else -- text!match(cc)
				dbg("[FindPH]:#LINE# Text contains this byte! || i=%d",i)
			end -- text!match(cc) -- }}}
			local skip=1
			if i==31 then
				skip=96 -- ASCII
			end
			i=i+skip
		until (i==255) -- }}}
		i=nil
		--- }}}

		if not(tpr["<"]) or not(tpr[">"]) then
			err("Impossible to find at least two unused byte codes in this HTML-code. We need it to escape bracket-contained placeholders inside tags.")
			err("Consider enabling 'keep_danger_placeholders' option (to silence this error, if parser wasn't failed with current HTML-code) or manually replace few random bytes, to free up the codes.")
		else
			dbg("[FindPH]:#LINE# Found! || '<'=%d, '>'=%d",tpr["<"]:byte(),tpr[">"]:byte())
		end

--	dbg("tpr[>] || tpr[] || #busy%d")

		-- g {{{
		local function g(id,...)
			local arg={...}
			local orig=arg[id]
			arg[id]=arg[id]:gsub("(.)",tpr)
			if arg[id] ~= orig then
				tpl=true
				dbg("[g]:#LINE# orig: %s", str(orig))
				dbg("[g]:#LINE# replaced: %s",str(arg[id]))
			end
			dbg("[g]:#LINE# called, id: %s, arg[id]: %s, args { "..(("{%s}, "):rep(#arg):gsub(", $","")).." }",id,arg[id],...)
			dbg("[g]:#LINE# concat(arg): %s",table.concat(arg))
			return table.concat(arg)
		end
		-- g }}}

		-- tpl-placeholders and attributes {{{
		text=text
			:gsub(
				"(=[%s]-)".. -- only match attr.values, and not random strings between two random apostrophs
				"(%b'')",
				function(...)return g(2,...)end
			)
			:gsub(
				"(=[%s]-)".. -- same for "
				'(%b"")',
				function(...)return g(2,...)end
			) -- Escape "<"/">" inside attr.values (see issue #50)
			:gsub(
				"(<".. -- Match "<",
				(opts.tpl_skip_pattern or "[^!]").. -- with exclusion pattern (for example, to ignore comments, which aren't template placeholders, but can legally contain "<"/">" inside.
				")([^>]+)".. -- If matched, we want to escape '<'s if we meet them inside tag
				"(>)",
				function(...)return g(2,...)end
			)
			:gsub(
				"("..
				(tpr["<"] or "__FAILED__").. -- Here we search for "<", we escaped in previous gsub (and don't break things if we have no escaping replacement)
				")("..
				(opts.tpl_marker_pattern or "[^%w%s]").. -- Capture templating symbol
				")([%g%s]-)".. -- match placeholder's content
				"(%2)(>)".. -- placeholder's tail
				"([^>]*>)", -- remainings
				function(...)return g(5,...)end
			)
		-- }}}
	end -- }}}

	local index = 0
	local root = ElementNode:new(index, str(text))
	local node, descend, tpos, opentags = root, true, 1, {}

	while true do -- MainLoop {{{
		if index == limit then -- {{{
			err("Main loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
			break
		end -- }}}
		-- openstart/tpos Definitions {{{
		local openstart, name
		openstart, tpos, name = root._text:find(
			"<" ..        -- an uncaptured starting "<"
			"([%w-]+)" .. -- name = the first word, directly following the "<"
			"[^>]*>",     -- include, but not capture everything up to the next ">"
		tpos)
		dbg("[MainLoop]:#LINE# openstart=%s || tpos=%s || name=%s",str(openstart),str(tpos),str(name))
		-- }}}
		if not name then break end
		-- Some more vars {{{
		index = index + 1
		local tag = ElementNode:new(index, str(name), (node or {}), descend, openstart, tpos)
		node = tag
		local tagloop
		local tagst, apos = tag:gettext(), 1
		-- }}}
		while true do -- TagLoop {{{
			dbg("[TagLoop]:#LINE# tag.name=%s, tagloop=%s",str(tag.name),str(tagloop))
			if tagloop == limit then -- {{{
				err("Tag parsing loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
				break
			end -- }}}
			-- Attrs {{{
			local start, k, eq, quote, v, zsp
			start, apos, k, zsp, eq, zsp, quote = tagst:find(
				"%s+" ..         -- some uncaptured space
				"([^%s=/>]+)" .. -- k = an unspaced string up to an optional "=" or the "/" or ">"
				"([%s]-)"..      -- zero or more spaces
				"(=?)" ..        -- eq = the optional; "=", else ""
				"([%s]-)"..      -- zero or more spaces
				[=[(['"]?)]=],      -- quote = an optional "'" or '"' following the "=", or ""
			apos)
			dbg("[TagLoop]:#LINE# start=%s || apos=%s || k=%s || zsp='%s' || eq='%s', quote=[%s]",str(start),str(apos),str(k),str(zsp),str(eq),str(quote))
			-- }}}
			if not k or k == "/>" or k == ">" then break end
			-- Pattern {{{
			if eq == "=" then
				local pattern = "=([^%s>]*)"
				if quote ~= "" then
					pattern = quote .. "([^" .. quote .. "]*)" .. quote
				end
				start, apos, v = tagst:find(pattern, apos)
				dbg("[TagLoop]:#LINE# start=%s || apos=%s || v=%s || pattern=%s",str(start),str(apos),str(v),str(pattern))
			end
			-- }}}
			v=v or ""
			if tpl then -- {{{
				for rk,rv in pairs(tpr) do
					v = v:gsub(rv,rk)
					dbg("[TagLoop]:#LINE# rv=%s || rk=%s",str(rv),str(rk))
				end
			end -- }}}

			dbg("[TagLoop]:#LINE# k=%s || v=%s",str(k),str(v))
			tag:addattribute(k, v)
			tagloop = (tagloop or 0) + 1
		end
		-- }}}
		if voidelements[tag.name:lower()] then -- {{{
			descend = false
			tag:close()
		else
			descend = true
			opentags[tag.name] = opentags[tag.name] or {}
			table.insert(opentags[tag.name], tag)
		end
		-- }}}
		local closeend = tpos
		local closingloop
		while true do -- TagCloseLoop {{{
			-- Can't remember why did I add that, so comment it for now (and not remove), in case it will be needed again
			-- (although, it causes #59 and #60, so it will anyway be needed to rework)
			-- if voidelements[tag.name:lower()] then break end -- already closed
			if closingloop == limit then
				err("Tag closing loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
				break
			end

			local closestart, closing, closename
			closestart, closeend, closing, closename = root._text:find("[^<]*<(/?)([%w-]+)", closeend)
			dbg("[TagCloseLoop]:#LINE# closestart=%s || closeend=%s || closing=%s || closename=%s",str(closestart),str(closeend),str(closing),str(closename))

			if not closing or closing == "" then break end

			tag = table.remove(opentags[closename] or {}) or tag -- kludges for the cases of closing void or non-opened tags
			closestart = root._text:find("<", closestart)
			dbg("[TagCloseLoop]:#LINE# closestart=%s",str(closestart))
			tag:close(closestart, closeend + 1)
			node = tag.parent
			descend = true
			closingloop = (closingloop or 0) + 1
		end -- }}}
	end -- }}}
	if tpl then -- {{{
		dbg("tpl")
		for k,v in pairs(tpr) do
			root._text = root._text:gsub(v,k)
		end
	end -- }}}
	return root
end -- }}}
HtmlParser.parse = parse
return HtmlParser