local function addPattern (hyphenator, pattern)
local trie = hyphenator.trie
local bits = SU.splitUtf8(pattern)
for i = 1, #bits do
local char = bits[i]
if not char:find("%d") then
if not trie[char] then
trie[char] = {}
end
trie = trie[char]
end
end
trie["_"] = {}
local lastWasDigit = 0
for i = 1, #bits do
local char = bits[i]
if char:find("%d") then
lastWasDigit = 1
table.insert(trie["_"], tonumber(char))
elseif lastWasDigit == 1 then
lastWasDigit = 0
else
table.insert(trie["_"], 0)
end
end
end
local function registerException (hyphenator, exception)
local text = exception:gsub("-", "")
local bits = SU.splitUtf8(exception)
hyphenator.exceptions[text] = {}
local j = 1
for _, bit in ipairs(bits) do
j = j + 1
if bit == "-" then
j = j - 1
hyphenator.exceptions[text][j] = 1
else
hyphenator.exceptions[text][j] = 0
end
end
end
local function loadPatterns (hyphenator, language)
SILE.languageSupport.loadLanguage(language)
local languageset = SILE.hyphenator.languages[language]
if not languageset then
print("No patterns for language " .. language)
return
end
for _, pattern in ipairs(languageset.patterns) do
addPattern(hyphenator, pattern)
end
if not languageset.exceptions then
languageset.exceptions = {}
end
for _, exception in ipairs(languageset.exceptions) do
registerException(hyphenator, exception)
end
end
SILE._hyphenate = function (self, text)
if luautf8.len(text) < self.minWord then
return { text }
end
local lowertext = luautf8.lower(text)
local points = self.exceptions[lowertext]
local word = SU.splitUtf8(text)
if not points then
points = SU.map(function ()
return 0
end, word)
local work = SU.map(luautf8.lower, word)
table.insert(work, ".")
table.insert(work, 1, ".")
table.insert(points, 1, 0)
for i = 1, #work do
local trie = self.trie
for j = i, #work do
if not trie[work[j]] then
break
end
trie = trie[work[j]]
local p = trie["_"]
if p then
for k = 1, #p do
if points[i + k - 2] and points[i + k - 2] < p[k] then
points[i + k - 2] = p[k]
end
end
end
end
end
for i = 1, self.leftmin do
points[i] = 0
end
for i = #points - self.rightmin, #points do
points[i] = 0
end
end
local pieces = { "" }
for i = 1, #word do
pieces[#pieces] = pieces[#pieces] .. word[i]
if points[1 + i] and 1 == (points[1 + i] % 2) then
table.insert(pieces, "")
end
end
return pieces
end
SILE.hyphenator = {}
SILE.hyphenator.languages = {}
SILE._hyphenators = {}
local function defaultHyphenateSegments (node, segments, _)
local hyphen = SILE.shaper:createNnodes(SILE.settings:get("font.hyphenchar"), node.options)
return SILE.types.node.discretionary({ prebreak = hyphen }), segments
end
local initHyphenator = function (lang)
if not SILE._hyphenators[lang] then
SILE._hyphenators[lang] = { minWord = 5, leftmin = 2, rightmin = 2, trie = {}, exceptions = {} }
loadPatterns(SILE._hyphenators[lang], lang)
end
if SILE.hyphenator.languages[lang] and not SILE.hyphenator.languages[lang].hyphenateSegments then
SILE.hyphenator.languages[lang].hyphenateSegments = defaultHyphenateSegments
end
end
local hyphenateNode = function (node)
if not node.language then
return { node }
end
if not node.is_nnode or not node.text then
return { node }
end
if node.language and (type(SILE.hyphenator.languages[node.language]) == "function") then
return SILE.hyphenator.languages[node.language](node)
end
initHyphenator(node.language)
local segments = SILE._hyphenate(SILE._hyphenators[node.language], node.text)
local hyphen
if #segments > 1 then
local hyphenateSegments = SILE.hyphenator.languages[node.language].hyphenateSegments
local newnodes = {}
for j, segment in ipairs(segments) do
if segment == "" then
SU.dump({ j, segments })
SU.error("No hyphenation segment should ever be empty", true)
end
hyphen, segments = hyphenateSegments(node, segments, j)
for _, newNode in ipairs(SILE.shaper:createNnodes(segments[j], node.options)) do
if newNode.is_nnode then
newNode.parent = node
table.insert(newnodes, newNode)
end
end
if j < #segments then
hyphen.parent = node
table.insert(newnodes, hyphen)
end
end
node.children = newnodes
node.hyphenated = false
node.done = false
return newnodes
end
return { node }
end
SILE.showHyphenationPoints = function (word, language)
language = language or "en"
initHyphenator(language)
return SU.concat(SILE._hyphenate(SILE._hyphenators[language], word), SILE.settings:get("font.hyphenchar"))
end
SILE.hyphenate = function (nodelist)
local newlist = {}
for _, node in ipairs(nodelist) do
local newnodes = hyphenateNode(node)
if newnodes then
for _, n in ipairs(newnodes) do
table.insert(newlist, n)
end
end
end
return newlist
end
SILE.registerCommand("hyphenator:add-exceptions", function (options, content)
local language = options.lang or SILE.settings:get("document.language") or "und"
SILE.languageSupport.loadLanguage(language)
initHyphenator(language)
for token in SU.gtoke(content[1]) do
if token.string then
registerException(SILE._hyphenators[language], token.string)
end
end
end, nil, nil, true)