local lexer = {}
local Prefix, Suffix, Cleaner = "^[%c%s]*", "[%c%s]*", "[%c%s]+"
local UNICODE = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]+"
local NUMBER_A = "0[xX][%da-fA-F_]+"
local NUMBER_B = "0[bB][01_]+"
local NUMBER_C = "%d+%.?%d*[eE][%+%-]?%d+"
local NUMBER_D = "%d+[%._]?[%d_eE]*"
local OPERATORS = "[:;<>/~%*%(%)%-={},%.#%^%+%%]+"
local BRACKETS = "[%[%]]+" local IDEN = "[%a_][%w_]*"
local STRING_EMPTY = "(['\"])%1" local STRING_PLAIN = "(['\"])[^\n]-([^\\]%1)" local STRING_INTER = "`[^\n]-`"
local STRING_INCOMP_A = "(['\"]).-\n" local STRING_INCOMP_B = "(['\"])[^\n]*" local STRING_MULTI = "%[(=*)%[.-%]%1%]" local STRING_MULTI_INCOMP = "%[=*%[.-.*" local COMMENT_MULTI = "%-%-%[(=*)%[.-%]%1%]" local COMMENT_MULTI_INCOMP = "%-%-%[=*%[.-.*" local COMMENT_PLAIN = "%-%-.-\n" local COMMENT_INCOMP = "%-%-.*"
local lang = require(script.language)
local lua_keyword = lang.keyword
local lua_builtin = lang.builtin
local lua_libraries = lang.libraries
lexer.language = lang
local lua_matches = {
{ Prefix .. IDEN .. Suffix, "var" },
{ Prefix .. NUMBER_A .. Suffix, "number" },
{ Prefix .. NUMBER_B .. Suffix, "number" },
{ Prefix .. NUMBER_C .. Suffix, "number" },
{ Prefix .. NUMBER_D .. Suffix, "number" },
{ Prefix .. STRING_EMPTY .. Suffix, "string" },
{ Prefix .. STRING_PLAIN .. Suffix, "string" },
{ Prefix .. STRING_INCOMP_A .. Suffix, "string" },
{ Prefix .. STRING_INCOMP_B .. Suffix, "string" },
{ Prefix .. STRING_MULTI .. Suffix, "string" },
{ Prefix .. STRING_MULTI_INCOMP .. Suffix, "string" },
{ Prefix .. STRING_INTER .. Suffix, "string_inter" },
{ Prefix .. COMMENT_MULTI .. Suffix, "comment" },
{ Prefix .. COMMENT_MULTI_INCOMP .. Suffix, "comment" },
{ Prefix .. COMMENT_PLAIN .. Suffix, "comment" },
{ Prefix .. COMMENT_INCOMP .. Suffix, "comment" },
{ Prefix .. OPERATORS .. Suffix, "operator" },
{ Prefix .. BRACKETS .. Suffix, "operator" },
{ Prefix .. UNICODE .. Suffix, "iden" },
{ "^.", "iden" },
}
local PATTERNS, TOKENS = {}, {}
for i, m in lua_matches do
PATTERNS[i] = m[1]
TOKENS[i] = m[2]
end
function lexer.scan(s: string)
local index = 1
local size = #s
local previousContent1, previousContent2, previousContent3, previousToken = "", "", "", ""
local thread = coroutine.create(function()
while index <= size do
local matched = false
for tokenType, pattern in ipairs(PATTERNS) do
local start, finish = string.find(s, pattern, index)
if start == nil then
continue
end
index = finish + 1
matched = true
local content = string.sub(s, start, finish)
local rawToken = TOKENS[tokenType]
local processedToken = rawToken
if rawToken == "var" then
local cleanContent = string.gsub(content, Cleaner, "")
if lua_keyword[cleanContent] then
processedToken = "keyword"
elseif lua_builtin[cleanContent] then
processedToken = "builtin"
elseif string.find(previousContent1, "%.[%s%c]*$") and previousToken ~= "comment" then
local parent = string.gsub(previousContent2, Cleaner, "")
local lib = lua_libraries[parent]
if lib and lib[cleanContent] and not string.find(previousContent3, "%.[%s%c]*$") then
processedToken = "builtin"
else
processedToken = "iden"
end
else
processedToken = "iden"
end
elseif rawToken == "string_inter" then
if not string.find(content, "[^\\]{") then
processedToken = "string"
else
processedToken = nil
local isString = true
local subIndex = 1
local subSize = #content
while subIndex <= subSize do
local subStart, subFinish = string.find(content, "^.-[^\\][{}]", subIndex)
if subStart == nil then
coroutine.yield("string", string.sub(content, subIndex))
break
end
if isString then
subIndex = subFinish + 1
coroutine.yield("string", string.sub(content, subStart, subFinish))
isString = false
else
subIndex = subFinish
local subContent = string.sub(content, subStart, subFinish - 1)
for innerToken, innerContent in lexer.scan(subContent) do
coroutine.yield(innerToken, innerContent)
end
isString = true
end
end
end
end
previousContent3 = previousContent2
previousContent2 = previousContent1
previousContent1 = content
previousToken = processedToken or rawToken
if processedToken then
coroutine.yield(processedToken, content)
end
break
end
if not matched then
return
end
end
return
end)
return function()
if coroutine.status(thread) == "dead" then
return
end
local success, token, content = coroutine.resume(thread)
if success and token then
return token, content
end
return
end
end
function lexer.navigator()
local nav = {
Source = "",
TokenCache = table.create(50),
_RealIndex = 0,
_UserIndex = 0,
_ScanThread = nil,
}
function nav:Destroy()
self.Source = nil
self._RealIndex = nil
self._UserIndex = nil
self.TokenCache = nil
self._ScanThread = nil
end
function nav:SetSource(SourceString)
self.Source = SourceString
self._RealIndex = 0
self._UserIndex = 0
table.clear(self.TokenCache)
self._ScanThread = coroutine.create(function()
for Token, Src in lexer.scan(self.Source) do
self._RealIndex += 1
self.TokenCache[self._RealIndex] = { Token, Src }
coroutine.yield(Token, Src)
end
end)
end
function nav.Next()
nav._UserIndex += 1
if nav._RealIndex >= nav._UserIndex then
return table.unpack(nav.TokenCache[nav._UserIndex])
else
if coroutine.status(nav._ScanThread) == "dead" then
return
else
local success, token, src = coroutine.resume(nav._ScanThread)
if success and token then
return token, src
else
return
end
end
end
end
function nav.Peek(PeekAmount)
local GoalIndex = nav._UserIndex + PeekAmount
if nav._RealIndex >= GoalIndex then
if GoalIndex > 0 then
return table.unpack(nav.TokenCache[GoalIndex])
else
return
end
else
if coroutine.status(nav._ScanThread) == "dead" then
return
else
local IterationsAway = GoalIndex - nav._RealIndex
local success, token, src = nil, nil, nil
for _ = 1, IterationsAway do
success, token, src = coroutine.resume(nav._ScanThread)
if not (success or token) then
break
end
end
return token, src
end
end
end
return nav
end
return lexer