using System;
using System.Text.RegularExpressions;
using System.Collections.Generic;
public class simpleLexer : absLexer
{
public static string operators_re =
"([()\\+\\-\\*/:;%^<>!,.$~]|\\s|\\[|\\]|{|}|=|\"[^\"]*\")";
public string endofline = null;
public char commentchar = '#';
string[] keywords = {"if","else","while","let","for","lambda"};
string[] multichar_syms = {"==","<=",">=","!=", "++","--","&&","||"};
HashSet<string> kwhash = new HashSet<string>();
Dictionary<char,string> mshash = new Dictionary<char,string>();
public void addKeyword(string kw)
{ kwhash.Add(kw); }
public void addMultichar(string ms)
{
if (ms.Length>1) mshash[ms[0]] = ms.Substring(1,ms.Length-1);
}
protected string[] lines;
protected string[] ssplit;
protected int ti=0; protected int linenumber=0; public simpleLexer(string s) {
foreach (string kw in keywords) addKeyword(kw);
foreach (string ms in multichar_syms) addMultichar(ms);
lines = new string[1]; lines[0] = s; linenumber = 1;
ssplit = Regex.Split(s,operators_re);
}
public simpleLexer(string filename, string eof) {
foreach (string kw in keywords) addKeyword(kw);
foreach (string ms in multichar_syms) addMultichar(ms);
endofline = eof;
lines = System.IO.File.ReadAllLines(filename);
linenumber = 0;
newline();
}
bool newline()
{
string s = "";
while (linenumber<lines.Length && (s==null || s.Length<1 || s[0]==commentchar)) {
linenumber++;
s = lines[linenumber-1];
if (s!=null) s = s.Trim();
}
if (s!=null && s.Length>0 && s[0]!=commentchar) {
if (endofline!=null && endofline.Length>1)
s = s+ " " + endofline;
ssplit = Regex.Split(s,operators_re);
ti = 0; return true;
}
else return false;
}
static bool alphabetical(char x) => (x >=65 && x<=90) || (x>=97 && x<=122);
public virtual LexToken next()
{
string tok = "";
if (ssplit==null) return null;
while (tok==null || tok.Length<1)
{
if (ti>=ssplit.Length) {
if (!newline()) return null;
}
else tok = ssplit[ti++].Trim();
}
LexToken ax = new LexToken("Symbol",tok);
try {
if (kwhash.Contains(tok)) {ax = new LexToken("Keyword",tok); }
else if (tok[0]=='\"' && tok[tok.Length-1]=='\"')
{ ax = new LexToken("StringLiteral", tok); }
else if (alphabetical(tok[0])) {ax = new LexToken("Alphanumeric",tok);}
else if ((int)tok[0]==(int)'.' || ((int)tok[0]>=48 && (int)tok[0]<=57)) {
try {
int n = int.Parse(tok);
ax = new LexToken("Integer",n);
} catch (Exception) { ax= new LexToken("Float", double.Parse(tok)); }
}
else if (tok.Length==1 && ti<ssplit.Length) { string rest= mshash[tok[0]];
string nexttok =ssplit[ti];
while ((nexttok==null || nexttok.Length<1) && ti<ssplit.Length-1)
{ nexttok = ssplit[++ti]; } if (rest==nexttok) {ax= new LexToken("Symbol",tok+nexttok); ti++;}
}
}
catch (Exception) {ax = new LexToken("Symbol",tok);}
return ax;
}
public virtual int linenum() {return linenumber;}
public virtual LexToken translate_token(LexToken t) { return t; }
public static void Main(string[] argv)
{
string ax = "while (1) fork();";
if (argv.Length>0) ax = argv[0];
absLexer scanner = new simpleLexer(ax);
LexToken token;
do {
token = scanner.next();
if (token!=null) Console.WriteLine("LexToken: "+token);
}
while (token!=null);
Console.WriteLine("\ntesting file input from lexertest.txt..");
scanner = new simpleLexer("lexertest.txt",null);
do {
token = scanner.next();
if (token!=null) Console.WriteLine("Token from file: "+token);
}
while (token!=null);
Console.WriteLine("line number at "+scanner.linenum());
}}