#ifndef JSONNET_LEXER_H
#define JSONNET_LEXER_H
#include <cassert>
#include <cstdlib>
#include <iostream>
#include <list>
#include <sstream>
#include <string>
#include <vector>
#include "static_error.h"
#include "unicode.h"
struct FodderElement {
enum Kind {
LINE_END,
INTERSTITIAL,
PARAGRAPH,
};
Kind kind;
unsigned blanks;
unsigned indent;
std::vector<std::string> comment;
FodderElement(Kind kind, unsigned blanks, unsigned indent,
const std::vector<std::string> &comment)
: kind(kind), blanks(blanks), indent(indent), comment(comment)
{
assert(kind != LINE_END || comment.size() <= 1);
assert(kind != INTERSTITIAL || (blanks == 0 && indent == 0 && comment.size() == 1));
assert(kind != PARAGRAPH || comment.size() >= 1);
}
};
static inline std::ostream &operator<<(std::ostream &o, const FodderElement &f)
{
switch (f.kind) {
case FodderElement::LINE_END:
o << "END(" << f.blanks << ", " << f.indent;
if (!f.comment.empty()) {
o << ", " << f.comment[0];
}
o << ")";
break;
case FodderElement::INTERSTITIAL:
o << "INT(" << f.blanks << ", " << f.indent << ", " << f.comment[0] << ")";
break;
case FodderElement::PARAGRAPH:
o << "PAR(" << f.blanks << ", " << f.indent << ", " << f.comment[0] << "...)";
break;
}
return o;
}
typedef std::vector<FodderElement> Fodder;
static inline bool fodder_has_clean_endline(const Fodder &fodder)
{
return !fodder.empty() && fodder.back().kind != FodderElement::INTERSTITIAL;
}
static inline void fodder_push_back(Fodder &a, const FodderElement &elem)
{
if (fodder_has_clean_endline(a) && elem.kind == FodderElement::LINE_END) {
if (elem.comment.size() > 0) {
a.emplace_back(FodderElement::PARAGRAPH, elem.blanks, elem.indent, elem.comment);
} else {
a.back().indent = elem.indent;
a.back().blanks += elem.blanks;
}
} else {
if (!fodder_has_clean_endline(a) && elem.kind == FodderElement::PARAGRAPH) {
a.emplace_back(FodderElement::LINE_END, 0, elem.indent, std::vector<std::string>());
}
a.push_back(elem);
}
}
static inline Fodder concat_fodder(const Fodder &a, const Fodder &b)
{
if (a.size() == 0)
return b;
if (b.size() == 0)
return a;
Fodder r = a;
fodder_push_back(r, b[0]);
for (unsigned i = 1; i < b.size(); ++i) {
r.push_back(b[i]);
}
return r;
}
static inline void fodder_move_front(Fodder &a, Fodder &b)
{
a = concat_fodder(b, a);
b.clear();
}
static inline Fodder make_fodder(const FodderElement &elem)
{
Fodder fodder;
fodder_push_back(fodder, elem);
return fodder;
}
static inline void ensureCleanNewline(Fodder &fodder)
{
if (!fodder_has_clean_endline(fodder)) {
fodder_push_back(fodder, FodderElement(FodderElement::Kind::LINE_END, 0, 0, {}));
}
}
static inline int countNewlines(const FodderElement &elem)
{
switch (elem.kind) {
case FodderElement::INTERSTITIAL: return 0;
case FodderElement::LINE_END: return 1;
case FodderElement::PARAGRAPH: return elem.comment.size() + elem.blanks;
}
std::cerr << "Unknown FodderElement kind" << std::endl;
abort();
}
static inline int countNewlines(const Fodder &fodder)
{
int sum = 0;
for (const auto &elem : fodder) {
sum += countNewlines(elem);
}
return sum;
}
static inline std::ostream &operator<<(std::ostream &o, const Fodder &fodder)
{
bool first = true;
for (const auto &f : fodder) {
o << (first ? "[" : ", ");
first = false;
o << f;
}
o << (first ? "[]" : "]");
return o;
}
struct Token {
enum Kind {
BRACE_L,
BRACE_R,
BRACKET_L,
BRACKET_R,
COMMA,
DOLLAR,
DOT,
PAREN_L,
PAREN_R,
SEMICOLON,
IDENTIFIER,
NUMBER,
OPERATOR,
STRING_DOUBLE,
STRING_SINGLE,
STRING_BLOCK,
VERBATIM_STRING_SINGLE,
VERBATIM_STRING_DOUBLE,
ASSERT,
ELSE,
ERROR,
FALSE,
FOR,
FUNCTION,
IF,
IMPORT,
IMPORTSTR,
IN,
LOCAL,
NULL_LIT,
TAILSTRICT,
THEN,
SELF,
SUPER,
TRUE,
END_OF_FILE
} kind;
Fodder fodder;
std::string data;
std::string stringBlockIndent;
std::string stringBlockTermIndent;
UString data32(void) const
{
return decode_utf8(data);
}
LocationRange location;
Token(Kind kind, const Fodder &fodder, const std::string &data,
const std::string &string_block_indent, const std::string &string_block_term_indent,
const LocationRange &location)
: kind(kind),
fodder(fodder),
data(data),
stringBlockIndent(string_block_indent),
stringBlockTermIndent(string_block_term_indent),
location(location)
{
}
Token(Kind kind, const std::string &data = "") : kind(kind), data(data) {}
static const char *toString(Kind v)
{
switch (v) {
case BRACE_L: return "\"{\"";
case BRACE_R: return "\"}\"";
case BRACKET_L: return "\"[\"";
case BRACKET_R: return "\"]\"";
case COMMA: return "\",\"";
case DOLLAR: return "\"$\"";
case DOT: return "\".\"";
case PAREN_L: return "\"(\"";
case PAREN_R: return "\")\"";
case SEMICOLON: return "\";\"";
case IDENTIFIER: return "IDENTIFIER";
case NUMBER: return "NUMBER";
case OPERATOR: return "OPERATOR";
case STRING_SINGLE: return "STRING_SINGLE";
case STRING_DOUBLE: return "STRING_DOUBLE";
case VERBATIM_STRING_SINGLE: return "VERBATIM_STRING_SINGLE";
case VERBATIM_STRING_DOUBLE: return "VERBATIM_STRING_DOUBLE";
case STRING_BLOCK: return "STRING_BLOCK";
case ASSERT: return "assert";
case ELSE: return "else";
case ERROR: return "error";
case FALSE: return "false";
case FOR: return "for";
case FUNCTION: return "function";
case IF: return "if";
case IMPORT: return "import";
case IMPORTSTR: return "importstr";
case IN: return "in";
case LOCAL: return "local";
case NULL_LIT: return "null";
case SELF: return "self";
case SUPER: return "super";
case TAILSTRICT: return "tailstrict";
case THEN: return "then";
case TRUE: return "true";
case END_OF_FILE: return "end of file";
default:
std::cerr << "INTERNAL ERROR: Unknown token kind: " << v << std::endl;
std::abort();
}
}
};
typedef std::list<Token> Tokens;
static inline bool operator==(const Token &a, const Token &b)
{
if (a.kind != b.kind)
return false;
if (a.data != b.data)
return false;
return true;
}
static inline std::ostream &operator<<(std::ostream &o, Token::Kind v)
{
o << Token::toString(v);
return o;
}
static inline std::ostream &operator<<(std::ostream &o, const Token &v)
{
if (v.data == "") {
o << Token::toString(v.kind);
} else if (v.kind == Token::OPERATOR) {
o << "\"" << v.data << "\"";
} else {
o << "(" << Token::toString(v.kind) << ", \"" << v.data << "\")";
}
return o;
}
Token::Kind lex_get_keyword_kind(const std::string &identifier);
Tokens jsonnet_lex(const std::string &filename, const char *input);
std::string jsonnet_unlex(const Tokens &tokens);
#endif