module.exports = config;
const fs = require('fs');
const path = require('path');
const UnionFind = require('union-find');
function config(lang, singletons, advanced) {
singletons = !!singletons;
advanced = !!advanced;
if (lang && (typeof lang !== 'string' || lang.length != 2)) throw Error('optional lang param must be string containing 2 letter ISO 639-1 Code');
if (lang) {
if (!fs.statSync(path.resolve(__dirname, `./tokens/${lang}.json`))) {
if (!fs.statSync(path.resolve(__dirname, `./tokens/${lang}.json`))) {
return [];
} else {
let tokenjs = require(`./tokens/${lang}`);
return prepare(tokenjs(), singletons, advanced);
}
} else {
let tokenjson = require(`./tokens/${lang}.json`);
return prepare(tokenjson, singletons, advanced);
}
}
const tokens = {};
fs.readdirSync(path.resolve(__dirname, './tokens/')).forEach((token) => {
if (token.match(/\.json$/)) {
let json = require(`./tokens/${token}`);
tokens[token.replace(/\.json/, '')] = prepare(json, singletons, advanced);
} else if (token.match(/\.js$/)) {
let js = require(`./tokens/${token.replace('\.js$', '')}`);
tokens[token.replace(/\.js/, '')] = prepare(js(), singletons, advanced);
} else {
return;
}
});
return tokens;
}
function prepare(data, singletons, advanced) {
if (!singletons) data = removeSingletons(data);
if (!advanced) data = simplify(data);
return data;
}
function simplify(data) {
if (!data.length || !data[0].tokens) return data;
let tokens = new Set();
let props = new Map();
let positions = new Map();
for (let group of data) {
const groupProps = {};
for (const toKeep of ['skipBoundaries', 'skipDiacriticStripping', 'regex']) {
if (group[toKeep]) groupProps[toKeep] = group[toKeep];
}
const keepCount = Object.keys(groupProps).length;
for (let token of group.tokens) {
tokens.add(token);
if (keepCount > 0) props.set(token, groupProps);
positions.set(token, positions.size);
}
}
tokens = Array.from(tokens).sort();
let invTokens = new Map();
tokens.forEach((v, i) => { invTokens.set(v, i); });
let uf = new UnionFind(tokens.length);
for (let group of data) {
let idx1 = invTokens.get(group.tokens[0]);
for (let token of group.tokens.slice(1)) {
let idx2 = invTokens.get(token);
uf.link(idx1, idx2);
}
}
let out = [];
let groups = Array.from(new Set(uf.roots)).sort((a, b) => a - b);
let invGroups = new Map();
groups.forEach((v, i) => { invGroups.set(v, i); });
for (let g = 0; g < groups.length; g++) out[g] = [];
for (let i = 0; i < tokens.length; i++) {
out[invGroups.get(uf.roots[i])].push(tokens[i]);
}
out.forEach((arr) => {
arr.sort((a, b) => a.length - b.length);
for (let i = 1; i < arr.length; i++) {
let tokenProps = props.get(arr[i]);
if (tokenProps) {
tokenProps = Object.assign({}, tokenProps);
tokenProps.text = arr[i];
arr[i] = tokenProps;
}
}
});
out.sort((a, b) => (positions.get(a[0]) || 0) - (positions.get(b[0]) || 0))
return out;
}
function removeSingletons(tokens) {
if (!(tokens instanceof Array)) return tokens;
return tokens.filter((token) => {
return (token instanceof Array && token.length > 1) ||
(token.tokens instanceof Array && token.tokens.length > 1);
});
}