"use strict";
const fs = require("fs/promises");
const path = require("path");
const matter = require("gray-matter");
const minimatch = require("minimatch");
function normalizePath(value) {
return value.replace(/\\/g, "/");
}
function stripExtension(filePath) {
return filePath.replace(/\.mdx?$/i, "");
}
function stripIndex(filePath) {
return filePath.replace(/\/index$/i, "");
}
function trimSlashes(value) {
return value.replace(/^\/+|\/+$/g, "");
}
function firstParagraph(content) {
const paragraphs = content
.split(/\n\s*\n/)
.map((part) => part.trim())
.filter(Boolean);
for (const paragraph of paragraphs) {
if (
!paragraph.startsWith("#") &&
!paragraph.startsWith("import ") &&
!paragraph.startsWith("export ")
) {
return paragraph.replace(/\n+/g, " ").trim();
}
}
return "";
}
function extractTitle(filePath, data, content) {
if (typeof data.title === "string" && data.title.trim()) {
return data.title.trim();
}
const headingMatch = content.match(/^#\s+(.+)$/m);
if (headingMatch?.[1]) {
return headingMatch[1].trim();
}
return path
.basename(filePath, path.extname(filePath))
.replace(/[-_]+/g, " ")
.replace(/\b\w/g, (char) => char.toUpperCase());
}
function extractDescription(data, content) {
if (typeof data.description === "string" && data.description.trim()) {
return data.description.trim().replace(/\n+/g, " ");
}
return firstParagraph(content);
}
function cleanContent(content, options) {
let cleaned = content.replace(/\r\n/g, "\n");
if (options.excludeImports) {
cleaned = cleaned.replace(/^\s*import\s+.*?;?\s*$/gm, "");
}
return cleaned.trim();
}
function withoutLeadingTitleHeading(content, title) {
const lines = content.trim().split("\n");
const firstLine = lines[0]?.trim();
const headingMatch = firstLine?.match(/^#\s+(.+)$/);
if (headingMatch?.[1]?.trim() === title.trim()) {
return lines.slice(1).join("\n").trim();
}
return content.trim();
}
async function collectMarkdownFiles(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true });
const files = [];
for (const entry of entries) {
if (entry.name.startsWith("_")) {
continue;
}
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
files.push(...(await collectMarkdownFiles(fullPath)));
continue;
}
if (entry.isFile() && /\.mdx?$/i.test(entry.name)) {
files.push(fullPath);
}
}
return files;
}
function matchesAnyPattern(file, patterns, siteDir, docsDir) {
if (!patterns?.length) {
return false;
}
const siteRelative = normalizePath(path.relative(siteDir, file));
const docsRelative = normalizePath(path.relative(path.join(siteDir, docsDir), file));
return patterns.some(
(pattern) =>
minimatch(siteRelative, pattern, { matchBase: true }) ||
minimatch(docsRelative, pattern, { matchBase: true }),
);
}
function orderFiles(files, orderPatterns, includeUnmatchedLast, siteDir, docsDir) {
if (!orderPatterns?.length) {
return [...files];
}
const ordered = [];
const used = new Set();
for (const pattern of orderPatterns) {
for (const file of files) {
if (used.has(file)) {
continue;
}
if (matchesAnyPattern(file, [pattern], siteDir, docsDir)) {
ordered.push(file);
used.add(file);
}
}
}
if (includeUnmatchedLast) {
for (const file of files) {
if (!used.has(file)) {
ordered.push(file);
}
}
}
return ordered;
}
function buildSiteUrl(siteConfig) {
const origin = (siteConfig.url || "").replace(/\/+$/, "");
const baseUrl = siteConfig.baseUrl === "/" ? "" : trimSlashes(siteConfig.baseUrl || "/");
return baseUrl ? `${origin}/${baseUrl}` : origin;
}
function routeCandidates(relativeFilePath, frontMatter) {
const candidates = new Set();
const baseRelative = stripIndex(stripExtension(normalizePath(relativeFilePath)));
candidates.add(baseRelative ? `/${trimSlashes(baseRelative)}` : "/");
if (typeof frontMatter.slug === "string" && frontMatter.slug.trim()) {
const slug = trimSlashes(frontMatter.slug.trim());
candidates.add(slug ? `/${slug}` : "/");
}
if (typeof frontMatter.id === "string" && frontMatter.id.trim()) {
const parent = path.dirname(baseRelative);
const id = trimSlashes(frontMatter.id.trim());
const candidate =
parent && parent !== "." ? `/${trimSlashes(`${parent}/${id}`)}` : `/${id}`;
candidates.add(candidate);
}
return [...candidates];
}
function resolveHtmlUrl(siteUrl, relativeFilePath, frontMatter, routesPaths) {
const normalizedRoutes = (routesPaths || []).map((route) => route.replace(/\/+$/, "") || "/");
for (const candidate of routeCandidates(relativeFilePath, frontMatter)) {
const normalizedCandidate = candidate.replace(/\/+$/, "") || "/";
const routeMatch = normalizedRoutes.find((route) => route === normalizedCandidate);
if (routeMatch) {
return `${siteUrl}${routeMatch === "/" ? "/" : routeMatch}`;
}
}
const fallback = routeCandidates(relativeFilePath, frontMatter)[0] || "/";
return `${siteUrl}${fallback === "/" ? "/" : fallback}`;
}
function generatedMarkdownRelativePath(relativeFilePath) {
return normalizePath(relativeFilePath).replace(/\.mdx?$/i, ".md");
}
async function parseDoc(filePath, siteDir, docsDir, siteUrl, routesPaths, options) {
const raw = await fs.readFile(filePath, "utf8");
const parsed = matter(raw);
const content = cleanContent(parsed.content, options);
const relativeFilePath = normalizePath(path.relative(path.join(siteDir, docsDir), filePath));
return {
title: extractTitle(filePath, parsed.data, content),
description: extractDescription(parsed.data, content),
content,
relativeFilePath,
markdownRelativePath: generatedMarkdownRelativePath(relativeFilePath),
htmlUrl: resolveHtmlUrl(siteUrl, relativeFilePath, parsed.data, routesPaths),
};
}
async function writeMarkdownFiles(outDir, siteUrl, docs) {
const updatedDocs = [];
for (const doc of docs) {
const outputPath = path.join(outDir, doc.markdownRelativePath);
await fs.mkdir(path.dirname(outputPath), { recursive: true });
const content = withoutLeadingTitleHeading(doc.content, doc.title);
const markdown = `# ${doc.title}${
doc.description ? `\n\n> ${doc.description}` : ""
}\n\n${content}\n`;
await fs.writeFile(outputPath, markdown, "utf8");
updatedDocs.push({
...doc,
markdownUrl: `${siteUrl}/${trimSlashes(doc.markdownRelativePath)}`,
});
}
return updatedDocs;
}
function applyMdExtension(url) {
const trimmed = url.replace(/\/+$/, "");
return trimmed.endsWith(".md") ? trimmed : `${trimmed}.md`;
}
function tocLine(doc, addMdExtension) {
const url = doc.markdownUrl || (addMdExtension ? applyMdExtension(doc.htmlUrl) : doc.htmlUrl);
return `- [${doc.title}](${url})${doc.description ? `: ${doc.description}` : ""}`;
}
function renderLlmsTxt(siteTitle, siteDescription, rootContent, docs, addMdExtension) {
return `# ${siteTitle}
> ${siteDescription}
${rootContent}
## Table of Contents
${docs.map((doc) => tocLine(doc, addMdExtension)).join("\n")}
`;
}
function renderLlmsFull(siteTitle, siteDescription, rootContent, docs) {
return `# ${siteTitle}
> ${siteDescription}
${rootContent}
${docs
.map((doc) => `## ${doc.title}\n\n${withoutLeadingTitleHeading(doc.content, doc.title)}`)
.join("\n\n---\n\n")}
`;
}
async function generateBundle(outDir, siteTitle, siteDescription, rootContent, fullRootContent, docs, options) {
if (options.generateLLMsTxt !== false) {
await fs.writeFile(
path.join(outDir, options.llmsTxtFilename || "llms.txt"),
renderLlmsTxt(siteTitle, siteDescription, rootContent, docs, options.addMdExtension !== false),
"utf8",
);
}
if (options.generateLLMsFullTxt !== false) {
await fs.writeFile(
path.join(outDir, options.llmsFullTxtFilename || "llms-full.txt"),
renderLlmsFull(siteTitle, siteDescription, fullRootContent, docs),
"utf8",
);
}
}
async function getDocs(siteDir, docsDir, siteUrl, routesPaths, options) {
const docsRoot = path.join(siteDir, docsDir);
let files = await collectMarkdownFiles(docsRoot);
if (options.ignoreFiles?.length) {
files = files.filter((file) => !matchesAnyPattern(file, options.ignoreFiles, siteDir, docsDir));
}
files = orderFiles(
files,
options.includeOrder || [],
options.includeUnmatchedLast !== false,
siteDir,
docsDir,
);
const docs = [];
for (const file of files) {
docs.push(await parseDoc(file, siteDir, docsDir, siteUrl, routesPaths, options));
}
return docs;
}
module.exports = function docusaurusPluginLlms(context, options = {}) {
const siteTitle = options.title || context.siteConfig.title;
const siteDescription = options.description || context.siteConfig.tagline || "";
const docsDir = options.docsDir || "docs";
const siteUrl = buildSiteUrl(context.siteConfig);
return {
name: "docusaurus-plugin-llms",
async postBuild(props) {
const routesPaths = props?.routesPaths || [];
let docs = await getDocs(context.siteDir, docsDir, siteUrl, routesPaths, options);
if (options.generateMarkdownFiles) {
docs = await writeMarkdownFiles(context.outDir, siteUrl, docs);
}
await generateBundle(
context.outDir,
siteTitle,
siteDescription,
options.rootContent ||
"This file contains links to documentation sections following the llmstxt.org standard.",
options.fullRootContent ||
"This file contains all documentation content in a single document following the llmstxt.org standard.",
docs,
options,
);
for (const customFile of options.customLLMFiles || []) {
let customDocs = docs.filter((doc) =>
matchesAnyPattern(
path.join(context.siteDir, docsDir, doc.relativeFilePath),
customFile.includePatterns || [],
context.siteDir,
docsDir,
),
);
if (customFile.ignorePatterns?.length) {
customDocs = customDocs.filter(
(doc) =>
!matchesAnyPattern(
path.join(context.siteDir, docsDir, doc.relativeFilePath),
customFile.ignorePatterns,
context.siteDir,
docsDir,
),
);
}
const orderedCustomDocs = orderFiles(
customDocs.map((doc) => path.join(context.siteDir, docsDir, doc.relativeFilePath)),
customFile.orderPatterns || [],
customFile.includeUnmatchedLast !== false,
context.siteDir,
docsDir,
);
customDocs = orderedCustomDocs
.map((filePath) =>
customDocs.find(
(doc) => path.join(context.siteDir, docsDir, doc.relativeFilePath) === filePath,
),
)
.filter(Boolean);
const outputPath = path.join(context.outDir, customFile.filename);
const content = customFile.fullContent
? renderLlmsFull(
customFile.title || siteTitle,
customFile.description || siteDescription,
customFile.rootContent ||
"This file contains all documentation content in a single document following the llmstxt.org standard.",
customDocs,
)
: renderLlmsTxt(
customFile.title || siteTitle,
customFile.description || siteDescription,
customFile.rootContent ||
"This file contains links to documentation sections following the llmstxt.org standard.",
customDocs,
options.addMdExtension !== false,
);
await fs.writeFile(outputPath, content, "utf8");
}
},
};
};