unpdf 0.7.0

High-performance PDF content extraction to Markdown, text, and JSON
Documentation
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>unpdf — WASM Playground</title>
  <style>
    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }

    :root {
      --bg: #0f1117;
      --surface: #1a1d27;
      --surface2: #242736;
      --border: #2e3347;
      --accent: #6c8cff;
      --accent-dim: #3d4f99;
      --text: #e2e4ee;
      --text-dim: #8890a8;
      --err: #ff6b6b;
      --ok: #6bffb3;
      --radius: 10px;
      --mono: 'Fira Code', 'Cascadia Code', 'JetBrains Mono', Consolas, monospace;
    }

    body {
      background: var(--bg);
      color: var(--text);
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
      min-height: 100vh;
      display: flex;
      flex-direction: column;
      align-items: center;
      padding: 2rem 1rem;
      gap: 1.5rem;
    }

    header {
      text-align: center;
    }
    header h1 { font-size: 1.75rem; letter-spacing: -0.5px; }
    header h1 span { color: var(--accent); }
    header p { color: var(--text-dim); margin-top: 0.35rem; font-size: 0.9rem; }

    .drop-zone {
      width: 100%;
      max-width: 720px;
      border: 2px dashed var(--border);
      border-radius: var(--radius);
      padding: 2.5rem 2rem;
      text-align: center;
      cursor: pointer;
      transition: border-color 0.15s, background 0.15s;
      background: var(--surface);
      user-select: none;
    }
    .drop-zone:hover, .drop-zone.dragover {
      border-color: var(--accent);
      background: #1e2136;
    }
    .drop-zone svg { width: 40px; height: 40px; margin-bottom: 0.75rem; opacity: 0.6; }
    .drop-zone p { color: var(--text-dim); font-size: 0.9rem; }
    .drop-zone p strong { color: var(--text); }
    #file-input { display: none; }

    .result-card {
      width: 100%;
      max-width: 720px;
      background: var(--surface);
      border: 1px solid var(--border);
      border-radius: var(--radius);
      overflow: hidden;
    }

    .card-header {
      display: flex;
      align-items: center;
      justify-content: space-between;
      padding: 0.75rem 1rem;
      background: var(--surface2);
      border-bottom: 1px solid var(--border);
      gap: 0.5rem;
      flex-wrap: wrap;
    }
    .file-info { font-size: 0.85rem; color: var(--text-dim); }
    .file-info strong { color: var(--text); }

    .tabs { display: flex; gap: 0.25rem; }
    .tab {
      padding: 0.3rem 0.75rem;
      border-radius: 5px;
      border: 1px solid transparent;
      background: transparent;
      color: var(--text-dim);
      font-size: 0.8rem;
      cursor: pointer;
      transition: all 0.12s;
    }
    .tab:hover { color: var(--text); background: var(--surface); }
    .tab.active {
      color: var(--accent);
      border-color: var(--accent-dim);
      background: #1c2040;
    }

    .copy-btn {
      padding: 0.3rem 0.75rem;
      border-radius: 5px;
      border: 1px solid var(--border);
      background: transparent;
      color: var(--text-dim);
      font-size: 0.8rem;
      cursor: pointer;
      transition: all 0.12s;
      margin-left: auto;
    }
    .copy-btn:hover { color: var(--text); border-color: var(--accent); }
    .copy-btn.copied { color: var(--ok); border-color: var(--ok); }

    .output-area {
      position: relative;
      max-height: 500px;
      overflow-y: auto;
    }
    .output-area pre {
      padding: 1rem;
      font-family: var(--mono);
      font-size: 0.8rem;
      line-height: 1.6;
      white-space: pre-wrap;
      word-break: break-word;
      color: var(--text);
    }

    .status-bar {
      padding: 0.5rem 1rem;
      background: var(--surface2);
      border-top: 1px solid var(--border);
      font-size: 0.78rem;
      color: var(--text-dim);
      display: flex;
      gap: 1.5rem;
      flex-wrap: wrap;
    }
    .status-bar .ok { color: var(--ok); }
    .status-bar .err { color: var(--err); }

    .error-card {
      width: 100%;
      max-width: 720px;
      background: #2a1a1a;
      border: 1px solid #5c2a2a;
      border-radius: var(--radius);
      padding: 1rem 1.25rem;
      font-size: 0.85rem;
      color: var(--err);
    }
    .error-card strong { display: block; margin-bottom: 0.25rem; }

    .loading {
      color: var(--text-dim);
      font-size: 0.9rem;
    }
    .spinner {
      display: inline-block;
      width: 14px; height: 14px;
      border: 2px solid var(--border);
      border-top-color: var(--accent);
      border-radius: 50%;
      animation: spin 0.7s linear infinite;
      vertical-align: middle;
      margin-right: 0.4rem;
    }
    @keyframes spin { to { transform: rotate(360deg); } }

    footer {
      font-size: 0.78rem;
      color: var(--text-dim);
      text-align: center;
    }
    footer a { color: var(--accent); text-decoration: none; }
    footer a:hover { text-decoration: underline; }

    .hidden { display: none !important; }
  </style>
</head>
<body>
  <header>
    <h1><span>unpdf</span> WASM Playground</h1>
    <p>PDF → Markdown / Text / JSON — runs entirely in your browser</p>
  </header>

  <div class="drop-zone" id="drop-zone" role="button" tabindex="0" aria-label="Drop a PDF file here or click to select">
    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
      <path d="M14.5 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V7.5L14.5 2z"/>
      <polyline points="14 2 14 8 20 8"/>
      <path d="M12 18v-6"/>
      <path d="M9 15l3-3 3 3"/>
    </svg>
    <p><strong>Drop a PDF here</strong> or click to select</p>
    <p style="margin-top:0.3rem;font-size:0.78rem;">Processed entirely in-browser — no uploads, no server</p>
    <input type="file" id="file-input" accept=".pdf,application/pdf">
  </div>

  <div id="loading-indicator" class="loading hidden">
    <span class="spinner"></span> Parsing PDF…
  </div>

  <div id="error-container" class="hidden"></div>

  <div id="result-card" class="result-card hidden">
    <div class="card-header">
      <span class="file-info" id="file-info"></span>
      <div class="tabs" role="tablist">
        <button class="tab active" data-tab="markdown" role="tab">Markdown</button>
        <button class="tab" data-tab="text" role="tab">Text</button>
        <button class="tab" data-tab="json" role="tab">JSON</button>
      </div>
      <button class="copy-btn" id="copy-btn">Copy</button>
    </div>
    <div class="output-area">
      <pre id="output"></pre>
    </div>
    <div class="status-bar" id="status-bar"></div>
  </div>

  <footer>
    <a href="https://github.com/iyulab/unpdf" target="_blank" rel="noopener">iyulab/unpdf</a>
    &nbsp;·&nbsp;
    <a href="https://www.npmjs.com/package/@iyulab/unpdf" target="_blank" rel="noopener">npm: @iyulab/unpdf</a>
    &nbsp;·&nbsp; MIT License
  </footer>

  <script type="module">
    import init, { parse, parseWithOptions, ParseOptions } from './pkg/unpdf_wasm.js';

    const wasmReady = init();

    const dropZone = document.getElementById('drop-zone');
    const fileInput = document.getElementById('file-input');
    const loadingEl = document.getElementById('loading-indicator');
    const errorContainer = document.getElementById('error-container');
    const resultCard = document.getElementById('result-card');
    const outputEl = document.getElementById('output');
    const fileInfoEl = document.getElementById('file-info');
    const statusBarEl = document.getElementById('status-bar');
    const copyBtn = document.getElementById('copy-btn');

    let tabs = { markdown: '', text: '', json: '' };
    let activeTab = 'markdown';

    // --- Drop zone ---
    dropZone.addEventListener('click', () => fileInput.click());
    dropZone.addEventListener('keydown', e => { if (e.key === 'Enter' || e.key === ' ') fileInput.click(); });

    dropZone.addEventListener('dragover', e => { e.preventDefault(); dropZone.classList.add('dragover'); });
    dropZone.addEventListener('dragleave', () => dropZone.classList.remove('dragover'));
    dropZone.addEventListener('drop', e => {
      e.preventDefault();
      dropZone.classList.remove('dragover');
      const file = e.dataTransfer?.files?.[0];
      if (file) processPdf(file);
    });

    fileInput.addEventListener('change', () => {
      if (fileInput.files?.[0]) processPdf(fileInput.files[0]);
    });

    // --- Tab switching ---
    document.querySelectorAll('.tab').forEach(btn => {
      btn.addEventListener('click', () => {
        document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
        btn.classList.add('active');
        activeTab = btn.dataset.tab;
        outputEl.textContent = tabs[activeTab];
      });
    });

    // --- Copy ---
    copyBtn.addEventListener('click', async () => {
      try {
        await navigator.clipboard.writeText(tabs[activeTab]);
        copyBtn.textContent = 'Copied!';
        copyBtn.classList.add('copied');
        setTimeout(() => { copyBtn.textContent = 'Copy'; copyBtn.classList.remove('copied'); }, 1500);
      } catch {}
    });

    // --- Main processing ---
    async function processPdf(file) {
      showLoading();
      try {
        await wasmReady;
        const buffer = await file.arrayBuffer();
        const bytes = new Uint8Array(buffer);

        const doc = parse(bytes);

        const md = doc.toMarkdown();
        const txt = doc.toText();
        const json = doc.toJson();
        const pages = doc.pageCount();

        let meta = {};
        try { meta = JSON.parse(doc.metadata()); } catch {}

        tabs = { markdown: md, text: txt, json: prettyJson(json) };
        showResult(file.name, file.size, pages, meta);
      } catch (err) {
        showError(err);
      }
    }

    function prettyJson(raw) {
      try { return JSON.stringify(JSON.parse(raw), null, 2); } catch { return raw; }
    }

    function showLoading() {
      loadingEl.classList.remove('hidden');
      errorContainer.classList.add('hidden');
      resultCard.classList.add('hidden');
    }

    function showResult(name, size, pages, meta) {
      loadingEl.classList.add('hidden');
      errorContainer.classList.add('hidden');

      fileInfoEl.replaceChildren();
      const nameEl = document.createElement('strong');
      nameEl.textContent = name;
      fileInfoEl.append(nameEl, ` · ${formatBytes(size)}`);

      // Reset to markdown tab
      document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
      document.querySelector('[data-tab="markdown"]').classList.add('active');
      activeTab = 'markdown';
      outputEl.textContent = tabs.markdown;

      statusBarEl.replaceChildren();
      if (meta.title) {
        const ok1 = document.createElement('span');
        ok1.className = 'ok';
        ok1.textContent = '';
        statusBarEl.append(ok1, ` ${meta.title} · `);
      }
      const ok2 = document.createElement('span');
      ok2.className = 'ok';
      ok2.textContent = '';
      statusBarEl.append(ok2, ` ${pages} page${pages !== 1 ? 's' : ''} · ${formatBytes(tabs.markdown.length, 'chars')} markdown`);

      resultCard.classList.remove('hidden');
    }

    function showError(err) {
      loadingEl.classList.add('hidden');
      resultCard.classList.add('hidden');

      const msg = err?.message ?? String(err);
      let hint = '';
      if (/password|encrypt/i.test(msg)) hint = 'This PDF appears to be encrypted. Try passing a password via the API.';
      else if (/not a pdf|invalid/i.test(msg)) hint = 'The file does not appear to be a valid PDF.';

      const card = document.createElement('div');
      card.className = 'error-card';
      const heading = document.createElement('strong');
      heading.textContent = 'Parse error';
      card.appendChild(heading);
      card.appendChild(document.createTextNode(msg));
      if (hint) {
        const hintEl = document.createElement('span');
        hintEl.style.cssText = 'color:var(--text-dim);margin-top:0.25rem;display:block';
        hintEl.textContent = hint;
        card.appendChild(hintEl);
      }
      errorContainer.replaceChildren(card);
      errorContainer.classList.remove('hidden');
    }

    function formatBytes(n, unit = 'bytes') {
      if (unit === 'chars') return `${n.toLocaleString()} chars`;
      if (n < 1024) return `${n} B`;
      if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
      return `${(n / 1024 / 1024).toFixed(1)} MB`;
    }

  </script>
</body>
</html>