pdf_oxide 0.3.24

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
#!/usr/bin/env node

/**
 * DOM Integration Tests for PDF Page Navigation and Editing
 *
 * Tests the PdfPage class with DOM-like element access and manipulation
 */

import { describe, it } from 'node:test';
import assert from 'node:assert';
import { Pdf, PdfBuilder } from '../index.js';
import { promises as fs } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';

const TEMP_DIR = join(tmpdir(), `pdf-oxide-dom-tests-${Date.now()}`);

describe('PdfPage DOM Access and Manipulation', () => {
  before(async () => {
    try {
      await fs.mkdir(TEMP_DIR, { recursive: true });
    } catch (err) {
      // Directory might already exist
    }
  });

  after(async () => {
    try {
      const files = await fs.readdir(TEMP_DIR);
      for (const file of files) {
        await fs.unlink(join(TEMP_DIR, file));
      }
      await fs.rmdir(TEMP_DIR);
    } catch (err) {
      // Cleanup is best-effort
    }
  });

  describe('Page properties', () => {
    it('should get page index', () => {
      const pdf = Pdf.fromMarkdown('Test');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.getPageIndex, 'function');
      assert.strictEqual(page.getPageIndex(), 0);
    });

    it('should get page dimensions', () => {
      const pdf = Pdf.fromMarkdown('Test');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.getWidth, 'function');
      assert.strictEqual(typeof page.getHeight, 'function');

      const width = page.getWidth();
      const height = page.getHeight();

      assert.strictEqual(typeof width, 'number');
      assert.strictEqual(typeof height, 'number');
      assert.ok(width > 0);
      assert.ok(height > 0);
    });

    it('should return default Letter dimensions', () => {
      const pdf = Pdf.fromMarkdown('Test');
      const page = pdf.page(0);

      // Default dimensions should be US Letter (612 × 792 points)
      assert.ok(page.getWidth() > 0);
      assert.ok(page.getHeight() > 0);
    });
  });

  describe('Element access', () => {
    it('should get children elements', () => {
      const pdf = Pdf.fromMarkdown('# Title\n\nContent here');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.children, 'function');
      const children = page.children();

      assert.ok(Array.isArray(children));
    });

    it('should handle empty pages', () => {
      const pdf = Pdf.fromMarkdown('');
      const page = pdf.page(0);

      const children = page.children();
      assert.ok(Array.isArray(children));
    });
  });

  describe('Text search', () => {
    it('should find text containing query (case-insensitive)', () => {
      const pdf = Pdf.fromMarkdown('Hello World\nThis is a test');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.findTextContaining, 'function');
      // Note: findTextContaining may not work until actual page data is populated
      // This test verifies the method exists and returns expected type
      const results = page.findTextContaining('hello');
      assert.ok(Array.isArray(results));
    });

    it('should search with options', () => {
      const pdf = Pdf.fromMarkdown('Test content\nMore test data');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.findText, 'function');

      // Search without options
      const results1 = page.findText('test');
      assert.ok(Array.isArray(results1));

      // Search with options (case-sensitive would be added in full implementation)
      // For now, just verify method works without options
    });

    it('should return search results with structure', () => {
      const pdf = Pdf.fromMarkdown('Sample text here');
      const page = pdf.page(0);

      const results = page.findText('text');
      assert.ok(Array.isArray(results));

      // If results exist, verify structure
      if (results.length > 0) {
        const result = results[0];
        assert.strictEqual(typeof result.text, 'string');
        assert.strictEqual(typeof result.page_index, 'number');
        assert.ok(typeof result.bbox === 'object');
      }
    });
  });

  describe('Element mutation', () => {
    it('should set text content', () => {
      const pdf = Pdf.fromMarkdown('Original text');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.setText, 'function');

      // Get a text element ID first
      const children = page.children();
      if (children.length > 0) {
        // Try to modify (will error if element doesn't exist, which is ok)
        try {
          page.setText(children[0], 'Modified text');
        } catch (err) {
          // Element ID format may vary; we're just testing the API exists
        }
      }
    });

    it('should add elements', () => {
      const pdf = Pdf.fromMarkdown('Start');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.addElement, 'function');

      // Create new text element
      const elementContent = {
        element_type: 'text',
        data: 'New text content',
      };

      const newId = page.addElement(elementContent);
      assert.strictEqual(typeof newId, 'string');
      assert.ok(newId.length > 0);
    });

    it('should remove elements', () => {
      const pdf = Pdf.fromMarkdown('Content to remove');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.removeElement, 'function');

      // Try removing non-existent element (should error gracefully)
      try {
        page.removeElement('nonexistent_element_id');
      } catch (err) {
        // Expected: element not found
        assert.ok(err instanceof Error);
      }
    });

    it('should add text element and return ID', () => {
      const pdf = Pdf.fromMarkdown('Start');
      const page = pdf.page(0);

      const element = {
        element_type: 'text',
        data: 'Added text',
      };

      const id = page.addElement(element);

      assert.strictEqual(typeof id, 'string');
      assert.ok(id.startsWith('element_'));
    });
  });

  describe('Annotations', () => {
    it('should get annotations', () => {
      const pdf = Pdf.fromMarkdown('Content');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.annotations, 'function');
      const annotations = page.annotations();

      assert.ok(Array.isArray(annotations));
    });

    it('should add annotations', () => {
      const pdf = Pdf.fromMarkdown('Content');
      const page = pdf.page(0);

      assert.strictEqual(typeof page.addAnnotation, 'function');

      const annotation = {
        annotation_type: 'text',
        data: 'Comment content',
      };

      const id = page.addAnnotation(annotation);
      assert.strictEqual(typeof id, 'string');
    });

    it('should have correct annotation structure', () => {
      const pdf = Pdf.fromMarkdown('Content');
      const page = pdf.page(0);

      const annotation = {
        annotation_type: 'highlight',
        data: 'Important text',
      };

      page.addAnnotation(annotation);

      const annotations = page.annotations();
      assert.ok(Array.isArray(annotations));

      // Verify annotation structure if any exist
      if (annotations.length > 0) {
        const annot = annotations[0];
        assert.ok(typeof annot.id === 'string');
        assert.ok(typeof annot.annotation_type === 'string');
      }
    });
  });

  describe('Page state tracking', () => {
    it('should track modifications', () => {
      const pdf = Pdf.fromMarkdown('Test');
      const page = pdf.page(0);

      // Initially page should not be marked as modified
      // (this depends on implementation; add logic as needed)

      // Make a modification
      try {
        const element = {
          element_type: 'text',
          data: 'New content',
        };
        page.addElement(element);
        // Page should now be marked as modified
      } catch (err) {
        // If implementation doesn't track this, that's ok for now
      }
    });
  });

  describe('DOM navigation workflow', () => {
    it('should support complete DOM workflow', () => {
      const pdf = Pdf.fromMarkdown('# Document\n\nWith content');
      const page = pdf.page(0);

      // 1. Get page properties
      assert.ok(page.getPageIndex() >= 0);
      assert.ok(page.getWidth() > 0);
      assert.ok(page.getHeight() > 0);

      // 2. Access elements
      const children = page.children();
      assert.ok(Array.isArray(children));

      // 3. Search for text
      const searchResults = page.findText('content');
      assert.ok(Array.isArray(searchResults));

      // 4. Add new element
      const newElement = {
        element_type: 'text',
        data: 'Added element',
      };
      const elementId = page.addElement(newElement);
      assert.ok(elementId.length > 0);

      // 5. Manage annotations
      const annotation = {
        annotation_type: 'text',
        data: 'Annotation comment',
      };
      const annotationId = page.addAnnotation(annotation);
      assert.ok(annotationId.length > 0);

      const annotations = page.annotations();
      assert.ok(Array.isArray(annotations));
    });
  });

  describe('Integration with Pdf class', () => {
    it('should access multiple pages', () => {
      const pdf = PdfBuilder.create()
        .title('Multi-page Document')
        .fromMarkdown('# Page 1\n\nContent for page 1\n\n# Page 2\n\nContent for page 2');

      const page0 = pdf.page(0);
      assert.strictEqual(page0.getPageIndex(), 0);

      // Note: Multiple pages from markdown may not work as expected
      // This tests the API integration rather than functionality
    });

    it('should save modified pages', async () => {
      const outputPath = join(TEMP_DIR, 'modified-page.pdf');

      const pdf = Pdf.fromMarkdown('Original content');
      const page = pdf.page(0);

      // Make modifications
      const element = {
        element_type: 'text',
        data: 'Added text',
      };
      page.addElement(element);

      // Save page back
      pdf.savePage(page);

      // Save document
      pdf.save(outputPath);

      // Verify file exists
      const stat = await fs.stat(outputPath);
      assert.ok(stat.size > 0);
    });
  });

  describe('Error handling', () => {
    it('should handle invalid element ID', () => {
      const pdf = Pdf.fromMarkdown('Content');
      const page = pdf.page(0);

      assert.throws(() => {
        page.removeElement('invalid_element_xyz');
      });
    });

    it('should handle invalid element type', () => {
      const pdf = Pdf.fromMarkdown('Content');
      const page = pdf.page(0);

      const invalidElement = {
        element_type: 'unknown_type',
        data: 'Some data',
      };

      assert.throws(() => {
        page.addElement(invalidElement);
      });
    });

    it('should handle set_text on non-existent element', () => {
      const pdf = Pdf.fromMarkdown('Content');
      const page = pdf.page(0);

      assert.throws(() => {
        page.setText('nonexistent_id', 'New text');
      });
    });
  });
});