pdf_oxide 0.3.23

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
"""
Python bindings tests for pdf_oxide.

These tests verify the Python API works correctly, including:
- Opening PDF files
- pathlib.Path and context manager support
- Extracting text
- Converting to Markdown
- Converting to HTML
- Error handling
"""

import logging
from pathlib import Path

import pytest

from pdf_oxide import PdfDocument


def test_open_pdf():
    """Test opening a PDF file."""
    # Note: This test will need actual PDF fixtures to run
    # For now, it documents the expected behavior
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        assert doc is not None
        # Version should be a tuple of two integers
        version = doc.version()
        assert isinstance(version, tuple)
        assert len(version) == 2
        assert isinstance(version[0], int)
        assert isinstance(version[1], int)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_version():
    """Test getting PDF version."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        major, minor = doc.version()
        assert major >= 1
        assert minor >= 0
        # PDF versions are typically 1.0 through 2.0
        assert major <= 2
        assert minor <= 7
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_page_count():
    """Test getting page count."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        count = doc.page_count()
        assert isinstance(count, int)
        assert count >= 1
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_extract_text():
    """Test extracting text from a page."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        text = doc.extract_text(0)
        assert isinstance(text, str)
        # Text should be non-empty for a real PDF
        # (empty is ok for a minimal test PDF though)
        assert text is not None
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_extract_text_with_content():
    """Test extracting text that contains specific content."""
    try:
        doc = PdfDocument("tests/fixtures/hello_world.pdf")
        text = doc.extract_text(0)
        assert isinstance(text, str)
        assert len(text) > 0
        # Should contain "Hello" or "hello" (case-insensitive check)
        assert "hello" in text.lower()
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'hello_world.pdf' not available or invalid")


def test_to_markdown():
    """Test converting a page to Markdown."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        markdown = doc.to_markdown(0)
        assert isinstance(markdown, str)
        assert markdown is not None
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_to_markdown_with_options():
    """Test converting to Markdown with custom options."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")

        # Test with heading detection enabled
        markdown = doc.to_markdown(0, detect_headings=True)
        assert isinstance(markdown, str)

        # Test with heading detection disabled
        markdown = doc.to_markdown(0, detect_headings=False)
        assert isinstance(markdown, str)

        # Test with layout preservation
        markdown = doc.to_markdown(0, preserve_layout=True)
        assert isinstance(markdown, str)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_to_html():
    """Test converting a page to HTML."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        html = doc.to_html(0)
        assert isinstance(html, str)
        assert html is not None
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_to_html_semantic_mode():
    """Test converting to semantic HTML (default mode)."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        html = doc.to_html(0, preserve_layout=False)
        assert isinstance(html, str)
        # Semantic HTML should not contain absolute positioning
        # (though it might not contain much if the PDF is simple)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_to_html_layout_mode():
    """Test converting to layout-preserved HTML."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        html = doc.to_html(0, preserve_layout=True)
        assert isinstance(html, str)
        # Layout mode should include positioning CSS
        # Check if it contains position-related CSS or inline styles
        # (only if the PDF has content)
        if len(html) > 100:
            assert "position" in html.lower() or "style" in html.lower()
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_to_markdown_all():
    """Test converting all pages to Markdown."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        markdown = doc.to_markdown_all()
        assert isinstance(markdown, str)
        assert markdown is not None
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_to_markdown_all_multipage():
    """Test converting multiple pages to Markdown."""
    try:
        doc = PdfDocument("tests/fixtures/multipage.pdf")
        markdown = doc.to_markdown_all()
        assert isinstance(markdown, str)
        assert len(markdown) > 0
        # Multi-page markdown should contain horizontal rules as separators
        page_count = doc.page_count()
        if page_count > 1:
            assert "---" in markdown
    except OSError:
        pytest.skip("Test fixture 'multipage.pdf' not available")


def test_to_html_all():
    """Test converting all pages to HTML."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        html = doc.to_html_all()
        assert isinstance(html, str)
        assert html is not None
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_to_html_all_multipage():
    """Test converting multiple pages to HTML."""
    try:
        doc = PdfDocument("tests/fixtures/multipage.pdf")
        html = doc.to_html_all()
        assert isinstance(html, str)
        assert len(html) > 0
        # Multi-page HTML should contain page div elements
        page_count = doc.page_count()
        if page_count > 1:
            assert 'class="page"' in html or "data-page" in html
    except OSError:
        pytest.skip("Test fixture 'multipage.pdf' not available")


# === pathlib.Path and context manager ===


def test_open_pdf_pathlib():
    """PdfDocument accepts pathlib.Path; behavior matches string path."""
    try:
        path = Path("tests/fixtures/1.pdf")
        doc = PdfDocument(path)
        assert doc is not None
        assert doc.page_count() == 7, "1.pdf has 7 pages"
        version = doc.version()
        assert isinstance(version, tuple) and len(version) == 2 and version[0] >= 1
        # Same as opening with string path
        doc_str = PdfDocument("tests/fixtures/1.pdf")
        assert doc.version() == doc_str.version()
        assert doc.page_count() == doc_str.page_count()
        assert doc.extract_text(0) == doc_str.extract_text(0)
        assert len(doc.extract_text(0).strip()) > 0, "1.pdf has text on page 0"
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_context_manager():
    """PdfDocument works as context manager: enter returns self, exit runs normally."""
    try:
        with PdfDocument("tests/fixtures/1.pdf") as doc:
            assert doc is not None
            assert doc.page_count() == 7, "1.pdf has 7 pages"
            version = doc.version()
            assert isinstance(version, tuple) and len(version) == 2 and version[0] >= 1
            text = doc.extract_text(0)
            assert isinstance(text, str) and len(text.strip()) > 0, "1.pdf has text"
        # After block, no error; doc was closed on exit
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_context_manager_with_pathlib():
    """Context manager works when path is pathlib.Path."""
    try:
        with PdfDocument(Path("tests/fixtures/1.pdf")) as doc:
            assert doc.page_count() == 7, "1.pdf has 7 pages"
            version = doc.version()
            assert isinstance(version, tuple) and len(version) == 2 and version[0] >= 1
            _ = doc.to_markdown(0)  # 1.pdf has text
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_context_manager_exception_propagates():
    """Exceptions inside the with block are not swallowed (__exit__ returns False)."""
    try:
        with pytest.raises(ValueError), PdfDocument("tests/fixtures/1.pdf") as doc:
            _ = doc.page_count()
            raise ValueError("intentional")
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


# === From Bytes Tests ===


def test_from_bytes_matches_file():
    """Test that from_bytes produces the same results as opening from path."""
    with open("tests/fixtures/simple.pdf", "rb") as f:
        data = f.read()
    doc_path = PdfDocument("tests/fixtures/simple.pdf")
    doc_bytes = PdfDocument.from_bytes(data)

    assert doc_path.version() == doc_bytes.version()
    assert doc_path.page_count() == doc_bytes.page_count()
    assert doc_path.extract_text(0) == doc_bytes.extract_text(0)


def test_from_bytes_roundtrip():
    """Test opening bytes from a PDF created with Pdf.from_text."""
    from pdf_oxide import Pdf

    pdf = Pdf.from_text("Hello from bytes!")
    pdf_bytes = pdf.to_bytes()

    doc = PdfDocument.from_bytes(pdf_bytes)
    assert doc.page_count() >= 1
    text = doc.extract_text(0)
    assert "Hello from bytes!" in text


def test_from_bytes_invalid():
    """Test error handling for invalid bytes."""
    with pytest.raises(IOError):
        PdfDocument.from_bytes(b"not a pdf")


# === Error Handling Tests ===


def test_error_handling_nonexistent_file():
    """Test error handling for non-existent file."""
    with pytest.raises(IOError) as exc_info:
        PdfDocument("nonexistent_file_that_does_not_exist.pdf")

    # Error message should be helpful
    error_msg = str(exc_info.value)
    assert "Failed to open PDF" in error_msg or "No such file" in error_msg


def test_error_handling_invalid_page():
    """Test error handling for invalid page index."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        page_count = doc.page_count()

        # Try to access a page that doesn't exist
        with pytest.raises(RuntimeError) as exc_info:
            doc.extract_text(page_count + 100)

        # Error message should indicate the problem
        error_msg = str(exc_info.value)
        assert "Failed to extract text" in error_msg or "page" in error_msg.lower()
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_error_handling_invalid_page_conversion():
    """Test error handling for invalid page in conversion."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        page_count = doc.page_count()

        # Try to convert a page that doesn't exist
        with pytest.raises(RuntimeError):
            doc.to_markdown(page_count + 100)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_repr():
    """Test string representation of PdfDocument."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        repr_str = repr(doc)
        assert isinstance(repr_str, str)
        assert "PdfDocument" in repr_str
        assert "version=" in repr_str
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_multiple_operations():
    """Test performing multiple operations on the same document."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")

        # Get version multiple times
        version1 = doc.version()
        version2 = doc.version()
        assert version1 == version2

        # Extract text multiple times
        text1 = doc.extract_text(0)
        text2 = doc.extract_text(0)
        assert text1 == text2

        # Convert to different formats
        markdown = doc.to_markdown(0)
        html = doc.to_html(0)
        assert isinstance(markdown, str)
        assert isinstance(html, str)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_image_output_dir():
    """Test specifying image output directory."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")

        # Convert with image output directory specified
        markdown = doc.to_markdown(0, image_output_dir="./test_images")
        assert isinstance(markdown, str)

        # Convert without images
        markdown = doc.to_markdown(0, include_images=False)
        assert isinstance(markdown, str)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_all_options_combined():
    """Test using all conversion options together."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")

        # Test with all options specified
        markdown = doc.to_markdown(
            0,
            preserve_layout=True,
            detect_headings=False,
            include_images=True,
            image_output_dir="./output",
        )
        assert isinstance(markdown, str)

        html = doc.to_html(
            0,
            preserve_layout=True,
            detect_headings=True,
            include_images=False,
            image_output_dir=None,
        )
        assert isinstance(html, str)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


# === PDF Creation Tests ===


def test_pdf_from_markdown():
    """Test creating PDF from Markdown."""
    from pdf_oxide import Pdf

    md_content = """# Test Document

This is a **test** paragraph.

## Section 1

Some text content.
"""
    pdf = Pdf.from_markdown(md_content)
    assert pdf is not None
    # PDF should have some bytes
    pdf_bytes = pdf.to_bytes()
    assert isinstance(pdf_bytes, bytes)
    assert len(pdf_bytes) > 0
    # Should start with PDF header
    assert pdf_bytes[:4] == b"%PDF"


def test_pdf_from_markdown_with_options():
    """Test creating PDF from Markdown with options."""
    from pdf_oxide import Pdf

    md_content = "# Hello World"
    pdf = Pdf.from_markdown(
        md_content,
        title="Test Title",
        author="Test Author",
    )
    assert pdf is not None
    pdf_bytes = pdf.to_bytes()
    assert len(pdf_bytes) > 0


def test_pdf_from_html():
    """Test creating PDF from HTML."""
    from pdf_oxide import Pdf

    html_content = """
    <h1>Test Document</h1>
    <p>This is a <strong>test</strong> paragraph.</p>
    """
    pdf = Pdf.from_html(html_content)
    assert pdf is not None
    pdf_bytes = pdf.to_bytes()
    assert isinstance(pdf_bytes, bytes)
    assert len(pdf_bytes) > 0
    assert pdf_bytes[:4] == b"%PDF"


def test_pdf_from_text():
    """Test creating PDF from plain text."""
    from pdf_oxide import Pdf

    text_content = "Hello, World!\n\nThis is plain text."
    pdf = Pdf.from_text(text_content)
    assert pdf is not None
    pdf_bytes = pdf.to_bytes()
    assert len(pdf_bytes) > 0
    assert pdf_bytes[:4] == b"%PDF"


def test_pdf_save_to_file(tmp_path):
    """Test saving PDF to a file."""
    from pdf_oxide import Pdf

    pdf = Pdf.from_text("Test content")
    output_path = tmp_path / "output.pdf"
    pdf.save(str(output_path))
    assert output_path.exists()
    assert output_path.stat().st_size > 0


# === Advanced Graphics Tests ===


def test_color_creation():
    """Test Color class creation."""
    from pdf_oxide import Color

    # Create from RGB values
    color = Color(1.0, 0.0, 0.0)
    assert color is not None

    # Create from hex
    color = Color.from_hex("#FF0000")
    assert color is not None

    color = Color.from_hex("00FF00")
    assert color is not None


def test_color_predefined():
    """Test predefined colors."""
    from pdf_oxide import Color

    black = Color.black()
    assert black is not None

    white = Color.white()
    assert white is not None

    red = Color.red()
    assert red is not None

    green = Color.green()
    assert green is not None

    blue = Color.blue()
    assert blue is not None


def test_blend_modes():
    """Test BlendMode constants."""
    from pdf_oxide import BlendMode

    # Test all blend modes are accessible
    assert BlendMode.NORMAL() is not None
    assert BlendMode.MULTIPLY() is not None
    assert BlendMode.SCREEN() is not None
    assert BlendMode.OVERLAY() is not None
    assert BlendMode.DARKEN() is not None
    assert BlendMode.LIGHTEN() is not None
    assert BlendMode.COLOR_DODGE() is not None
    assert BlendMode.COLOR_BURN() is not None
    assert BlendMode.HARD_LIGHT() is not None
    assert BlendMode.SOFT_LIGHT() is not None
    assert BlendMode.DIFFERENCE() is not None
    assert BlendMode.EXCLUSION() is not None


def test_ext_gstate():
    """Test ExtGState (transparency) builder."""
    from pdf_oxide import BlendMode, ExtGState

    # Create with fill alpha
    gs = ExtGState().fill_alpha(0.5)
    assert gs is not None

    # Chained builder pattern
    gs = ExtGState().fill_alpha(0.5).stroke_alpha(0.8).blend_mode(BlendMode.MULTIPLY())
    assert gs is not None


def test_ext_gstate_presets():
    """Test ExtGState preset methods."""
    from pdf_oxide import BlendMode, ExtGState

    semi = ExtGState.semi_transparent()
    assert semi is not None

    # Test creating with blend mode (instead of preset static methods)
    multiply = ExtGState().blend_mode(BlendMode.MULTIPLY())
    assert multiply is not None

    screen = ExtGState().blend_mode(BlendMode.SCREEN())
    assert screen is not None


def test_linear_gradient():
    """Test LinearGradient builder."""
    from pdf_oxide import Color, LinearGradient

    # Basic gradient
    gradient = (
        LinearGradient()
        .start(0.0, 0.0)
        .end(100.0, 100.0)
        .add_stop(0.0, Color.red())
        .add_stop(1.0, Color.blue())
    )
    assert gradient is not None


def test_linear_gradient_presets():
    """Test LinearGradient preset methods."""
    from pdf_oxide import Color, LinearGradient

    # Horizontal preset
    gradient = LinearGradient.horizontal(100.0, Color.black(), Color.white())
    assert gradient is not None

    # Vertical preset
    gradient = LinearGradient.vertical(100.0, Color.black(), Color.white())
    assert gradient is not None

    # Manual two-color gradient
    gradient = LinearGradient().add_stop(0.0, Color.black()).add_stop(1.0, Color.white())
    assert gradient is not None


def test_radial_gradient():
    """Test RadialGradient builder."""
    from pdf_oxide import Color, RadialGradient

    gradient = (
        RadialGradient()
        .inner_circle(50.0, 50.0, 0.0)
        .outer_circle(50.0, 50.0, 50.0)
        .add_stop(0.0, Color.white())
        .add_stop(1.0, Color.black())
    )
    assert gradient is not None


def test_radial_gradient_centered():
    """Test centered RadialGradient."""
    from pdf_oxide import RadialGradient

    gradient = RadialGradient.centered(50.0, 50.0, 50.0)
    assert gradient is not None


def test_line_cap():
    """Test LineCap constants."""
    from pdf_oxide import LineCap

    assert LineCap.BUTT() is not None
    assert LineCap.ROUND() is not None
    assert LineCap.SQUARE() is not None


def test_line_join():
    """Test LineJoin constants."""
    from pdf_oxide import LineJoin

    assert LineJoin.MITER() is not None
    assert LineJoin.ROUND() is not None
    assert LineJoin.BEVEL() is not None


def test_pattern_presets():
    """Test PatternPresets static methods."""
    from pdf_oxide import Color, PatternPresets

    # Horizontal stripes
    content = PatternPresets.horizontal_stripes(10.0, 10.0, 5.0, Color.red())
    assert isinstance(content, bytes)
    assert len(content) > 0

    # Vertical stripes
    content = PatternPresets.vertical_stripes(10.0, 10.0, 5.0, Color.blue())
    assert isinstance(content, bytes)
    assert len(content) > 0

    # Checkerboard
    content = PatternPresets.checkerboard(10.0, Color.white(), Color.black())
    assert isinstance(content, bytes)
    assert len(content) > 0

    # Dots
    content = PatternPresets.dots(10.0, 2.0, Color.red())
    assert isinstance(content, bytes)
    assert len(content) > 0

    # Diagonal lines
    content = PatternPresets.diagonal_lines(10.0, 0.5, Color.black())
    assert isinstance(content, bytes)
    assert len(content) > 0

    # Crosshatch
    content = PatternPresets.crosshatch(10.0, 0.5, Color.black())
    assert isinstance(content, bytes)
    assert len(content) > 0


# === Extraction & Structure Tests ===


def test_extract_images():
    """Test extracting image metadata from a page."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        images = doc.extract_images(0)
        assert isinstance(images, list)
        # Each image should be a dict with expected keys
        for img in images:
            assert isinstance(img, dict)
            assert "width" in img
            assert "height" in img
            assert "color_space" in img
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_extract_spans():
    """Test extracting text spans from a page."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        spans = doc.extract_spans(0)
        assert isinstance(spans, list)
        for span in spans:
            # TextSpan objects should have expected attributes
            assert hasattr(span, "text")
            assert hasattr(span, "bbox")
            assert hasattr(span, "font_name")
            assert hasattr(span, "font_size")
            assert hasattr(span, "is_bold")
            assert hasattr(span, "is_italic")
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_extract_spans_repr():
    """Test TextSpan __repr__."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        spans = doc.extract_spans(0)
        if spans:
            r = repr(spans[0])
            assert "TextSpan" in r
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_get_outline():
    """Test getting document outline (bookmarks)."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        outline = doc.get_outline()
        # Outline is either None or a list
        assert outline is None or isinstance(outline, list)
        if outline:
            for item in outline:
                assert isinstance(item, dict)
                assert "title" in item
                assert "children" in item
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_get_annotations():
    """Test getting page annotations."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        annotations = doc.get_annotations(0)
        assert isinstance(annotations, list)
        for ann in annotations:
            assert isinstance(ann, dict)
            assert "subtype" in ann
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_extract_paths():
    """Test extracting vector paths from a page."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        paths = doc.extract_paths(0)
        assert isinstance(paths, list)
        for path in paths:
            assert isinstance(path, dict)
            assert "bbox" in path
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_extract_paths_operations():
    """Test that extract_paths returns operations with coordinates."""
    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        paths = doc.extract_paths(0)
        assert isinstance(paths, list)
        assert len(paths) > 0, "Expected at least one path"

        for path in paths:
            assert "operations" in path, "Path dict should contain 'operations' field"
            assert isinstance(path["operations"], list)
            assert len(path["operations"]) == path["operations_count"]

            for op in path["operations"]:
                assert isinstance(op, dict)
                assert "op" in op, "Each operation should have an 'op' field"
                op_type = op["op"]
                assert op_type in ("move_to", "line_to", "curve_to", "rectangle", "close_path")

                if op_type in ("move_to", "line_to"):
                    assert "x" in op and "y" in op
                elif op_type == "curve_to":
                    assert all(k in op for k in ("cx1", "cy1", "cx2", "cy2", "x", "y"))
                elif op_type == "rectangle":
                    assert all(k in op for k in ("x", "y", "width", "height"))
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_extract_images_invalid_page():
    """Test extract_images with invalid page index."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        with pytest.raises(RuntimeError):
            doc.extract_images(999)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_extract_spans_invalid_page():
    """Test extract_spans with invalid page index."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        with pytest.raises(RuntimeError):
            doc.extract_spans(999)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_get_annotations_invalid_page():
    """Test get_annotations with invalid page index."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        with pytest.raises(RuntimeError):
            doc.get_annotations(999)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


def test_extract_paths_invalid_page():
    """Test extract_paths with invalid page index."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        with pytest.raises(RuntimeError):
            doc.extract_paths(999)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


# === Image Bytes Extraction Tests ===


def test_extract_image_bytes_empty():
    """Test extracting image bytes from a page with no images."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        result = doc.extract_image_bytes(0)
        assert isinstance(result, list)
        # Each item should be a dict with data as bytes
        for img in result:
            assert isinstance(img, dict)
            assert "width" in img
            assert "height" in img
            assert "format" in img
            assert "data" in img
            assert isinstance(img["data"], bytes)
            assert img["format"] == "png"
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


# === PDF from Images Tests ===


def test_pdf_from_image_bytes():
    """Test creating PDF from image bytes."""
    from pdf_oxide import Pdf

    # Create a minimal 1x1 PNG
    png_data = _create_minimal_png()
    pdf = Pdf.from_image_bytes(png_data)
    assert pdf is not None
    pdf_bytes = pdf.to_bytes()
    assert len(pdf_bytes) > 0
    assert pdf_bytes[:4] == b"%PDF"


def test_pdf_from_image(tmp_path):
    """Test creating PDF from an image file."""
    from pdf_oxide import Pdf

    img_path = tmp_path / "test.jpg"
    img_path.write_bytes(_create_minimal_png())

    pdf = Pdf.from_image(str(img_path))
    assert pdf is not None
    assert len(pdf.to_bytes()) > 0


def test_pdf_from_images(tmp_path):
    """Test creating PDF from multiple image files."""
    from pdf_oxide import Pdf

    img1 = tmp_path / "img1.jpg"
    img2 = tmp_path / "img2.jpg"
    img1.write_bytes(_create_minimal_png())
    img2.write_bytes(_create_minimal_png())

    pdf = Pdf.from_images([str(img1), str(img2)])
    assert pdf is not None
    assert len(pdf.to_bytes()) > 0


def _create_minimal_png():
    """Create a minimal valid 1x1 white image (JPEG format, known-good bytes)."""
    return bytes(
        [
            0xFF,
            0xD8,
            0xFF,
            0xE0,
            0x00,
            0x10,
            0x4A,
            0x46,
            0x49,
            0x46,
            0x00,
            0x01,
            0x01,
            0x00,
            0x00,
            0x01,
            0x00,
            0x01,
            0x00,
            0x00,
            0xFF,
            0xDB,
            0x00,
            0x43,
            0x00,
            0x08,
            0x06,
            0x06,
            0x07,
            0x06,
            0x05,
            0x08,
            0x07,
            0x07,
            0x07,
            0x09,
            0x09,
            0x08,
            0x0A,
            0x0C,
            0x14,
            0x0D,
            0x0C,
            0x0B,
            0x0B,
            0x0C,
            0x19,
            0x12,
            0x13,
            0x0F,
            0x14,
            0x1D,
            0x1A,
            0x1F,
            0x1E,
            0x1D,
            0x1A,
            0x1C,
            0x1C,
            0x20,
            0x24,
            0x2E,
            0x27,
            0x20,
            0x22,
            0x2C,
            0x23,
            0x1C,
            0x1C,
            0x28,
            0x37,
            0x29,
            0x2C,
            0x30,
            0x31,
            0x34,
            0x34,
            0x34,
            0x1F,
            0x27,
            0x39,
            0x3D,
            0x38,
            0x32,
            0x3C,
            0x2E,
            0x33,
            0x34,
            0x32,
            0xFF,
            0xC0,
            0x00,
            0x0B,
            0x08,
            0x00,
            0x01,
            0x00,
            0x01,
            0x01,
            0x01,
            0x11,
            0x00,
            0xFF,
            0xC4,
            0x00,
            0x1F,
            0x00,
            0x00,
            0x01,
            0x05,
            0x01,
            0x01,
            0x01,
            0x01,
            0x01,
            0x01,
            0x00,
            0x00,
            0x00,
            0x00,
            0x00,
            0x00,
            0x00,
            0x00,
            0x01,
            0x02,
            0x03,
            0x04,
            0x05,
            0x06,
            0x07,
            0x08,
            0x09,
            0x0A,
            0x0B,
            0xFF,
            0xC4,
            0x00,
            0xB5,
            0x10,
            0x00,
            0x02,
            0x01,
            0x03,
            0x03,
            0x02,
            0x04,
            0x03,
            0x05,
            0x05,
            0x04,
            0x04,
            0x00,
            0x00,
            0x01,
            0x7D,
            0x01,
            0x02,
            0x03,
            0x00,
            0x04,
            0x11,
            0x05,
            0x12,
            0x21,
            0x31,
            0x41,
            0x06,
            0x13,
            0x51,
            0x61,
            0x07,
            0x22,
            0x71,
            0x14,
            0x32,
            0x81,
            0x91,
            0xA1,
            0x08,
            0x23,
            0x42,
            0xB1,
            0xC1,
            0x15,
            0x52,
            0xD1,
            0xF0,
            0x24,
            0x33,
            0x62,
            0x72,
            0x82,
            0x09,
            0x0A,
            0x16,
            0x17,
            0x18,
            0x19,
            0x1A,
            0x25,
            0x26,
            0x27,
            0x28,
            0x29,
            0x2A,
            0x34,
            0x35,
            0x36,
            0x37,
            0x38,
            0x39,
            0x3A,
            0x43,
            0x44,
            0x45,
            0x46,
            0x47,
            0x48,
            0x49,
            0x4A,
            0x53,
            0x54,
            0x55,
            0x56,
            0x57,
            0x58,
            0x59,
            0x5A,
            0x63,
            0x64,
            0x65,
            0x66,
            0x67,
            0x68,
            0x69,
            0x6A,
            0x73,
            0x74,
            0x75,
            0x76,
            0x77,
            0x78,
            0x79,
            0x7A,
            0x83,
            0x84,
            0x85,
            0x86,
            0x87,
            0x88,
            0x89,
            0x8A,
            0x92,
            0x93,
            0x94,
            0x95,
            0x96,
            0x97,
            0x98,
            0x99,
            0x9A,
            0xA2,
            0xA3,
            0xA4,
            0xA5,
            0xA6,
            0xA7,
            0xA8,
            0xA9,
            0xAA,
            0xB2,
            0xB3,
            0xB4,
            0xB5,
            0xB6,
            0xB7,
            0xB8,
            0xB9,
            0xBA,
            0xC2,
            0xC3,
            0xC4,
            0xC5,
            0xC6,
            0xC7,
            0xC8,
            0xC9,
            0xCA,
            0xD2,
            0xD3,
            0xD4,
            0xD5,
            0xD6,
            0xD7,
            0xD8,
            0xD9,
            0xDA,
            0xE1,
            0xE2,
            0xE3,
            0xE4,
            0xE5,
            0xE6,
            0xE7,
            0xE8,
            0xE9,
            0xEA,
            0xF1,
            0xF2,
            0xF3,
            0xF4,
            0xF5,
            0xF6,
            0xF7,
            0xF8,
            0xF9,
            0xFA,
            0xFF,
            0xDA,
            0x00,
            0x08,
            0x01,
            0x01,
            0x00,
            0x00,
            0x3F,
            0x00,
            0xFB,
            0xD5,
            0xDB,
            0x20,
            0xA8,
            0xF9,
            0xFF,
            0xD9,
        ]
    )


# === Form Flattening Tests ===


def test_flatten_forms():
    """Test flattening form fields."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        # Should not raise, even if there are no forms
        doc.flatten_forms()
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


# === PDF Merging Tests ===


def test_merge_from_bytes():
    """Test merging PDFs from bytes."""
    from pdf_oxide import Pdf

    # Create two PDFs
    pdf1 = Pdf.from_text("Page 1")
    pdf2 = Pdf.from_text("Page 2")

    # Save pdf1 to file, open as PdfDocument, merge pdf2 bytes
    import os
    import tempfile

    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
        tmp_path = f.name
    # File handle closed before use — required on Windows (file locking)
    try:
        pdf1.save(tmp_path)
        doc = PdfDocument(tmp_path)
        count = doc.merge_from(pdf2.to_bytes())
        assert count == 1, "Should merge 1 page"
    finally:
        os.unlink(tmp_path)


# === File Embedding Tests ===


def test_embed_file():
    """Test embedding a file into a PDF."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        doc.embed_file("readme.txt", b"Hello embedded file")
        # Should succeed without error
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


# === Page Labels Tests ===


def test_page_labels():
    """Test getting page labels."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        labels = doc.page_labels()
        assert isinstance(labels, list)
        for label in labels:
            assert isinstance(label, dict)
            assert "start_page" in label
            assert "style" in label
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


# === XMP Metadata Tests ===


def test_xmp_metadata():
    """Test getting XMP metadata."""
    try:
        doc = PdfDocument("tests/fixtures/simple.pdf")
        metadata = doc.xmp_metadata()
        # Can be None or a dict
        assert metadata is None or isinstance(metadata, dict)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture 'simple.pdf' not available or invalid")


class _PdfOxideLogCapture(logging.Handler):
    """Captures log records emitted under the ``pdf_oxide`` logger tree.

    We attach this directly to the ``pdf_oxide`` logger (rather than using
    pytest's ``caplog``) because ``pyo3_log`` emits on child loggers such as
    ``pdf_oxide.xref`` / ``pdf_oxide.document``, and caplog's per-logger
    level plumbing doesn't always surface those child records reliably.
    """

    def __init__(self):
        super().__init__(level=logging.DEBUG)
        # Note: no type annotation on ``self.records`` — we support Python
        # 3.8 and PEP 585 ``list[X]`` generic syntax is 3.9+. Using
        # ``typing.List`` would also work; skipping the annotation entirely
        # is simpler for a test fixture.
        self.records = []

    def emit(self, record: logging.LogRecord) -> None:
        self.records.append(record)


def _capture_pdf_oxide_logs():
    """Install a handler on the pdf_oxide logger and set it to DEBUG."""
    logger = logging.getLogger("pdf_oxide")
    handler = _PdfOxideLogCapture()
    logger.addHandler(handler)
    prev_level = logger.level
    prev_propagate = logger.propagate
    logger.setLevel(logging.DEBUG)
    logger.propagate = False
    return logger, handler, prev_level, prev_propagate


def test_log_level_issue_283_regression():
    """Regression test for #283 — Python log level honored by extraction pipeline.

    Runs the reproduction from the issue (extract_text with a real PDF) twice
    in a single test so the two assertions share one process and one pyo3_log
    cache state:

    1. With ``set_log_level('debug')`` the pipeline emits DEBUG records on
       the ``pdf_oxide`` logger tree (sanity check — if this fails, either
       the ``pyo3_log`` bridge is broken or the Rust macros aren't emitting
       at all, which would make assertion 2 pass vacuously).
    2. With ``set_log_level('error')`` no DEBUG / TRACE / INFO / WARN records
       leak through (the actual regression — before the fix, the
       ``extract_log_*!`` macros bypassed the ``log`` crate via ``eprintln!``
       and so ignored both ``pdf_oxide.set_log_level`` and
       ``logging.basicConfig``).

    The debug phase must run first, because once ``log::set_max_level`` is
    lowered to Error the Rust ``log::debug!`` calls are short-circuited at
    compile-time checks and never reach pyo3_log — a subsequent re-raise to
    Debug in the same process doesn't always re-enable them due to pyo3_log's
    per-logger level cache.
    """
    import pdf_oxide

    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")

    # Capture the pre-test Rust-side log level so we can restore it exactly
    # instead of hard-coding "info" — avoids leaking global state across
    # tests in the same process.
    prev_rust_level = pdf_oxide.get_log_level()
    logger, handler, prev_level, prev_propagate = _capture_pdf_oxide_logs()
    try:
        # Phase 1: DEBUG level should produce at least some DEBUG records.
        pdf_oxide.set_log_level("debug")
        for page in range(doc.page_count()):
            doc.extract_text(page)
        debug_records = [r for r in handler.records if r.levelno == logging.DEBUG]
        assert debug_records, (
            "expected at least one DEBUG record from pdf_oxide at DEBUG level — "
            "if this fails, the pyo3_log bridge is broken and the suppression "
            "assertion below would pass vacuously"
        )

        # Phase 2: ERROR level must suppress everything below ERROR.
        handler.records.clear()
        pdf_oxide.set_log_level("error")
        for page in range(doc.page_count()):
            doc.extract_text(page)
        leaked = [r for r in handler.records if r.levelno < logging.ERROR]
        assert not leaked, (
            f"DEBUG/TRACE/INFO/WARN records leaked at ERROR level (regression "
            f"of #283): {[(r.name, r.levelname, r.getMessage()) for r in leaked[:5]]}"
        )
    finally:
        pdf_oxide.set_log_level(prev_rust_level)
        logger.removeHandler(handler)
        logger.setLevel(prev_level)
        logger.propagate = prev_propagate


# === Word/Line Extraction Tests ===


def test_extract_words_basic():
    """Test extracting words from a page."""
    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        words = doc.extract_words(0)
        assert isinstance(words, list)
        assert len(words) > 0
        for w in words:
            assert hasattr(w, "text")
            assert hasattr(w, "bbox")
            assert isinstance(w.text, str)
            assert len(w.text) > 0
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_extract_words_with_threshold():
    """Test extracting words with a custom word_gap_threshold."""
    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        words_default = doc.extract_words(0)
        words_tight = doc.extract_words(0, word_gap_threshold=0.5)
        assert isinstance(words_tight, list)
        assert len(words_tight) > 0
        # A tighter threshold should generally produce at least as many words
        assert len(words_tight) >= len(words_default)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_extract_words_with_region_and_threshold():
    """Test extracting words with both region and threshold."""
    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        words = doc.extract_words(0, region=(0, 0, 300, 400), word_gap_threshold=2.0)
        assert isinstance(words, list)
        # Region-filtered results should be a subset of full page
        all_words = doc.extract_words(0, word_gap_threshold=2.0)
        assert len(words) <= len(all_words)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_extract_text_lines_basic():
    """Test extracting text lines from a page."""
    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        lines = doc.extract_text_lines(0)
        assert isinstance(lines, list)
        assert len(lines) > 0
        for line in lines:
            assert hasattr(line, "text")
            assert hasattr(line, "bbox")
            assert isinstance(line.text, str)
            assert len(line.text) > 0
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_extract_text_lines_with_thresholds():
    """Test extracting text lines with custom thresholds."""
    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        lines = doc.extract_text_lines(0, word_gap_threshold=2.0, line_gap_threshold=5.0)
        assert isinstance(lines, list)
        assert len(lines) > 0
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


# === Page Layout Params Tests ===


def test_page_layout_params():
    """Test computing adaptive layout parameters for a page."""
    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        params = doc.page_layout_params(0)
        assert hasattr(params, "word_gap_threshold")
        assert hasattr(params, "line_gap_threshold")
        assert hasattr(params, "median_char_width")
        assert hasattr(params, "median_font_size")
        assert hasattr(params, "median_line_spacing")
        assert hasattr(params, "column_count")
        # Sanity checks — thresholds should be positive
        assert params.word_gap_threshold > 0
        assert params.line_gap_threshold > 0
        assert params.median_char_width > 0
        assert params.median_font_size > 0
        # __repr__ should include LayoutParams
        r = repr(params)
        assert "LayoutParams" in r
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_page_layout_params_invalid_page():
    """Test page_layout_params with an invalid page index."""
    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        with pytest.raises(RuntimeError):
            doc.page_layout_params(9999)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


# === ExtractionProfile Tests ===


def test_extraction_profile_inspect():
    """Test ExtractionProfile static constructors and attributes."""
    from pdf_oxide import ExtractionProfile

    profile = ExtractionProfile.form()
    assert profile.name == "Form"
    assert isinstance(profile.tj_offset_threshold, float)
    assert isinstance(profile.word_margin_ratio, float)
    assert isinstance(profile.space_threshold_em_ratio, float)
    assert isinstance(profile.space_char_multiplier, float)
    assert isinstance(profile.use_adaptive_threshold, bool)

    r = repr(profile)
    assert "ExtractionProfile" in r
    assert "Form" in r


def test_extraction_profile_available():
    """Test ExtractionProfile.available() returns all profile names."""
    from pdf_oxide import ExtractionProfile

    names = ExtractionProfile.available()
    assert isinstance(names, list)
    assert len(names) >= 9
    assert "Form" in names
    assert "Academic" in names


def test_extraction_profile_all_constructors():
    """Test that all profile static constructors work."""
    from pdf_oxide import ExtractionProfile

    constructors = [
        ExtractionProfile.conservative,
        ExtractionProfile.aggressive,
        ExtractionProfile.balanced,
        ExtractionProfile.academic,
        ExtractionProfile.policy,
        ExtractionProfile.form,
        ExtractionProfile.government,
        ExtractionProfile.scanned_ocr,
        ExtractionProfile.adaptive,
    ]
    for ctor in constructors:
        profile = ctor()
        assert isinstance(profile.name, str)
        assert len(profile.name) > 0


def test_extract_words_with_profile():
    """Test extracting words with an ExtractionProfile."""
    from pdf_oxide import ExtractionProfile

    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        profile = ExtractionProfile.form()
        words = doc.extract_words(0, profile=profile)
        assert isinstance(words, list)
        assert len(words) > 0
        for w in words:
            assert isinstance(w.text, str)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_extract_text_lines_with_profile():
    """Test extracting text lines with an ExtractionProfile."""
    from pdf_oxide import ExtractionProfile

    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        profile = ExtractionProfile.academic()
        lines = doc.extract_text_lines(0, profile=profile)
        assert isinstance(lines, list)
        assert len(lines) > 0
        for line in lines:
            assert isinstance(line.text, str)
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_extract_words_profile_and_threshold():
    """Test combining profile with threshold overrides."""
    from pdf_oxide import ExtractionProfile

    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        profile = ExtractionProfile.aggressive()
        words = doc.extract_words(0, word_gap_threshold=1.5, profile=profile)
        assert isinstance(words, list)
        assert len(words) > 0
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


def test_extract_text_lines_profile_and_thresholds():
    """Test combining profile with both threshold overrides for text lines."""
    from pdf_oxide import ExtractionProfile

    try:
        doc = PdfDocument("tests/fixtures/1.pdf")
        profile = ExtractionProfile.policy()
        lines = doc.extract_text_lines(
            0,
            word_gap_threshold=2.0,
            line_gap_threshold=5.0,
            profile=profile,
        )
        assert isinstance(lines, list)
        assert len(lines) > 0
    except (OSError, RuntimeError):
        pytest.skip("Test fixture '1.pdf' not available or invalid")


# Note: To run these tests successfully, you'll need to:
# 1. Install maturin: pip install maturin
# 2. Build the extension: maturin develop
# 3. Install pytest: pip install pytest
# 4. Create test PDF fixtures in tests/fixtures/
# 5. Run tests: pytest tests/test_python.py