List of all items
Structs
- api::batch::BatchFileResult
- api::batch::BatchRequest
- api::batch::BatchResult
- api::config::ProcessingConfig
- api::filter::FilterConfig
- models::bbox::BoundingBox
- models::bbox::MultiBoundingBox
- models::bbox::Vertex
- models::chunks::ImageChunk
- models::chunks::LineArtChunk
- models::chunks::LineChunk
- models::chunks::TextChunk
- models::document::PdfDocument
- models::list::ListBody
- models::list::ListInterval
- models::list::ListItem
- models::list::ListItemInfo
- models::list::ListLabel
- models::list::PDFList
- models::semantic::SemanticCaption
- models::semantic::SemanticFigure
- models::semantic::SemanticFormula
- models::semantic::SemanticHeaderOrFooter
- models::semantic::SemanticHeading
- models::semantic::SemanticNumberHeading
- models::semantic::SemanticParagraph
- models::semantic::SemanticPicture
- models::semantic::SemanticTable
- models::semantic::SemanticTextNode
- models::table::TableBorder
- models::table::TableBorderCell
- models::table::TableBorderRow
- models::table::TableBordersCollection
- models::table::TableToken
- models::text::TextBlock
- models::text::TextColumn
- models::text::TextLine
- output::toc_builder::TableOfContents
- output::toc_builder::TocEntry
- pdf::annotation_enrichment::AnnotationGroup
- pdf::annotation_enrichment::AnnotationStats
- pdf::annotation_extractor::PdfAnnotation
- pdf::bookmark_extractor::Bookmark
- pdf::chunk_parser::PageChunks
- pdf::encryption::EncryptionInfo
- pdf::font::FontCache
- pdf::font::PdfFont
- pdf::form_extractor::FormField
- pdf::graphics_state::GraphicsState
- pdf::graphics_state::GraphicsStateStack
- pdf::graphics_state::Matrix
- pdf::graphics_state::TextState
- pdf::hyperlink_extractor::PdfHyperlink
- pdf::image_extractor::ExtractedImage
- pdf::loader::PdfMetadata
- pdf::loader::RawPdfDocument
- pdf::metadata_writer::PdfMetadata
- pdf::page_info::PageInfo
- pipeline::error_recovery::PipelineErrors
- pipeline::logging::PipelineTimer
- pipeline::logging::StageRecord
- pipeline::orchestrator::PipelineState
- pipeline::stages::column_detector::ColumnLayout
- pipeline::stages::footnote_linker::FootnoteLinkResult
- pipeline::stages::footnote_linker::LinkedFootnote
- tagged::struct_tree::McidTagInfo
- tagged::struct_tree::StructNode
- utils::diff::Change
- utils::diff::DiffResult
- utils::font_metrics_cache::FontMetricsCache
- utils::font_metrics_cache::TextMeasurement
- utils::image_dedup::DeduplicatedImage
- utils::image_dedup::ImageFingerprint
- utils::image_dedup::ImageRef
- utils::language_detector::LangDetection
- utils::layout_analysis::ContentDensity
- utils::layout_analysis::PageMargins
- utils::sanitizer::SanitizationRule
- utils::statistics::ModeWeightStatistics
- utils::statistics::TextStyle
- utils::xref_index::CrossReferenceIndex
- utils::xref_index::XRefEntry
Enums
- EdgePdfError
- api::config::HybridBackend
- api::config::HybridMode
- api::config::ImageFormat
- api::config::ImageOutput
- api::config::OutputFormat
- api::config::ReadingOrder
- api::config::TableMethod
- models::content::ContentElement
- models::enums::PdfLayer
- models::enums::SemanticType
- models::enums::TextAlignment
- models::enums::TextFormat
- models::enums::TextType
- models::enums::TriageDecision
- models::table::TableTokenType
- pdf::annotation_extractor::AnnotationType
- pdf::form_extractor::FormFieldType
- pipeline::error_recovery::PageResult
- utils::diff::ChangeKind
- utils::layout_analysis::LayoutType
Functions
- api::batch::collect_pdf_files
- api::batch::collect_pdf_files_recursive
- api::batch::process_batch
- api::config_loader::config_to_json
- api::config_loader::load_config_from_file
- api::config_loader::merge_configs
- api::config_loader::parse_config_json
- convert
- convert_bytes
- output::csv::to_csv
- output::docx::to_docx
- output::html::to_html
- output::json::to_json_string
- output::json::write_json
- output::legacy_json::to_legacy_json_string
- output::legacy_json::to_legacy_json_value
- output::markdown::to_markdown
- output::text::to_text
- pdf::annotation_enrichment::annotations_to_markdown
- pdf::annotation_enrichment::compute_stats
- pdf::annotation_enrichment::filter_user_annotations
- pdf::annotation_enrichment::filter_with_content
- pdf::annotation_enrichment::group_by_type_and_page
- pdf::annotation_extractor::extract_annotations
- pdf::bookmark_extractor::extract_bookmarks
- pdf::chunk_parser::extract_page_chunks
- pdf::encryption::detect_encryption
- pdf::encryption::load_with_password
- pdf::font::resolve_page_fonts
- pdf::form_extractor::extract_form_fields
- pdf::hyperlink_extractor::extract_hyperlinks
- pdf::image_extractor::extract_image_chunks
- pdf::image_extractor::extract_image_data
- pdf::line_extractor::extract_line_chunks
- pdf::loader::load_pdf
- pdf::loader::load_pdf_from_bytes
- pdf::metadata_writer::read_metadata
- pdf::metadata_writer::write_metadata
- pdf::page_info::extract_page_info
- pdf::raster_table_ocr::recover_page_raster_table_cell_text
- pdf::raster_table_ocr::recover_raster_table_borders
- pdf::raster_table_ocr::recover_raster_table_text_chunks
- pdf::text_extractor::extract_text_chunks
- pipeline::error_recovery::process_pages_with_recovery
- pipeline::orchestrator::run_pipeline
- pipeline::parallel::configure_thread_pool
- pipeline::parallel::par_extract
- pipeline::parallel::par_map_pages
- pipeline::parallel::par_map_pages_indexed
- pipeline::stages::boxed_heading_promoter::promote_boxed_headings
- pipeline::stages::caption_linker::link_captions
- pipeline::stages::cluster_table_detector::detect_cluster_tables
- pipeline::stages::column_detector::detect_columns
- pipeline::stages::content_filter::filter_content
- pipeline::stages::content_sanitizer::sanitize_content
- pipeline::stages::cross_page_linker::link_cross_page_tables
- pipeline::stages::figure_detector::detect_figures
- pipeline::stages::footnote_detector::detect_footnotes
- pipeline::stages::footnote_linker::link_footnotes
- pipeline::stages::header_footer::detect_headers_footers
- pipeline::stages::heading_detector::detect_headings
- pipeline::stages::id_assignment::assign_ids
- pipeline::stages::list_detector::detect_lists
- pipeline::stages::list_pass2::detect_common_prefix_lists_document
- pipeline::stages::list_pass2::detect_paragraph_lists
- pipeline::stages::nesting_level::assign_nesting_levels
- pipeline::stages::output_builder::build_document
- pipeline::stages::output_builder::build_paged_document
- pipeline::stages::paragraph_detector::detect_paragraphs
- pipeline::stages::reading_order::sort_reading_order
- pipeline::stages::table_content_assigner::assign_content_to_tables
- pipeline::stages::table_detector::detect_table_borders
- pipeline::stages::table_detector::filter_empty_tables
- pipeline::stages::table_detector::filter_suspicious_tables
- pipeline::stages::table_detector::release_pre_cluster_tables
- pipeline::stages::text_block_grouper::group_text_blocks
- pipeline::stages::text_line_grouper::group_text_lines
- pipeline::stages::toc_detector::detect_toc
- pipeline::stages::watermark_detector::detect_watermarks
- pipeline::stages::watermark_detector::remove_watermarks
- tagged::processor::process_tagged_pdf
- tagged::struct_tree::build_mcid_map
- tagged::struct_tree::classify_struct_type
- tagged::struct_tree::extract_struct_tree
- tagged::struct_tree::is_tagged
- utils::diff::diff_documents
- utils::image_dedup::deduplicate
- utils::image_dedup::duplicate_count
- utils::image_dedup::fingerprint
- utils::image_dedup::savings_ratio
- utils::language_detector::detect_language
- utils::layout_analysis::classify_layout
- utils::layout_analysis::compute_density
- utils::layout_analysis::detect_margins
- utils::page_range::filter_pages
- utils::page_range::parse_page_range
- utils::sanitizer::default_rules
- utils::sanitizer::normalize_unicode
- utils::sanitizer::sanitize_text
- utils::text_normalizer::collapse_whitespace
- utils::text_normalizer::full_normalize
- utils::text_normalizer::normalize_pdf_text
- utils::text_normalizer::normalize_typography
- utils::text_normalizer::strip_diacritics
- utils::xycut::xycut_sort