Class PdfToHtmlConverter


  • public class PdfToHtmlConverter
    extends Object
    Experimental

    Converts a PDF to an HTML file.

    • Field Detail

      • pdfDocument

        protected com.aspose.pdf.Document pdfDocument
      • paragraphAbsorber

        protected com.aspose.pdf.ParagraphAbsorber paragraphAbsorber
      • headers

        protected Set<com.aspose.pdf.MarkupSection> headers
      • footers

        protected Set<com.aspose.pdf.MarkupSection> footers
      • pageNumbers

        protected Set<com.aspose.pdf.MarkupSection> pageNumbers
    • Method Detail

      • cleanupContent

        protected com.aspose.pdf.Document cleanupContent​(SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters)
                                                  throws IOException
        Throws:
        IOException
      • configureHtmlSaveOptions

        protected com.aspose.pdf.HtmlSaveOptions configureHtmlSaveOptions()
      • removeEmptyAndTransparentSections

        protected void removeEmptyAndTransparentSections()
      • removePagesWithSingleRotatedSection

        protected void removePagesWithSingleRotatedSection()
      • removeHeaders

        protected void removeHeaders()
      • removeFooters

        protected void removeFooters()
      • removePageNumbers

        protected void removePageNumbers()
      • removeEmptyPages

        protected void removeEmptyPages()
      • removeTopMargin

        protected void removeTopMargin()
      • removeBottomMargin

        protected void removeBottomMargin()
      • findHeadersOnSinglePage

        protected Set<com.aspose.pdf.MarkupSection> findHeadersOnSinglePage​(com.aspose.pdf.PageMarkup pageMarkup)
      • findFootersOnSinglePage

        protected Set<com.aspose.pdf.MarkupSection> findFootersOnSinglePage​(com.aspose.pdf.PageMarkup pageMarkup)
      • findPageNumbersOnSinglePage

        protected Set<com.aspose.pdf.MarkupSection> findPageNumbersOnSinglePage​(com.aspose.pdf.PageMarkup pageMarkup)
      • findTopMargin

        protected Optional<Double> findTopMargin​(List<com.aspose.pdf.MarkupSection> pageSections,
                                                 Set<com.aspose.pdf.MarkupSection> headersOnThePage)
      • findBottomMargin

        protected Optional<Double> findBottomMargin​(List<com.aspose.pdf.MarkupSection> pageSections,
                                                    Set<com.aspose.pdf.MarkupSection> footersOnThePage)
      • findHeadersByPosition

        protected Set<com.aspose.pdf.MarkupSection> findHeadersByPosition​(com.aspose.pdf.MarkupSection markupSection)
      • findFootersByPosition

        protected Set<com.aspose.pdf.MarkupSection> findFootersByPosition​(com.aspose.pdf.MarkupSection markupSection)
      • matchesHeaderRegexp

        protected boolean matchesHeaderRegexp​(com.aspose.pdf.MarkupSection markupSection)
      • matchesFooterRegexp

        protected boolean matchesFooterRegexp​(com.aspose.pdf.MarkupSection markupSection)
      • matchesPageNumberRegexp

        protected boolean matchesPageNumberRegexp​(com.aspose.pdf.MarkupSection markupSection)
      • findDocumentNameOnTopCorner

        protected Set<com.aspose.pdf.MarkupSection> findDocumentNameOnTopCorner​(com.aspose.pdf.PageMarkup pageMarkup)
      • matchingSectionsByPosition

        protected Predicate<com.aspose.pdf.MarkupSection> matchingSectionsByPosition​(com.aspose.pdf.MarkupSection markupSection)
      • removeTextFragmentsContainingPageNumber

        protected void removeTextFragmentsContainingPageNumber​(com.aspose.pdf.MarkupSection markupSection)
      • setPageSize

        protected void setPageSize​(com.aspose.pdf.Page page,
                                   com.aspose.pdf.Rectangle rectangle)
      • getFragmentText

        protected String getFragmentText​(com.aspose.pdf.TextFragment textFragment)
      • getSectionText

        protected String getSectionText​(com.aspose.pdf.MarkupSection section)
      • removeSections

        protected void removeSections​(Collection<com.aspose.pdf.MarkupSection> sectionsToRemove)
      • removeTextFragment

        protected void removeTextFragment​(com.aspose.pdf.TextFragment fragmentToRemove)
      • getPageIndex

        protected Optional<Integer> getPageIndex​(com.aspose.pdf.MarkupSection markupSection)
      • getPageIndex

        protected Optional<Integer> getPageIndex​(List<com.aspose.pdf.TextFragment> textFragments)