Class PdfToHtmlConverter

java.lang.Object
com.flowable.platform.pdf.converter.PdfToHtmlConverter

public class PdfToHtmlConverter extends Object
Experimental

Converts a PDF to an HTML file.

  • Field Details

    • configuration

      protected PdfToHtmlConverterConfig configuration
    • pdfDocument

      protected com.aspose.pdf.Document pdfDocument
    • paragraphAbsorber

      protected com.aspose.pdf.ParagraphAbsorber paragraphAbsorber
    • headers

      protected Set<com.aspose.pdf.MarkupSection> headers
    • footers

      protected Set<com.aspose.pdf.MarkupSection> footers
    • pageNumbers

      protected Set<com.aspose.pdf.MarkupSection> pageNumbers
  • Constructor Details

  • Method Details

    • convert

      public InputStream convert(InputStream pdfInputStream, SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters) throws IOException
      Throws:
      IOException
    • cleanupContent

      protected com.aspose.pdf.Document cleanupContent(SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters) throws IOException
      Throws:
      IOException
    • configureHtmlSaveOptions

      protected com.aspose.pdf.HtmlSaveOptions configureHtmlSaveOptions()
    • removeEmptyAndTransparentSections

      protected void removeEmptyAndTransparentSections()
    • removePagesWithSingleRotatedSection

      protected void removePagesWithSingleRotatedSection()
    • removeHeaders

      protected void removeHeaders()
    • removeFooters

      protected void removeFooters()
    • removePageNumbers

      protected void removePageNumbers()
    • removeEmptyPages

      protected void removeEmptyPages()
    • removeTopMargin

      protected void removeTopMargin()
    • removeBottomMargin

      protected void removeBottomMargin()
    • findHeadersOnSinglePage

      protected Set<com.aspose.pdf.MarkupSection> findHeadersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
    • findFootersOnSinglePage

      protected Set<com.aspose.pdf.MarkupSection> findFootersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
    • findPageNumbersOnSinglePage

      protected Set<com.aspose.pdf.MarkupSection> findPageNumbersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
    • findTopMargin

      protected Optional<Double> findTopMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> headersOnThePage)
    • findBottomMargin

      protected Optional<Double> findBottomMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> footersOnThePage)
    • findHeadersByPosition

      protected Set<com.aspose.pdf.MarkupSection> findHeadersByPosition(com.aspose.pdf.MarkupSection markupSection)
    • findFootersByPosition

      protected Set<com.aspose.pdf.MarkupSection> findFootersByPosition(com.aspose.pdf.MarkupSection markupSection)
    • matchesHeaderRegexp

      protected boolean matchesHeaderRegexp(com.aspose.pdf.MarkupSection markupSection)
    • matchesFooterRegexp

      protected boolean matchesFooterRegexp(com.aspose.pdf.MarkupSection markupSection)
    • matchesPageNumberRegexp

      protected boolean matchesPageNumberRegexp(com.aspose.pdf.MarkupSection markupSection)
    • findDocumentNameOnTopCorner

      protected Set<com.aspose.pdf.MarkupSection> findDocumentNameOnTopCorner(com.aspose.pdf.PageMarkup pageMarkup)
    • matchingSectionsByPosition

      protected Predicate<com.aspose.pdf.MarkupSection> matchingSectionsByPosition(com.aspose.pdf.MarkupSection markupSection)
    • removeTextFragmentsContainingPageNumber

      protected void removeTextFragmentsContainingPageNumber(com.aspose.pdf.MarkupSection markupSection)
    • setPageSize

      protected void setPageSize(com.aspose.pdf.Page page, com.aspose.pdf.Rectangle rectangle)
    • getFragmentText

      protected String getFragmentText(com.aspose.pdf.TextFragment textFragment)
    • getSectionText

      protected String getSectionText(com.aspose.pdf.MarkupSection section)
    • removeSections

      protected void removeSections(Collection<com.aspose.pdf.MarkupSection> sectionsToRemove)
    • removeTextFragment

      protected void removeTextFragment(com.aspose.pdf.TextFragment fragmentToRemove)
    • getPageIndex

      protected Optional<Integer> getPageIndex(com.aspose.pdf.MarkupSection markupSection)
    • getPageIndex

      protected Optional<Integer> getPageIndex(List<com.aspose.pdf.TextFragment> textFragments)