Class PdfToHtmlConverter
- java.lang.Object
-
- com.flowable.platform.pdf.converter.PdfToHtmlConverter
-
public class PdfToHtmlConverter extends Object
ExperimentalConverts a PDF to an HTML file.
-
-
Field Summary
Fields Modifier and Type Field Description protected PdfToHtmlConverterConfig
configuration
protected Set<com.aspose.pdf.MarkupSection>
footers
protected Set<com.aspose.pdf.MarkupSection>
headers
protected Set<com.aspose.pdf.MarkupSection>
pageNumbers
protected com.aspose.pdf.ParagraphAbsorber
paragraphAbsorber
protected com.aspose.pdf.Document
pdfDocument
-
Constructor Summary
Constructors Constructor Description PdfToHtmlConverter(PdfToHtmlConverterConfig configuration)
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description protected com.aspose.pdf.Document
cleanupContent(SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters)
protected com.aspose.pdf.HtmlSaveOptions
configureHtmlSaveOptions()
InputStream
convert(InputStream pdfInputStream, SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters)
protected Optional<Double>
findBottomMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> footersOnThePage)
protected Set<com.aspose.pdf.MarkupSection>
findDocumentNameOnTopCorner(com.aspose.pdf.PageMarkup pageMarkup)
protected Set<com.aspose.pdf.MarkupSection>
findFootersByPosition(com.aspose.pdf.MarkupSection markupSection)
protected Set<com.aspose.pdf.MarkupSection>
findFootersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
protected Set<com.aspose.pdf.MarkupSection>
findHeadersByPosition(com.aspose.pdf.MarkupSection markupSection)
protected Set<com.aspose.pdf.MarkupSection>
findHeadersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
protected Set<com.aspose.pdf.MarkupSection>
findPageNumbersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
protected Optional<Double>
findTopMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> headersOnThePage)
protected String
getFragmentText(com.aspose.pdf.TextFragment textFragment)
protected Optional<Integer>
getPageIndex(com.aspose.pdf.MarkupSection markupSection)
protected Optional<Integer>
getPageIndex(List<com.aspose.pdf.TextFragment> textFragments)
protected String
getSectionText(com.aspose.pdf.MarkupSection section)
protected boolean
matchesFooterRegexp(com.aspose.pdf.MarkupSection markupSection)
protected boolean
matchesHeaderRegexp(com.aspose.pdf.MarkupSection markupSection)
protected boolean
matchesPageNumberRegexp(com.aspose.pdf.MarkupSection markupSection)
protected Predicate<com.aspose.pdf.MarkupSection>
matchingSectionsByPosition(com.aspose.pdf.MarkupSection markupSection)
protected void
removeBottomMargin()
protected void
removeEmptyAndTransparentSections()
protected void
removeEmptyPages()
protected void
removeFooters()
protected void
removeHeaders()
protected void
removePageNumbers()
protected void
removePagesWithSingleRotatedSection()
protected void
removeSections(Collection<com.aspose.pdf.MarkupSection> sectionsToRemove)
protected void
removeTextFragment(com.aspose.pdf.TextFragment fragmentToRemove)
protected void
removeTextFragmentsContainingPageNumber(com.aspose.pdf.MarkupSection markupSection)
protected void
removeTopMargin()
protected void
setPageSize(com.aspose.pdf.Page page, com.aspose.pdf.Rectangle rectangle)
-
-
-
Field Detail
-
configuration
protected PdfToHtmlConverterConfig configuration
-
pdfDocument
protected com.aspose.pdf.Document pdfDocument
-
paragraphAbsorber
protected com.aspose.pdf.ParagraphAbsorber paragraphAbsorber
-
headers
protected Set<com.aspose.pdf.MarkupSection> headers
-
footers
protected Set<com.aspose.pdf.MarkupSection> footers
-
pageNumbers
protected Set<com.aspose.pdf.MarkupSection> pageNumbers
-
-
Constructor Detail
-
PdfToHtmlConverter
public PdfToHtmlConverter(PdfToHtmlConverterConfig configuration)
-
-
Method Detail
-
convert
public InputStream convert(InputStream pdfInputStream, SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters) throws IOException
- Throws:
IOException
-
cleanupContent
protected com.aspose.pdf.Document cleanupContent(SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters) throws IOException
- Throws:
IOException
-
configureHtmlSaveOptions
protected com.aspose.pdf.HtmlSaveOptions configureHtmlSaveOptions()
-
removeEmptyAndTransparentSections
protected void removeEmptyAndTransparentSections()
-
removePagesWithSingleRotatedSection
protected void removePagesWithSingleRotatedSection()
-
removeHeaders
protected void removeHeaders()
-
removeFooters
protected void removeFooters()
-
removePageNumbers
protected void removePageNumbers()
-
removeEmptyPages
protected void removeEmptyPages()
-
removeTopMargin
protected void removeTopMargin()
-
removeBottomMargin
protected void removeBottomMargin()
-
findHeadersOnSinglePage
protected Set<com.aspose.pdf.MarkupSection> findHeadersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
-
findFootersOnSinglePage
protected Set<com.aspose.pdf.MarkupSection> findFootersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
-
findPageNumbersOnSinglePage
protected Set<com.aspose.pdf.MarkupSection> findPageNumbersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
-
findTopMargin
protected Optional<Double> findTopMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> headersOnThePage)
-
findBottomMargin
protected Optional<Double> findBottomMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> footersOnThePage)
-
findHeadersByPosition
protected Set<com.aspose.pdf.MarkupSection> findHeadersByPosition(com.aspose.pdf.MarkupSection markupSection)
-
findFootersByPosition
protected Set<com.aspose.pdf.MarkupSection> findFootersByPosition(com.aspose.pdf.MarkupSection markupSection)
-
matchesHeaderRegexp
protected boolean matchesHeaderRegexp(com.aspose.pdf.MarkupSection markupSection)
-
matchesFooterRegexp
protected boolean matchesFooterRegexp(com.aspose.pdf.MarkupSection markupSection)
-
matchesPageNumberRegexp
protected boolean matchesPageNumberRegexp(com.aspose.pdf.MarkupSection markupSection)
-
findDocumentNameOnTopCorner
protected Set<com.aspose.pdf.MarkupSection> findDocumentNameOnTopCorner(com.aspose.pdf.PageMarkup pageMarkup)
-
matchingSectionsByPosition
protected Predicate<com.aspose.pdf.MarkupSection> matchingSectionsByPosition(com.aspose.pdf.MarkupSection markupSection)
-
removeTextFragmentsContainingPageNumber
protected void removeTextFragmentsContainingPageNumber(com.aspose.pdf.MarkupSection markupSection)
-
setPageSize
protected void setPageSize(com.aspose.pdf.Page page, com.aspose.pdf.Rectangle rectangle)
-
getFragmentText
protected String getFragmentText(com.aspose.pdf.TextFragment textFragment)
-
getSectionText
protected String getSectionText(com.aspose.pdf.MarkupSection section)
-
removeSections
protected void removeSections(Collection<com.aspose.pdf.MarkupSection> sectionsToRemove)
-
removeTextFragment
protected void removeTextFragment(com.aspose.pdf.TextFragment fragmentToRemove)
-
-