Class PdfToHtmlConverter
- java.lang.Object
-
- com.flowable.platform.pdf.converter.PdfToHtmlConverter
-
public class PdfToHtmlConverter extends Object
ExperimentalConverts a PDF to an HTML file.
-
-
Field Summary
Fields Modifier and Type Field Description protected PdfToHtmlConverterConfigconfigurationprotected Set<com.aspose.pdf.MarkupSection>footersprotected Set<com.aspose.pdf.MarkupSection>headersprotected Set<com.aspose.pdf.MarkupSection>pageNumbersprotected com.aspose.pdf.ParagraphAbsorberparagraphAbsorberprotected com.aspose.pdf.DocumentpdfDocument
-
Constructor Summary
Constructors Constructor Description PdfToHtmlConverter(PdfToHtmlConverterConfig configuration)
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description protected com.aspose.pdf.DocumentcleanupContent(SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters)protected com.aspose.pdf.HtmlSaveOptionsconfigureHtmlSaveOptions()InputStreamconvert(InputStream pdfInputStream, SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters)protected Optional<Double>findBottomMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> footersOnThePage)protected Set<com.aspose.pdf.MarkupSection>findDocumentNameOnTopCorner(com.aspose.pdf.PageMarkup pageMarkup)protected Set<com.aspose.pdf.MarkupSection>findFootersByPosition(com.aspose.pdf.MarkupSection markupSection)protected Set<com.aspose.pdf.MarkupSection>findFootersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)protected Set<com.aspose.pdf.MarkupSection>findHeadersByPosition(com.aspose.pdf.MarkupSection markupSection)protected Set<com.aspose.pdf.MarkupSection>findHeadersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)protected Set<com.aspose.pdf.MarkupSection>findPageNumbersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)protected Optional<Double>findTopMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> headersOnThePage)protected StringgetFragmentText(com.aspose.pdf.TextFragment textFragment)protected Optional<Integer>getPageIndex(com.aspose.pdf.MarkupSection markupSection)protected Optional<Integer>getPageIndex(List<com.aspose.pdf.TextFragment> textFragments)protected StringgetSectionText(com.aspose.pdf.MarkupSection section)protected booleanmatchesFooterRegexp(com.aspose.pdf.MarkupSection markupSection)protected booleanmatchesHeaderRegexp(com.aspose.pdf.MarkupSection markupSection)protected booleanmatchesPageNumberRegexp(com.aspose.pdf.MarkupSection markupSection)protected Predicate<com.aspose.pdf.MarkupSection>matchingSectionsByPosition(com.aspose.pdf.MarkupSection markupSection)protected voidremoveBottomMargin()protected voidremoveEmptyAndTransparentSections()protected voidremoveEmptyPages()protected voidremoveFooters()protected voidremoveHeaders()protected voidremovePageNumbers()protected voidremovePagesWithSingleRotatedSection()protected voidremoveSections(Collection<com.aspose.pdf.MarkupSection> sectionsToRemove)protected voidremoveTextFragment(com.aspose.pdf.TextFragment fragmentToRemove)protected voidremoveTextFragmentsContainingPageNumber(com.aspose.pdf.MarkupSection markupSection)protected voidremoveTopMargin()protected voidsetPageSize(com.aspose.pdf.Page page, com.aspose.pdf.Rectangle rectangle)
-
-
-
Field Detail
-
configuration
protected PdfToHtmlConverterConfig configuration
-
pdfDocument
protected com.aspose.pdf.Document pdfDocument
-
paragraphAbsorber
protected com.aspose.pdf.ParagraphAbsorber paragraphAbsorber
-
headers
protected Set<com.aspose.pdf.MarkupSection> headers
-
footers
protected Set<com.aspose.pdf.MarkupSection> footers
-
pageNumbers
protected Set<com.aspose.pdf.MarkupSection> pageNumbers
-
-
Constructor Detail
-
PdfToHtmlConverter
public PdfToHtmlConverter(PdfToHtmlConverterConfig configuration)
-
-
Method Detail
-
convert
public InputStream convert(InputStream pdfInputStream, SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters) throws IOException
- Throws:
IOException
-
cleanupContent
protected com.aspose.pdf.Document cleanupContent(SortedSet<com.flowable.platform.pdf.converter.PdfToHtmlConverter.ConverterFilter> filters) throws IOException
- Throws:
IOException
-
configureHtmlSaveOptions
protected com.aspose.pdf.HtmlSaveOptions configureHtmlSaveOptions()
-
removeEmptyAndTransparentSections
protected void removeEmptyAndTransparentSections()
-
removePagesWithSingleRotatedSection
protected void removePagesWithSingleRotatedSection()
-
removeHeaders
protected void removeHeaders()
-
removeFooters
protected void removeFooters()
-
removePageNumbers
protected void removePageNumbers()
-
removeEmptyPages
protected void removeEmptyPages()
-
removeTopMargin
protected void removeTopMargin()
-
removeBottomMargin
protected void removeBottomMargin()
-
findHeadersOnSinglePage
protected Set<com.aspose.pdf.MarkupSection> findHeadersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
-
findFootersOnSinglePage
protected Set<com.aspose.pdf.MarkupSection> findFootersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
-
findPageNumbersOnSinglePage
protected Set<com.aspose.pdf.MarkupSection> findPageNumbersOnSinglePage(com.aspose.pdf.PageMarkup pageMarkup)
-
findTopMargin
protected Optional<Double> findTopMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> headersOnThePage)
-
findBottomMargin
protected Optional<Double> findBottomMargin(List<com.aspose.pdf.MarkupSection> pageSections, Set<com.aspose.pdf.MarkupSection> footersOnThePage)
-
findHeadersByPosition
protected Set<com.aspose.pdf.MarkupSection> findHeadersByPosition(com.aspose.pdf.MarkupSection markupSection)
-
findFootersByPosition
protected Set<com.aspose.pdf.MarkupSection> findFootersByPosition(com.aspose.pdf.MarkupSection markupSection)
-
matchesHeaderRegexp
protected boolean matchesHeaderRegexp(com.aspose.pdf.MarkupSection markupSection)
-
matchesFooterRegexp
protected boolean matchesFooterRegexp(com.aspose.pdf.MarkupSection markupSection)
-
matchesPageNumberRegexp
protected boolean matchesPageNumberRegexp(com.aspose.pdf.MarkupSection markupSection)
-
findDocumentNameOnTopCorner
protected Set<com.aspose.pdf.MarkupSection> findDocumentNameOnTopCorner(com.aspose.pdf.PageMarkup pageMarkup)
-
matchingSectionsByPosition
protected Predicate<com.aspose.pdf.MarkupSection> matchingSectionsByPosition(com.aspose.pdf.MarkupSection markupSection)
-
removeTextFragmentsContainingPageNumber
protected void removeTextFragmentsContainingPageNumber(com.aspose.pdf.MarkupSection markupSection)
-
setPageSize
protected void setPageSize(com.aspose.pdf.Page page, com.aspose.pdf.Rectangle rectangle)
-
getFragmentText
protected String getFragmentText(com.aspose.pdf.TextFragment textFragment)
-
getSectionText
protected String getSectionText(com.aspose.pdf.MarkupSection section)
-
removeSections
protected void removeSections(Collection<com.aspose.pdf.MarkupSection> sectionsToRemove)
-
removeTextFragment
protected void removeTextFragment(com.aspose.pdf.TextFragment fragmentToRemove)
-
-