diff --git a/gradle.properties b/gradle.properties index 8d976cf..cc5c1ec 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ group = org.xbib.graphics name = graphics -version = 5.0.1 +version = 5.0.2 org.gradle.warning.mode = ALL diff --git a/gradle/test/junit5.gradle b/gradle/test/junit5.gradle index 9e19058..9abeea9 100644 --- a/gradle/test/junit5.gradle +++ b/gradle/test/junit5.gradle @@ -13,6 +13,7 @@ test { file('/var/tmp/gs').mkdirs() systemProperty 'java.awt.headless', 'true' systemProperty 'java.io.tmpdir', '/var/tmp/' + systemProperty 'pdfbox.fontcache', '/var/tmp/pdfbox' systemProperty 'jna.tmpdir', '/var/tmp/' systemProperty 'jna.debug', 'true' systemProperty 'java.util.logging.config.file', 'src/test/resources/logging.properties' diff --git a/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/DocumentAnalyzer.java b/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/DocumentAnalyzer.java index 3c845ca..006ccdd 100644 --- a/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/DocumentAnalyzer.java +++ b/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/DocumentAnalyzer.java @@ -5,11 +5,10 @@ import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; -import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.graphics.image.PDImage; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; @@ -41,38 +40,64 @@ public class DocumentAnalyzer { List> pages = new ArrayList<>(); int imagecount = 0; int pagecount = document.getNumberOfPages(); + boolean isDocumentColor = false; + boolean isDocumentGray = false; for (int i = 0; i < pagecount; i++) { PDPage pdPage = document.getPage(i); Map pageMap = analyze(i, pdPage); + boolean isColor = (boolean) pageMap.get("iscolor"); + if (isColor) { + isDocumentColor = true; + } + boolean isGray = (boolean) pageMap.get("isgray"); + if (isGray) { + isDocumentGray = true; + } List> list = (List>) pageMap.get("images"); imagecount += list.size(); pages.add(pageMap); } - result.put("pages", pages); result.put("imagecount", imagecount); + result.put("iscolor", isDocumentColor); + result.put("isgray", isDocumentGray); + result.put("pages", pages); } catch (Exception e) { - logger.log(Level.SEVERE, e.getMessage(), e); + logger.log(Level.WARNING, e.getMessage(), e); } } + public Map getResult() { + return result; + } + + public boolean isColor() { + return (boolean) result.get("iscolor"); + } + + + public boolean isGray() { + return (boolean) result.get("isgray"); + } + private void documentToResult(PDDocument document) { try { - result.put("author", document.getDocumentInformation().getAuthor()); - result.put("creator", document.getDocumentInformation().getCreator()); - result.put("producer", document.getDocumentInformation().getProducer()); - result.put("title", document.getDocumentInformation().getTitle()); + PDDocumentInformation documentInformation = document.getDocumentInformation(); + result.put("author", documentInformation.getAuthor()); + result.put("creator", documentInformation.getCreator()); + result.put("producer", documentInformation.getProducer()); + result.put("title", documentInformation.getTitle()); result.put("pagecount", document.getNumberOfPages()); - Calendar calendar = document.getDocumentInformation().getCreationDate(); + Calendar calendar = documentInformation.getCreationDate(); if (calendar != null) { result.put("creationDate", calendar.toInstant()); } - calendar = document.getDocumentInformation().getModificationDate(); + calendar = documentInformation.getModificationDate(); if (calendar != null) { result.put("modificationDate", calendar.toInstant()); } } catch (Exception e) { // NPE if creation/modification dates are borked - /** + /* * java.lang.NullPointerException: null * at java.text.SimpleDateFormat.matchZoneString(SimpleDateFormat.java:1695) ~[?:?] * at java.text.SimpleDateFormat.subParseZoneString(SimpleDateFormat.java:1763) ~[?:?] @@ -85,14 +110,10 @@ public class DocumentAnalyzer { * at org.apache.pdfbox.cos.COSDictionary.getDate(COSDictionary.java:790) ~[pdfbox-2.0.12.jar:2.0.12] * at org.apache.pdfbox.pdmodel.PDDocumentInformation.getCreationDate(PDDocumentInformation.java:212) ~[pdfbox-2.0.12.jar:2.0.12] */ - logger.log(Level.SEVERE, e.getMessage(), e); + logger.log(Level.WARNING, e.getMessage(), e); } } - public Map getResult() { - return result; - } - public Map analyze(int i, PDPage page) throws IOException { Map m = new LinkedHashMap<>(); m.put("page", i); @@ -101,42 +122,46 @@ public class DocumentAnalyzer { m.put("mediabox", Map.of("height", page.getMediaBox().getHeight(), "width", page.getMediaBox().getWidth())); m.put("bleedbox", Map.of("height", page.getBleedBox().getHeight(), "width", page.getBleedBox().getWidth())); m.put("rotation", page.getRotation()); - ImageGraphicsExtractor extractor = new ImageGraphicsExtractor(page); - extractor.process(); - m.put("images", extractor.getList()); - List> fonts = new ArrayList<>(); - PDResources res = page.getResources(); - for (COSName cosName : res.getFontNames()) { - PDFont font = res.getFont(cosName); - if (font != null) { - Map f = new LinkedHashMap<>(); - f.put("name", font.getName()); - f.put("damaged", font.isDamaged()); - f.put("embedded", font.isEmbedded()); - f.put("type", font.getType()); - f.put("subtype", font.getSubType()); - fonts.add(f); - } - } - m.put("fonts", fonts); + PageExtractor pageExtractor = new PageExtractor(page); + pageExtractor.process(); + m.put("images", pageExtractor.getImages()); + m.put("iscolor", pageExtractor.isColor()); + m.put("isgray", pageExtractor.isGray()); + FontExtractor fontExtractor = new FontExtractor(page); + fontExtractor.process(); + m.put("fonts", fontExtractor.getFonts()); return m; } - class ImageGraphicsExtractor extends PDFGraphicsStreamEngine { + class PageExtractor extends PDFGraphicsStreamEngine { - private final List> list; + private final List> images; - protected ImageGraphicsExtractor(PDPage page) { + private boolean isColor; + + private boolean isGray; + + protected PageExtractor(PDPage page) { super(page); - this.list = new ArrayList<>(); + this.images = new ArrayList<>(); + this.isColor = false; + this.isGray = false; } public void process() throws IOException { processPage(getPage()); } - public List> getList() { - return list; + public List> getImages() { + return images; + } + + public boolean isColor() { + return isColor; + } + + public boolean isGray() { + return isGray; } @Override @@ -151,15 +176,26 @@ public class DocumentAnalyzer { return; } seen.add(xobject.getCOSObject()); - Map m = new LinkedHashMap<>(); + Map m = new LinkedHashMap<>(); + String colorSpaceName = xobject.getColorSpace().getName(); + boolean isColorSpace = isColorSpaceName(colorSpaceName); + if (isColorSpace) { + this.isColor = true; + } + boolean isGraySpace = isGraySpaceName(colorSpaceName); + if (isGraySpace) { + if (xobject.getBitsPerComponent() > 1) { + this.isGray = true; + } + } m.put("width", xobject.getWidth()); m.put("height", xobject.getHeight()); m.put("bitspercomponent", xobject.getBitsPerComponent()); - m.put("colorspace", xobject.getColorSpace().getName()); - m.put("iscolor", PDDeviceRGB.INSTANCE.getName().equals(xobject.getColorSpace().getName())); - m.put("isgray", PDDeviceGray.INSTANCE.getName().equals(xobject.getColorSpace().getName())); + m.put("colorspace", colorSpaceName); m.put("suffix", xobject.getSuffix()); - list.add(m); + m.put("iscolor", isColorSpace); + m.put("isgray", isGraySpace); + images.add(m); } } @@ -169,12 +205,10 @@ public class DocumentAnalyzer { @Override public void moveTo(float x, float y) throws IOException { - } @Override public void lineTo(float x, float y) throws IOException { - } @Override @@ -196,10 +230,26 @@ public class DocumentAnalyzer { @Override public void strokePath() { + String colorSpaceName = getGraphicsState().getStrokingColor().getColorSpace().getName(); + if (isColorSpaceName(colorSpaceName)) { + logger.log(Level.INFO, "strokepath: color true, " + colorSpaceName); + this.isColor = true; + } + if (isGraySpaceName(colorSpaceName)) { + this.isGray = true; + } } @Override public void fillPath(int windingRule) { + String colorSpaceName = getGraphicsState().getStrokingColor().getColorSpace().getName(); + if (isColorSpaceName(colorSpaceName)) { + logger.log(Level.INFO, "fillpath: color true " + colorSpaceName); + this.isColor = true; + } + if (isGraySpaceName(colorSpaceName)) { + this.isGray = true; + } } @Override @@ -209,6 +259,51 @@ public class DocumentAnalyzer { @Override public void shadingFill(COSName shadingName) { } + + private boolean isColorSpaceName(String name) { + return "DeviceRGB".equals(name) || + "DeviceCMYK".equals(name) || + "Indexed".equals(name); + } + + private boolean isGraySpaceName(String name) { + return "DeviceGray".equals(name); + } + } + + static class FontExtractor { + + private final List> fonts; + + private final PDResources res; + + public FontExtractor(PDPage page) { + fonts = new ArrayList<>(); + res = page.getResources(); + } + + public void process() { + for (COSName cosName : res.getFontNames()) { + try { + PDFont font = res.getFont(cosName); + if (font != null) { + Map f = new LinkedHashMap<>(); + f.put("name", font.getName()); + f.put("damaged", font.isDamaged()); + f.put("embedded", font.isEmbedded()); + f.put("type", font.getType()); + f.put("subtype", font.getSubType()); + fonts.add(f); + } + } catch (IOException e) { + logger.log(Level.WARNING, e.getMessage(), e); + } + } + } + + public List> getFonts() { + return fonts; + } } } diff --git a/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/DocumentAnalyzerTest.java b/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/DocumentAnalyzerTest.java index a412755..aeec787 100644 --- a/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/DocumentAnalyzerTest.java +++ b/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/DocumentAnalyzerTest.java @@ -18,13 +18,16 @@ public class DocumentAnalyzerTest { @Test public void testDocument() throws IOException { Path tmp = Files.createTempDirectory("document-analyzer"); - Path path = tmp.resolve("antonio_sample.pdf"); - try (InputStream inputStream = getClass().getResourceAsStream("antonio_sample.pdf"); + String sample = "antonio_sample.pdf"; + Path path = tmp.resolve(sample); + try (InputStream inputStream = getClass().getResourceAsStream(sample); OutputStream outputStream = Files.newOutputStream(path)) { if (inputStream != null) { inputStream.transferTo(outputStream); DocumentAnalyzer documentAnalyzer = new DocumentAnalyzer(path.toFile()); logger.log(Level.INFO, "result = " + documentAnalyzer.getResult()); + logger.log(Level.INFO, "iscolor = " + documentAnalyzer.isColor()); + logger.log(Level.INFO, "isgray = " + documentAnalyzer.isGray()); } } Files.delete(path); diff --git a/graphics-pdfbox/src/test/resources/logging.properties b/graphics-pdfbox/src/test/resources/logging.properties index 4c61377..f6b2e4f 100644 --- a/graphics-pdfbox/src/test/resources/logging.properties +++ b/graphics-pdfbox/src/test/resources/logging.properties @@ -6,3 +6,4 @@ java.util.logging.ConsoleHandler.formatter=java.util.logging.SimpleFormatter org.apache.fontbox.ttf.level=OFF org.apache.fontbox.util.autodetect.FontFileFinder.level=OFF org.apache.pdfbox.pdmodel.font.FileSystemFontProvider.level=OFF +org.apache.pdfbox.contentstream.operator.graphics.level=OFF