From 7b624e8f30482da28d3f2987818795eee867470e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Prante?= Date: Thu, 7 Dec 2023 13:47:58 +0100 Subject: [PATCH] add colorspaces to documetn analysis, improve barcode analysis --- gradle.properties | 2 +- .../pdfbox/analyze/BarcodeAnalyzer.java | 161 +++++++++--------- .../pdfbox/analyze/DocumentAnalyzer.java | 87 ++++++---- .../pdfbox/test/BarcodeAnalyzerTest.java | 10 +- .../pdfbox/test/DocumentAnalyzerTest.java | 1 + 5 files changed, 141 insertions(+), 120 deletions(-) diff --git a/gradle.properties b/gradle.properties index 29205ff..08f30ec 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ group = org.xbib.graphics name = graphics -version = 5.2.0 +version = 5.2.1 org.gradle.warning.mode = ALL diff --git a/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/BarcodeAnalyzer.java b/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/BarcodeAnalyzer.java index cabb548..dd49a01 100644 --- a/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/BarcodeAnalyzer.java +++ b/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/BarcodeAnalyzer.java @@ -28,68 +28,73 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; public class BarcodeAnalyzer { - private static final Logger logger = Logger.getLogger(BarcodeAnalyzer.class.getName()); - - private final List resultList; + private final BarcodeAnalyzerResult result; private final Set seen; + private float saturationMin; + private float brightnessMin; + + private int maximumBlankPixelDelimiterCount; + public BarcodeAnalyzer() { - this.resultList = new ArrayList<>(); + this.result = new BarcodeAnalyzerResult(); this.seen = new HashSet<>(); + this.saturationMin = 0.10f; + this.brightnessMin = 0.80f; + this.maximumBlankPixelDelimiterCount = 20; + } + + public void setSaturationMin(float saturationMin) { + this.saturationMin = saturationMin; + } + + public void setBrightnessMin(float brightnessMin) { + this.brightnessMin = brightnessMin; + } + + public void setMaximumBlankPixelDelimiterCount(int maximumBlankPixelDelimiterCount) { + this.maximumBlankPixelDelimiterCount = maximumBlankPixelDelimiterCount; } public void process(File file, int pageNumber) throws IOException { try (PDDocument document = Loader.loadPDF(file)) { - process(document.getPage(pageNumber), 20); + if (pageNumber >= 0 && pageNumber < document.getNumberOfPages()) { + process(document.getPage(pageNumber)); + } } } - public void process(File file, int pageNumber, int maximumBlankPixelDelimiterCount) - throws IOException { - try (PDDocument document = Loader.loadPDF(file)) { - process(document.getPage(pageNumber), maximumBlankPixelDelimiterCount); - } - } - - public void process(PDPage pdPage, int maximumBlankPixelDelimiterCount) - throws IOException { + public void process(PDPage pdPage) throws IOException { + Objects.requireNonNull(pdPage); PDResources pdResources = pdPage.getResources(); for (COSName name : pdResources.getXObjectNames()) { PDXObject xobject = pdResources.getXObject(name); seen.add(xobject.getCOSObject()); if (xobject instanceof PDImageXObject imageXObject) { - BufferedImage image = imageXObject.getImage(); - extractBarcodeArrayByAreas(image, maximumBlankPixelDelimiterCount); + extractBarcode(imageXObject.getImage()); } } PageExtractor pageExtractor = new PageExtractor(pdPage, seen); pageExtractor.process(); for (PDImageXObject imageXObject : pageExtractor.getImages()) { - BufferedImage image = imageXObject.getImage(); - extractBarcodeArrayByAreas(image, maximumBlankPixelDelimiterCount); + extractBarcode(imageXObject.getImage()); } } - public List getResultList() { - return resultList; + public List> getResult() { + return result.getResult(); } - private void extractBarcodeArrayByAreas(BufferedImage image, int maximumBlankPixelDelimiterCount) { - float saturationMin = 0.10f; - float brightnessMin = 0.80f; - BufferedImage blackAndWhiteImage = getThresholdImage(image, saturationMin, brightnessMin); - List areaList = getAllAreaByColor(blackAndWhiteImage, null, 0, 0, 0, - maximumBlankPixelDelimiterCount); - logger.log(Level.FINE, "found " + areaList + " rectangles"); - for (Rectangle rectangle : areaList) { + private void extractBarcode(BufferedImage image) { + for (Rectangle rectangle : getAreas(computeBlackAndWhite(image))) { if (rectangle.x < 0) { rectangle.x = 0; } @@ -103,51 +108,32 @@ public class BarcodeAnalyzer { rectangle.width = image.getWidth() - rectangle.x; } BufferedImage croppedImage = image.getSubimage(rectangle.x, rectangle.y, rectangle.width, rectangle.height); - Result result = decodeBarcode(croppedImage); - if (result != null) { - resultList.add(result); + LuminanceSource source = new BufferedImageLuminanceSource(croppedImage); + BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source)); + Reader reader = new MultiFormatReader(); + try { + Result result = reader.decode(bitmap); + if (result != null) { + this.result.add(Map.entry(result.getBarcodeFormat().name(), result.getText())); + } + } catch (NotFoundException | ChecksumException | FormatException e) { + // ignore 'not found' or other problems } } } - private BufferedImage getThresholdImage(BufferedImage image, float saturationMin, float brightnessMin) { - BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_ARGB); - Graphics2D graphics2D = result.createGraphics(); - graphics2D.drawRenderedImage(image, null); - graphics2D.dispose(); - computeBlackAndWhite(image, result, saturationMin, brightnessMin); - return result; - } - - private Result decodeBarcode(BufferedImage bufferedImage) { - LuminanceSource source = new BufferedImageLuminanceSource(bufferedImage); - BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source)); - Reader reader = new MultiFormatReader(); - try { - return reader.decode(bitmap); - } catch (NotFoundException | ChecksumException | FormatException e) { - // ignore 'not found' or other problems - } - return null; - } - - private List getAllAreaByColor(BufferedImage in, BufferedImage out, - int redColor, int greenColor, int blueColor, - int maximumBlankPixelDelimiterCount) { - int w = in.getWidth(); - int h = in.getHeight(); + private List getAreas(BufferedImage bufferedImage) { + int redColor = 0; + int greenColor = 0; + int blueColor = 0; + int w = bufferedImage.getWidth(); + int h = bufferedImage.getHeight(); int pixel; List areaList = new ArrayList<>(); - Graphics2D gc = null; - if (out != null) { - gc = out.createGraphics(); - gc.setColor(new Color(1f, 0f, 0f)); - } int maximumBlankPixelDelimiterCountDouble = maximumBlankPixelDelimiterCount * 2; for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { - pixel = in.getRGB(x, y); - int alpha = ((pixel >> 24) & 0xFF); + pixel = bufferedImage.getRGB(x, y); int red = ((pixel >> 16) & 0xFF); int green = ((pixel >> 8) & 0xFF); int blue = (pixel & 0xFF); @@ -167,12 +153,6 @@ public class BarcodeAnalyzer { if (isInArea) { continue; } - pixel = 0; - pixel = pixel | (alpha << 24); - pixel = pixel | (0); - pixel = pixel | (255 << 8); - pixel = pixel | (0); - isInArea = false; for (Rectangle rectangle : areaList) { Rectangle intersection = rectangle.intersection(rect); if (intersection.width > 0 && intersection.height > 0) { @@ -206,24 +186,25 @@ public class BarcodeAnalyzer { areaList.remove(rectToRemove); } } - if (out != null) { - out.setRGB(x, y, pixel); - gc.draw(rect); - } } } } return areaList; } - private void computeBlackAndWhite(BufferedImage in, BufferedImage out, float saturationMin, float brightnessMin) { - int w = in.getWidth(); - int h = in.getHeight(); + private BufferedImage computeBlackAndWhite(BufferedImage bufferedImage) { + BufferedImage outputBufferedImage = + new BufferedImage(bufferedImage.getWidth(), bufferedImage.getHeight(), BufferedImage.TYPE_INT_ARGB); + Graphics2D graphics2D = outputBufferedImage.createGraphics(); + graphics2D.drawRenderedImage(bufferedImage, null); + graphics2D.dispose(); + int w = bufferedImage.getWidth(); + int h = bufferedImage.getHeight(); int pixel; float[] hsb = new float[3]; for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { - pixel = in.getRGB(x, y); + pixel = bufferedImage.getRGB(x, y); int alpha = ((pixel >> 24) & 0xFF); int red = ((pixel >> 16) & 0xFF); int green = ((pixel >> 8) & 0xFF); @@ -243,9 +224,27 @@ public class BarcodeAnalyzer { pixel = pixel | (red << 16); pixel = pixel | (green << 8); pixel = pixel | (blue); - out.setRGB(x, y, pixel); + outputBufferedImage.setRGB(x, y, pixel); } } + return outputBufferedImage; + } + + public static class BarcodeAnalyzerResult { + + private final List> result; + + private BarcodeAnalyzerResult() { + this.result = new ArrayList<>(); + } + + public List> getResult() { + return result; + } + + private void add(Map.Entry entry) { + result.add(entry); + } } private static class PageExtractor extends PDFGraphicsStreamEngine { diff --git a/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/DocumentAnalyzer.java b/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/DocumentAnalyzer.java index 956818e..d1fdcdd 100644 --- a/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/DocumentAnalyzer.java +++ b/graphics-pdfbox/src/main/java/org/xbib/graphics/pdfbox/analyze/DocumentAnalyzer.java @@ -18,6 +18,7 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Calendar; +import java.util.Collection; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -44,6 +45,7 @@ public class DocumentAnalyzer { try (PDDocument document = Loader.loadPDF(file)) { documentInformationToResult(document.getDocumentInformation()); List> pages = new ArrayList<>(); + Collection colorspaces = new LinkedHashSet<>(); int documentimagecount = 0; int pagecount = document.getNumberOfPages(); boolean isDocumentColor = false; @@ -55,6 +57,7 @@ public class DocumentAnalyzer { for (int i = 0; i < pagecount; i++) { PDPage pdPage = document.getPage(i); Map pageMap = analyzePage(i, pdPage, seen); + colorspaces.addAll((Collection) pageMap.get("colorspaces")); boolean isColor = (boolean) pageMap.get("iscolor"); if (isColor) { isDocumentColor = true; @@ -79,6 +82,7 @@ public class DocumentAnalyzer { } result.put("pagecount", pagecount); result.put("imagecount", documentimagecount); + result.put("colorspaces", colorspaces); result.put("isimage", pagecount > 0 && isDocumentImage); result.put("iscolor", isDocumentColor); result.put("isgray", isDocumentGray); @@ -103,6 +107,11 @@ public class DocumentAnalyzer { return (int) result.get("pagecount"); } + @SuppressWarnings("unchecked") + public Collection getColorSpaces() { + return (Collection) result.get("colorspaces"); + } + public boolean isColor() { return (boolean) result.get("iscolor"); } @@ -173,11 +182,18 @@ public class DocumentAnalyzer { m.put("cropbox", Map.of("height", page.getCropBox().getHeight(), "width", page.getCropBox().getWidth())); m.put("mediabox", Map.of("height", page.getMediaBox().getHeight(), "width", page.getMediaBox().getWidth())); m.put("bleedbox", Map.of("height", page.getBleedBox().getHeight(), "width", page.getBleedBox().getWidth())); + if (page.getTrimBox() != null) { + m.put("trimbox", Map.of("height", page.getTrimBox().getHeight(), "width", page.getTrimBox().getWidth())); + } + if (page.getArtBox() != null) { + m.put("artbox", Map.of("height", page.getArtBox().getHeight(), "width", page.getArtBox().getWidth())); + } checkBoxDimensions(page, m); m.put("rotation", page.getRotation()); PageExtractor pageExtractor = new PageExtractor(page, seen); pageExtractor.process(); m.put("images", pageExtractor.getImages()); + m.put("colorspaces", pageExtractor.getColorSpaces()); m.put("iscolor", pageExtractor.isColor()); m.put("isgray", pageExtractor.isGray()); FontExtractor fontExtractor = new FontExtractor(page); @@ -190,10 +206,17 @@ public class DocumentAnalyzer { List isA4 = new ArrayList<>(); List isLetter = new ArrayList<>(); List isLandscape= new ArrayList<>(); - List.of(Map.entry("bbox", page.getBBox()), + List> boxes = new ArrayList<>(List.of(Map.entry("bbox", page.getBBox()), Map.entry("cropbox", page.getCropBox()), Map.entry("mediabox", page.getMediaBox()), - Map.entry("bleedbox", page.getBleedBox())).forEach(e -> { + Map.entry("bleedbox", page.getBleedBox()))); + if (page.getTrimBox() != null) { + boxes.add(Map.entry("trimbox", page.getTrimBox())); + } + if (page.getArtBox() != null) { + boxes.add(Map.entry("artbox", page.getArtBox())); + } + boxes.forEach(e -> { String boxName = e.getKey(); PDRectangle rect = e.getValue(); Set set = new LinkedHashSet<>(); @@ -392,16 +415,19 @@ public class DocumentAnalyzer { private final Set seen; - private boolean isColor; + private final Collection colorSpaces; private boolean isGray; + private boolean isBlackWhite; + private PageExtractor(PDPage page, Set seen) { super(page); this.seen = seen; this.images = new ArrayList<>(); - this.isColor = false; + this.colorSpaces = new LinkedHashSet<>(); this.isGray = false; + this.isBlackWhite = false; } public void process() throws IOException { @@ -412,14 +438,22 @@ public class DocumentAnalyzer { return images; } + public Collection getColorSpaces() { + return colorSpaces; + } + public boolean isColor() { - return isColor; + return isColor(colorSpaces); } public boolean isGray() { return isGray; } + public boolean isBlackWhite() { + return isBlackWhite; + } + @Override public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) { } @@ -434,14 +468,15 @@ public class DocumentAnalyzer { seen.add(xobject.getCOSObject()); Map m = new LinkedHashMap<>(); String colorSpaceName = xobject.getColorSpace().getName(); - boolean isColorSpace = isColorSpaceName(colorSpaceName); - if (isColorSpace) { - this.isColor = true; + colorSpaces.add(colorSpaceName); + if (isColor(List.of(colorSpaceName))) { + m.put("iscolor", true); } - boolean isGraySpace = isGraySpaceName(colorSpaceName); - if (isGraySpace) { + if ("DeviceGray".equals(colorSpaceName)) { + // black & white check if (xobject.getBitsPerComponent() > 1) { this.isGray = true; + m.put("isgray", true); } } m.put("width", xobject.getWidth()); @@ -449,8 +484,6 @@ public class DocumentAnalyzer { m.put("bitspercomponent", xobject.getBitsPerComponent()); m.put("colorspace", colorSpaceName); m.put("suffix", xobject.getSuffix()); - m.put("iscolor", isColorSpace); - m.put("isgray", isGraySpace); images.add(m); } } @@ -486,24 +519,12 @@ public class DocumentAnalyzer { @Override public void strokePath() { - String colorSpaceName = getGraphicsState().getStrokingColor().getColorSpace().getName(); - if (isColorSpaceName(colorSpaceName)) { - this.isColor = true; - } - if (isGraySpaceName(colorSpaceName)) { - this.isGray = true; - } + colorSpaces.add(getGraphicsState().getStrokingColor().getColorSpace().getName()); } @Override public void fillPath(int windingRule) { - String colorSpaceName = getGraphicsState().getStrokingColor().getColorSpace().getName(); - if (isColorSpaceName(colorSpaceName)) { - this.isColor = true; - } - if (isGraySpaceName(colorSpaceName)) { - this.isGray = true; - } + colorSpaces.add(getGraphicsState().getStrokingColor().getColorSpace().getName()); } @Override @@ -514,15 +535,15 @@ public class DocumentAnalyzer { public void shadingFill(COSName shadingName) { } - private boolean isColorSpaceName(String name) { - return "DeviceRGB".equals(name) || - "DeviceCMYK".equals(name) || - "ICCBased".equals(name) || - "Indexed".equals(name); + private boolean isColor(Collection colorSpaceNames) { + return colorSpaceNames.contains("DeviceRGB") || + colorSpaceNames.contains("DeviceCYMK") || + colorSpaceNames.contains("ICCBased") || + colorSpaceNames.contains("Indexed"); } - private boolean isGraySpaceName(String name) { - return "DeviceGray".equals(name); + private boolean isGray(Collection colorSpaceNames) { + return colorSpaceNames.contains("DeviceGray"); } } diff --git a/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/BarcodeAnalyzerTest.java b/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/BarcodeAnalyzerTest.java index 2cf151d..bae7813 100644 --- a/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/BarcodeAnalyzerTest.java +++ b/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/BarcodeAnalyzerTest.java @@ -2,13 +2,13 @@ package org.xbib.graphics.pdfbox.test; import org.junit.jupiter.api.Test; import org.xbib.graphics.pdfbox.analyze.BarcodeAnalyzer; -import org.xbib.graphics.zxing.Result; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; @@ -19,16 +19,16 @@ public class BarcodeAnalyzerTest { @Test public void testBarcodeAnalysis() throws IOException { Path tmp = Files.createTempDirectory("barcode-analyzer"); - String sample = "394_394-F3GIS2JO.pdf"; + String sample = "test.pdf"; Path path = tmp.resolve(sample); try (InputStream inputStream = getClass().getResourceAsStream(sample); OutputStream outputStream = Files.newOutputStream(path)) { if (inputStream != null) { inputStream.transferTo(outputStream); BarcodeAnalyzer barcodeAnalyzer = new BarcodeAnalyzer(); - barcodeAnalyzer.process(path.toFile(), 0, 5); - for (Result result : barcodeAnalyzer.getResultList()) { - logger.log(Level.INFO, "barcodeFormat = " + result.getBarcodeFormat() + " value = " + result.getText()); + barcodeAnalyzer.process(path.toFile(), 0); + for (Map.Entry entry : barcodeAnalyzer.getResult()) { + logger.log(Level.INFO, "barcode format = " + entry.getKey() + " value = " + entry.getValue()); } } } diff --git a/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/DocumentAnalyzerTest.java b/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/DocumentAnalyzerTest.java index 5871453..c864e31 100644 --- a/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/DocumentAnalyzerTest.java +++ b/graphics-pdfbox/src/test/java/org/xbib/graphics/pdfbox/test/DocumentAnalyzerTest.java @@ -28,6 +28,7 @@ public class DocumentAnalyzerTest { documentAnalyzer.process(path.toFile()); logger.log(Level.INFO, "result = " + documentAnalyzer.getResult()); logger.log(Level.INFO, "isvalid = " + documentAnalyzer.isValid()); + logger.log(Level.INFO, "colorspaces = " + documentAnalyzer.getColorSpaces()); logger.log(Level.INFO, "iscolor = " + documentAnalyzer.isColor()); logger.log(Level.INFO, "isgray = " + documentAnalyzer.isGray()); logger.log(Level.INFO, "isA4 = " + documentAnalyzer.isA4());