do not assume color space is present when analyzing document images

This commit is contained in:
Jörg Prante 2025-02-04 13:45:39 +01:00
parent b13aa5f3ef
commit a323076ca3
4 changed files with 37 additions and 3 deletions
gradle.properties
graphics-pdfbox/src
main/java/org/xbib/graphics/pdfbox/analyze
test
java/org/xbib/graphics/pdfbox/test
resources/org/xbib/graphics/pdfbox/test

View file

@ -1,3 +1,3 @@
group = org.xbib.graphics
name = graphics
version = 5.6.0
version = 5.6.1

View file

@ -10,6 +10,7 @@ import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
@ -545,12 +546,18 @@ public class DocumentAnalyzer {
@Override
public void strokePath() {
colorSpaces.add(getGraphicsState().getStrokingColor().getColorSpace().getName());
PDColorSpace pdColorSpace = getGraphicsState().getStrokingColor().getColorSpace();
if (pdColorSpace != null) {
colorSpaces.add(pdColorSpace.getName());
}
}
@Override
public void fillPath(int windingRule) {
colorSpaces.add(getGraphicsState().getStrokingColor().getColorSpace().getName());
PDColorSpace pdColorSpace = getGraphicsState().getStrokingColor().getColorSpace();
if (pdColorSpace != null) {
colorSpaces.add(pdColorSpace.getName());
}
}
@Override

View file

@ -41,4 +41,31 @@ public class DocumentAnalyzerTest {
Files.delete(path);
Files.delete(tmp);
}
@Test
public void testPdfXDocument() throws IOException {
String sample = "20250017556.pdf";
Path tmp = Files.createTempDirectory("document-analyzer");
Path path = tmp.resolve(sample);
try (InputStream inputStream = getClass().getResourceAsStream(sample);
OutputStream outputStream = Files.newOutputStream(path)) {
if (inputStream != null) {
inputStream.transferTo(outputStream);
DocumentAnalyzer documentAnalyzer = new DocumentAnalyzer();
documentAnalyzer.process(path.toFile());
logger.log(Level.INFO, "result = " + documentAnalyzer.getResult());
logger.log(Level.INFO, "isvalid = " + documentAnalyzer.isValid());
logger.log(Level.INFO, "suffixes = " + documentAnalyzer.getSuffixes());
logger.log(Level.INFO, "colorspaces = " + documentAnalyzer.getColorSpaces());
logger.log(Level.INFO, "iscolor = " + documentAnalyzer.isColor());
logger.log(Level.INFO, "isgray = " + documentAnalyzer.isGray());
logger.log(Level.INFO, "isA4 = " + documentAnalyzer.isA4());
logger.log(Level.INFO, "isLetter = " + documentAnalyzer.isLetter());
logger.log(Level.INFO, "islandscape = " + documentAnalyzer.isLandscape());
logger.log(Level.INFO, "isimage = " + documentAnalyzer.isImage());
}
}
Files.delete(path);
Files.delete(tmp);
}
}