do not try to reuse image reader; enforce disposal after each image, tackling memory hog

This commit is contained in:
Jörg Prante 2022-11-16 00:03:15 +01:00
parent 3df4a2aaf4
commit 8c86ed45b8
2 changed files with 51 additions and 84 deletions

View file

@ -1,5 +1,5 @@
group = org.xbib.graphics
name = graphics
version = 4.3.0
version = 4.3.1
org.gradle.warning.mode = ALL

View file

@ -1,5 +1,7 @@
package org.xbib.graphics.ghostscript;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import javax.imageio.ImageIO;
import javax.imageio.ImageReadParam;
import javax.imageio.ImageReader;
@ -33,11 +35,9 @@ import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@ -47,18 +47,17 @@ public class PDFRasterizer implements Closeable {
private static final Logger logger = Logger.getLogger(PDFRasterizer.class.getName());
private static final int MAX_IMAGE_SIZE_BYTES = 128 * 1024 * 1024;
private String creator;
private String author;
private String subject;
private final Map<String, ImageReader> imageReaders;
private final Path tmpPath;
public PDFRasterizer() {
this.imageReaders = createImageReaders();
this.tmpPath = Paths.get(System.getProperty("java.io.tmpdir", "/var/tmp")).resolve(toString());
}
@ -76,7 +75,6 @@ public class PDFRasterizer implements Closeable {
@Override
public void close() throws IOException {
disposeImageReaders(imageReaders);
}
public synchronized void convert(Path source, Path target) throws IOException {
@ -211,7 +209,7 @@ public class PDFRasterizer implements Closeable {
public synchronized int mergeImagesToPDF(Path sourceDir, Path targetFile, String globPattern) throws IOException {
logger.info("mergeImagesToPDF: source=" + sourceDir + " target=" + targetFile + " glob =" + globPattern);
int pagecount = 0;
AtomicInteger pagecount = new AtomicInteger();
PathMatcher pathMatcher = sourceDir.getFileSystem().getPathMatcher("glob:" + globPattern);
List<PDDocument> coverPageDocs = new ArrayList<>();
try (Stream<Path> files = Files.list(sourceDir);
@ -244,43 +242,47 @@ public class PDFRasterizer implements Closeable {
newPage.setCropBox(page.getCropBox());
newPage.setMediaBox(page.getMediaBox());
newPage.setRotation(page.getRotation());
pagecount++;
pagecount.incrementAndGet();
}
coverPageDocs.add(doc);
}
} else if (isImageSuffix(path)) {
logger.log(Level.FINE, "found image " + path);
long size = Files.size(path);
if (size > 128 * 1024 * 1024) {
logger.log(Level.WARNING, "skipping image because too large: " + path + " size = " + size);
if (size > MAX_IMAGE_SIZE_BYTES) {
logger.log(Level.WARNING, "skipping image because larger than: " + path + " size = " + size);
} else {
BufferedImage bufferedImage = readImage(path);
PDPage page = new PDPage(new PDRectangle(bufferedImage.getWidth(), bufferedImage.getHeight()));
pdDocument.addPage(page);
PDImageXObject pdImageXObject = LosslessFactory.createFromImage(pdDocument, bufferedImage);
if (pdImageXObject != null) {
// true = use FlateDecode to compress
PDPageContentStream pdPageContentStream =
new PDPageContentStream(pdDocument, page, PDPageContentStream.AppendMode.APPEND, true);
pdPageContentStream.drawImage(pdImageXObject, 0, 0);
pdPageContentStream.close();
pagecount++;
} else {
logger.log(Level.WARNING, "unable to create PDImageXObject from buffered image from " + path);
throw new IOException("unable to create PDImageXObject from buffered image");
}
bufferedImage.flush();
readImage(path, bufferedImage -> {
try {
PDPage page = new PDPage(new PDRectangle(bufferedImage.getWidth(), bufferedImage.getHeight()));
pdDocument.addPage(page);
PDImageXObject pdImageXObject = LosslessFactory.createFromImage(pdDocument, bufferedImage);
if (pdImageXObject != null) {
// true = use FlateDecode to compress
PDPageContentStream pdPageContentStream =
new PDPageContentStream(pdDocument, page, PDPageContentStream.AppendMode.APPEND, true);
pdPageContentStream.drawImage(pdImageXObject, 0, 0);
pdPageContentStream.close();
pagecount.incrementAndGet();
} else {
logger.log(Level.WARNING, "unable to create PDImageXObject from buffered image from " + path);
throw new IOException("unable to create PDImageXObject from buffered image");
}
} catch (IOException e) {
logger.log(Level.SEVERE, e.getMessage(), e);
}
});
}
}
}
pdDocument.save(outputStream);
logger.info("mergeImagesToPDF: done, " + pagecount + " pages");
logger.info("mergeImagesToPDF: done, " + pagecount.get() + " pages");
} finally {
for (PDDocument pd : coverPageDocs) {
pd.close();
}
}
return pagecount;
return pagecount.get();
}
public synchronized void scalePDF(Path sourceFile,
@ -381,33 +383,6 @@ public class PDFRasterizer implements Closeable {
string.endsWith(".tif");
}
private Map<String, ImageReader> createImageReaders() {
Map<String, ImageReader> map = new LinkedHashMap<>();
ImageReader pngReader = getImageReader("png");
if (pngReader != null) {
logger.log(Level.FINE, "PNG reader: " + pngReader.getClass().getName());
map.put("png", pngReader);
}
ImageReader pnmReader = getImageReader("pnm");
if (pnmReader != null) {
logger.log(Level.FINE, "PNM reader: " + pnmReader.getClass().getName());
map.put("pnm", pnmReader);
}
ImageReader jpegReader = getImageReader("jpeg");
if (jpegReader != null) {
logger.log(Level.FINE, "JPEG reader: " + jpegReader.getClass().getName());
map.put("jpg", jpegReader);
map.put("jpeg", jpegReader);
}
ImageReader tiffReader = getImageReader("tiff");
if (tiffReader != null) {
logger.log(Level.FINE, "TIFF reader: " + tiffReader.getClass().getName());
map.put("tif", tiffReader);
map.put("tiff", tiffReader);
}
return map;
}
private ImageReader getImageReader(String formatName) {
Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(formatName);
if (readers.hasNext()) {
@ -416,37 +391,29 @@ public class PDFRasterizer implements Closeable {
return null;
}
private void disposeImageReaders(Map<String, ImageReader> map) {
if (map != null) {
for (Map.Entry<String, ImageReader> entry : map.entrySet()) {
try {
entry.getValue().dispose();
} catch (Exception e) {
logger.log(Level.WARNING, e.getMessage());
}
}
}
}
private BufferedImage readImage(Path path) throws IOException {
private void readImage(Path path, Consumer<BufferedImage> consumer) throws IOException {
String suffix = getSuffix(path.getFileName().toString().toLowerCase(Locale.ROOT));
if ("jpg".equals(suffix)) {
suffix = "jpeg";
}
if ("tif".equals(suffix)) {
suffix = "tiff";
}
ImageInputStream imageInputStream = ImageIO.createImageInputStream(path.toFile());
if (imageInputStream != null) {
ImageReader imageReader = imageReaders.get(suffix);
if (imageReader != null) {
imageReader.setInput(imageInputStream);
ImageReadParam param = imageReader.getDefaultReadParam();
BufferedImage bufferedImage = imageReader.read(0, param);
logger.log(Level.FINE, "path = " + path + " loaded, width = " + bufferedImage.getWidth() +
" height = " + bufferedImage.getHeight() +
" color model = " + bufferedImage.getColorModel());
imageInputStream.close();
return bufferedImage;
} else {
throw new IOException("no image reader found for " + suffix);
}
ImageReader imageReader = getImageReader(suffix);
if (imageReader != null) {
logger.log(Level.FINE, "using image reader for " + suffix + " = " + imageReader.getClass().getName());
imageReader.setInput(imageInputStream);
ImageReadParam param = imageReader.getDefaultReadParam();
BufferedImage bufferedImage = imageReader.read(0, param);
logger.log(Level.FINE, "path = " + path + " loaded, width = " + bufferedImage.getWidth() +
" height = " + bufferedImage.getHeight() +
" color model = " + bufferedImage.getColorModel());
imageInputStream.close();
consumer.accept(bufferedImage);
imageReader.dispose();
} else {
throw new IOException("no image input stream possible for " + path);
throw new IOException("no image reader found for " + suffix);
}
}