do not try to reuse image reader; enforce disposal after each image, tackling memory hog

This commit is contained in:
Jörg Prante 2022-11-16 00:03:15 +01:00
parent 3df4a2aaf4
commit 8c86ed45b8
2 changed files with 51 additions and 84 deletions

View file

@ -1,5 +1,5 @@
group = org.xbib.graphics group = org.xbib.graphics
name = graphics name = graphics
version = 4.3.0 version = 4.3.1
org.gradle.warning.mode = ALL org.gradle.warning.mode = ALL

View file

@ -1,5 +1,7 @@
package org.xbib.graphics.ghostscript; package org.xbib.graphics.ghostscript;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import javax.imageio.ImageReadParam; import javax.imageio.ImageReadParam;
import javax.imageio.ImageReader; import javax.imageio.ImageReader;
@ -33,11 +35,9 @@ import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Calendar; import java.util.Calendar;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -47,18 +47,17 @@ public class PDFRasterizer implements Closeable {
private static final Logger logger = Logger.getLogger(PDFRasterizer.class.getName()); private static final Logger logger = Logger.getLogger(PDFRasterizer.class.getName());
private static final int MAX_IMAGE_SIZE_BYTES = 128 * 1024 * 1024;
private String creator; private String creator;
private String author; private String author;
private String subject; private String subject;
private final Map<String, ImageReader> imageReaders;
private final Path tmpPath; private final Path tmpPath;
public PDFRasterizer() { public PDFRasterizer() {
this.imageReaders = createImageReaders();
this.tmpPath = Paths.get(System.getProperty("java.io.tmpdir", "/var/tmp")).resolve(toString()); this.tmpPath = Paths.get(System.getProperty("java.io.tmpdir", "/var/tmp")).resolve(toString());
} }
@ -76,7 +75,6 @@ public class PDFRasterizer implements Closeable {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
disposeImageReaders(imageReaders);
} }
public synchronized void convert(Path source, Path target) throws IOException { public synchronized void convert(Path source, Path target) throws IOException {
@ -211,7 +209,7 @@ public class PDFRasterizer implements Closeable {
public synchronized int mergeImagesToPDF(Path sourceDir, Path targetFile, String globPattern) throws IOException { public synchronized int mergeImagesToPDF(Path sourceDir, Path targetFile, String globPattern) throws IOException {
logger.info("mergeImagesToPDF: source=" + sourceDir + " target=" + targetFile + " glob =" + globPattern); logger.info("mergeImagesToPDF: source=" + sourceDir + " target=" + targetFile + " glob =" + globPattern);
int pagecount = 0; AtomicInteger pagecount = new AtomicInteger();
PathMatcher pathMatcher = sourceDir.getFileSystem().getPathMatcher("glob:" + globPattern); PathMatcher pathMatcher = sourceDir.getFileSystem().getPathMatcher("glob:" + globPattern);
List<PDDocument> coverPageDocs = new ArrayList<>(); List<PDDocument> coverPageDocs = new ArrayList<>();
try (Stream<Path> files = Files.list(sourceDir); try (Stream<Path> files = Files.list(sourceDir);
@ -244,17 +242,18 @@ public class PDFRasterizer implements Closeable {
newPage.setCropBox(page.getCropBox()); newPage.setCropBox(page.getCropBox());
newPage.setMediaBox(page.getMediaBox()); newPage.setMediaBox(page.getMediaBox());
newPage.setRotation(page.getRotation()); newPage.setRotation(page.getRotation());
pagecount++; pagecount.incrementAndGet();
} }
coverPageDocs.add(doc); coverPageDocs.add(doc);
} }
} else if (isImageSuffix(path)) { } else if (isImageSuffix(path)) {
logger.log(Level.FINE, "found image " + path); logger.log(Level.FINE, "found image " + path);
long size = Files.size(path); long size = Files.size(path);
if (size > 128 * 1024 * 1024) { if (size > MAX_IMAGE_SIZE_BYTES) {
logger.log(Level.WARNING, "skipping image because too large: " + path + " size = " + size); logger.log(Level.WARNING, "skipping image because larger than: " + path + " size = " + size);
} else { } else {
BufferedImage bufferedImage = readImage(path); readImage(path, bufferedImage -> {
try {
PDPage page = new PDPage(new PDRectangle(bufferedImage.getWidth(), bufferedImage.getHeight())); PDPage page = new PDPage(new PDRectangle(bufferedImage.getWidth(), bufferedImage.getHeight()));
pdDocument.addPage(page); pdDocument.addPage(page);
PDImageXObject pdImageXObject = LosslessFactory.createFromImage(pdDocument, bufferedImage); PDImageXObject pdImageXObject = LosslessFactory.createFromImage(pdDocument, bufferedImage);
@ -264,23 +263,26 @@ public class PDFRasterizer implements Closeable {
new PDPageContentStream(pdDocument, page, PDPageContentStream.AppendMode.APPEND, true); new PDPageContentStream(pdDocument, page, PDPageContentStream.AppendMode.APPEND, true);
pdPageContentStream.drawImage(pdImageXObject, 0, 0); pdPageContentStream.drawImage(pdImageXObject, 0, 0);
pdPageContentStream.close(); pdPageContentStream.close();
pagecount++; pagecount.incrementAndGet();
} else { } else {
logger.log(Level.WARNING, "unable to create PDImageXObject from buffered image from " + path); logger.log(Level.WARNING, "unable to create PDImageXObject from buffered image from " + path);
throw new IOException("unable to create PDImageXObject from buffered image"); throw new IOException("unable to create PDImageXObject from buffered image");
} }
bufferedImage.flush(); } catch (IOException e) {
logger.log(Level.SEVERE, e.getMessage(), e);
}
});
} }
} }
} }
pdDocument.save(outputStream); pdDocument.save(outputStream);
logger.info("mergeImagesToPDF: done, " + pagecount + " pages"); logger.info("mergeImagesToPDF: done, " + pagecount.get() + " pages");
} finally { } finally {
for (PDDocument pd : coverPageDocs) { for (PDDocument pd : coverPageDocs) {
pd.close(); pd.close();
} }
} }
return pagecount; return pagecount.get();
} }
public synchronized void scalePDF(Path sourceFile, public synchronized void scalePDF(Path sourceFile,
@ -381,33 +383,6 @@ public class PDFRasterizer implements Closeable {
string.endsWith(".tif"); string.endsWith(".tif");
} }
private Map<String, ImageReader> createImageReaders() {
Map<String, ImageReader> map = new LinkedHashMap<>();
ImageReader pngReader = getImageReader("png");
if (pngReader != null) {
logger.log(Level.FINE, "PNG reader: " + pngReader.getClass().getName());
map.put("png", pngReader);
}
ImageReader pnmReader = getImageReader("pnm");
if (pnmReader != null) {
logger.log(Level.FINE, "PNM reader: " + pnmReader.getClass().getName());
map.put("pnm", pnmReader);
}
ImageReader jpegReader = getImageReader("jpeg");
if (jpegReader != null) {
logger.log(Level.FINE, "JPEG reader: " + jpegReader.getClass().getName());
map.put("jpg", jpegReader);
map.put("jpeg", jpegReader);
}
ImageReader tiffReader = getImageReader("tiff");
if (tiffReader != null) {
logger.log(Level.FINE, "TIFF reader: " + tiffReader.getClass().getName());
map.put("tif", tiffReader);
map.put("tiff", tiffReader);
}
return map;
}
private ImageReader getImageReader(String formatName) { private ImageReader getImageReader(String formatName) {
Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(formatName); Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(formatName);
if (readers.hasNext()) { if (readers.hasNext()) {
@ -416,24 +391,18 @@ public class PDFRasterizer implements Closeable {
return null; return null;
} }
private void disposeImageReaders(Map<String, ImageReader> map) { private void readImage(Path path, Consumer<BufferedImage> consumer) throws IOException {
if (map != null) {
for (Map.Entry<String, ImageReader> entry : map.entrySet()) {
try {
entry.getValue().dispose();
} catch (Exception e) {
logger.log(Level.WARNING, e.getMessage());
}
}
}
}
private BufferedImage readImage(Path path) throws IOException {
String suffix = getSuffix(path.getFileName().toString().toLowerCase(Locale.ROOT)); String suffix = getSuffix(path.getFileName().toString().toLowerCase(Locale.ROOT));
if ("jpg".equals(suffix)) {
suffix = "jpeg";
}
if ("tif".equals(suffix)) {
suffix = "tiff";
}
ImageInputStream imageInputStream = ImageIO.createImageInputStream(path.toFile()); ImageInputStream imageInputStream = ImageIO.createImageInputStream(path.toFile());
if (imageInputStream != null) { ImageReader imageReader = getImageReader(suffix);
ImageReader imageReader = imageReaders.get(suffix);
if (imageReader != null) { if (imageReader != null) {
logger.log(Level.FINE, "using image reader for " + suffix + " = " + imageReader.getClass().getName());
imageReader.setInput(imageInputStream); imageReader.setInput(imageInputStream);
ImageReadParam param = imageReader.getDefaultReadParam(); ImageReadParam param = imageReader.getDefaultReadParam();
BufferedImage bufferedImage = imageReader.read(0, param); BufferedImage bufferedImage = imageReader.read(0, param);
@ -441,13 +410,11 @@ public class PDFRasterizer implements Closeable {
" height = " + bufferedImage.getHeight() + " height = " + bufferedImage.getHeight() +
" color model = " + bufferedImage.getColorModel()); " color model = " + bufferedImage.getColorModel());
imageInputStream.close(); imageInputStream.close();
return bufferedImage; consumer.accept(bufferedImage);
imageReader.dispose();
} else { } else {
throw new IOException("no image reader found for " + suffix); throw new IOException("no image reader found for " + suffix);
} }
} else {
throw new IOException("no image input stream possible for " + path);
}
} }
private static String getSuffix(String filename) { private static String getSuffix(String filename) {