/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.ocr;

import java.awt.Image;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import javax.imageio.ImageIO;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.jpeg.JpegParser;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TesseractOCRParser
extends AbstractParser {
    private static final long serialVersionUID = -8167538283213097265L;
    private static final TesseractOCRConfig DEFAULT_CONFIG = new TesseractOCRConfig();
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(MediaType.image((String)"png"), MediaType.image((String)"jpeg"), MediaType.image((String)"tiff"), MediaType.image((String)"x-ms-bmp"), MediaType.image((String)"gif"))));
    private static Map<String, Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
    private static Parser _TMP_IMAGE_METADATA_PARSER = new CompositeImageParser();

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        TesseractOCRConfig config = (TesseractOCRConfig)context.get(TesseractOCRConfig.class, (Object)DEFAULT_CONFIG);
        if (this.hasTesseract(config)) {
            return SUPPORTED_TYPES;
        }
        return Collections.emptySet();
    }

    private void setEnv(TesseractOCRConfig config, ProcessBuilder pb) {
        String tessdataPrefix = "TESSDATA_PREFIX";
        Map<String, String> env = pb.environment();
        if (!config.getTessdataPath().isEmpty()) {
            env.put(tessdataPrefix, config.getTessdataPath());
        } else if (!config.getTesseractPath().isEmpty()) {
            env.put(tessdataPrefix, config.getTesseractPath());
        }
    }

    private boolean hasTesseract(TesseractOCRConfig config) {
        String tesseract = config.getTesseractPath() + TesseractOCRParser.getTesseractProg();
        if (TESSERACT_PRESENT.containsKey(tesseract)) {
            return TESSERACT_PRESENT.get(tesseract);
        }
        String[] checkCmd = new String[]{tesseract};
        boolean hasTesseract = ExternalParser.check((String[])checkCmd, (int[])new int[0]);
        TESSERACT_PRESENT.put(tesseract, hasTesseract);
        return hasTesseract;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        TemporaryResources tmp = new TemporaryResources();
        FileOutputStream fos = null;
        TikaInputStream tis = null;
        try {
            int w = image.getWidth(null);
            int h = image.getHeight(null);
            BufferedImage bImage = new BufferedImage(w, h, 1);
            File file = tmp.createTemporaryFile();
            fos = new FileOutputStream(file);
            ImageIO.write((RenderedImage)bImage, "png", fos);
            tis = TikaInputStream.get((File)file);
            this.parse((InputStream)tis, handler, metadata, context);
        }
        finally {
            tmp.dispose();
            if (tis != null) {
                tis.close();
            }
            if (fos != null) {
                fos.close();
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        TesseractOCRConfig config = (TesseractOCRConfig)context.get(TesseractOCRConfig.class, (Object)DEFAULT_CONFIG);
        if (!this.hasTesseract(config)) {
            return;
        }
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        TemporaryResources tmp = new TemporaryResources();
        File output = null;
        try {
            TikaInputStream tikaStream = TikaInputStream.get((InputStream)stream, (TemporaryResources)tmp);
            File input = tikaStream.getFile();
            long size = tikaStream.getLength();
            if (size >= (long)config.getMinFileSizeToOcr() && size <= (long)config.getMaxFileSizeToOcr()) {
                output = tmp.createTemporaryFile();
                this.doOCR(input, output, config);
                output = new File(output.getAbsolutePath() + ".txt");
                if (output.exists()) {
                    this.extractOutput(new FileInputStream(output), xhtml);
                }
            }
            _TMP_IMAGE_METADATA_PARSER.parse((InputStream)tikaStream, handler, metadata, context);
        }
        finally {
            tmp.dispose();
            if (output != null) {
                output.delete();
            }
        }
    }

    private void doOCR(File input, File output, TesseractOCRConfig config) throws IOException, TikaException {
        String[] cmd = new String[]{config.getTesseractPath() + TesseractOCRParser.getTesseractProg(), input.getPath(), output.getPath(), "-l", config.getLanguage(), "-psm", config.getPageSegMode()};
        ProcessBuilder pb = new ProcessBuilder(cmd);
        this.setEnv(config, pb);
        final Process process = pb.start();
        process.getOutputStream().close();
        InputStream out = process.getInputStream();
        InputStream err = process.getErrorStream();
        this.logStream("OCR MSG", out, input);
        this.logStream("OCR ERROR", err, input);
        FutureTask<Integer> waitTask = new FutureTask<Integer>(new Callable<Integer>(){

            @Override
            public Integer call() throws Exception {
                return process.waitFor();
            }
        });
        Thread waitThread = new Thread(waitTask);
        waitThread.start();
        try {
            waitTask.get(config.getTimeout(), TimeUnit.SECONDS);
        }
        catch (InterruptedException e) {
            waitThread.interrupt();
            process.destroy();
            Thread.currentThread().interrupt();
            throw new TikaException("TesseractOCRParser interrupted", (Throwable)e);
        }
        catch (ExecutionException e) {
        }
        catch (TimeoutException e) {
            waitThread.interrupt();
            process.destroy();
            throw new TikaException("TesseractOCRParser timeout", (Throwable)e);
        }
    }

    private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
        xhtml.startDocument();
        xhtml.startElement("div");
        try (InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8);){
            char[] buffer = new char[1024];
            int n = reader.read(buffer);
            while (n != -1) {
                if (n > 0) {
                    xhtml.characters(buffer, 0, n);
                }
                n = reader.read(buffer);
            }
        }
        xhtml.endElement("div");
        xhtml.endDocument();
    }

    private void logStream(String logType, final InputStream stream, File file) {
        new Thread(){

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            @Override
            public void run() {
                InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8);
                StringBuilder out = new StringBuilder();
                char[] buffer = new char[1024];
                try {
                    int n = reader.read(buffer);
                    while (n != -1) {
                        out.append(buffer, 0, n);
                        n = reader.read(buffer);
                    }
                }
                catch (IOException e) {
                }
                finally {
                    IOUtils.closeQuietly((InputStream)stream);
                }
                String msg = out.toString();
                LogFactory.getLog(TesseractOCRParser.class).debug((Object)msg);
            }
        }.start();
    }

    static String getTesseractProg() {
        return System.getProperty("os.name").startsWith("Windows") ? "tesseract.exe" : "tesseract";
    }

    private static class CompositeImageParser
    extends CompositeParser {
        private static final long serialVersionUID = -2398203346206381382L;
        private static List<Parser> imageParsers = Arrays.asList(new Parser[]{new ImageParser(), new JpegParser(), new TiffParser()});

        CompositeImageParser() {
            super(new MediaTypeRegistry(), imageParsers);
        }
    }
}

