/*
 * Decompiled with CFR 0.152.
 */
package org.apache.jackrabbit.oak.plugins.tika;

import com.google.common.io.ByteSource;
import com.google.common.io.CountingInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.jackrabbit.oak.commons.IOUtils;
import org.apache.jackrabbit.oak.commons.io.LazyInputStream;
import org.apache.jackrabbit.oak.plugins.blob.datastore.TextWriter;
import org.apache.jackrabbit.oak.plugins.tika.BinaryResource;
import org.apache.jackrabbit.oak.plugins.tika.BinaryStats;
import org.apache.jackrabbit.oak.plugins.tika.TikaHelper;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.WriteOutContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class TextExtractor
implements Closeable {
    private static final Logger log = LoggerFactory.getLogger(TextExtractor.class);
    private static final Logger parserError = LoggerFactory.getLogger("org.apache.jackrabbit.oak.plugins.tika.ParserError");
    private static final int PROGRESS_BATCH_SIZE = 1000;
    private static final int MAX_EXTRACT_LENGTH = 100000;
    private static final String ERROR_TEXT = "TextExtractionError";
    private final TextWriter textWriter;
    private final WorkItem SHUTDOWN_SIGNAL = new WorkItem(null);
    private BlockingQueue<WorkItem> inputQueue;
    private ExecutorService executorService;
    private int threadPoolSize = Runtime.getRuntime().availableProcessors();
    private int queueSize = 100;
    private final AtomicInteger errorCount = new AtomicInteger();
    private final AtomicLong timeTaken = new AtomicLong();
    private final AtomicInteger extractionCount = new AtomicInteger();
    private final AtomicInteger textWrittenCount = new AtomicInteger();
    private final AtomicInteger parserErrorCount = new AtomicInteger();
    private final AtomicInteger processedCount = new AtomicInteger();
    private final AtomicInteger emptyCount = new AtomicInteger();
    private final AtomicInteger notSupportedCount = new AtomicInteger();
    private final AtomicInteger alreadyExtractedCount = new AtomicInteger();
    private final AtomicLong extractedTextSize = new AtomicLong();
    private final AtomicLong nonEmptyExtractedTextSize = new AtomicLong();
    private final AtomicLong totalSizeRead = new AtomicLong();
    private int maxExtractedLength = 100000;
    private File tikaConfig;
    private TikaHelper tika;
    private boolean initialized;
    private BinaryStats stats;
    private boolean closed;

    public TextExtractor(TextWriter textWriter) {
        this.textWriter = textWriter;
    }

    public void extract(Iterable<BinaryResource> binaries) throws InterruptedException, IOException {
        this.initialize();
        for (BinaryResource binary : binaries) {
            this.inputQueue.put(new WorkItem(binary));
        }
    }

    @Override
    public void close() {
        if (this.closed) {
            return;
        }
        if (!this.inputQueue.isEmpty()) {
            log.info("Shutting down the extractor. Pending task count {}", (Object)this.inputQueue.size());
        }
        if (this.executorService != null) {
            try {
                this.inputQueue.put(this.SHUTDOWN_SIGNAL);
                this.executorService.shutdown();
                this.executorService.awaitTermination(10L, TimeUnit.DAYS);
            }
            catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
        this.dumpStats();
        this.closed = true;
    }

    public void setTikaConfig(File tikaConfig) {
        this.tikaConfig = tikaConfig;
    }

    public void setThreadPoolSize(int threadPoolSize) {
        this.threadPoolSize = threadPoolSize;
    }

    public void setStats(BinaryStats stats) {
        this.stats = stats;
    }

    private void dumpStats() {
        StringWriter sw = new StringWriter();
        PrintWriter pw = new PrintWriter(sw);
        pw.println("Text extraction stats");
        pw.printf("\t Processed Count           : %d%n", this.processedCount.get());
        pw.printf("\t   Extraction Count        : %d%n", this.extractionCount.get());
        pw.printf("\t     Empty Count           : %d%n", this.emptyCount.get());
        pw.printf("\t     Text Written Count    : %d%n", this.textWrittenCount.get());
        pw.printf("\t   Parser Error Count      : %d%n", this.parserErrorCount.get());
        pw.printf("\t   Error Count             : %d%n", this.errorCount.get());
        pw.printf("\t   Not Supported Count     : %d%n", this.notSupportedCount.get());
        pw.printf("\t   Already processed Count : %d%n", this.alreadyExtractedCount.get());
        pw.printf("\t Total bytes read          : %s%n", IOUtils.humanReadableByteCount(this.totalSizeRead.get()));
        pw.printf("\t Total text extracted      : %s%n", IOUtils.humanReadableByteCount(this.extractedTextSize.get()));
        pw.printf("\t   Non empty text          : %s%n", IOUtils.humanReadableByteCount(this.nonEmptyExtractedTextSize.get()));
        pw.printf("\t Time taken                : %d sec%n", this.timeTaken.get() / 1000L);
        pw.close();
        log.info(sw.toString());
    }

    private void dumpProgress(int count) {
        if (count % 1000 == 0) {
            String progress = "";
            if (this.stats != null) {
                double processedPercent = (double)count * 1.0 / (double)this.stats.getTotalCount() * 100.0;
                double indexedPercent = (double)this.extractionCount.get() * 1.0 / (double)this.stats.getIndexedCount() * 100.0;
                progress = String.format("(%1.2f%%) (Extraction stats %d/%d %1.2f%%, Ignored count %d)", processedPercent, this.extractionCount.get(), this.stats.getIndexedCount(), indexedPercent, this.notSupportedCount.get());
            }
            log.info("Processed {} {} binaries so far ...", (Object)count, (Object)progress);
        }
    }

    private synchronized void initialize() throws IOException {
        if (this.initialized) {
            return;
        }
        this.inputQueue = new ArrayBlockingQueue<WorkItem>(this.queueSize);
        this.tika = new TikaHelper(this.tikaConfig);
        this.initializeExecutorService();
        this.initialized = true;
    }

    private void extractText(BinaryResource source) throws IOException {
        String extractedContent;
        String type = source.getMimeType();
        if (type == null || !this.tika.isSupportedMediaType(type)) {
            log.trace("Ignoring binary content for node {} due to unsupported (or null) jcr:mimeType [{}]", (Object)source, (Object)type);
            this.notSupportedCount.incrementAndGet();
            return;
        }
        String blobId = source.getBlobId();
        if (this.textWriter.isProcessed(blobId)) {
            this.alreadyExtractedCount.incrementAndGet();
            return;
        }
        Metadata metadata = new Metadata();
        metadata.set("Content-Type", type);
        if (source.getEncoding() != null) {
            metadata.set("Content-Encoding", source.getEncoding());
        }
        if (ERROR_TEXT.equals(extractedContent = this.parseStringValue(source.getByteSource(), metadata, source.getPath()))) {
            this.textWriter.markError(blobId);
        } else if (extractedContent != null) {
            if (!(extractedContent = extractedContent.trim()).isEmpty()) {
                this.nonEmptyExtractedTextSize.addAndGet(extractedContent.length());
                this.textWriter.write(blobId, extractedContent);
                this.textWrittenCount.incrementAndGet();
            } else {
                this.textWriter.markEmpty(blobId);
                this.emptyCount.incrementAndGet();
            }
        }
    }

    private void initializeExecutorService() {
        this.executorService = Executors.newFixedThreadPool(this.threadPoolSize);
        for (int i = 0; i < this.threadPoolSize; ++i) {
            this.executorService.submit(new Extractor());
        }
        log.info("Initialized text extractor pool with {} threads", (Object)this.threadPoolSize);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private String parseStringValue(ByteSource byteSource, Metadata metadata, String path) {
        long size;
        long start;
        WriteOutContentHandler handler;
        block7: {
            handler = new WriteOutContentHandler(this.maxExtractedLength);
            start = System.currentTimeMillis();
            size = 0L;
            try {
                CountingInputStream stream = new CountingInputStream(new LazyInputStream(byteSource));
                try {
                    this.tika.getParser().parse(stream, handler, metadata, new ParseContext());
                }
                finally {
                    size = stream.getCount();
                    stream.close();
                }
            }
            catch (LinkageError e) {
            }
            catch (Throwable t) {
                if (handler.isWriteLimitReached(t)) break block7;
                this.parserErrorCount.incrementAndGet();
                parserError.debug("Failed to extract text from a binary property: " + path + " This is a fairly common case, and nothing to" + " worry about. The stack trace is included to" + " help improve the text extraction feature.", t);
                return ERROR_TEXT;
            }
        }
        String result = handler.toString();
        this.timeTaken.addAndGet(System.currentTimeMillis() - start);
        if (size > 0L) {
            this.extractedTextSize.addAndGet(result.length());
            this.extractionCount.incrementAndGet();
            this.totalSizeRead.addAndGet(size);
            return result;
        }
        return null;
    }

    private static class WorkItem {
        final BinaryResource source;

        private WorkItem(BinaryResource source) {
            this.source = source;
        }

        public String toString() {
            return this.source != null ? this.source.toString() : "<EMPTY>";
        }
    }

    private class Extractor
    implements Runnable {
        private Extractor() {
        }

        @Override
        public void run() {
            while (true) {
                WorkItem workItem = null;
                try {
                    workItem = (WorkItem)TextExtractor.this.inputQueue.take();
                    if (workItem == TextExtractor.this.SHUTDOWN_SIGNAL) {
                        TextExtractor.this.inputQueue.put(TextExtractor.this.SHUTDOWN_SIGNAL);
                        return;
                    }
                    TextExtractor.this.extractText(workItem.source);
                    TextExtractor.this.dumpProgress(TextExtractor.this.processedCount.incrementAndGet());
                    continue;
                }
                catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    return;
                }
                catch (Exception e) {
                    TextExtractor.this.errorCount.incrementAndGet();
                    log.warn("Error occurred while processing {}", (Object)workItem, (Object)e);
                    continue;
                }
                break;
            }
        }
    }
}

