/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.ocr;

import java.io.File;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.image.BPGParser;
import org.apache.tika.parser.image.HeifParser;
import org.apache.tika.parser.image.ICNSParser;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.JpegParser;
import org.apache.tika.parser.image.PSDParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.image.WebPParser;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.ocr.TesseractOCRParser;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.Test;

public class TesseractOCRParserTest
extends TikaTest {
    public static boolean canRun() throws TikaConfigException {
        TesseractOCRParser p = new TesseractOCRParser();
        p.initialize(Collections.EMPTY_MAP);
        return p.hasTesseract();
    }

    @Test
    public void testInterwordSpacing() throws Exception {
        Assumptions.assumeTrue((boolean)TesseractOCRParserTest.canRun(), (String)"can run OCR");
        String xml = this.getXML((String)"testOCR_spacing.png", (Metadata)this.getMetadata((MediaType)MediaType.image((String)"png"))).xml;
        TesseractOCRParserTest.assertContains((String)"The quick", (String)xml);
        TesseractOCRConfig tesseractOCRConfigconfig = new TesseractOCRConfig();
        tesseractOCRConfigconfig.setPreserveInterwordSpacing(true);
        ParseContext parseContext = new ParseContext();
        parseContext.set(TesseractOCRConfig.class, (Object)tesseractOCRConfigconfig);
        xml = this.getXML((String)"testOCR_spacing.png", (Metadata)this.getMetadata((MediaType)MediaType.image((String)"png")), (ParseContext)parseContext).xml;
        Matcher m = Pattern.compile("The\\s{5,20}quick").matcher(xml);
        Assertions.assertTrue((boolean)m.find());
    }

    private Metadata getMetadata(MediaType mediaType) {
        Metadata metadata = new Metadata();
        MediaType ocrMediaType = new MediaType(mediaType.getType(), "OCR-" + mediaType.getSubtype());
        metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE, ocrMediaType.toString());
        return metadata;
    }

    private MediaType deOCR(MediaType mediaType) {
        String subtype = mediaType.getSubtype();
        if (subtype.startsWith("ocr-")) {
            subtype = subtype.substring(4);
        }
        return new MediaType(mediaType.getType(), subtype);
    }

    @Test
    public void confirmMultiPageTiffHandling() throws Exception {
        Assumptions.assumeTrue((boolean)TesseractOCRParserTest.canRun(), (String)"can run OCR");
        String xml = this.getXML((String)"testTIFF_multipage.tif", (Metadata)this.getMetadata((MediaType)MediaType.image((String)"tiff"))).xml;
        TesseractOCRParserTest.assertContains((String)"Page 2", (String)xml);
    }

    @Test
    public void confirmRuntimeSkipOCR() throws Exception {
        Assumptions.assumeTrue((boolean)TesseractOCRParserTest.canRun(), (String)"can run OCR");
        TesseractOCRConfig config = new TesseractOCRConfig();
        config.setSkipOcr(true);
        ParseContext context = new ParseContext();
        context.set(TesseractOCRConfig.class, (Object)config);
        String xml = this.getXML((String)"testTIFF_multipage.tif", (Metadata)this.getMetadata((MediaType)MediaType.image((String)"tiff")), (ParseContext)context).xml;
        TesseractOCRParserTest.assertNotContained((String)"Page 2", (String)xml);
    }

    @Test
    public void testPositiveRotateOCR() throws Exception {
        Assumptions.assumeTrue((boolean)TesseractOCRParserTest.canRun());
        TesseractOCRParser p = new TesseractOCRParser();
        Assumptions.assumeTrue((boolean)p.hasImageMagick());
        TesseractOCRConfig config = new TesseractOCRConfig();
        config.setApplyRotation(true);
        config.setResize(100);
        ParseContext parseContext = new ParseContext();
        parseContext.set(TesseractOCRConfig.class, (Object)config);
        Metadata metadata = this.getMetadata(MediaType.image((String)"png"));
        String ocr = this.getText("testRotated+10.png", metadata, parseContext);
        Assertions.assertEquals((Object)"true", (Object)metadata.get(TesseractOCRParser.IMAGE_MAGICK));
        Assertions.assertEquals((double)10.0, (double)Double.parseDouble(metadata.get(TesseractOCRParser.IMAGE_ROTATION)), (double)0.01);
        TesseractOCRParserTest.assertContains((String)"Its had resolving otherwise she contented therefore", (String)ocr);
    }

    @Test
    public void testNegativeRotateOCR() throws Exception {
        TesseractOCRParser p = new TesseractOCRParser();
        Assumptions.assumeTrue((boolean)p.hasImageMagick());
        TesseractOCRConfig config = new TesseractOCRConfig();
        config.setApplyRotation(true);
        config.setResize(100);
        ParseContext parseContext = new ParseContext();
        parseContext.set(TesseractOCRConfig.class, (Object)config);
        Assumptions.assumeTrue((boolean)TesseractOCRParserTest.canRun());
        Metadata metadata = this.getMetadata(MediaType.image((String)"png"));
        String ocr = this.getText("testRotated-10.png", metadata, parseContext);
        Assertions.assertEquals((Object)"true", (Object)metadata.get(TesseractOCRParser.IMAGE_MAGICK));
        Assertions.assertEquals((double)-10.0, (double)Double.parseDouble(metadata.get(TesseractOCRParser.IMAGE_ROTATION)), (double)0.01);
        TesseractOCRParserTest.assertContains((String)"Its had resolving otherwise she contented therefore", (String)ocr);
    }

    @Test
    public void testConfig() throws Exception {
        try (InputStream is = this.getResourceAsStream("/test-configs/TIKA-2705-tesseract.xml");){
            TikaConfig config = new TikaConfig(is);
            Parser p = config.getParser();
            Parser tesseractOCRParser = TesseractOCRParserTest.findParser((Parser)p, TesseractOCRParser.class);
            Assertions.assertNotNull((Object)tesseractOCRParser);
            TesseractOCRConfig tesseractOCRConfig = ((TesseractOCRParser)tesseractOCRParser).getDefaultConfig();
            Assertions.assertEquals((int)241, (int)tesseractOCRConfig.getTimeoutSeconds());
            Assertions.assertEquals((Object)TesseractOCRConfig.OUTPUT_TYPE.HOCR, (Object)tesseractOCRConfig.getOutputType());
            Assertions.assertEquals((Object)"ceb", (Object)tesseractOCRConfig.getLanguage());
            Assertions.assertEquals((Object)false, (Object)tesseractOCRConfig.isApplyRotation());
        }
    }

    @Test
    public void testPreloadLangs() throws Exception {
        TikaConfig config;
        Assumptions.assumeTrue((boolean)TesseractOCRParserTest.canRun());
        try (InputStream is = this.getResourceAsStream("/test-configs/tika-config-tesseract-load-langs.xml");){
            config = new TikaConfig(is);
        }
        Parser p = config.getParser();
        Parser tesseractOCRParser = TesseractOCRParserTest.findParser((Parser)p, TesseractOCRParser.class);
        Assertions.assertNotNull((Object)tesseractOCRParser);
        Set langs = ((TesseractOCRParser)tesseractOCRParser).getLangs();
        Assertions.assertTrue((langs.size() > 0 ? 1 : 0) != 0);
        TesseractOCRConfig tesseractOCRConfig = new TesseractOCRConfig();
        tesseractOCRConfig.setLanguage("zzz");
        ParseContext parseContext = new ParseContext();
        parseContext.set(TesseractOCRConfig.class, (Object)tesseractOCRConfig);
        try {
            this.getRecursiveMetadata("testOCR_spacing.png", (Parser)new AutoDetectParser(config), this.getMetadata(MediaType.image((String)"png")), parseContext, false);
            Assertions.fail((String)"should have thrown exception");
        }
        catch (TikaException tikaException) {
            // empty catch block
        }
    }

    @Test
    public void testArbitraryParams() throws Exception {
        try (InputStream is = this.getResourceAsStream("/test-configs/tika-config-tesseract-arbitrary.xml");){
            TikaConfig config = new TikaConfig(is);
            Parser p = config.getParser();
            Parser tesseractOCRParser = TesseractOCRParserTest.findParser((Parser)p, TesseractOCRParser.class);
            Assertions.assertNotNull((Object)tesseractOCRParser);
            TesseractOCRConfig tesseractOCRConfig = ((TesseractOCRParser)tesseractOCRParser).getDefaultConfig();
            Assertions.assertEquals((Object)"0.75", tesseractOCRConfig.getOtherTesseractConfig().get("textord_initialx_ile"));
            Assertions.assertEquals((Object)"0.15625", tesseractOCRConfig.getOtherTesseractConfig().get("textord_noise_hfract"));
        }
    }

    public void showCoverage() throws Exception {
        HashSet imageParserMimes = new HashSet();
        for (Parser p : new Parser[]{new BPGParser(), new HeifParser(), new ICNSParser(), new ImageParser(), new JpegParser(), new PSDParser(), new TiffParser(), new WebPParser()}) {
            imageParserMimes.addAll(p.getSupportedTypes(new ParseContext()));
        }
        HashSet<MediaType> literalTesseractMimes = new HashSet<MediaType>();
        HashSet<MediaType> ocrTesseractMimes = new HashSet<MediaType>();
        for (MediaType mt : new TesseractOCRParser().getSupportedTypes(new ParseContext())) {
            if (mt.getSubtype().startsWith("ocr-")) {
                ocrTesseractMimes.add(this.deOCR(mt));
                continue;
            }
            literalTesseractMimes.add(mt);
        }
        for (MediaType mt : imageParserMimes) {
            if (ocrTesseractMimes.contains(mt)) continue;
            System.out.println("tesseract isn't currently configured to handle: " + mt);
        }
        for (MediaType mt : literalTesseractMimes) {
            System.out.println("We don't have dedicated image parsers for these formats, which are handled by tesseract: " + mt);
        }
    }

    @Test
    public void testTrailingSlashInPathBehavior() {
        TesseractOCRParser parser = new TesseractOCRParser();
        parser.setTesseractPath("blah");
        Assertions.assertEquals((Object)("blah" + File.separator), (Object)parser.getTesseractPath());
        parser.setTesseractPath("blah" + File.separator);
        Assertions.assertEquals((Object)("blah" + File.separator), (Object)parser.getTesseractPath());
        parser.setTesseractPath("");
        Assertions.assertEquals((Object)"", (Object)parser.getTesseractPath());
        parser.setTessdataPath("blahdata");
        Assertions.assertEquals((Object)("blahdata" + File.separator), (Object)parser.getTessdataPath());
        parser.setTessdataPath("blahdata" + File.separator);
        Assertions.assertEquals((Object)("blahdata" + File.separator), (Object)parser.getTessdataPath());
        parser.setTessdataPath("");
        Assertions.assertEquals((Object)"", (Object)parser.getTessdataPath());
        parser.setImageMagickPath("imagemagickpath");
        Assertions.assertEquals((Object)("imagemagickpath" + File.separator), (Object)parser.getImageMagickPath());
        parser.setImageMagickPath("imagemagickpath" + File.separator);
        Assertions.assertEquals((Object)("imagemagickpath" + File.separator), (Object)parser.getImageMagickPath());
        parser.setImageMagickPath("");
        Assertions.assertEquals((Object)"", (Object)parser.getImageMagickPath());
    }

    @Test
    public void testBogusPathCheck() {
        TesseractOCRParser parser = new TesseractOCRParser();
        parser.setTesseractPath("blahdeblahblah");
        Assertions.assertEquals((Object)("blahdeblahblah" + File.separator), (Object)parser.getTesseractPath());
    }

    @Test
    public void testThreadJoinInLoadingLangs() throws Exception {
        Assumptions.assumeTrue((boolean)TesseractOCRParserTest.canRun());
        Set<String> langs = this.getLangs();
        Assumptions.assumeTrue((langs.size() > 0 ? 1 : 0) != 0);
        for (int i = 0; i < 20; ++i) {
            Assertions.assertEquals(langs, this.getLangs());
        }
    }

    private Set<String> getLangs() throws Exception {
        TesseractOCRParser p = new TesseractOCRParser();
        p.setPreloadLangs(true);
        p.initialize(Collections.EMPTY_MAP);
        return p.getLangs();
    }
}

