package com.johnsnowlabs.nlp.util.io;

import com.johnsnowlabs.nlp.DocumentAssembler;
import com.johnsnowlabs.nlp.Finisher;
import com.johnsnowlabs.nlp.HasOutputAnnotationCol;
import com.johnsnowlabs.nlp.annotators.Tokenizer;
import com.johnsnowlabs.nlp.annotators.common.TaggedSentence;
import com.johnsnowlabs.nlp.annotators.common.TaggedWord;
import com.johnsnowlabs.nlp.util.io.ResourceHelper;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.net.URL;
import java.net.URLDecoder;
import java.util.Enumeration;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.Transformer;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.SparkSession$implicits$;
import org.apache.spark.sql.expressions.Window$;
import org.apache.spark.sql.functions$;
import org.apache.spark.util.CollectionAccumulator;
import scala.Array$;
import scala.Enumeration;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Predef$DummyImplicit$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.SeqLike;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.Map$;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;

/* compiled from: ResourceHelper.scala */
/* loaded from: input_file:com/johnsnowlabs/nlp/util/io/ResourceHelper$.class */
public final class ResourceHelper$ {
    public static final ResourceHelper$ MODULE$ = null;
    private final SparkSession spark;

    static {
        new ResourceHelper$();
    }

    public SparkSession spark() {
        return this.spark;
    }

    public InputStream com$johnsnowlabs$nlp$util$io$ResourceHelper$$inputStreamOrSequence(FileSystem fileSystem, RemoteIterator<LocatedFileStatus> remoteIterator) {
        LocatedFileStatus locatedFileStatus = (LocatedFileStatus) remoteIterator.next();
        return remoteIterator.hasNext() ? new SequenceInputStream(fileSystem.open(locatedFileStatus.getPath()), com$johnsnowlabs$nlp$util$io$ResourceHelper$$inputStreamOrSequence(fileSystem, remoteIterator)) : fileSystem.open(locatedFileStatus.getPath());
    }

    public String com$johnsnowlabs$nlp$util$io$ResourceHelper$$fixTarget(String str) {
        String s = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"^.*target\\\\", ".*scala-.*\\\\", ".*classes\\\\", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{File.separator, File.separator, File.separator}));
        return str.matches(new StringBuilder().append(s).append(".*").toString()) ? str.replaceFirst(s, "") : str;
    }

    public InputStream getResourceStream(String str) {
        return (InputStream) Option$.MODULE$.apply(getClass().getResourceAsStream(str)).getOrElse(new ResourceHelper$$anonfun$getResourceStream$1(str));
    }

    public URL getResourceFile(String str) {
        URL resource = getClass().getResource(str);
        if (resource == null) {
            resource = getClass().getClassLoader().getResource(str);
        }
        return resource;
    }

    public Seq<String> listResourceDirectory(String str) {
        URL resourceFile = getResourceFile(str);
        System.out.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"listDirectory ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{resourceFile})));
        if (resourceFile != null && resourceFile.getProtocol().equals("file") && new File(resourceFile.toURI()).exists()) {
            return (Seq) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(new File(resourceFile.toURI()).listFiles()).sorted(Ordering$.MODULE$.ordered(Predef$.MODULE$.$conforms()))).map(new ResourceHelper$$anonfun$listResourceDirectory$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).map(new ResourceHelper$$anonfun$listResourceDirectory$2(), Array$.MODULE$.fallbackCanBuildFrom(Predef$DummyImplicit$.MODULE$.dummyImplicit()));
        }
        if (resourceFile == null) {
            throw new FileNotFoundException(str);
        }
        if (!resourceFile.getProtocol().equals("jar")) {
            throw new UnsupportedOperationException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Cannot list files for URL ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{resourceFile})));
        }
        Enumeration<JarEntry> entries = new JarFile(URLDecoder.decode(resourceFile.getPath().substring(5, resourceFile.getPath().indexOf("!")), "UTF-8")).entries();
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        String stringBuilder = new StringBuilder().append(new StringOps(Predef$.MODULE$.augmentString(new StringOps(Predef$.MODULE$.augmentString(str)).stripPrefix(new StringOps(Predef$.MODULE$.augmentString(File.separator)).replaceAllLiterally("\\", "/")))).stripSuffix(File.separator)).append(new StringOps(Predef$.MODULE$.augmentString(File.separator)).replaceAllLiterally("\\", "/")).toString();
        while (entries.hasMoreElements()) {
            String stripPrefix = new StringOps(Predef$.MODULE$.augmentString(entries.nextElement().getName())).stripPrefix(File.separator);
            if (stripPrefix.startsWith(stringBuilder)) {
                String substring = stripPrefix.substring(stringBuilder.length());
                int indexOf = substring.indexOf("/");
                if (indexOf >= 0) {
                    substring = substring.substring(0, indexOf);
                }
                if (new StringOps(Predef$.MODULE$.augmentString(substring)).nonEmpty()) {
                    arrayBuffer.append(Predef$.MODULE$.wrapRefArray(new String[]{new StringBuilder().append(stringBuilder).append(substring).toString()}));
                }
            }
        }
        return (Seq) ((SeqLike) arrayBuffer.distinct()).sorted(Ordering$String$.MODULE$);
    }

    public Dataset<?> createDatasetFromText(String str, boolean z, boolean z2, boolean z3, boolean z4) {
        Predef$.MODULE$.require((z2 && z4) || !(z2 || z4), new ResourceHelper$$anonfun$createDatasetFromText$1());
        Dataset textFile = spark().read().textFile(str);
        if (z) {
            textFile = textFile.as(spark().implicits().newStringEncoder()).map(new ResourceHelper$$anonfun$createDatasetFromText$2(), spark().implicits().newStringEncoder()).filter(new ResourceHelper$$anonfun$createDatasetFromText$3());
        }
        if (z2) {
            textFile = textFile.withColumn("filename", functions$.MODULE$.input_file_name());
        }
        if (z4) {
            textFile = textFile.groupBy("filename", Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.collect_list(spark().implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"value"}))).$(Nil$.MODULE$)).as("value"), Predef$.MODULE$.wrapRefArray(new Column[0])).withColumn("text", functions$.MODULE$.concat_ws(" ", Predef$.MODULE$.wrapRefArray(new Column[]{spark().implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"value"}))).$(Nil$.MODULE$)}))).drop("value");
        }
        if (z3) {
            textFile = (!z2 || z4) ? textFile.withColumn("id", functions$.MODULE$.monotonically_increasing_id()) : textFile.withColumn("id", functions$.MODULE$.row_number().over(Window$.MODULE$.partitionBy("filename", Predef$.MODULE$.wrapRefArray(new String[0])).orderBy("filename", Predef$.MODULE$.wrapRefArray(new String[0]))));
        }
        return textFile.withColumnRenamed("value", "text");
    }

    public boolean createDatasetFromText$default$2() {
        return true;
    }

    public boolean createDatasetFromText$default$3() {
        return false;
    }

    public boolean createDatasetFromText$default$4() {
        return false;
    }

    public boolean createDatasetFromText$default$5() {
        return false;
    }

    public Map<String, String> parseKeyValueText(ExternalResource externalResource) {
        Map<String, String> map;
        Enumeration.Value readAs = externalResource.readAs();
        Enumeration.Value LINE_BY_LINE = ReadAs$.MODULE$.LINE_BY_LINE();
        if (LINE_BY_LINE != null ? !LINE_BY_LINE.equals(readAs) : readAs != null) {
            Enumeration.Value SPARK_DATASET = ReadAs$.MODULE$.SPARK_DATASET();
            if (SPARK_DATASET != null ? !SPARK_DATASET.equals(readAs) : readAs != null) {
                throw new Exception("Unsupported readAs");
            }
            Dataset df = spark().read().options(externalResource.options()).format((String) externalResource.options().apply("format")).options(externalResource.options()).option("delimiter", (String) externalResource.options().apply("delimiter")).load(externalResource.path()).toDF(Predef$.MODULE$.wrapRefArray(new String[]{"key", "value"}));
            scala.collection.mutable.Map empty = Map$.MODULE$.empty();
            df.as(spark().implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.johnsnowlabs.nlp.util.io.ResourceHelper$$typecreator10$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    Universe universe = mirror.universe();
                    return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple2"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
                }
            }))).foreach(new ResourceHelper$$anonfun$parseKeyValueText$1(empty));
            map = empty.toMap(Predef$.MODULE$.$conforms());
        } else {
            ResourceHelper.SourceStream sourceStream = new ResourceHelper.SourceStream(externalResource.path());
            Map<String, String> map2 = sourceStream.content().getLines().map(new ResourceHelper$$anonfun$3(externalResource)).toMap(Predef$.MODULE$.$conforms());
            sourceStream.close();
            map = map2;
        }
        return map;
    }

    public String[] parseLines(ExternalResource externalResource) {
        String[] strArr;
        Enumeration.Value readAs = externalResource.readAs();
        Enumeration.Value LINE_BY_LINE = ReadAs$.MODULE$.LINE_BY_LINE();
        if (LINE_BY_LINE != null ? !LINE_BY_LINE.equals(readAs) : readAs != null) {
            Enumeration.Value SPARK_DATASET = ReadAs$.MODULE$.SPARK_DATASET();
            if (SPARK_DATASET != null ? !SPARK_DATASET.equals(readAs) : readAs != null) {
                throw new Exception("Unsupported readAs");
            }
            Dataset load = spark().read().options(externalResource.options()).format((String) externalResource.options().apply("format")).load(externalResource.path());
            CollectionAccumulator collectionAccumulator = spark().sparkContext().collectionAccumulator();
            load.as(spark().implicits().newStringEncoder()).foreach(new ResourceHelper$$anonfun$parseLines$1(collectionAccumulator));
            String[] strArr2 = (String[]) Predef$.MODULE$.refArrayOps(collectionAccumulator.value().toArray()).map(new ResourceHelper$$anonfun$5(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
            collectionAccumulator.reset();
            strArr = strArr2;
        } else {
            ResourceHelper.SourceStream sourceStream = new ResourceHelper.SourceStream(externalResource.path());
            String[] strArr3 = (String[]) sourceStream.content().getLines().toArray(ClassTag$.MODULE$.apply(String.class));
            sourceStream.close();
            strArr = strArr3;
        }
        return strArr;
    }

    public Tuple2<String, String>[] parseTupleText(ExternalResource externalResource) {
        Tuple2<String, String>[] tuple2Arr;
        Enumeration.Value readAs = externalResource.readAs();
        Enumeration.Value LINE_BY_LINE = ReadAs$.MODULE$.LINE_BY_LINE();
        if (LINE_BY_LINE != null ? !LINE_BY_LINE.equals(readAs) : readAs != null) {
            Enumeration.Value SPARK_DATASET = ReadAs$.MODULE$.SPARK_DATASET();
            if (SPARK_DATASET != null ? !SPARK_DATASET.equals(readAs) : readAs != null) {
                throw new Exception("Unsupported readAs");
            }
            Dataset load = spark().read().options(externalResource.options()).format((String) externalResource.options().apply("format")).load(externalResource.path());
            CollectionAccumulator collectionAccumulator = spark().sparkContext().collectionAccumulator();
            load.as(spark().implicits().newStringEncoder()).foreach(new ResourceHelper$$anonfun$parseTupleText$1(collectionAccumulator));
            Tuple2<String, String>[] tuple2Arr2 = (Tuple2[]) Predef$.MODULE$.refArrayOps(collectionAccumulator.value().toArray()).map(new ResourceHelper$$anonfun$9(externalResource), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
            collectionAccumulator.reset();
            tuple2Arr = tuple2Arr2;
        } else {
            ResourceHelper.SourceStream sourceStream = new ResourceHelper.SourceStream(externalResource.path());
            Tuple2<String, String>[] tuple2Arr3 = (Tuple2[]) sourceStream.content().getLines().filter(new ResourceHelper$$anonfun$6()).map(new ResourceHelper$$anonfun$7(externalResource)).toArray(ClassTag$.MODULE$.apply(Tuple2.class));
            sourceStream.close();
            tuple2Arr = tuple2Arr3;
        }
        return tuple2Arr;
    }

    public TaggedSentence[] parseTupleSentences(ExternalResource externalResource) {
        TaggedSentence[] taggedSentenceArr;
        Enumeration.Value readAs = externalResource.readAs();
        Enumeration.Value LINE_BY_LINE = ReadAs$.MODULE$.LINE_BY_LINE();
        if (LINE_BY_LINE != null ? !LINE_BY_LINE.equals(readAs) : readAs != null) {
            Enumeration.Value SPARK_DATASET = ReadAs$.MODULE$.SPARK_DATASET();
            if (SPARK_DATASET != null ? !SPARK_DATASET.equals(readAs) : readAs != null) {
                throw new Exception("Unsupported readAs");
            }
            Dataset filter = spark().read().options(externalResource.options()).format((String) externalResource.options().apply("format")).load(externalResource.path()).as(spark().implicits().newStringEncoder()).filter(new ResourceHelper$$anonfun$13());
            ResourceHelper$$anonfun$14 resourceHelper$$anonfun$14 = new ResourceHelper$$anonfun$14(externalResource);
            SparkSession$implicits$ implicits = spark().implicits();
            TypeTags universe = package$.MODULE$.universe();
            taggedSentenceArr = (TaggedSentence[]) Predef$.MODULE$.refArrayOps((TaggedWord[][]) filter.map(resourceHelper$$anonfun$14, implicits.newProductArrayEncoder(universe.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.johnsnowlabs.nlp.util.io.ResourceHelper$$typecreator20$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("com.johnsnowlabs.nlp.annotators.common.TaggedWord").asType().toTypeConstructor();
                }
            }))).collect()).map(new ResourceHelper$$anonfun$parseTupleSentences$2(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(TaggedSentence.class)));
        } else {
            ResourceHelper.SourceStream sourceStream = new ResourceHelper.SourceStream(externalResource.path());
            TaggedWord[][] taggedWordArr = (TaggedWord[][]) sourceStream.content().getLines().filter(new ResourceHelper$$anonfun$11()).map(new ResourceHelper$$anonfun$12(externalResource)).toArray(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(TaggedWord.class)));
            sourceStream.close();
            taggedSentenceArr = (TaggedSentence[]) Predef$.MODULE$.refArrayOps(taggedWordArr).map(new ResourceHelper$$anonfun$parseTupleSentences$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(TaggedSentence.class)));
        }
        return taggedSentenceArr;
    }

    public Map<String, String> flattenRevertValuesAsKeys(ExternalResource externalResource) {
        Map<String, String> map;
        Enumeration.Value readAs = externalResource.readAs();
        Enumeration.Value LINE_BY_LINE = ReadAs$.MODULE$.LINE_BY_LINE();
        if (LINE_BY_LINE != null ? !LINE_BY_LINE.equals(readAs) : readAs != null) {
            Enumeration.Value SPARK_DATASET = ReadAs$.MODULE$.SPARK_DATASET();
            if (SPARK_DATASET != null ? !SPARK_DATASET.equals(readAs) : readAs != null) {
                throw new Exception("Unsupported readAs");
            }
            Dataset load = spark().read().options(externalResource.options()).format((String) externalResource.options().apply("format")).load(externalResource.path());
            scala.collection.mutable.Map empty = Map$.MODULE$.empty();
            load.as(spark().implicits().newStringEncoder()).foreach(new ResourceHelper$$anonfun$flattenRevertValuesAsKeys$2(externalResource, empty));
            map = empty.toMap(Predef$.MODULE$.$conforms());
        } else {
            scala.collection.mutable.Map apply = Map$.MODULE$.apply(Nil$.MODULE$);
            ResourceHelper.SourceStream sourceStream = new ResourceHelper.SourceStream(externalResource.path());
            sourceStream.content().getLines().foreach(new ResourceHelper$$anonfun$flattenRevertValuesAsKeys$1(externalResource, apply));
            sourceStream.close();
            map = apply.toMap(Predef$.MODULE$.$conforms());
        }
        return map;
    }

    public scala.collection.mutable.Map<String, Object> wordCount(ExternalResource externalResource, scala.collection.mutable.Map<String, Object> map, Option<PipelineModel> option) {
        Dataset transform;
        scala.collection.mutable.Map<String, Object> map2;
        Enumeration.Value readAs = externalResource.readAs();
        Enumeration.Value LINE_BY_LINE = ReadAs$.MODULE$.LINE_BY_LINE();
        if (LINE_BY_LINE != null ? !LINE_BY_LINE.equals(readAs) : readAs != null) {
            Enumeration.Value SPARK_DATASET = ReadAs$.MODULE$.SPARK_DATASET();
            if (SPARK_DATASET != null ? !SPARK_DATASET.equals(readAs) : readAs != null) {
                throw new IllegalArgumentException("format not available for word count");
            }
            Dataset load = spark().read().options(externalResource.options()).format((String) externalResource.options().apply("format")).load(externalResource.path());
            if (option.isDefined()) {
                transform = ((PipelineModel) option.get()).transform(load);
            } else {
                transform = new Pipeline().setStages(new Transformer[]{new DocumentAssembler().setInputCol("value"), ((Tokenizer) ((HasOutputAnnotationCol) new Tokenizer().setInputCols((Seq<String>) Predef$.MODULE$.wrapRefArray(new String[]{"document"}))).setOutputCol("token")).setTargetPattern((String) externalResource.options().apply("tokenPattern")), new Finisher().setInputCols((Seq<String>) Predef$.MODULE$.wrapRefArray(new String[]{"token"})).setOutputCols((Seq<String>) Predef$.MODULE$.wrapRefArray(new String[]{"finished"})).setAnnotationSplitSymbol("--")}).fit(load).transform(load);
            }
            Dataset dataset = transform;
            scala.collection.mutable.Map<String, Object> withDefaultValue = Map$.MODULE$.empty().withDefaultValue(BoxesRunTime.boxToLong(0L));
            dataset.select("finished", Predef$.MODULE$.wrapRefArray(new String[0])).as(spark().implicits().newStringEncoder()).foreach(new ResourceHelper$$anonfun$wordCount$2(withDefaultValue));
            map2 = withDefaultValue;
        } else {
            ResourceHelper.SourceStream sourceStream = new ResourceHelper.SourceStream(externalResource.path());
            sourceStream.content().getLines().foreach(new ResourceHelper$$anonfun$wordCount$1(map, new StringOps(Predef$.MODULE$.augmentString((String) externalResource.options().apply("tokenPattern"))).r()));
            sourceStream.close();
            if (map.isEmpty()) {
                throw new FileNotFoundException("Word count dictionary for spell checker does not exist or is empty");
            }
            map2 = map;
        }
        return map2;
    }

    public scala.collection.mutable.Map<String, Object> wordCount$default$2() {
        return Map$.MODULE$.empty().withDefaultValue(BoxesRunTime.boxToLong(0L));
    }

    public Option<PipelineModel> wordCount$default$3() {
        return None$.MODULE$;
    }

    private ResourceHelper$() {
        MODULE$ = this;
        this.spark = SparkSession$.MODULE$.builder().getOrCreate();
    }
}
