/*
 * Decompiled with CFR 0.152.
 */
package com.johnsnowlabs.ml.crf;

import com.johnsnowlabs.ml.crf.CrfDataset;
import com.johnsnowlabs.ml.crf.DatasetEncoder;
import com.johnsnowlabs.ml.crf.DatasetEncoder$;
import com.johnsnowlabs.ml.crf.DatasetMetadata;
import com.johnsnowlabs.ml.crf.DatasetReader$;
import com.johnsnowlabs.ml.crf.Instance;
import com.johnsnowlabs.ml.crf.InstanceLabels;
import com.johnsnowlabs.ml.crf.SparseArray;
import com.johnsnowlabs.ml.crf.TextSentenceAttrs;
import com.johnsnowlabs.ml.crf.TextSentenceLabels;
import com.johnsnowlabs.ml.crf.WordAttrs;
import com.johnsnowlabs.ml.crf.WordAttrs$;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.Tuple2;
import scala.collection.GenTraversableOnce;
import scala.collection.Iterable;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.TraversableOnce$;
import scala.collection.generic.GenericTraversableTemplate;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.StringBuilder;
import scala.io.BufferedSource;
import scala.io.Codec$;
import scala.io.Source;
import scala.io.Source$;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

public final class DatasetReader$ {
    public static final DatasetReader$ MODULE$;

    static {
        new DatasetReader$();
    }

    private Source getSource(String file) {
        BufferedSource bufferedSource;
        if (file.endsWith(".gz")) {
            FileInputStream fis = new FileInputStream(file);
            GzipCompressorInputStream zis = new GzipCompressorInputStream((InputStream)fis);
            bufferedSource = Source$.MODULE$.fromInputStream((InputStream)zis, Codec$.MODULE$.fallbackSystemCodec());
        } else {
            bufferedSource = Source$.MODULE$.fromFile(file, Codec$.MODULE$.fallbackSystemCodec());
        }
        return bufferedSource;
    }

    private TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> readWithLabels(String file, int skipLines) {
        Iterator lines = this.getSource(file).getLines().drop(skipLines);
        ObjectRef labels = ObjectRef.create((Object)new ArrayBuffer());
        ObjectRef tokens2 = ObjectRef.create((Object)new ArrayBuffer());
        return lines.flatMap((Function1)new Serializable(labels, tokens2){
            public static final long serialVersionUID = 0L;
            private final ObjectRef labels$1;
            private final ObjectRef tokens$1;

            public final GenTraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> apply(String line) {
                Iterable iterable;
                String[] words = line.split("\t");
                if (words.length <= 1) {
                    iterable = Option$.MODULE$.option2Iterable(DatasetReader$.MODULE$.com$johnsnowlabs$ml$crf$DatasetReader$$addToResultIfExists$1(this.labels$1, this.tokens$1));
                } else {
                    Tuple2[] attrValues = (Tuple2[])Predef$.MODULE$.refArrayOps((Object[])Predef$.MODULE$.refArrayOps((Object[])words).drop(1)).map((Function1)new Serializable(this){
                        public static final long serialVersionUID = 0L;

                        public final Tuple2<String, String> apply(String feature) {
                            String[] attrValue = feature.split("=");
                            String attr = attrValue[0];
                            String value = Predef$.MODULE$.refArrayOps((Object[])attrValue).size() == 1 ? "" : attrValue[1];
                            return new Tuple2((Object)attr, (Object)value);
                        }
                    }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
                    ((ArrayBuffer)this.tokens$1.elem).append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new WordAttrs[]{new WordAttrs((Seq<Tuple2<String, String>>)Predef$.MODULE$.wrapRefArray((Object[])attrValues), WordAttrs$.MODULE$.apply$default$2())}));
                    ((ArrayBuffer)this.labels$1.elem).append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{(String)Predef$.MODULE$.refArrayOps((Object[])words).head()}));
                    iterable = Option$.MODULE$.option2Iterable((Option)None$.MODULE$);
                }
                return iterable;
            }
            {
                this.labels$1 = labels$1;
                this.tokens$1 = tokens$1;
            }
        });
    }

    private int readWithLabels$default$2() {
        return 0;
    }

    public CrfDataset encodeDataset(TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> source) {
        DatasetEncoder metadata = new DatasetEncoder(DatasetEncoder$.MODULE$.$lessinit$greater$default$1());
        Tuple2[] instances = (Tuple2[])TraversableOnce$.MODULE$.MonadOps(source).map((Function1)new Serializable(metadata){
            public static final long serialVersionUID = 0L;
            public final DatasetEncoder metadata$1;

            public final Tuple2<InstanceLabels, Instance> apply(Tuple2<TextSentenceLabels, TextSentenceAttrs> x0$1) {
                Tuple2<TextSentenceLabels, TextSentenceAttrs> tuple2 = x0$1;
                if (tuple2 != null) {
                    TextSentenceLabels textLabels = (TextSentenceLabels)tuple2._1();
                    TextSentenceAttrs textSentence = (TextSentenceAttrs)tuple2._2();
                    ObjectRef prevLabel = ObjectRef.create((Object)this.metadata$1.startLabel());
                    Tuple2 tuple22 = ((GenericTraversableTemplate)((TraversableLike)textLabels.labels().zip(textSentence.words(), Seq$.MODULE$.canBuildFrom())).map((Function1)new Serializable(this, prevLabel){
                        public static final long serialVersionUID = 0L;
                        private final /* synthetic */ anonfun.2 $outer;
                        private final ObjectRef prevLabel$1;

                        public final Tuple2<Object, SparseArray> apply(Tuple2<String, WordAttrs> x0$2) {
                            Tuple2<String, WordAttrs> tuple2 = x0$2;
                            if (tuple2 != null) {
                                WordAttrs word;
                                Seq attrs;
                                String label = (String)tuple2._1();
                                Tuple2<Object, SparseArray> tuple22 = this.$outer.metadata$1.getFeatures((String)this.prevLabel$1.elem, label, (Seq<String>)(attrs = (Seq)(word = (WordAttrs)tuple2._2()).strAttrs().map((Function1)new Serializable(this){
                                    public static final long serialVersionUID = 0L;

                                    public final String apply(Tuple2<String, String> a) {
                                        return new StringBuilder().append((Object)((String)a._1())).append((Object)"=").append(a._2()).toString();
                                    }
                                }, Seq$.MODULE$.canBuildFrom())), (Seq<Object>)Predef$.MODULE$.wrapFloatArray(word.numAttrs()));
                                if (tuple22 != null) {
                                    Tuple2 tuple23;
                                    int labelId = tuple22._1$mcI$sp();
                                    SparseArray features = (SparseArray)tuple22._2();
                                    Tuple2 tuple24 = tuple23 = new Tuple2((Object)BoxesRunTime.boxToInteger((int)labelId), (Object)features);
                                    int labelId2 = tuple24._1$mcI$sp();
                                    SparseArray features2 = (SparseArray)tuple24._2();
                                    this.prevLabel$1.elem = label;
                                    Tuple2 tuple25 = new Tuple2((Object)BoxesRunTime.boxToInteger((int)labelId2), (Object)features2);
                                    return tuple25;
                                }
                                throw new MatchError(tuple22);
                            }
                            throw new MatchError(tuple2);
                        }
                        {
                            if ($outer == null) {
                                throw null;
                            }
                            this.$outer = $outer;
                            this.prevLabel$1 = prevLabel$1;
                        }
                    }, Seq$.MODULE$.canBuildFrom())).unzip((Function1)Predef$.MODULE$.$conforms());
                    if (tuple22 != null) {
                        Tuple2 tuple23;
                        Seq labels = (Seq)tuple22._1();
                        Seq features = (Seq)tuple22._2();
                        Tuple2 tuple24 = tuple23 = new Tuple2((Object)labels, (Object)features);
                        Seq labels2 = (Seq)tuple24._1();
                        Seq features2 = (Seq)tuple24._2();
                        Tuple2 tuple25 = new Tuple2((Object)new InstanceLabels((Seq<Object>)labels2), (Object)new Instance((Seq<SparseArray>)features2));
                        return tuple25;
                    }
                    throw new MatchError((Object)tuple22);
                }
                throw new MatchError(tuple2);
            }
            {
                this.metadata$1 = metadata$1;
            }
        }).toArray(ClassTag$.MODULE$.apply(Tuple2.class));
        return new CrfDataset((Seq<Tuple2<InstanceLabels, Instance>>)Predef$.MODULE$.wrapRefArray((Object[])instances), metadata.getMetadata());
    }

    public InstanceLabels com$johnsnowlabs$ml$crf$DatasetReader$$encodeLabels(TextSentenceLabels labels, DatasetMetadata metadata) {
        Seq labelIds = (Seq)labels.labels().map((Function1)new Serializable(metadata){
            public static final long serialVersionUID = 0L;
            private final DatasetMetadata metadata$2;

            public final int apply(String text) {
                return BoxesRunTime.unboxToInt((Object)this.metadata$2.label2Id().getOrElse((Object)text, (Function0)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final int apply() {
                        return this.apply$mcI$sp();
                    }

                    public int apply$mcI$sp() {
                        return -1;
                    }
                }));
            }
            {
                this.metadata$2 = metadata$2;
            }
        }, Seq$.MODULE$.canBuildFrom());
        return new InstanceLabels((Seq<Object>)labelIds);
    }

    public Instance encodeSentence(TextSentenceAttrs sentence, DatasetMetadata metadata) {
        Seq items = (Seq)sentence.words().map((Function1)new Serializable(metadata){
            public static final long serialVersionUID = 0L;
            public final DatasetMetadata metadata$3;

            public final SparseArray apply(WordAttrs word) {
                Seq strAttrs = (Seq)((TraversableLike)word.strAttrs().flatMap((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;
                    private final /* synthetic */ anonfun.6 $outer;

                    public final Iterable<Object> apply(Tuple2<String, String> x0$3) {
                        Tuple2<String, String> tuple2 = x0$3;
                        if (tuple2 != null) {
                            String name = (String)tuple2._1();
                            String value = (String)tuple2._2();
                            String key = new StringBuilder().append((Object)name).append((Object)"=").append((Object)value).toString();
                            Iterable iterable = Option$.MODULE$.option2Iterable(this.$outer.metadata$3.attr2Id().get((Object)key));
                            return iterable;
                        }
                        throw new MatchError(tuple2);
                    }
                    {
                        if ($outer == null) {
                            throw null;
                        }
                        this.$outer = $outer;
                    }
                }, Seq$.MODULE$.canBuildFrom())).map((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final Tuple2<Object, Object> apply(int x$3) {
                        return new Tuple2((Object)BoxesRunTime.boxToInteger((int)x$3), (Object)BoxesRunTime.boxToFloat((float)1.0f));
                    }
                }, Seq$.MODULE$.canBuildFrom());
                Tuple2[] numAttrs = (Tuple2[])Predef$.MODULE$.refArrayOps((Object[])Predef$.MODULE$.floatArrayOps(word.numAttrs()).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).flatMap((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;
                    private final /* synthetic */ anonfun.6 $outer;

                    public final Iterable<Tuple2<Object, Object>> apply(Tuple2<Object, Object> x0$4) {
                        Tuple2<Object, Object> tuple2 = x0$4;
                        if (tuple2 != null) {
                            float value = BoxesRunTime.unboxToFloat((Object)tuple2._1());
                            int idx = tuple2._2$mcI$sp();
                            String key = new StringBuilder().append((Object)"num").append((Object)BoxesRunTime.boxToInteger((int)idx)).toString();
                            Option attr = this.$outer.metadata$3.attr2Id().get((Object)key);
                            Iterable iterable = Option$.MODULE$.option2Iterable(attr.map((Function1)new Serializable(this, value){
                                public static final long serialVersionUID = 0L;
                                private final float value$1;

                                public final Tuple2<Object, Object> apply(int attrName) {
                                    return new Tuple2((Object)BoxesRunTime.boxToInteger((int)attrName), (Object)BoxesRunTime.boxToFloat((float)this.value$1));
                                }
                                {
                                    this.value$1 = value$1;
                                }
                            }));
                            return iterable;
                        }
                        throw new MatchError(tuple2);
                    }
                    {
                        if ($outer == null) {
                            throw null;
                        }
                        this.$outer = $outer;
                    }
                }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
                Seq id2value = (Seq)strAttrs.$plus$plus((GenTraversableOnce)Predef$.MODULE$.refArrayOps((Object[])numAttrs), Seq$.MODULE$.canBuildFrom());
                Tuple2[] attrValues = (Tuple2[])((TraversableOnce)((SeqLike)id2value.sortBy((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final int apply(Tuple2<Object, Object> id) {
                        return id._1$mcI$sp();
                    }
                }, (Ordering)Ordering.Int$.MODULE$)).distinct()).toArray(ClassTag$.MODULE$.apply(Tuple2.class));
                return new SparseArray(attrValues);
            }
            {
                this.metadata$3 = metadata$3;
            }
        }, Seq$.MODULE$.canBuildFrom());
        return new Instance((Seq<SparseArray>)items);
    }

    public CrfDataset readAndEncode(String file, int skipLines) {
        TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> textDataset = this.readWithLabels(file, skipLines);
        return this.encodeDataset(textDataset);
    }

    public TraversableOnce<Tuple2<InstanceLabels, Instance>> readAndEncode(String file, int skipLines, DatasetMetadata metadata) {
        TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> textDataset = this.readWithLabels(file, skipLines);
        return TraversableOnce$.MODULE$.MonadOps(textDataset).map((Function1)new Serializable(metadata){
            public static final long serialVersionUID = 0L;
            private final DatasetMetadata metadata$4;

            public final Tuple2<InstanceLabels, Instance> apply(Tuple2<TextSentenceLabels, TextSentenceAttrs> x0$5) {
                Tuple2<TextSentenceLabels, TextSentenceAttrs> tuple2 = x0$5;
                if (tuple2 != null) {
                    TextSentenceLabels sourceLabels = (TextSentenceLabels)tuple2._1();
                    TextSentenceAttrs sourceInstance = (TextSentenceAttrs)tuple2._2();
                    InstanceLabels labels = DatasetReader$.MODULE$.com$johnsnowlabs$ml$crf$DatasetReader$$encodeLabels(sourceLabels, this.metadata$4);
                    Instance instance = DatasetReader$.MODULE$.encodeSentence(sourceInstance, this.metadata$4);
                    Tuple2 tuple22 = new Tuple2((Object)labels, (Object)instance);
                    return tuple22;
                }
                throw new MatchError(tuple2);
            }
            {
                this.metadata$4 = metadata$4;
            }
        });
    }

    public final Option com$johnsnowlabs$ml$crf$DatasetReader$$addToResultIfExists$1(ObjectRef labels$1, ObjectRef tokens$1) {
        None$ none$;
        if (((ArrayBuffer)tokens$1.elem).nonEmpty()) {
            Tuple2 result = new Tuple2((Object)new TextSentenceLabels((Seq<String>)((ArrayBuffer)labels$1.elem)), (Object)new TextSentenceAttrs((Seq<WordAttrs>)((ArrayBuffer)tokens$1.elem)));
            labels$1.elem = new ArrayBuffer();
            tokens$1.elem = new ArrayBuffer();
            none$ = new Some((Object)result);
        } else {
            none$ = None$.MODULE$;
        }
        return none$;
    }

    private DatasetReader$() {
        MODULE$ = this;
    }
}

