package cc.factorie.app.nlp.segment;

import scala.MatchError;
import scala.Option;
import scala.Predef$;
import scala.Serializable;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Iterable;
import scala.collection.Iterator;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashMap$;
import scala.collection.mutable.HashSet;
import scala.math.package$;
import scala.runtime.BoxesRunTime;

/* compiled from: PunktSentenceSegmenter.scala */
/* loaded from: input_file:cc/factorie/app/nlp/segment/PunktSentenceSegmenter$Punkt$PunktTrainer.class */
public class PunktSentenceSegmenter$Punkt$PunktTrainer extends PunktSentenceSegmenter$Punkt$PunktBase {
    private final Option<String> trainText;
    private final boolean verbose;
    public final PunktSentenceSegmenter$Punkt$PunktParameters cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params;
    private PunktSentenceSegmenter$Punkt$UnigramFreqDist typeFreqDist;
    private PunktSentenceSegmenter$Punkt$UnigramFreqDist sentenceStarterFreqDist;
    private PunktSentenceSegmenter$Punkt$BigramFreqDist collocationFreqDist;
    private int numPeriodTokens;
    private int sentenceBreakCount;
    private boolean finalized;
    private final double ABBREV;
    private boolean IGNORE_ABBREV_PENALTY;
    private int ABBREV_BACKOFF;
    private double COLLOCATION;
    private int SENT_STARTER;
    private boolean INCLUDE_ALL_COLLOCS;
    private boolean INCLUDE_ABBREV_COLLOCS;
    private int MIN_COLLOC_FREQ;

    public Option<String> trainText() {
        return this.trainText;
    }

    public boolean verbose() {
        return this.verbose;
    }

    public PunktSentenceSegmenter$Punkt$UnigramFreqDist typeFreqDist() {
        return this.typeFreqDist;
    }

    public void typeFreqDist_$eq(PunktSentenceSegmenter$Punkt$UnigramFreqDist punktSentenceSegmenter$Punkt$UnigramFreqDist) {
        this.typeFreqDist = punktSentenceSegmenter$Punkt$UnigramFreqDist;
    }

    public PunktSentenceSegmenter$Punkt$UnigramFreqDist sentenceStarterFreqDist() {
        return this.sentenceStarterFreqDist;
    }

    public void sentenceStarterFreqDist_$eq(PunktSentenceSegmenter$Punkt$UnigramFreqDist punktSentenceSegmenter$Punkt$UnigramFreqDist) {
        this.sentenceStarterFreqDist = punktSentenceSegmenter$Punkt$UnigramFreqDist;
    }

    public PunktSentenceSegmenter$Punkt$BigramFreqDist collocationFreqDist() {
        return this.collocationFreqDist;
    }

    public void collocationFreqDist_$eq(PunktSentenceSegmenter$Punkt$BigramFreqDist punktSentenceSegmenter$Punkt$BigramFreqDist) {
        this.collocationFreqDist = punktSentenceSegmenter$Punkt$BigramFreqDist;
    }

    public int numPeriodTokens() {
        return this.numPeriodTokens;
    }

    public void numPeriodTokens_$eq(int i) {
        this.numPeriodTokens = i;
    }

    public int sentenceBreakCount() {
        return this.sentenceBreakCount;
    }

    public void sentenceBreakCount_$eq(int i) {
        this.sentenceBreakCount = i;
    }

    public boolean finalized() {
        return this.finalized;
    }

    public void finalized_$eq(boolean z) {
        this.finalized = z;
    }

    public double ABBREV() {
        return this.ABBREV;
    }

    public boolean IGNORE_ABBREV_PENALTY() {
        return this.IGNORE_ABBREV_PENALTY;
    }

    public void IGNORE_ABBREV_PENALTY_$eq(boolean z) {
        this.IGNORE_ABBREV_PENALTY = z;
    }

    public int ABBREV_BACKOFF() {
        return this.ABBREV_BACKOFF;
    }

    public void ABBREV_BACKOFF_$eq(int i) {
        this.ABBREV_BACKOFF = i;
    }

    public double COLLOCATION() {
        return this.COLLOCATION;
    }

    public void COLLOCATION_$eq(double d) {
        this.COLLOCATION = d;
    }

    public int SENT_STARTER() {
        return this.SENT_STARTER;
    }

    public void SENT_STARTER_$eq(int i) {
        this.SENT_STARTER = i;
    }

    public boolean INCLUDE_ALL_COLLOCS() {
        return this.INCLUDE_ALL_COLLOCS;
    }

    public void INCLUDE_ALL_COLLOCS_$eq(boolean z) {
        this.INCLUDE_ALL_COLLOCS = z;
    }

    public boolean INCLUDE_ABBREV_COLLOCS() {
        return this.INCLUDE_ABBREV_COLLOCS;
    }

    public void INCLUDE_ABBREV_COLLOCS_$eq(boolean z) {
        this.INCLUDE_ABBREV_COLLOCS = z;
    }

    public int MIN_COLLOC_FREQ() {
        return this.MIN_COLLOC_FREQ;
    }

    public void MIN_COLLOC_FREQ_$eq(int i) {
        this.MIN_COLLOC_FREQ = i;
    }

    public void train(String str, boolean z, boolean z2) {
        trainTokensLogic(tokenizeWords(str), z, trainTokensLogic$default$3());
        if (z2) {
            finalizeTraining(z);
        }
    }

    public boolean train$default$2() {
        return false;
    }

    public boolean train$default$3() {
        return true;
    }

    public void trainTokens(ArrayBuffer<PunktSentenceSegmenter$Punkt$PunktToken> arrayBuffer, boolean z, boolean z2) {
        trainTokensLogic(arrayBuffer, z, trainTokensLogic$default$3());
        if (z2) {
            finalizeTraining(z);
        }
    }

    public boolean trainTokens$default$2() {
        return false;
    }

    public boolean trainTokens$default$3() {
        return true;
    }

    private void trainTokensLogic(ArrayBuffer<PunktSentenceSegmenter$Punkt$PunktToken> arrayBuffer, boolean z, boolean z2) {
        finalized_$eq(false);
        Iterator it = arrayBuffer.iterator();
        while (it.hasNext()) {
            PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken = (PunktSentenceSegmenter$Punkt$PunktToken) it.next();
            PunktSentenceSegmenter$Punkt$UnigramFreqDist typeFreqDist = typeFreqDist();
            typeFreqDist.update(punktSentenceSegmenter$Punkt$PunktToken.ty(), BoxesRunTime.boxToInteger(BoxesRunTime.unboxToInt(typeFreqDist.apply(punktSentenceSegmenter$Punkt$PunktToken.ty())) + 1));
            if (punktSentenceSegmenter$Punkt$PunktToken.periodFinal()) {
                numPeriodTokens_$eq(numPeriodTokens() + 1);
            }
        }
        Iterator it2 = reclassifyAbbrevTypes(uniqueTypes(arrayBuffer).toList()).iterator();
        while (it2.hasNext()) {
            Tuple3 tuple3 = (Tuple3) it2.next();
            if (tuple3 == null) {
                throw new MatchError(tuple3);
            }
            Tuple3 tuple32 = new Tuple3((String) tuple3._1(), BoxesRunTime.boxToDouble(BoxesRunTime.unboxToDouble(tuple3._2())), BoxesRunTime.boxToBoolean(BoxesRunTime.unboxToBoolean(tuple3._3())));
            String str = (String) tuple32._1();
            double unboxToDouble = BoxesRunTime.unboxToDouble(tuple32._2());
            boolean unboxToBoolean = BoxesRunTime.unboxToBoolean(tuple32._3());
            if (unboxToDouble >= ABBREV()) {
                if (unboxToBoolean) {
                    this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.abbrevTypes().$plus$eq(str);
                    if (z) {
                        Predef$.MODULE$.println(new StringOps(Predef$.MODULE$.augmentString(" Abbreviation (isAdd: %s): [%6.4f] %s")).format(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToBoolean(unboxToBoolean), BoxesRunTime.boxToDouble(unboxToDouble), str})));
                    }
                }
            } else if (!unboxToBoolean) {
                this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.abbrevTypes().$minus$eq(str);
                if (z) {
                    Predef$.MODULE$.println(new StringOps(Predef$.MODULE$.augmentString(" Removed abbreviation: [%6.4f] %s")).format(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToDouble(unboxToDouble), str})));
                }
            }
        }
        annotateFirstPass(arrayBuffer);
        annotateOrthographyData(arrayBuffer);
        sentenceBreakCount_$eq(sentenceBreakCount() + getSentenceBreakCount(arrayBuffer));
        Iterator it3 = PunktSentenceSegmenter$Punkt$.MODULE$.iteratePairs(arrayBuffer).iterator();
        while (it3.hasNext()) {
            Tuple2 tuple2 = (Tuple2) it3.next();
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            Tuple2 tuple22 = new Tuple2((PunktSentenceSegmenter$Punkt$PunktToken) tuple2._1(), (PunktSentenceSegmenter$Punkt$PunktToken) tuple2._2());
            PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken2 = (PunktSentenceSegmenter$Punkt$PunktToken) tuple22._1();
            PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken3 = (PunktSentenceSegmenter$Punkt$PunktToken) tuple22._2();
            if (punktSentenceSegmenter$Punkt$PunktToken2.periodFinal()) {
                if (isRareAbbrevType(punktSentenceSegmenter$Punkt$PunktToken2, punktSentenceSegmenter$Punkt$PunktToken3)) {
                    this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.abbrevTypes().$plus$eq(punktSentenceSegmenter$Punkt$PunktToken2.typeNoPeriod());
                    if (z) {
                        Predef$.MODULE$.println(new StringOps(Predef$.MODULE$.augmentString(" Rare Abbrev: %s")).format(Predef$.MODULE$.genericWrapArray(new Object[]{punktSentenceSegmenter$Punkt$PunktToken2.ty()})));
                    }
                }
                if (isPotentialSentenceStarter(punktSentenceSegmenter$Punkt$PunktToken2, punktSentenceSegmenter$Punkt$PunktToken3)) {
                    PunktSentenceSegmenter$Punkt$UnigramFreqDist sentenceStarterFreqDist = sentenceStarterFreqDist();
                    sentenceStarterFreqDist.update(punktSentenceSegmenter$Punkt$PunktToken3.ty(), BoxesRunTime.boxToInteger(BoxesRunTime.unboxToInt(sentenceStarterFreqDist.apply(punktSentenceSegmenter$Punkt$PunktToken3.ty())) + 1));
                }
                if (isPotentialCollocation(punktSentenceSegmenter$Punkt$PunktToken2, punktSentenceSegmenter$Punkt$PunktToken3)) {
                    PunktSentenceSegmenter$Punkt$BigramFreqDist collocationFreqDist = collocationFreqDist();
                    Tuple2 tuple23 = new Tuple2(punktSentenceSegmenter$Punkt$PunktToken2.typeNoPeriod(), punktSentenceSegmenter$Punkt$PunktToken3.typeNoSentPeriod());
                    collocationFreqDist.update(tuple23, BoxesRunTime.boxToInteger(BoxesRunTime.unboxToInt(collocationFreqDist.apply(tuple23)) + 1));
                }
            }
        }
    }

    private boolean trainTokensLogic$default$2() {
        return false;
    }

    private boolean trainTokensLogic$default$3() {
        return true;
    }

    public void finalizeTraining(boolean z) {
        this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.clearSentenceStarters();
        findSentenceStarters().withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$finalizeTraining$1(this)).foreach(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$finalizeTraining$2(this, z));
        this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.clearCollocations();
        findCollocations().withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$finalizeTraining$3(this)).foreach(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$finalizeTraining$4(this, z));
        finalized_$eq(true);
    }

    public boolean finalizeTraining$default$1() {
        return false;
    }

    public void freqThreshold(int i, int i2, int i3, int i4) {
        if (i > 1) {
            HashMap<String, Object> orthoContext = this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.orthoContext();
            this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.clearOrthoContext();
            typeFreqDist().withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$freqThreshold$1(this)).withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$freqThreshold$2(this, i)).foreach(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$freqThreshold$3(this, orthoContext));
        }
        typeFreqDist_$eq(typeFreqDist().thresholdFreq(i2));
        collocationFreqDist_$eq(collocationFreqDist().thresholdFreq(i3));
        sentenceStarterFreqDist_$eq(sentenceStarterFreqDist().thresholdFreq(i4));
    }

    public int freqThreshold$default$1() {
        return 2;
    }

    public int freqThreshold$default$2() {
        return 2;
    }

    public int freqThreshold$default$3() {
        return 2;
    }

    public int freqThreshold$default$4() {
        return 2;
    }

    public void annotateOrthographyData(ArrayBuffer<PunktSentenceSegmenter$Punkt$PunktToken> arrayBuffer) {
        Serializable serializable = PunktSentenceSegmenter$Punkt$Internal$.MODULE$;
        Iterator it = arrayBuffer.iterator();
        while (it.hasNext()) {
            PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken = (PunktSentenceSegmenter$Punkt$PunktToken) it.next();
            if (punktSentenceSegmenter$Punkt$PunktToken.paraStart()) {
                Serializable serializable2 = serializable;
                Object obj = PunktSentenceSegmenter$Punkt$Unknown$.MODULE$;
                if (serializable2 != null ? !serializable2.equals(obj) : obj != null) {
                    serializable = PunktSentenceSegmenter$Punkt$Initial$.MODULE$;
                }
            }
            if (punktSentenceSegmenter$Punkt$PunktToken.lineStart()) {
                Serializable serializable3 = serializable;
                Object obj2 = PunktSentenceSegmenter$Punkt$Internal$.MODULE$;
                if (serializable3 != null ? serializable3.equals(obj2) : obj2 == null) {
                    serializable = PunktSentenceSegmenter$Punkt$Unknown$.MODULE$;
                }
            }
            int unboxToInt = BoxesRunTime.unboxToInt(PunktSentenceSegmenter$Punkt$.MODULE$.orthoMap().getOrElse(new Tuple2(serializable, punktSentenceSegmenter$Punkt$PunktToken.firstCase()), new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$1(this)));
            if (unboxToInt != 0) {
                this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.addOrthoContext(punktSentenceSegmenter$Punkt$PunktToken.typeNoSentPeriod(), unboxToInt);
            }
            serializable = punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak() ? (punktSentenceSegmenter$Punkt$PunktToken.isNumber() || punktSentenceSegmenter$Punkt$PunktToken.isInitial()) ? PunktSentenceSegmenter$Punkt$Unknown$.MODULE$ : PunktSentenceSegmenter$Punkt$Initial$.MODULE$ : (punktSentenceSegmenter$Punkt$PunktToken.ellipsis() || punktSentenceSegmenter$Punkt$PunktToken.abbr()) ? PunktSentenceSegmenter$Punkt$Unknown$.MODULE$ : PunktSentenceSegmenter$Punkt$Internal$.MODULE$;
        }
    }

    public boolean isRareAbbrevType(PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken, PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken2) {
        if (punktSentenceSegmenter$Punkt$PunktToken.abbr() || !punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak()) {
            return false;
        }
        String typeNoSentPeriod = punktSentenceSegmenter$Punkt$PunktToken.typeNoSentPeriod();
        int unboxToInt = BoxesRunTime.unboxToInt(typeFreqDist().apply(typeNoSentPeriod)) + BoxesRunTime.unboxToInt(typeFreqDist().apply(new StringOps(Predef$.MODULE$.augmentString(typeNoSentPeriod)).dropRight(1)));
        if (this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.abbrevTypes().contains(typeNoSentPeriod) || unboxToInt >= ABBREV_BACKOFF()) {
            return false;
        }
        if (super.languageVars().internalPunctuation().contains((CharSequence) new StringOps(Predef$.MODULE$.augmentString(punktSentenceSegmenter$Punkt$PunktToken2.token())).take(1))) {
            return true;
        }
        if (!punktSentenceSegmenter$Punkt$PunktToken2.firstLower()) {
            return false;
        }
        int unboxToInt2 = BoxesRunTime.unboxToInt(this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.orthoContext().apply(punktSentenceSegmenter$Punkt$PunktToken2.typeNoSentPeriod()));
        return PunktSentenceSegmenter$Punkt$.MODULE$.hasFlag(unboxToInt2, PunktSentenceSegmenter$Punkt$.MODULE$.ORTHO_BEG_UC()) && !PunktSentenceSegmenter$Punkt$.MODULE$.hasFlag(unboxToInt2, PunktSentenceSegmenter$Punkt$.MODULE$.ORTHO_MID_UC());
    }

    public boolean isPotentialSentenceStarter(PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken, PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken2) {
        return punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak() && !punktSentenceSegmenter$Punkt$PunktToken.isNumber() && !punktSentenceSegmenter$Punkt$PunktToken.isInitial() && punktSentenceSegmenter$Punkt$PunktToken2.isAlpha();
    }

    public boolean isPotentialCollocation(PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken, PunktSentenceSegmenter$Punkt$PunktToken punktSentenceSegmenter$Punkt$PunktToken2) {
        return (INCLUDE_ALL_COLLOCS() || ((INCLUDE_ABBREV_COLLOCS() && punktSentenceSegmenter$Punkt$PunktToken.abbr()) || (punktSentenceSegmenter$Punkt$PunktToken.sentenceBreak() && (punktSentenceSegmenter$Punkt$PunktToken.isNumber() || punktSentenceSegmenter$Punkt$PunktToken.isInitial())))) && punktSentenceSegmenter$Punkt$PunktToken.isNonPunctuation() && punktSentenceSegmenter$Punkt$PunktToken2.isNonPunctuation();
    }

    public ArrayBuffer<Tuple2<Tuple2<String, String>, Object>> findCollocations() {
        ArrayBuffer<Tuple2<Tuple2<String, String>, Object>> arrayBuffer = new ArrayBuffer<>();
        collocationFreqDist().withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findCollocations$1(this)).withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findCollocations$2(this)).foreach(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findCollocations$3(this, arrayBuffer, sum(typeFreqDist().values())));
        return arrayBuffer;
    }

    public int sum(Iterable<Object> iterable) {
        Iterator it = iterable.iterator();
        int i = 0;
        while (true) {
            int i2 = i;
            if (!it.hasNext()) {
                return i2;
            }
            i = i2 + BoxesRunTime.unboxToInt(it.next());
        }
    }

    public List<Tuple3<String, Object, Object>> reclassifyAbbrevTypes(List<String> list) {
        return loop$1(loop$default$1$1(list), loop$default$2$1(), sum(typeFreqDist().values()));
    }

    public double dunningLogLikelihood(int i, int i2, int i3, int i4) {
        double d = i2 / i4;
        return (-2.0d) * (((i3 * package$.MODULE$.log(d)) + ((i - i3) * package$.MODULE$.log(1.0d - d))) - ((i3 * package$.MODULE$.log(0.99d)) + ((i - i3) * package$.MODULE$.log(1.0d - 0.99d))));
    }

    public double colLogLikelihood(int i, int i2, int i3, int i4) {
        double d = i2 / i4;
        double d2 = i3 / i;
        double d3 = (i2 - i3) / (i4 - i);
        return (-2.0d) * (((((i3 * package$.MODULE$.log(d)) + ((i - i3) * package$.MODULE$.log(1.0d - d))) + (((i2 - i3) * package$.MODULE$.log(d)) + ((((i4 - i) - i2) + i3) * package$.MODULE$.log(1.0d - d)))) - (i == i3 ? 0.0d : (i3 * package$.MODULE$.log(d2)) + ((i - i3) * package$.MODULE$.log(1.0d - d2)))) - (i2 == i3 ? 0.0d : ((i2 - i3) * package$.MODULE$.log(d3)) + ((((i4 - i) - i2) + i3) * package$.MODULE$.log(1.0d - d3))));
    }

    public void findAbbrevTypes() {
        this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params.clearAbbrevs();
        reclassifyAbbrevTypes(((TraversableOnce) typeFreqDist().keys().filter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$3(this))).toList()).withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findAbbrevTypes$1(this)).withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findAbbrevTypes$2(this)).foreach(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findAbbrevTypes$3(this));
    }

    public HashSet<String> uniqueTypes(Iterable<PunktSentenceSegmenter$Punkt$PunktToken> iterable) {
        HashSet<String> hashSet = new HashSet<>();
        Iterator it = iterable.iterator();
        while (it.hasNext()) {
            hashSet.$plus$eq(((PunktSentenceSegmenter$Punkt$PunktToken) it.next()).ty());
        }
        return hashSet;
    }

    public Iterable<Tuple2<String, Object>> findSentenceStarters() {
        int sum = sum(typeFreqDist().values());
        return (Iterable) ((TraversableLike) ((TraversableLike) sentenceStarterFreqDist().withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findSentenceStarters$1(this)).withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findSentenceStarters$2(this)).map(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findSentenceStarters$3(this), HashMap$.MODULE$.canBuildFrom())).withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findSentenceStarters$4(this)).map(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findSentenceStarters$5(this, sum), HashMap$.MODULE$.canBuildFrom())).withFilter(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findSentenceStarters$6(this, sum)).map(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$findSentenceStarters$7(this), HashMap$.MODULE$.canBuildFrom());
    }

    public int getSentenceBreakCount(Iterable<PunktSentenceSegmenter$Punkt$PunktToken> iterable) {
        return iterable.count(new PunktSentenceSegmenter$Punkt$PunktTrainer$$anonfun$getSentenceBreakCount$1(this));
    }

    /* JADX WARN: Removed duplicated region for block: B:25:0x008b  */
    /* JADX WARN: Removed duplicated region for block: B:28:0x012a  */
    /* JADX WARN: Removed duplicated region for block: B:31:0x012e  */
    /* JADX WARN: Removed duplicated region for block: B:32:0x00a4  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private final scala.collection.immutable.List loop$1(scala.collection.immutable.List r8, scala.collection.immutable.List r9, int r10) {
        /*
            Method dump skipped, instructions count: 379
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: cc.factorie.app.nlp.segment.PunktSentenceSegmenter$Punkt$PunktTrainer.loop$1(scala.collection.immutable.List, scala.collection.immutable.List, int):scala.collection.immutable.List");
    }

    private final List loop$default$1$1(List list) {
        return list;
    }

    private final List loop$default$2$1() {
        return Nil$.MODULE$;
    }

    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
    public PunktSentenceSegmenter$Punkt$PunktTrainer(Option<String> option, boolean z, PunktSentenceSegmenter$Punkt$PunktLanguageVars punktSentenceSegmenter$Punkt$PunktLanguageVars, PunktSentenceSegmenter$Punkt$PunktParameters punktSentenceSegmenter$Punkt$PunktParameters) {
        super(punktSentenceSegmenter$Punkt$PunktLanguageVars, punktSentenceSegmenter$Punkt$PunktParameters);
        this.trainText = option;
        this.verbose = z;
        this.cc$factorie$app$nlp$segment$PunktSentenceSegmenter$Punkt$PunktTrainer$$params = punktSentenceSegmenter$Punkt$PunktParameters;
        this.typeFreqDist = new PunktSentenceSegmenter$Punkt$UnigramFreqDist();
        this.sentenceStarterFreqDist = new PunktSentenceSegmenter$Punkt$UnigramFreqDist();
        this.collocationFreqDist = new PunktSentenceSegmenter$Punkt$BigramFreqDist();
        this.numPeriodTokens = 0;
        this.sentenceBreakCount = 0;
        this.finalized = false;
        this.ABBREV = 0.3d;
        this.IGNORE_ABBREV_PENALTY = false;
        this.ABBREV_BACKOFF = 5;
        this.COLLOCATION = 7.88d;
        this.SENT_STARTER = 30;
        this.INCLUDE_ALL_COLLOCS = false;
        this.INCLUDE_ABBREV_COLLOCS = false;
        this.MIN_COLLOC_FREQ = 1;
        if (option.isDefined()) {
            train((String) option.get(), z, true);
        }
    }
}
