/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.TokenizerTestData;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class JavaTokenizerSuite {
    private transient JavaSparkContext jsc;
    private transient SQLContext jsql;

    @Before
    public void setUp() {
        this.jsc = new JavaSparkContext("local", "JavaTokenizerSuite");
        this.jsql = new SQLContext(this.jsc);
    }

    @After
    public void tearDown() {
        this.jsc.stop();
        this.jsc = null;
    }

    @Test
    public void regexTokenizer() {
        Row[] pairs;
        RegexTokenizer myRegExTokenizer = ((RegexTokenizer)((RegexTokenizer)new RegexTokenizer().setInputCol("rawText")).setOutputCol("tokens")).setPattern("\\s").setGaps(true).setToLowercase(false).setMinTokenLength(3);
        JavaRDD rdd = this.jsc.parallelize(Arrays.asList(new TokenizerTestData("Test of tok.", new String[]{"Test", "tok."}), new TokenizerTestData("Te,st.  punct", new String[]{"Te,st.", "punct"})));
        DataFrame dataset = this.jsql.createDataFrame(rdd, TokenizerTestData.class);
        for (Row r : pairs = myRegExTokenizer.transform(dataset).select("tokens", new String[]{"wantedTokens"}).collect()) {
            Assert.assertEquals((Object)r.get(0), (Object)r.get(1));
        }
    }
}

