package jsat.text;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import jsat.linear.SparseVector;
import jsat.linear.Vec;
import jsat.text.tokenizer.Tokenizer;
import jsat.text.wordweighting.WordWeighting;

/* loaded from: input_file:jsat/text/HashedTextVectorCreator.class */
public class HashedTextVectorCreator implements TextVectorCreator {
    private static final long serialVersionUID = 1081388790985568192L;
    private int dimensionSize;
    private Tokenizer tokenizer;
    private WordWeighting weighting;

    public HashedTextVectorCreator(int i, Tokenizer tokenizer, WordWeighting wordWeighting) {
        if (i <= 1) {
            throw new ArithmeticException("Vector dimension must be a positive value");
        }
        this.dimensionSize = i;
        this.tokenizer = tokenizer;
        this.weighting = wordWeighting;
    }

    @Override // jsat.text.TextVectorCreator
    public Vec newText(String str) {
        return newText(str, new StringBuilder(), new ArrayList());
    }

    @Override // jsat.text.TextVectorCreator
    public Vec newText(String str, StringBuilder sb, List<String> list) {
        this.tokenizer.tokenize(str, sb, list);
        SparseVector sparseVector = new SparseVector(this.dimensionSize);
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            sparseVector.increment(Math.abs(it.next().hashCode()) % this.dimensionSize, 1.0d);
        }
        this.weighting.applyTo(sparseVector);
        return sparseVector;
    }
}
