package org.apache.mahout.math.hadoop.similarity;

import com.google.common.base.Preconditions;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasure;

/* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.class */
public class VectorDistanceSimilarityJob extends AbstractJob {
    public static final String SEEDS = "seeds";
    public static final String SEEDS_PATH_KEY = "seedsPath";
    public static final String DISTANCE_MEASURE_KEY = "vectorDistSim.measure";
    public static final String OUT_TYPE_KEY = "outType";
    public static final String MAX_DISTANCE = "maxDistance";

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new VectorDistanceSimilarityJob(), strArr);
    }

    public int run(String[] strArr) throws Exception {
        String option;
        addInputOption();
        addOutputOption();
        addOption(DefaultOptionCreator.distanceMeasureOption().create());
        addOption(SEEDS, "s", "The set of vectors to compute distances against.  Must fit in memory on the mapper");
        addOption(MAX_DISTANCE, "mx", "set an upper-bound on distance (double) such that any pair of vectors with a distance greater than this value is ignored in the output. Ignored for non pairwise output!");
        addOption(DefaultOptionCreator.overwriteOption().create());
        addOption(OUT_TYPE_KEY, "ot", "[pw|v] -- Define the output style: pairwise, the default, (pw) or vector (v).  Pairwise is a tuple of <seed, other, distance>, vector is <other, <Vector of size the number of seeds>>.", "pw");
        if (parseArguments(strArr) == null) {
            return -1;
        }
        Path inputPath = getInputPath();
        Path outputPath = getOutputPath();
        Path path = new Path(getOption(SEEDS));
        String option2 = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
        if (option2 == null) {
            option2 = SquaredEuclideanDistanceMeasure.class.getName();
        }
        if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
            HadoopUtil.delete(getConf(), outputPath);
        }
        DistanceMeasure distanceMeasure = (DistanceMeasure) ClassUtils.instantiateAs(option2, DistanceMeasure.class);
        String option3 = getOption(OUT_TYPE_KEY, "pw");
        Double d = null;
        if ("pw".equals(option3) && (option = getOption(MAX_DISTANCE)) != null) {
            d = Double.valueOf(Double.parseDouble(option));
            Preconditions.checkArgument(d.doubleValue() > VectorSimilarityMeasure.NO_NORM, "value for maxDistance must be greater than zero");
        }
        run(getConf(), inputPath, path, outputPath, distanceMeasure, option3, d);
        return 0;
    }

    public static void run(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, String str) throws IOException, ClassNotFoundException, InterruptedException {
        run(configuration, path, path2, path3, distanceMeasure, str, null);
    }

    public static void run(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, String str, Double d) throws IOException, ClassNotFoundException, InterruptedException {
        if (d != null) {
            configuration.set(MAX_DISTANCE, String.valueOf(d));
        }
        configuration.set(DISTANCE_MEASURE_KEY, distanceMeasure.getClass().getName());
        configuration.set(SEEDS_PATH_KEY, path2.toString());
        Job job = new Job(configuration, "Vector Distance Similarity: seeds: " + path2 + " input: " + path);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        if ("pw".equalsIgnoreCase(str)) {
            job.setMapOutputKeyClass(StringTuple.class);
            job.setOutputKeyClass(StringTuple.class);
            job.setMapOutputValueClass(DoubleWritable.class);
            job.setOutputValueClass(DoubleWritable.class);
            job.setMapperClass(VectorDistanceMapper.class);
        } else {
            if (!"v".equalsIgnoreCase(str)) {
                throw new IllegalArgumentException("Invalid outType specified: " + str);
            }
            job.setMapOutputKeyClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setMapOutputValueClass(VectorWritable.class);
            job.setOutputValueClass(VectorWritable.class);
            job.setMapperClass(VectorDistanceInvertedMapper.class);
        }
        job.setNumReduceTasks(0);
        FileInputFormat.addInputPath(job, path);
        FileOutputFormat.setOutputPath(job, path3);
        job.setJarByClass(VectorDistanceSimilarityJob.class);
        HadoopUtil.delete(configuration, path3);
        if (!job.waitForCompletion(true)) {
            throw new IllegalStateException("VectorDistance Similarity failed processing " + path2);
        }
    }
}
