/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.sx.graph;

import eu.dnetlib.dhp.schema.scholexplorer.DLIRelation;
import eu.dnetlib.dhp.utils.DHPUtils;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;

public class SparkSXGeneratePidSimlarity {
    static final String IDJSONPATH = "$.id";
    static final String OBJIDPATH = "$.originalObjIdentifier";

    public static void generateDataFrame(SparkSession spark, JavaSparkContext sc, String inputPath, String targetPath) {
        JavaPairRDD datasetSimRel = sc.textFile(inputPath + "/dataset/*").mapToPair((PairFunction & Serializable)k -> new Tuple2((Object)DHPUtils.getJPathString((String)IDJSONPATH, (String)k), (Object)DHPUtils.getJPathString((String)OBJIDPATH, (String)k))).filter((Function & Serializable)t -> !StringUtils.substringAfter((String)((String)t._1()), (String)"|").equalsIgnoreCase(StringUtils.substringAfter((String)((String)t._2()), (String)"::"))).distinct();
        JavaPairRDD publicationSimRel = sc.textFile(inputPath + "/publication/*").mapToPair((PairFunction & Serializable)k -> new Tuple2((Object)DHPUtils.getJPathString((String)IDJSONPATH, (String)k), (Object)DHPUtils.getJPathString((String)OBJIDPATH, (String)k))).filter((Function & Serializable)t -> !StringUtils.substringAfter((String)((String)t._1()), (String)"|").equalsIgnoreCase(StringUtils.substringAfter((String)((String)t._2()), (String)"::"))).distinct();
        JavaRDD simRel = datasetSimRel.union(publicationSimRel).map((Function & Serializable)s -> {
            DLIRelation r = new DLIRelation();
            r.setSource((String)s._1());
            r.setTarget((String)s._2());
            r.setRelType("similar");
            return r;
        });
        spark.createDataset(simRel.rdd(), Encoders.bean(DLIRelation.class)).distinct().write().mode(SaveMode.Overwrite).save(targetPath + "/pid_simRel");
    }
}

