/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.swh;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.swh.models.LastVisitData;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class PrepareSWHActionsets {
    private static final Logger log = LoggerFactory.getLogger(PrepareSWHActionsets.class);
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    public static <I extends Result> void main(String[] args) throws Exception {
        String jsonConfiguration = IOUtils.toString((InputStream)PrepareSWHActionsets.class.getResourceAsStream("/eu/dnetlib/dhp/swh/input_prepare_swh_actionsets.json"));
        ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
        parser.parseArgument(args);
        Boolean isSparkSessionManaged = Optional.ofNullable(parser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", (Object)isSparkSessionManaged);
        String inputPath = parser.get("lastVisitsPath");
        log.info("inputPath: {}", (Object)inputPath);
        String softwareInputPath = parser.get("softwareInputPath");
        log.info("softwareInputPath: {}", (Object)softwareInputPath);
        String outputPath = parser.get("actionsetsPath");
        log.info("outputPath: {}", (Object)outputPath);
        SparkConf conf = new SparkConf();
        SparkSessionSupport.runWithSparkSession((SparkConf)conf, (Boolean)isSparkSessionManaged, spark -> {
            JavaPairRDD<Text, Text> softwareRDD = PrepareSWHActionsets.prepareActionsets(spark, inputPath, softwareInputPath);
            softwareRDD.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
        });
    }

    private static Dataset<Row> loadSWHData(SparkSession spark, String inputPath) {
        JavaSparkContext sc = JavaSparkContext.fromSparkContext((SparkContext)spark.sparkContext());
        JavaRDD swhRDD = sc.sequenceFile(inputPath, Text.class, Text.class).map((Function & Serializable)t -> ((Text)t._2()).toString()).map((Function & Serializable)t -> (LastVisitData)OBJECT_MAPPER.readValue(t, LastVisitData.class)).filter((Function & Serializable)t -> t.getOrigin() != null && t.getSnapshot() != null).map((Function & Serializable)item -> RowFactory.create((Object[])new Object[]{item.getOrigin(), item.getSnapshot()}));
        List<StructField> fields = Arrays.asList(DataTypes.createStructField((String)"repoUrl", (DataType)DataTypes.StringType, (boolean)true), DataTypes.createStructField((String)"swhId", (DataType)DataTypes.StringType, (boolean)true));
        StructType schema = DataTypes.createStructType(fields);
        return spark.createDataFrame(swhRDD, schema);
    }

    private static Dataset<Row> loadGraphSoftwareData(SparkSession spark, String softwareInputPath) {
        return spark.read().textFile(softwareInputPath).map((MapFunction & Serializable)t -> (Software)OBJECT_MAPPER.readValue(t, Software.class), Encoders.bean(Software.class)).filter((FilterFunction & Serializable)t -> t.getCodeRepositoryUrl() != null).select(new Column[]{functions.col((String)"id"), functions.col((String)"codeRepositoryUrl.value").as("repoUrl")});
    }

    private static <I extends Software> JavaPairRDD<Text, Text> prepareActionsets(SparkSession spark, String inputPath, String softwareInputPath) {
        Dataset<Row> swhDF = PrepareSWHActionsets.loadSWHData(spark, inputPath);
        Dataset<Row> graphSoftwareDF = PrepareSWHActionsets.loadGraphSoftwareData(spark, softwareInputPath);
        Dataset joinedDF = graphSoftwareDF.join(swhDF, "repoUrl").select("id", new String[]{"swhid"});
        return joinedDF.map((MapFunction & Serializable)row -> {
            Software s = new Software();
            s.setId(row.getString(row.fieldIndex("id")));
            Qualifier qualifier = OafMapperUtils.qualifier((String)"swhid", (String)"Software Hash Identifier", (String)"dnet:pid_types", (String)"dnet:pid_types");
            DataInfo dataInfo = OafMapperUtils.dataInfo((Boolean)false, null, (Boolean)false, (Boolean)false, (Qualifier)ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, (String)"");
            s.setPid(Arrays.asList(OafMapperUtils.structuredProperty((String)String.format("swh:1:snp:%s", row.getString(row.fieldIndex("swhid"))), (Qualifier)qualifier, (DataInfo)dataInfo)));
            KeyValue kv = new KeyValue();
            kv.setKey("10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4");
            kv.setValue("Software Heritage");
            s.setCollectedfrom(Arrays.asList(kv));
            return s;
        }, Encoders.bean(Software.class)).toJavaRDD().map((Function & Serializable)p -> new AtomicAction(Software.class, (Oaf)p)).mapToPair((PairFunction & Serializable)aa -> new Tuple2((Object)new Text(aa.getClazz().getCanonicalName()), (Object)new Text(OBJECT_MAPPER.writeValueAsString(aa))));
    }
}

