/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.sx.graph;

import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Predicate;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import net.minidev.json.JSONArray;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;

public class SparkExtractEntitiesJob {
    static final String IDJSONPATH = "$.id";
    static final String SOURCEJSONPATH = "$.source";
    static final String TARGETJSONPATH = "$.target";

    public static void main(String[] args) throws Exception {
        ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString((InputStream)SparkExtractEntitiesJob.class.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json")));
        parser.parseArgument(args);
        SparkSession spark = SparkSession.builder().appName(SparkExtractEntitiesJob.class.getSimpleName()).master(parser.get("master")).getOrCreate();
        JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
        String inputPath = parser.get("sourcePath");
        String targetPath = parser.get("targetPath");
        String tdir = parser.get("targetDir");
        JavaRDD inputRDD = sc.textFile(inputPath);
        List entities = Arrays.stream(parser.get("entities").split(",")).map(String::trim).collect(Collectors.toList());
        if (entities.stream().anyMatch("dataset"::equalsIgnoreCase)) {
            inputRDD.filter(SparkExtractEntitiesJob::isDataset).saveAsTextFile(targetPath + "/dataset/" + tdir, GzipCodec.class);
        }
        if (entities.stream().anyMatch("unknown"::equalsIgnoreCase)) {
            inputRDD.filter(SparkExtractEntitiesJob::isUnknown).saveAsTextFile(targetPath + "/unknown/" + tdir, GzipCodec.class);
        }
        if (entities.stream().anyMatch("relation"::equalsIgnoreCase)) {
            inputRDD.filter(SparkExtractEntitiesJob::isRelation).saveAsTextFile(targetPath + "/relation/" + tdir, GzipCodec.class);
        }
        if (entities.stream().anyMatch("publication"::equalsIgnoreCase)) {
            inputRDD.filter(SparkExtractEntitiesJob::isPublication).saveAsTextFile(targetPath + "/publication/" + tdir, GzipCodec.class);
        }
    }

    public static boolean isDataset(String json) {
        String id = SparkExtractEntitiesJob.getJPathString(IDJSONPATH, json);
        if (StringUtils.isBlank((CharSequence)id)) {
            return false;
        }
        return id.startsWith("60|");
    }

    public static boolean isPublication(String json) {
        String id = SparkExtractEntitiesJob.getJPathString(IDJSONPATH, json);
        if (StringUtils.isBlank((CharSequence)id)) {
            return false;
        }
        return id.startsWith("50|");
    }

    public static boolean isUnknown(String json) {
        String id = SparkExtractEntitiesJob.getJPathString(IDJSONPATH, json);
        if (StringUtils.isBlank((CharSequence)id)) {
            return false;
        }
        return id.startsWith("70|");
    }

    public static boolean isRelation(String json) {
        String source = SparkExtractEntitiesJob.getJPathString(SOURCEJSONPATH, json);
        String target = SparkExtractEntitiesJob.getJPathString(TARGETJSONPATH, json);
        return StringUtils.isNotBlank((CharSequence)source) && StringUtils.isNotBlank((CharSequence)target);
    }

    public static String getJPathString(String jsonPath, String json) {
        try {
            Object o = JsonPath.read((String)json, (String)jsonPath, (Predicate[])new Predicate[0]);
            if (o instanceof String) {
                return (String)o;
            }
            if (o instanceof JSONArray && ((JSONArray)o).size() > 0) {
                return (String)((JSONArray)o).get(0);
            }
            return "";
        }
        catch (Exception e) {
            return "";
        }
    }
}

