/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.actionmanager.webcrawl;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class CreateActionSetFromWebEntries
implements Serializable {
    private static final Logger log = LoggerFactory.getLogger(CreateActionSetFromWebEntries.class);
    private static final String DOI_PREFIX = "50|doi_________::";
    private static final String ROR_PREFIX = "20|ror_________::";
    private static final String PMID_PREFIX = "50|pmid________::";
    private static final String PMCID_PREFIX = "50|pmc_________::";
    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    public static void main(String[] args) throws Exception {
        String jsonConfiguration = IOUtils.toString((InputStream)CreateActionSetFromWebEntries.class.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/webcrawl/as_parameters.json"));
        ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
        parser.parseArgument(args);
        Boolean isSparkSessionManaged = Optional.ofNullable(parser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", (Object)isSparkSessionManaged);
        String inputPath = parser.get("sourcePath");
        log.info("inputPath: {}", (Object)inputPath);
        String outputPath = parser.get("outputPath");
        log.info("outputPath: {}", (Object)outputPath);
        String blackListInputPath = parser.get("blackListPath");
        log.info("blackListInputPath: {}", (Object)blackListInputPath);
        SparkConf conf = new SparkConf();
        SparkSessionSupport.runWithSparkSession((SparkConf)conf, (Boolean)isSparkSessionManaged, spark -> CreateActionSetFromWebEntries.createActionSet(spark, inputPath, outputPath, blackListInputPath));
    }

    public static void createActionSet(SparkSession spark, String inputPath, String outputPath, String blackListInputPath) {
        Dataset dataset = CreateActionSetFromWebEntries.readWebCrawl(spark, inputPath).filter("country_code=='IE'").drop("publication_year");
        Dataset<Row> blackList = CreateActionSetFromWebEntries.readBlackList(spark, blackListInputPath);
        dataset.join(blackList, dataset.col("id").equalTo((Object)blackList.col("OpenAlexId")), "left").filter((FilterFunction & Serializable)r -> r.getAs("OpenAlexId") == null).drop("OpenAlexId").flatMap((FlatMapFunction & Serializable)row -> {
            ArrayList<Relation> ret = new ArrayList<Relation>();
            String ror = ROR_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)"ROR", (String)((String)row.getAs("ror"))));
            ret.addAll(CreateActionSetFromWebEntries.createAffiliationRelationPairDOI((String)row.getAs("doi"), ror));
            return ret.iterator();
        }, Encoders.bean(Relation.class)).toJavaRDD().map((Function & Serializable)p -> new AtomicAction(p.getClass(), (Oaf)p)).mapToPair((PairFunction & Serializable)aa -> new Tuple2((Object)new Text(aa.getClazz().getCanonicalName()), (Object)new Text(OBJECT_MAPPER.writeValueAsString(aa)))).saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
    }

    private static Dataset<Row> readWebCrawl(SparkSession spark, String inputPath) {
        StructType webInfo = StructType.fromDDL((String)"`id` STRING , `doi` STRING, `ids` STRUCT<`pmid` :STRING, `pmcid`: STRING >, `publication_year` STRING, `authorships` ARRAY<STRUCT <`institutions`: ARRAY <STRUCT <`ror`: STRING, `country_code` :STRING>>>>");
        return spark.read().schema(webInfo).json(inputPath).withColumn("authors", functions.explode((Column)functions.col((String)"authorships"))).selectExpr(new String[]{"id", "doi", "ids", "publication_year", "authors.institutions as institutions"}).withColumn("institution", functions.explode((Column)functions.col((String)"institutions"))).selectExpr(new String[]{"id", "doi", "institution.ror as ror", "institution.country_code as country_code", "publication_year"}).distinct();
    }

    private static Dataset<Row> readBlackList(SparkSession spark, String inputPath) {
        return spark.read().json(inputPath).select("OpenAlexId", new String[0]);
    }

    private static List<Relation> createAffiliationRelationPairPMCID(String pmcid, String ror) {
        if (pmcid == null) {
            return new ArrayList<Relation>();
        }
        return CreateActionSetFromWebEntries.createAffiliatioRelationPair(PMCID_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.pmc.toString(), (String)CreateActionSetFromWebEntries.removeResolver("PMC", pmcid))), ror);
    }

    private static List<Relation> createAffiliationRelationPairPMID(String pmid, String ror) {
        if (pmid == null) {
            return new ArrayList<Relation>();
        }
        return CreateActionSetFromWebEntries.createAffiliatioRelationPair(PMID_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.pmid.toString(), (String)CreateActionSetFromWebEntries.removeResolver("PMID", pmid))), ror);
    }

    private static String removeResolver(String pidType, String pid) {
        switch (pidType) {
            case "PMID": {
                return pid.substring(33);
            }
            case "PMC": {
                return "PMC" + pid.substring(43);
            }
            case "DOI": {
                return pid.substring(16);
            }
        }
        throw new RuntimeException();
    }

    private static List<Relation> createAffiliationRelationPairDOI(String doi, String ror) {
        if (doi == null) {
            return new ArrayList<Relation>();
        }
        return CreateActionSetFromWebEntries.createAffiliatioRelationPair(DOI_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.doi.toString(), (String)CreateActionSetFromWebEntries.removeResolver("DOI", doi))), ror);
    }

    private static List<Relation> createAffiliatioRelationPair(String resultId, String orgId) {
        ArrayList<Relation> newRelations = new ArrayList<Relation>();
        newRelations.add(OafMapperUtils.getRelation((String)orgId, (String)resultId, (String)"resultOrganization", (String)"affiliation", (String)"isAuthorInstitutionOf", Arrays.asList(OafMapperUtils.keyValue((String)"10|openaire____::fb98a192f6a055ba495ef414c330834b", (String)"Web Crawl")), (DataInfo)OafMapperUtils.dataInfo((Boolean)false, null, (Boolean)false, (Boolean)false, (Qualifier)OafMapperUtils.qualifier((String)"sysimport:crasswalk:webcrawl", (String)"Imported from Webcrawl", (String)"dnet:provenanceActions", (String)"dnet:provenanceActions"), (String)"0.9"), null));
        newRelations.add(OafMapperUtils.getRelation((String)resultId, (String)orgId, (String)"resultOrganization", (String)"affiliation", (String)"hasAuthorInstitution", Arrays.asList(OafMapperUtils.keyValue((String)"10|openaire____::fb98a192f6a055ba495ef414c330834b", (String)"Web Crawl")), (DataInfo)OafMapperUtils.dataInfo((Boolean)false, null, (Boolean)false, (Boolean)false, (Qualifier)OafMapperUtils.qualifier((String)"sysimport:crasswalk:webcrawl", (String)"Imported from Webcrawl", (String)"dnet:provenanceActions", (String)"dnet:provenanceActions"), (String)"0.9"), null));
        return newRelations;
    }
}

