/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.actionmanager.personentity;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.orcid.model.Author;
import eu.dnetlib.dhp.collection.orcid.model.Employment;
import eu.dnetlib.dhp.collection.orcid.model.Pid;
import eu.dnetlib.dhp.collection.orcid.model.Work;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.common.person.CoAuthorshipIterator;
import eu.dnetlib.dhp.common.person.Coauthors;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Person;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.utils.DHPUtils;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class ExtractPerson
implements Serializable {
    private static final Logger log = LoggerFactory.getLogger(ExtractPerson.class);
    private static final String QUERY = "SELECT * FROM project_person WHERE pid_type = 'ORCID'";
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
    private static final String OPENAIRE_PREFIX = "openaire____";
    private static final String SEPARATOR = "::";
    private static final String orcidKey = "10|openaire____::" + DHPUtils.md5((String)"orcid".toLowerCase());
    private static final String DOI_PREFIX = "50|doi_________::";
    private static final String PMID_PREFIX = "50|pmid________::";
    private static final String ARXIV_PREFIX = "50|arXiv_______::";
    private static final String PMCID_PREFIX = "50|pmcid_______::";
    private static final String ROR_PREFIX = "20|ror_________::";
    private static final String PERSON_PREFIX = ModelSupport.getIdPrefix(Person.class) + "|" + "orcid" + "_______";
    private static final String PROJECT_ID_PREFIX = ModelSupport.getIdPrefix(Project.class) + "|";
    public static final String ORCID_AUTHORS_CLASSID = "sysimport:crosswalk:orcid";
    public static final String ORCID_AUTHORS_CLASSNAME = "Imported from ORCID";
    public static final String FUNDER_AUTHORS_CLASSID = "sysimport:crosswalk:funderdatabase";
    public static final String FUNDER_AUTHORS_CLASSNAME = "Imported from Funder Database";
    public static final String OPENAIRE_DATASOURCE_ID = "10|infrastruct_::f66f1bd369679b5b077dcdf006089556";
    public static final String OPENAIRE_DATASOURCE_NAME = "OpenAIRE";
    public static List<KeyValue> collectedfromOpenAIRE = OafMapperUtils.listKeyValues((String[])new String[]{"10|infrastruct_::f66f1bd369679b5b077dcdf006089556", "OpenAIRE"});
    public static final DataInfo ORCIDDATAINFO = OafMapperUtils.dataInfo((Boolean)false, null, (Boolean)false, (Boolean)false, (Qualifier)OafMapperUtils.qualifier((String)"sysimport:crosswalk:orcid", (String)"Imported from ORCID", (String)"dnet:provenanceActions", (String)"dnet:provenanceActions"), (String)"0.91");
    public static final DataInfo FUNDERDATAINFO = OafMapperUtils.dataInfo((Boolean)false, null, (Boolean)false, (Boolean)false, (Qualifier)OafMapperUtils.qualifier((String)"sysimport:crosswalk:funderdatabase", (String)"Imported from Funder Database", (String)"dnet:provenanceActions", (String)"dnet:provenanceActions"), (String)"0.91");

    public static void main(String[] args) throws IOException, ParseException {
        ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString((InputStream)Objects.requireNonNull(ExtractPerson.class.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/personentity/as_parameters.json"))));
        parser.parseArgument(args);
        Boolean isSparkSessionManaged = Optional.ofNullable(parser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", (Object)isSparkSessionManaged);
        String inputPath = parser.get("inputPath");
        log.info("inputPath {}", (Object)inputPath);
        String outputPath = parser.get("outputPath");
        log.info("outputPath {}", (Object)outputPath);
        String workingDir = parser.get("workingDir");
        log.info("workingDir {}", (Object)workingDir);
        String dbUrl = parser.get("postgresUrl");
        String dbUser = parser.get("postgresUser");
        String dbPassword = parser.get("postgresPassword");
        String hdfsNameNode = parser.get("hdfsNameNode");
        SparkConf conf = new SparkConf();
        SparkSessionSupport.runWithSparkSession((SparkConf)conf, (Boolean)isSparkSessionManaged, spark -> {
            HdfsSupport.remove((String)outputPath, (Configuration)spark.sparkContext().hadoopConfiguration());
            ExtractPerson.extractInfoForActionSetFromORCID(spark, inputPath, workingDir);
            ExtractPerson.extractInfoForActionSetFromProjects(spark, inputPath, workingDir, dbUrl, dbUser, dbPassword, workingDir + "/project", hdfsNameNode);
            ExtractPerson.createActionSet(spark, outputPath, workingDir);
        });
    }

    private static void extractInfoForActionSetFromProjects(SparkSession spark, String inputPath, String workingDir, String dbUrl, String dbUser, String dbPassword, String hdfsPath, String hdfsNameNode) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", hdfsNameNode);
        FileSystem fileSystem = FileSystem.get((Configuration)conf);
        Path hdfsWritePath = new Path(hdfsPath);
        FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
        try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword);
             BufferedWriter writer = new BufferedWriter(new OutputStreamWriter((OutputStream)fos, StandardCharsets.UTF_8));){
            dbClient.processResults(QUERY, rs -> ExtractPerson.writeRelation(ExtractPerson.getRelationWithProject(rs), writer));
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static Relation getRelationWithProject(ResultSet rs) {
        try {
            return ExtractPerson.getProjectRelation(rs.getString("project"), rs.getString("pid"), rs.getString("role"));
        }
        catch (SQLException e) {
            throw new RuntimeException(e);
        }
    }

    private static Relation getProjectRelation(String project, String orcid, String role) {
        String source = PERSON_PREFIX + SEPARATOR + IdentifierFactory.md5((String)orcid);
        String target = PROJECT_ID_PREFIX + StringUtils.substringBefore((String)project, (String)SEPARATOR) + SEPARATOR + IdentifierFactory.md5((String)StringUtils.substringAfter((String)project, (String)SEPARATOR));
        ArrayList<KeyValue> properties = new ArrayList<KeyValue>();
        Relation relation2 = OafMapperUtils.getRelation((String)source, (String)target, (String)"projectPerson", (String)"participation", (String)"participatesToProject", collectedfromOpenAIRE, (DataInfo)FUNDERDATAINFO, null);
        relation2.setValidated(Boolean.valueOf(true));
        if (StringUtils.isNotBlank((CharSequence)role)) {
            KeyValue kv = new KeyValue();
            kv.setKey("role");
            kv.setValue(role);
            properties.add(kv);
        }
        if (!properties.isEmpty()) {
            relation2.setProperties(properties);
        }
        return relation2;
    }

    protected static void writeRelation(Relation relation2, BufferedWriter writer) {
        try {
            writer.write(OBJECT_MAPPER.writeValueAsString((Object)relation2));
            writer.newLine();
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static void createActionSet(SparkSession spark, String outputPath, String workingDir) {
        Dataset people = spark.read().textFile(workingDir + "/people").map((MapFunction & Serializable)value -> (Person)OBJECT_MAPPER.readValue(value, Person.class), Encoders.bean(Person.class));
        people.toJavaRDD().map((Function & Serializable)p -> new AtomicAction(p.getClass(), (Oaf)p)).union(ExtractPerson.getRelations(spark, workingDir + "/authorship").toJavaRDD().map((Function & Serializable)r -> new AtomicAction(r.getClass(), (Oaf)r))).union(ExtractPerson.getRelations(spark, workingDir + "/coauthorship").toJavaRDD().map((Function & Serializable)r -> new AtomicAction(r.getClass(), (Oaf)r))).union(ExtractPerson.getRelations(spark, workingDir + "/affiliation").toJavaRDD().map((Function & Serializable)r -> new AtomicAction(r.getClass(), (Oaf)r))).union(ExtractPerson.getRelations(spark, workingDir + "/project").toJavaRDD().map((Function & Serializable)r -> new AtomicAction(r.getClass(), (Oaf)r))).mapToPair((PairFunction & Serializable)aa -> new Tuple2((Object)new Text(aa.getClazz().getCanonicalName()), (Object)new Text(OBJECT_MAPPER.writeValueAsString(aa)))).saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
    }

    private static void extractInfoForActionSetFromORCID(SparkSession spark, String inputPath, String workingDir) {
        Dataset authors = spark.read().parquet(inputPath + "Authors").as(Encoders.bean(Author.class));
        Dataset works = spark.read().parquet(inputPath + "Works").as(Encoders.bean(Work.class)).filter((FilterFunction & Serializable)w -> Optional.ofNullable(w.getPids()).isPresent() && w.getPids().stream().anyMatch(p -> p.getSchema().equalsIgnoreCase("doi") || p.getSchema().equalsIgnoreCase("pmc") || p.getSchema().equalsIgnoreCase("pmid") || p.getSchema().equalsIgnoreCase("arxiv")));
        Dataset employmentDataset = spark.read().parquet(inputPath + "Employments").as(Encoders.bean(Employment.class));
        Dataset employment = employmentDataset.joinWith(authors, employmentDataset.col("orcid").equalTo((Object)authors.col("orcid"))).map((MapFunction & Serializable)t2 -> (Employment)t2._1(), Encoders.bean(Employment.class));
        authors.map((MapFunction & Serializable)op -> {
            Person person = new Person();
            person.setId(DHPUtils.generateIdentifier((String)op.getOrcid(), (String)PERSON_PREFIX));
            person.setBiography(Optional.ofNullable(op.getBiography()).orElse(""));
            KeyValue kv = OafMapperUtils.keyValue((String)orcidKey, (String)ModelConstants.ORCID_DS);
            kv.setDataInfo(null);
            person.setCollectedfrom(Arrays.asList(kv));
            person.setAlternativeNames((List)Optional.ofNullable(op.getOtherNames()).orElse(new ArrayList()));
            person.setFamilyName(Optional.ofNullable(op.getFamilyName()).orElse(""));
            person.setGivenName(Optional.ofNullable(op.getGivenName()).orElse(""));
            person.setPid((List)Optional.ofNullable(op.getOtherPids()).map(v -> v.stream().map(p -> OafMapperUtils.structuredProperty((String)p.getValue(), (String)p.getSchema(), (String)p.getSchema(), (String)"dnet:pid_types", (String)"dnet:pid_types", null)).collect(Collectors.toList())).orElse(new ArrayList()));
            person.getPid().add(OafMapperUtils.structuredProperty((String)op.getOrcid(), (String)"orcid", (String)"Open Researcher and Contributor ID", (String)"dnet:pid_types", (String)"dnet:pid_types", (DataInfo)OafMapperUtils.dataInfo((Boolean)false, null, (Boolean)false, (Boolean)false, (Qualifier)OafMapperUtils.qualifier((String)"sysimport:crosswalk:entityregistry", (String)"sysimport:crosswalk:entityregistry", (String)"dnet:pid_types", (String)"dnet:pid_types"), (String)"0.91")));
            person.setDateofcollection(op.getLastModifiedDate());
            person.setOriginalId(Arrays.asList(op.getOrcid()));
            person.setDataInfo(ORCIDDATAINFO);
            return person;
        }, Encoders.bean(Person.class)).write().option("compression", "gzip").mode(SaveMode.Overwrite).json(workingDir + "/people");
        works.flatMap(ExtractPerson::getAuthorshipRelationIterator, Encoders.bean(Relation.class)).write().option("compression", "gzip").mode(SaveMode.Overwrite).json(workingDir + "/authorship");
        Dataset coauthorship = works.flatMap((FlatMapFunction & Serializable)w -> {
            ArrayList lista = new ArrayList();
            w.getPids().stream().forEach(p -> {
                if (p.getSchema().equalsIgnoreCase("doi") || p.getSchema().equalsIgnoreCase("pmc") || p.getSchema().equalsIgnoreCase("pmid") || p.getSchema().equalsIgnoreCase("arxiv")) {
                    lista.add(new Tuple2((Object)p.getValue(), (Object)w.getOrcid()));
                }
            });
            return lista.iterator();
        }, Encoders.tuple((Encoder)Encoders.STRING(), (Encoder)Encoders.STRING())).groupByKey(Tuple2::_1, Encoders.STRING()).mapGroups((MapGroupsFunction & Serializable)(k, it) -> ExtractPerson.extractCoAuthors(it), Encoders.bean(Coauthors.class)).flatMap((FlatMapFunction & Serializable)c -> new CoAuthorshipIterator(c.getCoauthors()), Encoders.bean(Relation.class)).groupByKey((MapFunction & Serializable)r -> r.getSource() + r.getTarget(), Encoders.STRING()).mapGroups((MapGroupsFunction & Serializable)(k, it) -> (Relation)it.next(), Encoders.bean(Relation.class));
        coauthorship.write().option("compression", "gzip").mode(SaveMode.Overwrite).json(workingDir + "/coauthorship");
        employment.filter((FilterFunction & Serializable)e -> Optional.ofNullable(e.getAffiliationId()).isPresent()).filter((FilterFunction & Serializable)e -> e.getAffiliationId().getSchema().equalsIgnoreCase("ror")).map(ExtractPerson::getAffiliationRelation, Encoders.bean(Relation.class)).write().option("compression", "gzip").mode(SaveMode.Overwrite).json(workingDir + "/affiliation");
    }

    private static Dataset<Relation> getRelations(SparkSession spark, String path) {
        return spark.read().textFile(path).map((MapFunction & Serializable)value -> (Relation)OBJECT_MAPPER.readValue(value, Relation.class), Encoders.bean(Relation.class));
    }

    private static Coauthors extractCoAuthors(Iterator<Tuple2<String, String>> it) {
        Coauthors coauth = new Coauthors();
        ArrayList<Object> coauthors = new ArrayList<Object>();
        while (it.hasNext()) {
            coauthors.add(it.next()._2());
        }
        coauth.setCoauthors(coauthors);
        return coauth;
    }

    private static Relation getAffiliationRelation(Employment row) {
        KeyValue kv;
        String source = PERSON_PREFIX + SEPARATOR + IdentifierFactory.md5((String)row.getOrcid());
        String target = ROR_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)"ROR", (String)row.getAffiliationId().getValue()));
        ArrayList<KeyValue> properties = new ArrayList<KeyValue>();
        Relation relation2 = OafMapperUtils.getRelation((String)source, (String)target, (String)"organizationPerson", (String)"affiliation", (String)"isAffiliatedWith", Arrays.asList(OafMapperUtils.keyValue((String)orcidKey, (String)ModelConstants.ORCID_DS)), (DataInfo)ORCIDDATAINFO, null);
        relation2.setValidated(Boolean.valueOf(true));
        if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtils.isNotBlank((CharSequence)row.getStartDate())) {
            kv = new KeyValue();
            kv.setKey("startDate");
            kv.setValue(row.getStartDate());
            properties.add(kv);
        }
        if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtils.isNotBlank((CharSequence)row.getEndDate())) {
            kv = new KeyValue();
            kv.setKey("endDate");
            kv.setValue(row.getEndDate());
            properties.add(kv);
        }
        if (properties.size() > 0) {
            relation2.setProperties(properties);
        }
        return relation2;
    }

    @NotNull
    private static Iterator<Relation> getAuthorshipRelationIterator(Work w) {
        if (Optional.ofNullable(w.getPids()).isPresent()) {
            return w.getPids().stream().map(pid -> ExtractPerson.getRelation(w.getOrcid(), pid)).filter(Objects::nonNull).collect(Collectors.toList()).iterator();
        }
        ArrayList ret = new ArrayList();
        return ret.iterator();
    }

    private static Relation getRelation(String orcid, Pid pid) {
        String target;
        String source = PERSON_PREFIX + SEPARATOR + IdentifierFactory.md5((String)orcid);
        switch (pid.getSchema()) {
            case "doi": {
                target = DOI_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.doi.toString(), (String)pid.getValue()));
                break;
            }
            case "pmid": {
                target = PMID_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.pmid.toString(), (String)pid.getValue()));
                break;
            }
            case "arxiv": {
                target = ARXIV_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.arXiv.toString(), (String)pid.getValue()));
                break;
            }
            case "pmcid": {
                target = PMCID_PREFIX + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.pmc.toString(), (String)pid.getValue()));
                break;
            }
            default: {
                return null;
            }
        }
        Relation relation2 = OafMapperUtils.getRelation((String)source, (String)target, (String)"resultPerson", (String)"authorship", (String)"hasAuthored", Arrays.asList(OafMapperUtils.keyValue((String)orcidKey, (String)ModelConstants.ORCID_DS)), (DataInfo)ORCIDDATAINFO, null);
        relation2.setValidated(Boolean.valueOf(true));
        return relation2;
    }
}

