/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.actionmanager.personentity;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.personentity.ASConstants;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.orcid.model.Author;
import eu.dnetlib.dhp.collection.orcid.model.Employment;
import eu.dnetlib.dhp.collection.orcid.model.Pid;
import eu.dnetlib.dhp.collection.orcid.model.Work;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.common.person.CoAuthorshipIterator;
import eu.dnetlib.dhp.common.person.Coauthors;
import eu.dnetlib.dhp.common.person.Constants;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Person;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.utils.DHPUtils;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class ExtractPerson
implements Serializable {
    private static final Logger log = LoggerFactory.getLogger(ExtractPerson.class);
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    public static void main(String[] args) throws IOException, ParseException {
        ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString((InputStream)Objects.requireNonNull(ExtractPerson.class.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/personentity/as_parameters.json"))));
        parser.parseArgument(args);
        Boolean isSparkSessionManaged = Optional.ofNullable(parser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", (Object)isSparkSessionManaged);
        String inputPath = parser.get("inputPath");
        log.info("inputPath {}", (Object)inputPath);
        String outputPath = parser.get("outputPath");
        log.info("outputPath {}", (Object)outputPath);
        String workingDir = parser.get("workingDir");
        log.info("workingDir {}", (Object)workingDir);
        String publisherInputPath = parser.get("publisherInputPath");
        log.info("publisherInputPath {}", (Object)publisherInputPath);
        String dbUrl = parser.get("postgresUrl");
        String dbUser = parser.get("postgresUser");
        String dbPassword = parser.get("postgresPassword");
        String hdfsNameNode = parser.get("hdfsNameNode");
        SparkConf conf = new SparkConf();
        SparkSessionSupport.runWithSparkSession((SparkConf)conf, (Boolean)isSparkSessionManaged, spark -> {
            HdfsSupport.remove((String)outputPath, (Configuration)spark.sparkContext().hadoopConfiguration());
            ExtractPerson.extractInfoForActionSetFromORCID(spark, inputPath, workingDir);
            ExtractPerson.extractInfoForActionSetFromProjects(dbUrl, dbUser, dbPassword, workingDir + "/project", hdfsNameNode, isSparkSessionManaged);
            ExtractPerson.extractInfoForActionSetFromPublisher(spark, publisherInputPath, workingDir);
            ExtractPerson.createActionSet(spark, outputPath, workingDir);
        });
    }

    private static void extractInfoForActionSetFromPublisher(SparkSession spark, String inputPath, String workingDir) {
        Dataset df = spark.read().schema(Constants.PUBLISHER_INPUT_SCHEMA).json(inputPath).where("doi is not null");
        Dataset allAuthors = df.selectExpr(new String[]{"doi", "explode(authors) as author"}).selectExpr(new String[]{"doi", "author.contributor_roles as roles", "author.corresponding as corresponding", "author.matchings as affs", "explode(author.pids) as pid"}).where("pid.schema = 'orcid'");
        Dataset authors = allAuthors.selectExpr(new String[]{"explode (affs) as affiliation", "doi", "corresponding", "roles", "pid.value as orcid"}).where("affiliation.Status = 'active'").selectExpr(new String[]{"affiliation.Value as orgid", "affiliation.PID as orgpid", "affiliation.Confidence as trust", "doi", "corresponding", "roles", "orcid"});
        authors = authors.where("roles is null").selectExpr(new String[]{"*", " '' AS roleschema", " '' AS rolevalue", "'' AS rolename"}).drop("roles").unionAll(authors.where("roles is not null").selectExpr(new String[]{"orgid", "orgpid", "trust", "doi", "corresponding", "explode(roles) as role", "orcid"}).selectExpr(new String[]{"*", "role.schema as roleschema", "role.value as rolevalue", "role.name as rolename"}).drop("role"));
        Dataset relations = authors.map(ExtractPerson::getAuthorshipRelation, Encoders.bean(Relation.class)).unionAll(allAuthors.selectExpr(new String[]{"doi", "pid.value as orcid"}).groupByKey((MapFunction & Serializable)r -> (String)r.getAs("doi"), Encoders.STRING()).mapGroups((MapGroupsFunction & Serializable)(k, it) -> ExtractPerson.extractCoAuthorsRow(it), Encoders.bean(Coauthors.class)).flatMap((FlatMapFunction & Serializable)c -> new CoAuthorshipIterator(c.getCoauthors()), Encoders.bean(Relation.class)).groupByKey((MapFunction & Serializable)r -> r.getSource() + r.getTarget(), Encoders.STRING()).mapGroups((MapGroupsFunction & Serializable)(k, it) -> (Relation)it.next(), Encoders.bean(Relation.class)));
        relations.groupByKey((MapFunction & Serializable)r -> r.getSource() + r.getRelClass() + r.getTarget(), Encoders.STRING()).mapGroups((MapGroupsFunction & Serializable)(k, it) -> ExtractPerson.mergeRelation(it), Encoders.bean(Relation.class)).write().mode(SaveMode.Overwrite).option("compression", "gzip").json(workingDir + "/publishers");
    }

    private static Relation mergeRelation(Iterator<Relation> it) {
        Relation r = it.next();
        while (it.hasNext()) {
            Relation r1 = it.next();
            r = MergeUtils.mergeRelation((Relation)r, (Relation)r1);
        }
        return r;
    }

    @NotNull
    private static Relation getAuthorshipRelation(Row a) {
        KeyValue kv;
        String target = "50|doi_________::" + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.doi.toString(), (String)Constants.removePrefixUrl((String)((String)a.getAs("doi")))));
        String source = Constants.PERSON_PREFIX + "::" + IdentifierFactory.md5((String)Constants.removePrefixUrl((String)((String)a.getAs("orcid"))));
        Relation relation2 = OafMapperUtils.getRelation((String)source, (String)target, (String)"resultPerson", (String)"authorship", (String)"hasAuthored", (List)OafMapperUtils.listKeyValues((String[])new String[]{"10|infrastruct_::f66f1bd369679b5b077dcdf006089556", "OpenAIRE"}), null, null);
        Double trust = (Double)a.getAs("trust");
        if (StringUtils.isNotBlank((CharSequence)((CharSequence)a.getAs("orgid")))) {
            kv = new KeyValue();
            kv.setKey("declared_affiliation");
            if (((String)a.getAs("orgpid")).equalsIgnoreCase("ror")) {
                kv.setValue((String)a.getAs("orgid"));
            } else {
                kv.setValue("20|openorgs____::" + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)"OPENORGS", (String)((String)a.getAs("orgid")))));
            }
            kv.setDataInfo(OafMapperUtils.dataInfo((Boolean)false, (String)"openaire", (Boolean)true, (Boolean)false, null, (String)String.valueOf(trust)));
            if (!Optional.ofNullable(relation2.getProperties()).isPresent()) {
                relation2.setProperties(new ArrayList());
            }
            relation2.getProperties().add(kv);
        }
        if (Optional.ofNullable(a.getAs("corresponding")).isPresent() && a.getAs("corresponding").equals("true")) {
            kv = new KeyValue();
            kv.setKey("corresponding");
            kv.setValue("true");
            if (!Optional.ofNullable(relation2.getProperties()).isPresent()) {
                relation2.setProperties(new ArrayList());
            }
            relation2.getProperties().add(kv);
        }
        kv = new KeyValue();
        if (StringUtils.isNotBlank((CharSequence)((CharSequence)a.getAs("roleschema")))) {
            kv.setKey("role");
            String role = (String)a.getAs("roleschema") + (String)a.getAs("rolevalue");
            kv.setValue(role);
            if (!Optional.ofNullable(relation2.getProperties()).isPresent()) {
                relation2.setProperties(new ArrayList());
            }
            relation2.getProperties().add(kv);
        } else if (StringUtils.isNotBlank((CharSequence)((CharSequence)a.getAs("rolename")))) {
            kv.setKey("role");
            kv.setValue((String)a.getAs("rolename"));
            if (!Optional.ofNullable(relation2.getProperties()).isPresent()) {
                relation2.setProperties(new ArrayList());
            }
            relation2.getProperties().add(kv);
        }
        return relation2;
    }

    @NotNull
    private static Relation getAffiliationRelation(Row a) {
        String source = Constants.PERSON_PREFIX + "::" + IdentifierFactory.md5((String)Constants.removePrefixUrl((String)((String)a.getAs("orcid"))));
        String target = "20|ror_________::" + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)"ROR", (String)((String)a.getAs("orgid"))));
        return OafMapperUtils.getRelation((String)source, (String)target, (String)"organizationPerson", (String)"affiliation", (String)"isAffiliatedWith", (List)OafMapperUtils.listKeyValues((String[])new String[]{"10|infrastruct_::f66f1bd369679b5b077dcdf006089556", "OpenAIRE"}), null, null);
    }

    private static void extractInfoForActionSetFromProjects(String dbUrl, String dbUser, String dbPassword, String hdfsPath, String hdfsNameNode, Boolean exec) throws IOException {
        if (exec.booleanValue()) {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", hdfsNameNode);
            FileSystem fileSystem = FileSystem.get((Configuration)conf);
            Path hdfsWritePath = new Path(hdfsPath);
            FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
            try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword);
                 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter((OutputStream)fos, StandardCharsets.UTF_8));){
                dbClient.processResults("SELECT * FROM project_person WHERE pid_type = 'ORCID'", rs -> ExtractPerson.writeRelation(ExtractPerson.getRelationWithProject(rs), writer));
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    public static Relation getRelationWithProject(ResultSet rs) {
        try {
            return ExtractPerson.getProjectRelation(rs.getString("project"), rs.getString("pid"), rs.getString("role"));
        }
        catch (SQLException e) {
            throw new RuntimeException(e);
        }
    }

    private static Relation getProjectRelation(String project, String orcid, String role) {
        String source = Constants.PERSON_PREFIX + "::" + IdentifierFactory.md5((String)orcid);
        String target = Constants.PROJECT_ID_PREFIX + StringUtils.substringBefore((String)project, (String)"::") + "::" + IdentifierFactory.md5((String)StringUtils.substringAfter((String)project, (String)"::"));
        ArrayList<KeyValue> properties = new ArrayList<KeyValue>();
        Relation relation2 = OafMapperUtils.getRelation((String)source, (String)target, (String)"projectPerson", (String)"participation", (String)"participatesToProject", ASConstants.collectedfromOpenAIRE, (DataInfo)ASConstants.FUNDERDATAINFO, null);
        relation2.setValidated(Boolean.valueOf(true));
        if (StringUtils.isNotBlank((CharSequence)role)) {
            KeyValue kv = new KeyValue();
            kv.setKey("role");
            kv.setValue(role);
            properties.add(kv);
        }
        if (!properties.isEmpty()) {
            relation2.setProperties(properties);
        }
        return relation2;
    }

    protected static void writeRelation(Relation relation2, BufferedWriter writer) {
        try {
            writer.write(OBJECT_MAPPER.writeValueAsString((Object)relation2));
            writer.newLine();
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static void extractInfoForActionSetFromORCID(SparkSession spark, String inputPath, String workingDir) {
        ExtractPerson.writePerson(spark, inputPath, workingDir);
        ExtractPerson.writeAffiliations(spark, inputPath, workingDir);
    }

    private static void writeAffiliations(SparkSession spark, String inputPath, String workingDir) {
        Dataset employmentDataset = spark.read().parquet(inputPath + "Employments").as(Encoders.bean(Employment.class));
        Dataset authors = spark.read().parquet(inputPath + "Authors").as(Encoders.bean(Author.class));
        Dataset employment = employmentDataset.joinWith(authors, employmentDataset.col("orcid").equalTo((Object)authors.col("orcid"))).map(Tuple2::_1, Encoders.bean(Employment.class));
        employment.filter((FilterFunction & Serializable)e -> Optional.ofNullable(e.getAffiliationId()).isPresent()).filter((FilterFunction & Serializable)e -> e.getAffiliationId().getSchema().equalsIgnoreCase("ror")).map(ExtractPerson::getAffiliationRelation, Encoders.bean(Relation.class)).write().option("compression", "gzip").mode(SaveMode.Overwrite).json(workingDir + "/affiliation");
    }

    private static void writeCoAuthorship(SparkSession spark, String inputPath, String workingDir) {
        Dataset coauthorship = spark.read().parquet(inputPath + "Works").as(Encoders.bean(Work.class)).flatMap((FlatMapFunction & Serializable)work -> work.getPids().stream().filter(p -> ExtractPerson.isRelevantSchema(p.getSchema())).map(p -> new Tuple2((Object)p.getValue(), (Object)work.getOrcid())).collect(Collectors.toList()).iterator(), Encoders.tuple((Encoder)Encoders.STRING(), (Encoder)Encoders.STRING())).groupByKey(Tuple2::_1, Encoders.STRING()).mapGroups((MapGroupsFunction & Serializable)(k, it) -> ExtractPerson.extractCoAuthors(it), Encoders.bean(Coauthors.class)).flatMap((FlatMapFunction & Serializable)c -> new CoAuthorshipIterator(c.getCoauthors()), Encoders.bean(Relation.class)).groupByKey((MapFunction & Serializable)r -> r.getSource() + r.getTarget(), Encoders.STRING()).mapGroups((MapGroupsFunction & Serializable)(k, it) -> (Relation)it.next(), Encoders.bean(Relation.class));
        coauthorship.write().option("compression", "gzip").mode(SaveMode.Overwrite).json(workingDir + "/coauthorship");
    }

    private static void writeAuthorship(SparkSession spark, String inputPath, String workingDir) {
        Dataset works = spark.read().parquet(inputPath + "Works").as(Encoders.bean(Work.class)).filter(ExtractPerson::isAllowedPidType);
        works.flatMap(ExtractPerson::getAuthorshipRelationIterator, Encoders.bean(Relation.class)).write().option("compression", "gzip").mode(SaveMode.Overwrite).json(workingDir + "/authorship");
    }

    private static void writePerson(SparkSession spark, String inputPath, String workingDir) {
        Dataset authors = spark.read().parquet(inputPath + "Authors").as(Encoders.bean(Author.class));
        authors.map(ExtractPerson::getPerson, Encoders.bean(Person.class)).write().option("compression", "gzip").mode(SaveMode.Overwrite).json(workingDir + "/people");
    }

    private static boolean isRelevantSchema(String schema) {
        return schema.equalsIgnoreCase("doi") || schema.equalsIgnoreCase("pmc") || schema.equalsIgnoreCase("pmid") || schema.equalsIgnoreCase("arxiv");
    }

    @NotNull
    private static Iterator<Relation> getAuthorshipRelationIterator(Work w) {
        if (Optional.ofNullable(w.getPids()).isPresent()) {
            return w.getPids().stream().map(pid -> ExtractPerson.getRelation(w.getOrcid(), pid)).filter(Objects::nonNull).collect(Collectors.toList()).iterator();
        }
        ArrayList ret = new ArrayList();
        return ret.iterator();
    }

    private static boolean isAllowedPidType(Work w) {
        return Optional.ofNullable(w.getPids()).isPresent() && w.getPids().stream().anyMatch(p -> ExtractPerson.isRelevantSchema(p.getSchema()));
    }

    @NotNull
    private static Person getPerson(Author op) {
        Person person = new Person();
        person.setId(DHPUtils.generateIdentifier((String)op.getOrcid(), (String)Constants.PERSON_PREFIX));
        person.setBiography(Optional.ofNullable(op.getBiography()).orElse(""));
        KeyValue kv = OafMapperUtils.keyValue((String)Constants.ORCID_KEY, (String)ModelConstants.ORCID_DS);
        kv.setDataInfo(null);
        person.setCollectedfrom(Collections.singletonList(kv));
        person.setAlternativeNames((List)Optional.ofNullable(op.getOtherNames()).orElse(new ArrayList()));
        person.setFamilyName(Optional.ofNullable(op.getFamilyName()).orElse(""));
        person.setGivenName(Optional.ofNullable(op.getGivenName()).orElse(""));
        person.setPid((List)Optional.ofNullable(op.getOtherPids()).map(v -> v.stream().map(p -> OafMapperUtils.structuredProperty((String)p.getValue(), (String)p.getSchema(), (String)p.getSchema(), (String)"dnet:pid_types", (String)"dnet:pid_types", null)).collect(Collectors.toList())).orElse(new ArrayList()));
        person.getPid().add(OafMapperUtils.structuredProperty((String)op.getOrcid(), (String)"orcid", (String)"Open Researcher and Contributor ID", (String)"dnet:pid_types", (String)"dnet:pid_types", null));
        person.setDateofcollection(op.getLastModifiedDate());
        person.setOriginalId(Arrays.asList(op.getOrcid()));
        person.setDataInfo(Constants.ORCIDDATAINFO);
        return person;
    }

    private static Dataset<Relation> getRelations(SparkSession spark, String path) {
        if (HdfsSupport.exists((String)path, (Configuration)spark.sparkContext().hadoopConfiguration())) {
            return spark.read().textFile(path).map((MapFunction & Serializable)value -> (Relation)OBJECT_MAPPER.readValue(value, Relation.class), Encoders.bean(Relation.class));
        }
        return spark.emptyDataset(Encoders.bean(Relation.class));
    }

    private static Coauthors extractCoAuthors(Iterator<Tuple2<String, String>> it) {
        Coauthors coauth = new Coauthors();
        ArrayList<String> coauthors = new ArrayList<String>();
        while (it.hasNext()) {
            coauthors.add(Constants.removePrefixUrl((String)((String)it.next()._2())));
        }
        coauth.setCoauthors(coauthors);
        return coauth;
    }

    private static Coauthors extractCoAuthorsRow(Iterator<Row> it) {
        Coauthors coauth = new Coauthors();
        ArrayList<String> coauthors = new ArrayList<String>();
        while (it.hasNext()) {
            coauthors.add(Constants.removePrefixUrl((String)((String)it.next().getAs("orcid"))));
        }
        coauth.setCoauthors(coauthors);
        return coauth;
    }

    private static Relation getAffiliationRelation(Employment row) {
        KeyValue kv;
        String source = Constants.PERSON_PREFIX + "::" + IdentifierFactory.md5((String)row.getOrcid());
        String target = "20|ror_________::" + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)"ROR", (String)row.getAffiliationId().getValue()));
        ArrayList<KeyValue> properties = new ArrayList<KeyValue>();
        Relation relation2 = OafMapperUtils.getRelation((String)source, (String)target, (String)"organizationPerson", (String)"affiliation", (String)"isAffiliatedWith", Arrays.asList(OafMapperUtils.keyValue((String)Constants.ORCID_KEY, (String)ModelConstants.ORCID_DS)), (DataInfo)Constants.ORCIDDATAINFO, null);
        relation2.setValidated(Boolean.valueOf(true));
        if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtils.isNotBlank((CharSequence)row.getStartDate())) {
            kv = new KeyValue();
            kv.setKey("startDate");
            kv.setValue(row.getStartDate());
            properties.add(kv);
        }
        if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtils.isNotBlank((CharSequence)row.getEndDate())) {
            kv = new KeyValue();
            kv.setKey("endDate");
            kv.setValue(row.getEndDate());
            properties.add(kv);
        }
        if (properties.size() > 0) {
            relation2.setProperties(properties);
        }
        return relation2;
    }

    private static Relation getRelation(String orcid, Pid pid) {
        String target;
        String source = Constants.PERSON_PREFIX + "::" + IdentifierFactory.md5((String)orcid);
        switch (pid.getSchema()) {
            case "doi": {
                target = "50|doi_________::" + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.doi.toString(), (String)pid.getValue()));
                break;
            }
            case "pmid": {
                target = "50|pmid________::" + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.pmid.toString(), (String)pid.getValue()));
                break;
            }
            case "arxiv": {
                target = "50|arXiv_______::" + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.arXiv.toString(), (String)pid.getValue()));
                break;
            }
            case "pmcid": {
                target = "50|pmcid_______::" + IdentifierFactory.md5((String)PidCleaner.normalizePidValue((String)PidType.pmc.toString(), (String)pid.getValue()));
                break;
            }
            default: {
                return null;
            }
        }
        Relation relation2 = OafMapperUtils.getRelation((String)source, (String)target, (String)"resultPerson", (String)"authorship", (String)"hasAuthored", Collections.singletonList(OafMapperUtils.keyValue((String)Constants.ORCID_KEY, (String)ModelConstants.ORCID_DS)), (DataInfo)Constants.ORCIDDATAINFO, null);
        relation2.setValidated(Boolean.valueOf(true));
        return relation2;
    }

    private static void createActionSet(SparkSession spark, String outputPath, String workingDir) {
        Dataset people = spark.read().textFile(workingDir + "/people").map((MapFunction & Serializable)value -> (Person)OBJECT_MAPPER.readValue(value, Person.class), Encoders.bean(Person.class));
        Dataset relations = ExtractPerson.getRelations(spark, workingDir + "/affiliation").union(ExtractPerson.getRelations(spark, workingDir + "/project")).union(ExtractPerson.getRelations(spark, workingDir + "/publishers"));
        people.toJavaRDD().map((Function & Serializable)p -> new AtomicAction(p.getClass(), (Oaf)p)).union(relations.groupByKey((MapFunction & Serializable)r -> r.getSource() + r.getRelClass() + r.getTarget(), Encoders.STRING()).mapGroups((MapGroupsFunction & Serializable)(k, it) -> ExtractPerson.mergeRelation(it), Encoders.bean(Relation.class)).toJavaRDD().map((Function & Serializable)r -> new AtomicAction(r.getClass(), (Oaf)r))).mapToPair((PairFunction & Serializable)aa -> new Tuple2((Object)new Text(aa.getClazz().getCanonicalName()), (Object)new Text(OBJECT_MAPPER.writeValueAsString(aa)))).saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
    }
}

