/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.enrich.orcid;

import eu.dnetlib.dhp.application.AbstractScalaApplication;
import eu.dnetlib.dhp.enrich.orcid.AuthorEnricher$;
import eu.dnetlib.dhp.enrich.orcid.SparkEnrichGraphWithOrcidAuthors$;
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import java.util.List;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.slf4j.Logger;
import scala.Function1;
import scala.MatchError;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.reflect.ScalaSignature;

@ScalaSignature(bytes="\u0006\u0001\u0005=a\u0001B\u0001\u0003\u00015\u0011\u0001e\u00159be.,eN]5dQ\u001e\u0013\u0018\r\u001d5XSRDwJ]2jI\u0006+H\u000f[8sg*\u00111\u0001B\u0001\u0006_J\u001c\u0017\u000e\u001a\u0006\u0003\u000b\u0019\ta!\u001a8sS\u000eD'BA\u0004\t\u0003\r!\u0007\u000e\u001d\u0006\u0003\u0013)\tq\u0001\u001a8fi2L'MC\u0001\f\u0003\t)Wo\u0001\u0001\u0014\u0005\u0001q\u0001CA\b\u0013\u001b\u0005\u0001\"BA\t\u0007\u0003-\t\u0007\u000f\u001d7jG\u0006$\u0018n\u001c8\n\u0005M\u0001\"\u0001G!cgR\u0014\u0018m\u0019;TG\u0006d\u0017-\u00119qY&\u001c\u0017\r^5p]\"IQ\u0003\u0001B\u0001B\u0003%a\u0003I\u0001\raJ|\u0007/\u001a:usB\u000bG\u000f\u001b\t\u0003/uq!\u0001G\u000e\u000e\u0003eQ\u0011AG\u0001\u0006g\u000e\fG.Y\u0005\u00039e\ta\u0001\u0015:fI\u00164\u0017B\u0001\u0010 \u0005\u0019\u0019FO]5oO*\u0011A$G\u0005\u0003+IA\u0011B\t\u0001\u0003\u0002\u0003\u0006Ia\t\u0014\u0002\t\u0005\u0014xm\u001d\t\u00041\u00112\u0012BA\u0013\u001a\u0005\u0015\t%O]1z\u0013\t\u0011#\u0003\u0003\u0005)\u0001\t\u0005\t\u0015!\u0003*\u0003\rawn\u001a\t\u0003U=j\u0011a\u000b\u0006\u0003Y5\nQa\u001d7gi)T\u0011AL\u0001\u0004_J<\u0017B\u0001\u0019,\u0005\u0019aunZ4fe\")!\u0007\u0001C\u0001g\u00051A(\u001b8jiz\"B\u0001\u000e\u001c8qA\u0011Q\u0007A\u0007\u0002\u0005!)Q#\ra\u0001-!)!%\ra\u0001G!)\u0001&\ra\u0001S!)!\b\u0001C!w\u0005\u0019!/\u001e8\u0015\u0003q\u0002\"\u0001G\u001f\n\u0005yJ\"\u0001B+oSRDQ\u0001\u0011\u0001\u0005\n\u0005\u000bA\"\u001a8sS\u000eD'+Z:vYR,\"AQ1\u0015\rq\u001ae\n\u0015-[\u0011\u0015!u\b1\u0001F\u0003\u0015\u0019\b/\u0019:l!\t1E*D\u0001H\u0015\tA\u0015*A\u0002tc2T!\u0001\u0012&\u000b\u0005-k\u0013AB1qC\u000eDW-\u0003\u0002N\u000f\na1\u000b]1sWN+7o]5p]\")qj\u0010a\u0001-\u0005IqM]1qQB\u000bG\u000f\u001b\u0005\u0006#~\u0002\rAU\u0001\u0011_J\u001c\u0017\u000e\u001a)vE2L7-\u0019;j_:\u00042AR*V\u0013\t!vIA\u0004ECR\f7/\u001a;\u0011\u0005\u00193\u0016BA,H\u0005\r\u0011vn\u001e\u0005\u00063~\u0002\rAF\u0001\u000b_V$\b/\u001e;QCRD\u0007\"B.@\u0001\u0004a\u0016aA3oGB\u0019a)X0\n\u0005y;%aB#oG>$WM\u001d\t\u0003A\u0006d\u0001\u0001B\u0003c\u007f\t\u00071MA\u0001U#\t!w\r\u0005\u0002\u0019K&\u0011a-\u0007\u0002\b\u001d>$\b.\u001b8h!\tAW.D\u0001j\u0015\tQ7.A\u0002pC\u001aT!\u0001\u001c\u0004\u0002\rM\u001c\u0007.Z7b\u0013\tq\u0017N\u0001\u0004SKN,H\u000e\u001e\u0005\u0006a\u0002!I!]\u0001\u0013O\u0016tWM]1uK>\u00138-\u001b3UC\ndW\rF\u0002SeNDQ\u0001R8A\u0002\u0015CQ\u0001^8A\u0002Y\t\u0011\"\u001b8qkR\u0004\u0016\r\u001e5\b\u000bY\u0014\u0001\u0012A<\u0002AM\u0003\u0018M]6F]JL7\r[$sCBDw+\u001b;i\u001fJ\u001c\u0017\u000eZ!vi\"|'o\u001d\t\u0003ka4Q!\u0001\u0002\t\u0002e\u001c\"\u0001\u001f>\u0011\u0005aY\u0018B\u0001?\u001a\u0005\u0019\te.\u001f*fM\")!\u0007\u001fC\u0001}R\tq\u000f\u0003\u0005)q\n\u0007I\u0011AA\u0001+\u0005I\u0003bBA\u0003q\u0002\u0006I!K\u0001\u0005Y><\u0007\u0005C\u0004\u0002\na$\t!a\u0003\u0002\t5\f\u0017N\u001c\u000b\u0004y\u00055\u0001B\u0002\u0012\u0002\b\u0001\u00071\u0005")
public class SparkEnrichGraphWithOrcidAuthors
extends AbstractScalaApplication {
    private final Logger log;

    public static void main(String[] stringArray) {
        SparkEnrichGraphWithOrcidAuthors$.MODULE$.main(stringArray);
    }

    public void run() {
        String graphPath = this.parser().get("graphPath");
        this.log.info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"graphPath is '", "'"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{graphPath})));
        String orcidPath = this.parser().get("orcidPath");
        this.log.info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"orcidPath is '", "'"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{orcidPath})));
        String targetPath = this.parser().get("targetPath");
        this.log.info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"targetPath is '", "'"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{targetPath})));
        org.apache.spark.sql.Dataset<Row> orcidPublication = this.generateOrcidTable(this.spark(), orcidPath);
        this.enrichResult(this.spark(), new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/publication"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{graphPath})), orcidPublication, new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/publication"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{targetPath})), Encoders$.MODULE$.bean(Publication.class));
        this.enrichResult(this.spark(), new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/dataset"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{graphPath})), orcidPublication, new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/dataset"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{targetPath})), Encoders$.MODULE$.bean(Dataset.class));
        this.enrichResult(this.spark(), new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/software"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{graphPath})), orcidPublication, new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/software"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{targetPath})), Encoders$.MODULE$.bean(Software.class));
        this.enrichResult(this.spark(), new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/otherresearchproduct"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{graphPath})), orcidPublication, new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/otherresearchproduct"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{targetPath})), Encoders$.MODULE$.bean(OtherResearchProduct.class));
    }

    private <T extends Result> void enrichResult(SparkSession spark, String graphPath, org.apache.spark.sql.Dataset<Row> orcidPublication, String outputPath, Encoder<T> enc) {
        org.apache.spark.sql.Dataset entities = spark.read().schema(enc.schema()).json(graphPath).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("id"), functions$.MODULE$.col("datainfo"), functions$.MODULE$.col("instance")})).where("datainfo.deletedbyinference != true").drop("datainfo").withColumn("instances", functions$.MODULE$.explode(functions$.MODULE$.col("instance"))).withColumn("pids", functions$.MODULE$.explode(functions$.MODULE$.col("instances.pid"))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("pids.qualifier.classid").alias("pid_schema"), functions$.MODULE$.col("pids.value").alias("pid_value"), functions$.MODULE$.col("id").alias("dnet_id")}));
        org.apache.spark.sql.Dataset orcidDnet = orcidPublication.join(entities, functions$.MODULE$.lower(functions$.MODULE$.col("schema")).equalTo((Object)functions$.MODULE$.lower(functions$.MODULE$.col("pid_schema"))).$amp$amp((Object)functions$.MODULE$.lower(functions$.MODULE$.col("value")).equalTo((Object)functions$.MODULE$.lower(functions$.MODULE$.col("pid_value")))), "inner").groupBy((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("dnet_id")})).agg(functions$.MODULE$.collect_set(orcidPublication.apply("author")).alias("orcid_authors"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0])).select("dnet_id", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"orcid_authors"})).cache();
        orcidDnet.count();
        org.apache.spark.sql.Dataset result = spark.read().schema(enc.schema()).json(graphPath).as(enc);
        result.joinWith(orcidDnet, result.apply("id").equalTo((Object)orcidDnet.apply("dnet_id")), "left").map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final T apply(Tuple2<T, Row> x0$1) {
                Tuple2<T, Row> tuple2;
                block4: {
                    Result result;
                    block3: {
                        block2: {
                            tuple2 = x0$1;
                            if (tuple2 == null) break block2;
                            Result r = (Result)tuple2._1();
                            Row row = (Row)tuple2._2();
                            if (r == null) break block2;
                            Result result2 = r;
                            if (row != null) break block2;
                            result = result2;
                            break block3;
                        }
                        if (tuple2 == null) break block4;
                        Result p = (Result)tuple2._1();
                        Row r = (Row)tuple2._2();
                        if (p == null) break block4;
                        Result result3 = p;
                        if (r == null) break block4;
                        Row row = r;
                        result3.setAuthor(AuthorMerger.enrichOrcid((List)result3.getAuthor(), AuthorEnricher$.MODULE$.toOAFAuthor(row)));
                        result = result3;
                    }
                    return (T)result;
                }
                throw new MatchError(tuple2);
            }
        }, enc).write().mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath);
    }

    private org.apache.spark.sql.Dataset<Row> generateOrcidTable(SparkSession spark, String inputPath) {
        org.apache.spark.sql.Dataset orcidAuthors = spark.read().load(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/Authors"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{inputPath}))).select("orcid", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"familyName", "givenName", "creditName", "otherNames"}));
        org.apache.spark.sql.Dataset orcidWorks = spark.read().load(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/Works"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{inputPath}))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("orcid"), functions$.MODULE$.explode(functions$.MODULE$.col("pids")).alias("identifier")})).where("identifier.schema IN('doi','pmid','pmc','arxiv','handle')");
        org.apache.spark.sql.Dataset orcidPublication = orcidAuthors.join(orcidWorks, orcidAuthors.apply("orcid").equalTo((Object)orcidWorks.apply("orcid"))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("identifier.schema").alias("schema"), functions$.MODULE$.col("identifier.value").alias("value"), functions$.MODULE$.struct((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{orcidAuthors.apply("orcid").alias("orcid"), functions$.MODULE$.col("givenName"), functions$.MODULE$.col("familyName")})).alias("author")}));
        return orcidPublication.cache();
    }

    public SparkEnrichGraphWithOrcidAuthors(String propertyPath, String[] args, Logger log) {
        this.log = log;
        super(propertyPath, args, log);
    }
}

