/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.collection.plugin.base;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.collection.plugin.base.BaseCollectionInfo;
import eu.dnetlib.dhp.collection.plugin.base.BaseCollectorIterator;
import eu.dnetlib.dhp.collection.plugin.base.BaseRecordInfo;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

@Disabled
public class BaseCollectorIteratorTest {
    @Test
    void testImportFile() throws Exception {
        long count = 0L;
        BaseCollectorIterator iterator = new BaseCollectorIterator("base-sample.tar", new AggregatorReport());
        HashMap collections = new HashMap();
        HashMap<String, AtomicInteger> fields = new HashMap<String, AtomicInteger>();
        HashSet<String> types = new HashSet<String>();
        while (iterator.hasNext()) {
            Document record = DocumentHelper.parseText((String)iterator.next());
            if (++count % 1000L == 0L) {
                System.out.println("#\u00a0Read records: " + count);
            }
            for (Map.Entry o : record.selectNodes("//*|//@*")) {
                String path = ((Node)o).getPath();
                if (fields.containsKey(path)) {
                    ((AtomicInteger)fields.get(path)).incrementAndGet();
                } else {
                    fields.put(path, new AtomicInteger(1));
                }
                if (!(o instanceof Element)) continue;
                Element n = (Element)o;
                if ("collection".equals(n.getName())) {
                    String collName = n.getText().trim();
                    if (!StringUtils.isNotBlank((CharSequence)collName) || collections.containsKey(collName)) continue;
                    HashMap<String, String> collAttrs = new HashMap<String, String>();
                    for (Object ao : n.attributes()) {
                        collAttrs.put(((Attribute)ao).getName(), ((Attribute)ao).getValue());
                    }
                    collections.put(collName, collAttrs);
                    continue;
                }
                if (!"type".equals(n.getName())) continue;
                types.add(n.getText().trim());
            }
        }
        ObjectMapper mapper = new ObjectMapper();
        for (Map.Entry e : collections.entrySet()) {
            System.out.println((String)e.getKey() + ": " + mapper.writeValueAsString(e.getValue()));
        }
        for (Map.Entry e : fields.entrySet()) {
            System.out.println((String)e.getKey() + ": " + ((AtomicInteger)e.getValue()).get());
        }
        System.out.println("TYPES: ");
        for (String s : types) {
            System.out.println(s);
        }
        Assertions.assertEquals((long)30000L, (long)count);
    }

    @Test
    public void testParquet() throws Exception {
        String xml = IOUtils.toString((InputStream)this.getClass().getResourceAsStream("record.xml"));
        SparkSession spark = SparkSession.builder().master("local[*]").getOrCreate();
        ArrayList<BaseRecordInfo> ls = new ArrayList<BaseRecordInfo>();
        for (int i = 0; i < 10; ++i) {
            ls.add(this.extractInfo(xml));
        }
        JavaRDD rdd = JavaSparkContext.fromSparkContext((SparkContext)spark.sparkContext()).parallelize(ls);
        Dataset df = spark.createDataset(rdd.rdd(), Encoders.bean(BaseRecordInfo.class));
        df.printSchema();
        df.show(false);
    }

    private BaseRecordInfo extractInfo(String s) {
        try {
            Document record = DocumentHelper.parseText((String)s);
            BaseRecordInfo info = new BaseRecordInfo();
            LinkedHashSet<String> paths = new LinkedHashSet<String>();
            LinkedHashSet<String> types = new LinkedHashSet<String>();
            ArrayList<BaseCollectionInfo> colls = new ArrayList<BaseCollectionInfo>();
            for (Object o : record.selectNodes("//*|//@*")) {
                paths.add(((Node)o).getPath());
                if (!(o instanceof Element)) continue;
                Element n = (Element)o;
                String nodeName = n.getName();
                if ("collection".equals(nodeName)) {
                    String collName = n.getText().trim();
                    if (!StringUtils.isNotBlank((CharSequence)collName)) continue;
                    BaseCollectionInfo coll = new BaseCollectionInfo();
                    coll.setId(collName);
                    coll.setOpendoarId(n.valueOf("@opendoar_id").trim());
                    coll.setRorId(n.valueOf("@ror_id").trim());
                    colls.add(coll);
                    continue;
                }
                if ("type".equals(nodeName)) {
                    types.add("TYPE: " + n.getText().trim());
                    continue;
                }
                if (!"typenorm".equals(nodeName)) continue;
                types.add("TYPE_NORM: " + n.getText().trim());
            }
            info.setId(record.valueOf("//*[local-name() = 'header']/*[local-name() = 'identifier']").trim());
            info.getTypes().addAll(types);
            info.getPaths().addAll(paths);
            info.setCollections(colls);
            return info;
        }
        catch (DocumentException e) {
            throw new RuntimeException(e);
        }
    }
}

