/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.pace.utils;

import com.google.common.collect.Lists;
import eu.dnetlib.support.Block;
import java.io.Serializable;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;

public class BlockUtils
implements Serializable {
    public static double getOptimalComparisonNumber(JavaRDD<Block> blocks) {
        double SMOOTHING_FACTOR = 1.05;
        ArrayList<Tuple2> collect = blocks.mapToPair((PairFunction & Serializable)b -> new Tuple2((Object)b.comparisons(), (Object)b.elements())).mapToPair((PairFunction & Serializable)bs -> new Tuple2(bs._1(), (Object)new Tuple2(bs._1(), bs._2()))).reduceByKey((Function2 & Serializable)(a, b) -> new Tuple2((Object)((Integer)a._1() + (Integer)b._1()), (Object)((Integer)a._2() + (Integer)b._2()))).collect();
        collect = new ArrayList<Tuple2>(collect);
        collect.sort(Comparator.comparing(Tuple2::_1));
        double[] blockAssignments = new double[collect.size()];
        double[] comparisonsLevel = new double[collect.size()];
        double[] totalComparisonsPerLevel = new double[collect.size()];
        Integer totalComparisons = (Integer)((Tuple2)((Tuple2)collect.get(0))._2())._1();
        Integer totalBlockSize = (Integer)((Tuple2)((Tuple2)collect.get(0))._2())._2();
        blockAssignments[0] = totalBlockSize.intValue();
        comparisonsLevel[0] = ((Integer)((Tuple2)collect.get(0))._1()).intValue();
        totalComparisonsPerLevel[0] = totalComparisons.intValue();
        for (int i = 1; i < collect.size(); ++i) {
            Integer comparisonLevel = (Integer)((Tuple2)collect.get(i))._1();
            totalComparisons = totalComparisons + (Integer)((Tuple2)((Tuple2)collect.get(i))._2())._1();
            totalBlockSize = totalBlockSize + (Integer)((Tuple2)((Tuple2)collect.get(i))._2())._2();
            blockAssignments[i] = totalBlockSize.intValue();
            comparisonsLevel[i] = comparisonLevel.intValue();
            totalComparisonsPerLevel[i] = totalComparisons.intValue();
        }
        double currentBC = 0.0;
        double currentCC = 0.0;
        double currentSize = 0.0;
        double previousBC = 0.0;
        double previousCC = 0.0;
        double previousSize = 0.0;
        int arraySize = blockAssignments.length;
        for (int i = arraySize - 1; 0 <= i; --i) {
            previousSize = currentSize;
            previousBC = currentBC;
            previousCC = currentCC;
            currentSize = comparisonsLevel[i];
            currentBC = blockAssignments[i];
            currentCC = totalComparisonsPerLevel[i];
            if (currentBC * previousCC < SMOOTHING_FACTOR * currentCC * previousBC) break;
        }
        return previousSize;
    }

    public static int getOptimalBlockSize(JavaRDD<Block> blocks) {
        BigInteger numberOfComparisons = BigInteger.ZERO;
        BigInteger totalSizeOfBlocks = BigInteger.ZERO;
        JavaPairRDD blocksFreq = blocks.mapToPair((PairFunction & Serializable)b -> new Tuple2((Object)b.getKey(), (Object)b.elements())).mapToPair((PairFunction & Serializable)bs -> new Tuple2(bs._2(), (Object)1)).reduceByKey((Function2 & Serializable)(a, b) -> a + b).sortByKey();
        ArrayList blockSizesAndFreq = new ArrayList(blocksFreq.collect());
        double CC = 0.0;
        ArrayList<Tuple2> statistics = new ArrayList<Tuple2>();
        for (int i = 0; i < blockSizesAndFreq.size(); ++i) {
            BigInteger blockSize = new BigInteger(((Integer)((Tuple2)blockSizesAndFreq.get((int)i))._1).toString());
            int freq = (Integer)((Tuple2)blockSizesAndFreq.get((int)i))._2;
            totalSizeOfBlocks = totalSizeOfBlocks.add(BigInteger.valueOf(freq).multiply(blockSize));
            numberOfComparisons = numberOfComparisons.add(BigInteger.valueOf(freq).multiply(blockSize.multiply(blockSize.subtract(BigInteger.ONE)).shiftLeft(1)));
            CC = totalSizeOfBlocks.doubleValue() / numberOfComparisons.doubleValue();
            Tuple2 st = new Tuple2((Object)blockSize.intValue(), (Object)CC);
            statistics.add(st);
        }
        int optimalBlockSize = (Integer)((Tuple2)statistics.get((int)(statistics.size() - 1)))._1;
        double eps = 1.0;
        for (int i = statistics.size() - 1; i >= 1; --i) {
            if (!(Math.abs((Double)((Tuple2)statistics.get((int)i))._2 - (Double)((Tuple2)statistics.get((int)(i - 1)))._2) < eps)) continue;
            eps = Math.abs((Double)((Tuple2)statistics.get((int)i))._2 - (Double)((Tuple2)statistics.get((int)(i - 1)))._2);
            optimalBlockSize = (Integer)((Tuple2)statistics.get((int)i))._1;
        }
        return optimalBlockSize;
    }

    public static JavaRDD<Block> blockPurging(JavaRDD<Block> blocks) {
        int optimalBlockSize = BlockUtils.getOptimalBlockSize(blocks);
        System.out.println("optimalBlockSize = " + optimalBlockSize);
        return blocks.filter((Function & Serializable)b -> b.getElements().size() < optimalBlockSize);
    }

    public static JavaRDD<Block> blockPurging2(JavaRDD<Block> blocks) {
        double optimalComparisonNumber = BlockUtils.getOptimalComparisonNumber(blocks);
        System.out.println("optimalComparisonNumber = " + optimalComparisonNumber);
        return blocks.filter((Function & Serializable)b -> (double)b.comparisons() < optimalComparisonNumber);
    }

    public static JavaRDD<Block> blockFiltering(JavaRDD<Block> blocks) {
        double RATIO = 0.85;
        return blocks.flatMapToPair((PairFlatMapFunction & Serializable)b -> b.getElements().stream().map(e -> new Tuple2(e, (Object)new Tuple2((Object)b.getKey(), (Object)b.comparisons()))).iterator()).groupByKey().mapToPair((PairFunction & Serializable)es -> {
            ArrayList b = Lists.newArrayList((Iterable)((Iterable)es._2()));
            b.sort(Comparator.comparing(Tuple2::_2));
            int size = b.size();
            long limit = Math.round((double)size * RATIO);
            return new Tuple2(es._1(), b.subList(0, (int)limit));
        }).flatMapToPair((PairFlatMapFunction & Serializable)es -> ((List)es._2()).stream().map(it -> new Tuple2(it._1(), es._1())).collect(Collectors.toList()).iterator()).groupByKey().map((Function & Serializable)b -> new Block((String)b._1(), (Iterable)b._2()));
    }
}

