/*
 * Decompiled with CFR 0.152.
 */
package org.gcube.dataanalysis.executor.nodes.transducers.bionym;

import com.thoughtworks.xstream.XStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.OutputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.interfaces.ActorNode;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
import org.gcube.dataanalysis.ecoengine.utils.Tuple;
import org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing;
import org.gcube.dataanalysis.executor.job.management.QueueJobManager;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.CometMatcherManager;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.EVBPreprocessing;
import org.gcube.dataanalysis.executor.scripts.OSCommand;
import org.hibernate.SessionFactory;

public class BionymWorkflow
extends ActorNode {
    protected AlgorithmConfiguration currentconfig;
    protected SessionFactory dbconnection;
    private static String createOutputTable = "CREATE TABLE %1$s (inputname character varying(255),   suggestion character varying(255), score real)";
    int rawnamescount = 0;
    public int prevbroadcastTimePeriod;
    public int prevmaxNumberOfStages;
    public int prevmaxMessages;
    String destinationTable;
    public static String destinationTableParam = "OutputTable";
    public static String destinationTableLable = "OutputTableLabel";
    String originTable;
    public static String originTableParam = "RawTaxaNamesTable";
    String rawnamesColumn;
    public static String rawnamesColumnParam = "RawNamesColumn";
    String parser;
    public static String parserParam = "Parser";
    String reference;
    public static String referenceParam = "ReferenceDataset";
    String soundexweight;
    public static String soundexweightParam = "SoundexVSEditDist";
    String preprocessor;
    public static String doPreprocessParam = "Preprocess";
    float status;
    public static String maxMatchesParam = "MaxMatches";
    int maxMatches = 10;
    public static String pruningThresholdParam = "PruningThreshold";
    float pruningThreshold = 0.4f;
    static String headers = "inputname,suggestion,score";
    boolean haspostprocessed = false;
    List<Tuple<String>> matchedTuples = new ArrayList<Tuple<String>>();
    List<Tuple<String>> unmatchedTuples = new ArrayList<Tuple<String>>();

    public ALG_PROPS[] getProperties() {
        ALG_PROPS[] p = new ALG_PROPS[]{ALG_PROPS.PHENOMENON_VS_PARALLEL_PHENOMENON};
        return p;
    }

    public String getName() {
        return "BIONYM";
    }

    public String getDescription() {
        return "An algorithm implementing BiOnym, a flexible workflow approach to taxon name matching. The workflow allows to activate several taxa names matching algorithms and to get the list of possible transcriptions for a list of input raw species names with possible authorship indication.";
    }

    public List<StatisticalType> getInputParameters() {
        ArrayList<TableTemplates> templateLWRInput = new ArrayList<TableTemplates>();
        templateLWRInput.add(TableTemplates.GENERIC);
        InputTable p1 = new InputTable(templateLWRInput, originTableParam, "Input table containing raw taxa names that you want to match", "byonym");
        ColumnType p2 = new ColumnType(originTableParam, rawnamesColumnParam, "The column containing the raw taxa names with or without authoship information", "rawnames", false);
        ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, destinationTableParam, "name of the table that will contain the matches", "bion_");
        PrimitiveType p4 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, destinationTableLable, "Name of the table which will contain the matches", "bionout");
        PrimitiveType p5 = new PrimitiveType(Enum.class.getName(), (Object)CometMatcherManager.Reference.values(), PrimitiveTypes.ENUMERATED, referenceParam, "The reference dataset to use", "" + (Object)((Object)CometMatcherManager.Reference.FISHBASE));
        PrimitiveType p6 = new PrimitiveType(Enum.class.getName(), (Object)EVBPreprocessing.Preprocessors.values(), PrimitiveTypes.ENUMERATED, doPreprocessParam, "Set a preprocessing approach for the raw strings", "" + (Object)((Object)EVBPreprocessing.Preprocessors.EXPERT_RULES));
        PrimitiveType p7 = new PrimitiveType(Enum.class.getName(), (Object)CometMatcherManager.Weights.values(), PrimitiveTypes.ENUMERATED, soundexweightParam, "Set the use of soundex vs edit distance approaches to string matching", "" + (Object)((Object)CometMatcherManager.Weights.EDIT_DISTANCE));
        PrimitiveType p8 = new PrimitiveType(Enum.class.getName(), (Object)CometMatcherManager.Parsers.values(), PrimitiveTypes.ENUMERATED, parserParam, "Set the genus-species-author parser to use", "" + (Object)((Object)CometMatcherManager.Parsers.SIMPLE));
        PrimitiveType p9 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, pruningThresholdParam, "Pruning threshold for the output scores (from 0 to 1)", "0.4");
        PrimitiveType p10 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, maxMatchesParam, "Maximum number of matches to report per raw string", "10");
        ArrayList<StatisticalType> parameters = new ArrayList<StatisticalType>();
        parameters.add((StatisticalType)p1);
        parameters.add((StatisticalType)p3);
        parameters.add((StatisticalType)p2);
        parameters.add((StatisticalType)p4);
        parameters.add((StatisticalType)p5);
        parameters.add((StatisticalType)p6);
        parameters.add((StatisticalType)p7);
        parameters.add((StatisticalType)p8);
        parameters.add((StatisticalType)p9);
        parameters.add((StatisticalType)p10);
        DatabaseType.addDefaultDBPars(parameters);
        return parameters;
    }

    public StatisticalType getOutput() {
        ArrayList<TableTemplates> template = new ArrayList<TableTemplates>();
        template.add(TableTemplates.GENERIC);
        OutputTable p = new OutputTable(template, destinationTableLable, this.destinationTable, "Output  table");
        return p;
    }

    public void initSingleNode(AlgorithmConfiguration config) {
    }

    public float getInternalStatus() {
        return this.status;
    }

    public void filterMatchedTuples(List<Tuple<String>> tuples) {
        this.unmatchedTuples = null;
        this.unmatchedTuples = new ArrayList<Tuple<String>>();
        for (Tuple<String> tuple : tuples) {
            double score = 0.0;
            if (tuple.getElements().size() > 2) {
                try {
                    score = Double.parseDouble((String)tuple.getElements().get(2));
                }
                catch (Exception e) {
                    e.printStackTrace();
                }
            }
            if (score == 1.0) {
                this.matchedTuples.add(tuple);
                continue;
            }
            this.unmatchedTuples.add(tuple);
        }
    }

    public StringBuffer executeBionymWorkflow(String rawspeciesname, String sandboxfolder, String preprocessor, boolean[] enablematchers, float soundexweightF, int maxResults, float pruningThreshold) throws Exception {
        StringBuffer sb = new StringBuffer();
        ArrayList<String> rawnames = new ArrayList<String>();
        rawnames.add(rawspeciesname);
        List<Object> preprocessednames = new ArrayList();
        switch (EVBPreprocessing.Preprocessors.valueOf(preprocessor)) {
            case EXPERT_RULES: {
                EVBPreprocessing preprocess = new EVBPreprocessing();
                preprocessednames = preprocess.preprocess(this.parser, sandboxfolder, rawnames);
                break;
            }
            default: {
                preprocessednames = EVBPreprocessing.populateTuples(rawnames);
            }
        }
        this.filterMatchedTuples(preprocessednames);
        for (int i = 0; i < enablematchers.length; ++i) {
            if (i != 0) continue;
            CometMatcherManager comet = new CometMatcherManager();
            List<Tuple<String>> cometoutput = comet.match(this.parser, this.reference, sandboxfolder, this.unmatchedTuples, soundexweightF, maxResults);
            this.filterMatchedTuples(cometoutput);
        }
        this.matchedTuples.addAll(this.unmatchedTuples);
        int msize = this.matchedTuples.size();
        if (msize > 0) {
            for (int i = 0; i < msize; ++i) {
                Tuple<String> t = this.matchedTuples.get(i);
                String scoreS = (String)t.getElements().get(2);
                Float score = Float.valueOf(scoreS != null ? Float.parseFloat(scoreS) : 0.0f);
                if (!(score.floatValue() >= pruningThreshold)) continue;
                String spname = (String)t.getElements().get(0);
                String authorname = (String)t.getElements().get(1);
                if (authorname.length() > 0) {
                    spname = spname + " (" + authorname + ")";
                }
                sb.append("('" + rawspeciesname + "','" + spname + "','" + scoreS + "')");
                if (i >= msize - 1) continue;
                sb.append(",");
            }
        }
        String sbstring = sb.toString().trim();
        int ssize = sbstring.length();
        if (sbstring.endsWith(",")) {
            System.out.println("Deleting final comma..");
            sb = new StringBuffer(sbstring.substring(0, ssize - 1));
        }
        return sb;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public int executeNode(int leftStartIndex, int numberOfLeftElementsToProcess, int rightStartIndex, int numberOfRightElementsToProcess, boolean duplicate, String sandboxFolder, String nodeConfigurationFileObject, String logfileNameToProduce) {
        try {
            this.status = 0.0f;
            long t0 = System.currentTimeMillis();
            System.out.println("Restoring configuration");
            AlgorithmConfiguration config = Transformations.restoreConfig((String)new File(sandboxFolder, nodeConfigurationFileObject).getAbsolutePath());
            config.setConfigPath(sandboxFolder);
            this.dbconnection = DatabaseUtils.initDBSession((AlgorithmConfiguration)config);
            this.destinationTable = config.getParam(destinationTableParam);
            this.originTable = config.getParam(originTableParam);
            this.rawnamesColumn = config.getParam(rawnamesColumnParam);
            this.parser = config.getParam(parserParam);
            this.reference = config.getParam(referenceParam);
            this.soundexweight = config.getParam(soundexweightParam);
            this.preprocessor = config.getParam(doPreprocessParam);
            String maxMatchesS = config.getParam(maxMatchesParam);
            this.maxMatches = maxMatchesS == null ? 10 : Integer.parseInt(maxMatchesS);
            String pruningThrS = config.getParam(pruningThresholdParam);
            this.pruningThreshold = pruningThrS == null ? 0.4f : Float.parseFloat(pruningThrS);
            System.out.println("Destination Table: " + this.destinationTable);
            System.out.println("Origin Table: " + this.originTable);
            System.out.println("Column of names: " + this.rawnamesColumn);
            System.out.println("Parser to use: " + this.parser);
            System.out.println("Reference Dataset: " + this.reference);
            System.out.println("Soundex Preference: " + this.soundexweight);
            System.out.println("Preprocessor:" + this.preprocessor);
            System.out.println("Pruning threshold:" + this.pruningThreshold);
            System.out.println("Number of Matches:" + this.maxMatches);
            float soundexweightF = 0.5f;
            switch (CometMatcherManager.Weights.valueOf(this.soundexweight)) {
                case SOUNDEX: {
                    soundexweightF = 1.0f;
                    break;
                }
                case EDIT_DISTANCE: {
                    soundexweightF = 0.0f;
                    break;
                }
                case MIXED: {
                    soundexweightF = 0.5f;
                    break;
                }
                default: {
                    soundexweightF = 0.5f;
                }
            }
            System.out.println("Retrieving names to process");
            List rawnames = DatabaseFactory.executeSQLQuery((String)DatabaseUtils.getDinstictElements((String)this.originTable, (String)this.rawnamesColumn, (String)""), (SessionFactory)this.dbconnection);
            System.out.println("Retrieved a total of " + rawnames.size() + " species");
            int end = rightStartIndex + numberOfRightElementsToProcess;
            System.out.println("Processing from " + rightStartIndex + " to " + end);
            ArrayList<String> rawnamesFiltered = new ArrayList<String>();
            for (int i = rightStartIndex; i < end; ++i) {
                rawnamesFiltered.add("" + rawnames.get(i));
            }
            try {
                OSCommand.ExecuteGetLine("chmod +x *", null);
            }
            catch (Exception e) {
                System.out.println("WARNING: could not change the permissions");
            }
            int rawscounter = 0;
            System.out.println("Processing raw names");
            for (String rawname : rawnamesFiltered) {
                rawname = rawname.replace("'", "").replace("\"", "");
                System.out.println("Processing species: " + rawname);
                StringBuffer sb = this.executeBionymWorkflow(rawname, sandboxFolder, this.preprocessor, new boolean[]{true}, soundexweightF, this.maxMatches, this.pruningThreshold);
                System.out.println("Processed species: " + rawname);
                if (sb.length() <= 0) continue;
                ++rawscounter;
                System.out.println("Inserting results onto the table " + this.destinationTable);
                String insertQuery = DatabaseUtils.insertFromBuffer((String)this.destinationTable, (String)headers, (StringBuffer)sb);
                System.out.println("Insert Query: " + insertQuery);
                System.out.println("Inserting values for " + rawname);
                DatabaseFactory.executeSQLUpdate((String)insertQuery, (SessionFactory)this.dbconnection);
                System.out.println("Successfully Inserted values for " + rawname);
            }
            System.out.println("The procedure finished successfully. Processed " + rawscounter + " species.");
            System.out.println("Elapsed Time " + (System.currentTimeMillis() - t0) + " ms");
        }
        catch (Exception e) {
            e.printStackTrace();
            System.out.println("warning: error in node execution " + e.getLocalizedMessage());
            System.err.println("Error in node execution " + e.getLocalizedMessage());
            int n = -1;
            return n;
        }
        finally {
            if (this.dbconnection != null) {
                try {
                    this.dbconnection.close();
                }
                catch (Exception e) {}
            }
            this.status = 1.0f;
        }
        return 0;
    }

    public void setup(AlgorithmConfiguration config) throws Exception {
        this.haspostprocessed = false;
        AnalysisLogger.getLogger().info((Object)"Initializing DB Connection");
        this.dbconnection = DatabaseUtils.initDBSession((AlgorithmConfiguration)config);
        this.destinationTable = config.getParam(destinationTableParam);
        this.originTable = config.getParam(originTableParam);
        this.rawnamesColumn = config.getParam(rawnamesColumnParam);
        List rawnames = DatabaseFactory.executeSQLQuery((String)DatabaseUtils.getDinstictElements((String)this.originTable, (String)this.rawnamesColumn, (String)""), (SessionFactory)this.dbconnection);
        this.rawnamescount = rawnames.size();
        AnalysisLogger.getLogger().info((Object)("Creating Destination Table " + this.destinationTable));
        try {
            DatabaseFactory.executeSQLUpdate((String)DatabaseUtils.dropTableStatement((String)this.destinationTable), (SessionFactory)this.dbconnection);
        }
        catch (Exception e) {
            AnalysisLogger.getLogger().info((Object)("Table " + this.destinationTable + " did not exist"));
        }
        DatabaseFactory.executeSQLUpdate((String)String.format(createOutputTable, this.destinationTable), (SessionFactory)this.dbconnection);
        this.prevmaxMessages = D4ScienceDistributedProcessing.maxMessagesAllowedPerJob;
        D4ScienceDistributedProcessing.maxMessagesAllowedPerJob = 1;
        this.prevbroadcastTimePeriod = QueueJobManager.broadcastTimePeriod;
        QueueJobManager.broadcastTimePeriod = 14400000;
        this.prevmaxNumberOfStages = QueueJobManager.maxNumberOfStages;
        QueueJobManager.maxNumberOfStages = 10000;
        AnalysisLogger.getLogger().info((Object)("Destination Table Created! Addressing " + this.rawnamescount + " names"));
    }

    public int getNumberOfRightElements() {
        return this.rawnamescount;
    }

    public int getNumberOfLeftElements() {
        return 1;
    }

    public void stop() {
        if (!this.haspostprocessed) {
            try {
                AnalysisLogger.getLogger().info((Object)("The procedure did NOT correctly postprocessed ....Removing Table " + this.destinationTable + " because of computation stop!"));
                DatabaseFactory.executeSQLUpdate((String)DatabaseUtils.dropTableStatement((String)this.destinationTable), (SessionFactory)this.dbconnection);
            }
            catch (Exception e) {
                AnalysisLogger.getLogger().info((Object)("Table " + this.destinationTable + " did not exist"));
            }
        } else {
            AnalysisLogger.getLogger().info((Object)"The procedure has correctly postprocessed: shutting down the connection!");
        }
        if (this.dbconnection != null) {
            try {
                this.dbconnection.close();
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
    }

    public void postProcess(boolean manageDuplicates, boolean manageFault) {
        QueueJobManager.broadcastTimePeriod = this.prevbroadcastTimePeriod;
        QueueJobManager.maxNumberOfStages = this.prevmaxNumberOfStages;
        D4ScienceDistributedProcessing.maxMessagesAllowedPerJob = this.prevmaxMessages;
        this.haspostprocessed = true;
    }

    public static void mainTEST(String[] args) throws Exception {
        AlgorithmConfiguration config = new AlgorithmConfiguration();
        config.setConfigPath("./cfg/");
        String sandbox = "./PARALLEL_PROCESSING";
        String configfile = "testconfig.cfg";
        config.setPersistencePath(sandbox);
        config.setParam("DatabaseUserName", "utente");
        config.setParam("DatabasePassword", "d4science");
        config.setParam("DatabaseURL", "jdbc:postgresql://statistical-manager.d.d4science.research-infrastructures.eu/testdb");
        config.setParam(destinationTableParam, "taxamatchoutputlocal");
        config.setParam(destinationTableLable, "taxamatchoutputlabel");
        config.setParam(originTableParam, "taxamatchinput");
        config.setParam(rawnamesColumnParam, "rawstrings");
        config.setParam(parserParam, CometMatcherManager.Parsers.SIMPLE.name());
        config.setParam(referenceParam, CometMatcherManager.Reference.ASFIS.name());
        config.setParam(soundexweightParam, CometMatcherManager.Weights.EDIT_DISTANCE.name());
        config.setParam(doPreprocessParam, EVBPreprocessing.Preprocessors.EXPERT_RULES.name());
        config.setParam(maxMatchesParam, "10");
        AnalysisLogger.setLogger((String)(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile));
        BufferedWriter oos = new BufferedWriter(new FileWriter(new File(sandbox, configfile)));
        oos.write(new XStream().toXML((Object)config));
        oos.close();
        new BionymWorkflow().setup(config);
        new BionymWorkflow().executeNode(0, 1, 0, 1, false, sandbox, configfile, "test.log");
    }
}

