package eu.dnetlib.data.mapreduce.hbase.propagation.projecttoresult;

import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.data.mapreduce.hbase.propagation.Value;
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.TypeProtos;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;

import static eu.dnetlib.data.mapreduce.hbase.propagation.PropagationConstants.*;
import static eu.dnetlib.data.mapreduce.hbase.propagation.Utils.getEntity;


public class ProjectToResultMapper extends TableMapper<ImmutableBytesWritable, Text> {
    private static final String SEPARATOR = ",";
    private String[] sem_rels;
    private String trust;

    private ImmutableBytesWritable keyOut;
    private Text valueOut;



    @Override
    protected void setup(final Context context) throws IOException, InterruptedException {

        keyOut = new ImmutableBytesWritable();
        valueOut = new Text();

        sem_rels = context.getConfiguration().getStrings("propagatetoproject.semanticrelations", DEFAULT_PROJECT_RELATION_SET);
        trust = context.getConfiguration().get("propagatetoproject.trust","0.85");

    }

    @Override
    protected void map(final ImmutableBytesWritable keyIn, final Result value, final Context context) throws IOException, InterruptedException {
        final TypeProtos.Type type = OafRowKeyDecoder.decode(keyIn.copyBytes()).getType();
        //If the type is not result I do not need to process it
        if(!type.equals(TypeProtos.Type.result)) {
            return;
        }
        //verify if entity is valid
        final OafProtos.OafEntity entity = getEntity(value, type);
        if (entity == null) {
            context.getCounter(COUNTER_PROPAGATION,"Del by inference or null body for result").increment(1);
            return;
        }

        context.getCounter(COUNTER_PROPAGATION, "Valid result ").increment(1);

        //selection of all the projects associated to this result
        String projectIdList = getProjectIdList(value,context);

        //if the list of projects is not empty, verify if it exists some allowed semantic relation to which propagate the project
        if(StringUtils.isNotBlank(projectIdList)){
            final Set<String> toemitrelations = new HashSet<>();
            //selection of all the results bind by this result considering all the allowed semantic relations
            for (String sem_rel : sem_rels) {
                toemitrelations.addAll(getRelationTarget(value, sem_rel, context));
            }
            if (!toemitrelations.isEmpty()) {
                emit(context, toemitrelations, projectIdList);
                context.getCounter(COUNTER_PROPAGATION, "emit for semantic relation").increment(toemitrelations.size());


            }
            //This emit is to specify which projects are already associated to this result
            //Not to write an update from related result
            keyOut.set(entity.getId().getBytes());
            valueOut.set(Value.newInstance(projectIdList, Type.fromresult).toJson());

            context.write(keyOut, valueOut);
            context.getCounter(COUNTER_PROPAGATION, "emit for result").increment(1);
        }
    }

    //emit for each valid semantic relation the id of the relation target and the list of projects associated to the source of the relation
    private void emit( Context context, Set<String> toemitrelations, String projectIdList) throws IOException, InterruptedException {
        for(String relation : toemitrelations){
            keyOut.set(relation.getBytes());
            valueOut.set(Value.newInstance( projectIdList,trust,Type.fromsemrel).toJson());
            context.write(keyOut, valueOut);
        }
    }

    //starting from the Result gets the list of projects it is related to and returns it as a csv
    private String getProjectIdList(Result value, final Context context) throws InvalidProtocolBufferException {
        Set<String> ret = getRelationTarget(value, OUTCOME_PRODUCEDBY, context);
        return ret.size() == 0 ? null : String.join(SEPARATOR, ret);
    }

    private Set<String> getRelationTarget(Result value, String sem_rel, final Context context) throws InvalidProtocolBufferException {

        final Map<byte[], byte[]> relationMap = value.getFamilyMap(Bytes.toBytes(sem_rel));

        context.getCounter(COUNTER_PROPAGATION, sem_rel).increment(relationMap.size());


        /*
        we could extract the target qualifiers from the familyMap's keyset, but we also need to check the relationship is not deletedbyinference
        return relationMap.keySet().stream()
                .map(String::new)
                .collect(Collectors.toCollection(HashSet::new));
        */

        return relationMap.values().stream()
                .map(this::asOaf)
                .filter(Objects::nonNull)
                .filter(o -> isValid(o))
                .filter(o -> !o.getDataInfo().getDeletedbyinference())
                .map(o -> o.getRel().getTarget())
                .collect(Collectors.toCollection(HashSet::new));

    }

    private OafProtos.Oaf asOaf(byte[] r) {
        try {
            return OafProtos.Oaf.parseFrom(r);
        } catch (InvalidProtocolBufferException e) {
            return null;
        }
    }


    private boolean isValid(final OafProtos.Oaf oaf) {
        return (oaf != null) && oaf.isInitialized();
    }
}
