package eu.dnetlib.data.mapreduce.hbase.dataexport;

import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.tree.DefaultText;

import java.text.ParseException;
import java.util.Map;
import java.util.Objects;

public class RecordFilter {

    private Map<String, String> criteria;

    private String yearXpath;
    private int fromYear;
    private int toYear;

    public RecordFilter(Map<String, String> criteria, String yearXpath, int fromYear, int toYear) {
        this.criteria = criteria;
        this.yearXpath = yearXpath;
        this.fromYear = fromYear;
        this.toYear = toYear;
    }

    public boolean matches(final Document record, final boolean strict) throws ParseException {

        final String date = record.valueOf(yearXpath);
        if (StringUtils.isBlank(date)) {
            return false;
        }

        final String yyyy = DnetXsltFunctions.extractYear(date);
        if (StringUtils.isBlank(yyyy)) {
            return false;
        }
        final Integer year = Integer.valueOf(yyyy);

        if (year < fromYear | year > toYear) {
            return false;
        }


        boolean matched = false;
        for(final Map.Entry<String, String> c : criteria.entrySet()) {

            boolean matches = matched = record.selectNodes(c.getKey()).stream()
                    .filter(Objects::nonNull)
                    .map(o -> textOf(o))
                    .map(s -> ((String) s).toLowerCase())
                    .map(s -> ((String) s).trim())
                    .anyMatch(s -> {
                        return ((String) s).matches(c.getValue().toLowerCase());
                    });

            if (matches && !strict) {
                return true;
            }
        }
        return matched;
    }

    private String textOf(final Object o) {
        if (o instanceof org.dom4j.tree.DefaultText) {
            return ((DefaultText) o).getText();
        }
        return o.toString();
    }

}
