/**
 * log.debug(...) equal to  log.trace(...) in the application-logs
 * <p>
 * known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue
 */
package eu.dnetlib.data.collector.plugins.rest;

import java.io.InputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.net.HttpURLConnection;
import java.util.Iterator;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.*;

import com.google.common.collect.Maps;
import eu.dnetlib.data.collector.plugins.utils.JsonUtils;
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/**
 * @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak, Alessia Bardi, Miriam Baglioni
 * @date 2020-04-09
 */
public class RestIterator implements Iterator<String> {
    private final String AUTHBASIC = "basic";

    // TODO: clean up the comments of replaced source code
    private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
    private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
    private static final String EMPTY_XML = XML_HEADER + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
    private JsonUtils jsonUtils;

    private String baseUrl;
    private String resumptionType;
    private String resumptionParam;
    private String resultFormatValue;
    private String queryParams = "";
    private int resultSizeValue;
    private int resumptionInt = 0;            // integer resumption token (first record to harvest)
    private int resultTotal = -1;
    private String resumptionStr = Integer.toString(resumptionInt);  // string resumption token (first record to harvest or token scanned from results)
    private InputStream resultStream;
    private Transformer transformer;
    private XPath xpath;
    private String query;
    private XPathExpression xprResultTotalPath;
    private XPathExpression xprResumptionPath;
    private XPathExpression xprEntity;
    private String queryFormat;
    private String querySize;
    private String authMethod;
    private String authToken;
    private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
    private int discoverResultSize = 0;
    private int pagination = 1;
    /*
    While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in json.
    useful for cases when the target API expects a resultFormatValue != json, but the results are returned in json.
    An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
     */
    private String resultOutputFormat;
    /*
    Can be used to set additional request headers, like for content negotiation
     */
    private Map<String, String> requestHeaders;


    public RestIterator(
            final String baseUrl,
            final String resumptionType,
            final String resumptionParam,
            final String resumptionXpath,
            final String resultTotalXpath,
            final String resultFormatParam,
            final String resultFormatValue,
            final String resultSizeParam,
            final String resultSizeValueStr,
            final String queryParams,
            final String entityXpath,
            final String authMethod,
            final String authToken,
            final String resultOutputFormat,
            final Map<String, String> requestHeaders
    ) {
        this.jsonUtils = new JsonUtils();
        this.baseUrl = baseUrl;
        this.resumptionType = resumptionType;
        this.resumptionParam = resumptionParam;
        this.resultFormatValue = resultFormatValue;
        this.queryParams = queryParams;
        this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
        this.authMethod = authMethod;
        this.authToken = authToken;
        this.resultOutputFormat = resultOutputFormat;
        this.requestHeaders = requestHeaders != null ? requestHeaders : Maps.newHashMap();

        queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
        querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";

        try {
            initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
        } catch (Exception e) {
            throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
        }
        initQueue();
    }


    private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
            throws TransformerConfigurationException, XPathExpressionException {
        transformer = TransformerFactory.newInstance().newTransformer();
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
        xpath = XPathFactory.newInstance().newXPath();
        xprResultTotalPath = xpath.compile(resultTotalXpath);
        xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
        xprEntity = xpath.compile(entityXpath);
    }

    private void initQueue() {
        if (queryParams.equals("") && querySize.equals("") && queryFormat.equals("")) {
            query = baseUrl;
        } else {
            query = baseUrl + "?" + queryParams + querySize + queryFormat;
        }

        log.info("RestIterator.initQueue():: REST calls starting with " + query);
    }

    private void disconnect() {
        // TODO close inputstream
        log.debug("END!!!");
    }

    /* (non-Javadoc)
     * @see java.util.Iterator#hasNext()
     */
    @Override
    public boolean hasNext() {
        synchronized (recordQueue) {
            if (recordQueue.isEmpty() && query.isEmpty()) {
                disconnect();
                return false;
            } else {
                while (recordQueue.isEmpty() && !query.isEmpty()) {
                    try {
                        log.debug("RestIterator.hasNext():: get Query: " + query);
                        query = downloadPage(query);
                        log.debug("RestIterator.hasNext():: next queryURL from downloadPage(): " + query);
                    } catch (CollectorServiceException e) {
                        log.debug("RestIterator.hasNext():: CollectorPlugin.next()-Exception: " + e);
                        throw new RuntimeException(e);
                    }
                }
                if (recordQueue.peek() == null) {
                    disconnect();
                    return false;
                } else return true;
            }

        }
    }

    /* (non-Javadoc)
     * @see java.util.Iterator#next()
     */
    @Override
    public String next() {
        synchronized (recordQueue) {
            String res = recordQueue.poll();
            log.debug(res);
            return res;
        }
    }

    /*
     * download page and return nextQuery
     */
    private String downloadPage(String query) throws CollectorServiceException {
        String resultJson;
        String resultXml = XML_HEADER;
        String nextQuery = "";
        Node resultNode = null;
        NodeList nodeList = null;
        String qUrlArgument = "";
        int urlOldResumptionSize = 0;
        InputStream theHttpInputStream;

        // modifying request URL
        // check if cursor=* is initial set otherwise add it to the queryParam URL
        if (resumptionType.equalsIgnoreCase("deep-cursor")) {
            log.debug("RestIterator.downloadPage():: check resumptionType deep-cursor and check cursor=*?" + query);
            if (!query.contains("&cursor=")) {
                query += "&cursor=*";
            }
        }
        // find pagination page start number in queryParam and remove before start the first query
        if((resumptionType.toLowerCase().equals("pagination")) && (query.contains("paginationStart="))) {

            final Matcher m = Pattern.compile("paginationStart=([0-9]+)").matcher(query);
            m.find(); // guaranteed to be true for this regex

            String[] pageVal = m.group(0).split("=");
            pagination = Integer.parseInt(pageVal[1]);

            // remove page start number from queryParams
            query = query.replaceFirst("&?paginationStart=[0-9]+", "");

        }
        

        try {

            URL qUrl = new URL(query);
            log.debug("authMethod :" + authMethod);
            if (this.authMethod == "bearer") {
                log.trace("RestIterator.downloadPage():: authMethod before inputStream: " + resultXml);
                requestHeaders.put("Authorization", "Bearer " + authToken);
                //requestHeaders.put("Content-Type", "application/json");
            } else if (AUTHBASIC.equalsIgnoreCase(this.authMethod)) {
                log.trace("RestIterator.downloadPage():: authMethod before inputStream: " + resultXml);
                requestHeaders.put("Authorization", "Basic " + authToken);
                //requestHeaders.put("accept", "application/xml");
            }

            HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
            conn.setRequestMethod("GET");
            this.setRequestHeader(conn);
            resultStream = conn.getInputStream();

            if ("json".equals(resultOutputFormat)) {
                resultJson = IOUtils.toString(resultStream, "UTF-8");
                resultXml = jsonUtils.convertToXML(resultJson);
                resultStream = IOUtils.toInputStream(resultXml, "UTF-8");
            }

            if (!isEmptyXml(resultXml)) {
                resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
                nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
                log.debug("RestIterator.downloadPage():: nodeList.length=" + nodeList.getLength());
                for (int i = 0; i < nodeList.getLength(); i++) {
                    StringWriter sw = new StringWriter();
                    transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
                    String toEnqueue = sw.toString();
                    if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || isEmptyXml(toEnqueue)) {
                        log.warn("RestIterator.downloadPage():: The following record resulted in empty item for the feeding queue: " + resultXml);
                    } else {
                        recordQueue.add(sw.toString());
                    }
                }
            } else {
                log.warn("resultXml is equal with emptyXml");
            }

            resumptionInt += resultSizeValue;

            switch (resumptionType.toLowerCase()) {
                case "scan":    // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items
                    resumptionStr = xprResumptionPath.evaluate(resultNode);
                    break;

                case "count":   // begin at one step for all records, iterate over items
                    resumptionStr = Integer.toString(resumptionInt);
                    break;

                case "discover":   // size of result items unknown, iterate over items  (for openDOAR - 201808)
                    if (resultSizeValue < 2) {
                        log.debug("RestIterator.downloadPage().discover:: ode: discover, Param 'resultSizeValue' must greater then 1");
                        throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' must greater then 1");
                    }
                    log.debug("RestIterator.downloadPage().discover:: resumptionInt="+Integer.toString(resumptionInt)+"; ");
                    qUrlArgument = qUrl.getQuery();

                    if( qUrlArgument != null ) {
                    String[] arrayQUrlArgument = qUrlArgument.split("&");

                    // check if URL arguments given
                    if( arrayQUrlArgument != null ) {
                        for (String arrayUrlArgStr : arrayQUrlArgument) {
                            log.debug("RestIterator.downloadPage/discover:: "+arrayUrlArgStr);
                            if (arrayUrlArgStr.startsWith(resumptionParam)) {
                                String[] resumptionKeyValue = arrayUrlArgStr.split("=");
                                if (isInteger(resumptionKeyValue[1])) {
                                    urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
                                    log.debug("RestIterator.downloadPage():discover:: OldResumptionSize from Url (int): " + urlOldResumptionSize);
                                } else {
                                    log.debug("RestIterator.downloadPage().discover:: OldResumptionSize from Url (str): " + resumptionKeyValue[1]);
                                }
                            }
                        }
                    }
                    }
                    log.debug("RestIterator.downloadPage().discover:: nodeList.length=" + nodeList.getLength());

                    if (isEmptyXml(resultXml) || ((nodeList != null) && (nodeList.getLength() < resultSizeValue))
                    ) {
                        // resumptionStr = "";
                        if (nodeList != null) {
                            discoverResultSize += nodeList.getLength();
                        }
                        resultTotal = discoverResultSize;
                    } else {
                        resumptionStr = Integer.toString(resumptionInt);
                        resultTotal = resumptionInt + 1;
                        if (nodeList != null) {
                            discoverResultSize += nodeList.getLength();
                        }
                    }
                    log.debug("RestIterator.downloadPage().discover:: discoverResultSize=" + discoverResultSize);
                    break;

                case "pagination":
                case "page":         // pagination, iterate over page numbers
                    // find start page number
                    pagination += 1;
                    if (nodeList != null) {
                        discoverResultSize += nodeList.getLength();
                    } else {
                        resultTotal = discoverResultSize;
                        pagination = discoverResultSize;
                    }
                    resumptionInt = pagination;
                    resumptionStr = Integer.toString(resumptionInt);

                    log.debug("RestIterator.downloadPage().pagination:: resumptionStr=" + resumptionStr + " ; queryParams=" + queryParams + " ; resultTotal: " + resultTotal + " ; discoverResultSize: " + discoverResultSize);

                    break;

                case "deep-cursor":   // size of result items unknown, iterate over items  (for supporting deep cursor in solr)
                    // isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultSizeValue' is less than 2");}

                    resumptionStr = encodeValue(xprResumptionPath.evaluate(resultNode));
                    queryParams = queryParams.replace("&cursor=*", "");

                    // terminating if length of nodeList is 0
                    if ((nodeList != null) && (nodeList.getLength() < discoverResultSize)) {
                        resumptionInt += (nodeList.getLength() + 1 - resultSizeValue);
                    } else {
                        resumptionInt += (nodeList.getLength() - resultSizeValue);    // subtract the resultSizeValue because the iteration is over real length and the resultSizeValue is added before the switch()
                    }

                    discoverResultSize = nodeList.getLength();

                    log.debug("RestIterator.downloadPage().deep-cursor:: resumptionStr=" + resumptionStr + " ; queryParams=" + queryParams + " resumptionLengthIncreased: " + resumptionInt);

                    break;

                default:        // otherwise: abort
                    // resultTotal = resumptionInt;
                    break;
            }

        } catch (Exception e) {
            log.error(e);
            throw new IllegalStateException("collection failed: " + e.getMessage());
        }

        try {
            String resultTotalXpathEval = xprResultTotalPath.evaluate(resultNode);
 
            log.debug("downloadPage():: resInt: " +resumptionInt + "; resultTotal: " + resultTotal + " ; resultTotalXpath eval.: " + resultTotalXpathEval );
            if ((resultTotal == -1) && (!resultTotalXpathEval.isEmpty())) {
                resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
                if (resumptionType.toLowerCase().equals("page") && !AUTHBASIC.equalsIgnoreCase(authMethod)) {
                    resultTotal += 1;
                }           // to correct the upper bound
                log.info("resultTotal was -1 is now: " + resultTotal);
            }
        } catch (Exception e) {
            log.error(e);
            throw new IllegalStateException("downloadPage() resultTotal couldn't parse: " + e.getMessage());
        }
        log.debug("resultTotal: " + resultTotal + " ; resInt: " + resumptionInt);
        if (resumptionInt <= resultTotal) {
            nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
        } else {
            nextQuery = "";
            // if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; }    	// correct the resumptionInt and prevent a NullPointer Exception at mdStore
        }
        log.debug("downloadPage() nextQueryUrl: " + nextQuery);
        return nextQuery;


    }

    private boolean isEmptyXml(String s){
        return EMPTY_XML.equalsIgnoreCase(s);
    }


    private boolean isInteger(String s) {
        boolean isValidInteger = false;
        try {
            Integer.parseInt(s);

            // s is a valid integer

            isValidInteger = true;
        } catch (NumberFormatException ex) {
            // s is not an integer
        }

        return isValidInteger;
    }

    // Method to encode a string value using `UTF-8` encoding scheme
    private String encodeValue(String value) {
        try {
            return URLEncoder.encode(value, StandardCharsets.UTF_8.toString());
        } catch (UnsupportedEncodingException ex) {
            throw new RuntimeException(ex.getCause());
        }
    }

    /**
     * setRequestHeader
     * 
     * setRequestProperty: Sets the general request property. If a property with the key already exists, overwrite its value with the new value.
     * @param conn
     */
    private void setRequestHeader(HttpURLConnection conn) {
        if (requestHeaders != null) {
            for (String key : requestHeaders.keySet()) {
                conn.setRequestProperty(key, requestHeaders.get(key));
            }
            log.debug("Set Request Header with: " + requestHeaders);
        }

    }

    public String getResultFormatValue() {
        return resultFormatValue;
    }

    public String getResultOutputFormat() {
        return resultOutputFormat;
    }

}
