/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.collection.plugin.oai;

import eu.dnetlib.dhp.collection.plugin.utils.XmlCleaner;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
import java.io.IOException;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.URLEncoder;
import java.util.Iterator;
import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Node;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OaiIterator
implements Iterator<String> {
    private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
    private static final String REPORT_PREFIX = "oai:";
    public static final String UTF_8 = "UTF-8";
    private final Queue<String> queue = new PriorityBlockingQueue<String>();
    private final String baseUrl;
    private final String set;
    private final String mdFormat;
    private final String fromDate;
    private final String untilDate;
    private String token;
    private boolean started;
    private final HttpConnector2 httpConnector;
    private final AggregatorReport report;

    public OaiIterator(String baseUrl, String mdFormat, String set, String fromDate, String untilDate, HttpConnector2 httpConnector, AggregatorReport report) {
        this.baseUrl = baseUrl;
        this.mdFormat = mdFormat;
        this.set = set;
        this.fromDate = fromDate;
        this.untilDate = untilDate;
        this.started = false;
        this.httpConnector = httpConnector;
        this.report = report;
    }

    private void verifyStarted() {
        if (!this.started) {
            this.started = true;
            try {
                this.token = this.firstPage();
            }
            catch (CollectorException e) {
                throw new IllegalStateException(e);
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public boolean hasNext() {
        Queue<String> queue = this.queue;
        synchronized (queue) {
            this.verifyStarted();
            return !this.queue.isEmpty();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public String next() {
        Queue<String> queue = this.queue;
        synchronized (queue) {
            this.verifyStarted();
            String res = this.queue.poll();
            while (this.queue.isEmpty() && this.token != null && !this.token.isEmpty()) {
                try {
                    this.token = this.otherPages(this.token);
                }
                catch (CollectorException e) {
                    throw new IllegalStateException(e);
                }
            }
            return res;
        }
    }

    @Override
    public void remove() {
        throw new UnsupportedOperationException();
    }

    private String firstPage() throws CollectorException {
        try {
            String url = this.baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(this.mdFormat, UTF_8);
            if (this.set != null && !this.set.isEmpty()) {
                url = url + "&set=" + URLEncoder.encode(this.set, UTF_8);
            }
            if (this.fromDate != null && (this.fromDate.matches("\\d{4}-\\d{2}-\\d{2}") || this.fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
                url = url + "&from=" + URLEncoder.encode(this.fromDate, UTF_8);
            }
            if (this.untilDate != null && (this.untilDate.matches("\\d{4}-\\d{2}-\\d{2}") || this.untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
                url = url + "&until=" + URLEncoder.encode(this.untilDate, UTF_8);
            }
            log.info("Start harvesting using url: {}", (Object)url);
            return this.downloadPage(url);
        }
        catch (UnsupportedEncodingException e) {
            this.report.put((Object)e.getClass().getName(), (Object)e.getMessage());
            throw new CollectorException((Throwable)e);
        }
    }

    private String extractResumptionToken(String xml) {
        String s = StringUtils.substringAfter((String)xml, (String)"<resumptionToken");
        if (s == null) {
            return null;
        }
        String result = StringUtils.substringBetween((String)s, (String)">", (String)"</");
        if (result == null) {
            return null;
        }
        return result.trim();
    }

    private String otherPages(String resumptionToken) throws CollectorException {
        try {
            return this.downloadPage(this.baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(resumptionToken, UTF_8));
        }
        catch (UnsupportedEncodingException e) {
            this.report.put((Object)e.getClass().getName(), (Object)e.getMessage());
            throw new CollectorException((Throwable)e);
        }
    }

    private String downloadPage(String url) throws CollectorException {
        Document doc;
        String xml = this.httpConnector.getInputSource(url, this.report);
        try {
            doc = DocumentHelper.parseText((String)xml);
        }
        catch (DocumentException e) {
            log.warn("Error parsing xml, I try to clean it. {}", (Object)e.getMessage());
            this.report.put((Object)((Object)((Object)e)).getClass().getName(), (Object)e.getMessage());
            String cleaned = XmlCleaner.cleanAllEntities(xml);
            try {
                doc = DocumentHelper.parseText((String)cleaned);
            }
            catch (DocumentException e1) {
                String resumptionToken = this.extractResumptionToken(xml);
                if (resumptionToken == null) {
                    this.report.put((Object)((Object)((Object)e1)).getClass().getName(), (Object)e1.getMessage());
                    throw new CollectorException("Error parsing cleaned document:\n" + cleaned, (Throwable)e1);
                }
                return resumptionToken;
            }
        }
        Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
        if (errorNode != null) {
            String code = errorNode.valueOf("@code").trim();
            if ("noRecordsMatch".equalsIgnoreCase(code)) {
                String msg = "noRecordsMatch for oai call : " + url;
                log.warn(msg);
                this.report.put((Object)(REPORT_PREFIX + code), (Object)msg);
                return null;
            }
            String msg = code + " - " + errorNode.getText();
            this.report.put((Object)"oai:error", (Object)msg);
            throw new CollectorException(msg);
        }
        for (Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
            StringWriter sw = new StringWriter();
            XMLWriter writer = new XMLWriter((Writer)sw, OutputFormat.createPrettyPrint());
            try {
                writer.write((Node)o);
                this.queue.add(sw.toString());
            }
            catch (IOException e) {
                this.report.put((Object)e.getClass().getName(), (Object)e.getMessage());
                throw new CollectorException("Error parsing XML record:\n" + ((Node)o).asXML(), (Throwable)e);
            }
        }
        return doc.valueOf("//*[local-name()='resumptionToken']");
    }

    public AggregatorReport getReport() {
        return this.report;
    }
}

