package eu.dnetlib.data.collector.plugins.mediawiki;

import java.net.URL;
import java.util.Iterator;
import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;

/* loaded from: input_file:eu/dnetlib/data/collector/plugins/mediawiki/MediawikiIterator.class */
public class MediawikiIterator implements Iterator<String> {
    private static final Log log = LogFactory.getLog(MediawikiIterator.class);
    private SAXReader reader;
    private Queue<String> queue;
    private String baseUrl;
    private String apnamespace;
    private String apcontinue;
    private boolean started;

    public MediawikiIterator() {
        this.reader = new SAXReader();
        this.queue = new PriorityBlockingQueue();
    }

    public MediawikiIterator(String str, String str2) {
        this.reader = new SAXReader();
        this.queue = new PriorityBlockingQueue();
        this.baseUrl = str;
        this.apnamespace = str2;
        this.started = false;
    }

    private void verifyStarted() {
        if (this.started) {
            return;
        }
        this.apcontinue = firstPage();
        this.started = true;
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        boolean z;
        synchronized (this.queue) {
            verifyStarted();
            z = !this.queue.isEmpty();
        }
        return z;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public String next() {
        String poll;
        synchronized (this.queue) {
            verifyStarted();
            poll = this.queue.poll();
            while (this.queue.isEmpty() && this.apcontinue != null && !this.apcontinue.isEmpty()) {
                this.apcontinue = otherPages(this.apcontinue);
            }
        }
        return poll;
    }

    @Override // java.util.Iterator
    public void remove() {
    }

    private String firstPage() {
        String str = this.baseUrl + "?action=query&list=allpages&apnamespace=" + this.apnamespace + "&aplimit=50&format=xml";
        log.info("Downloading first page using url: " + str);
        return downloadPage(str);
    }

    private String otherPages(String str) {
        return downloadPage(this.baseUrl + "?action=query&list=allpages&apnamespace=" + this.apnamespace + "&aplimit=50&format=xml&apcontinue=" + str);
    }

    private String downloadPage(String str) {
        try {
            log.info("HTTP GET: " + str);
            Document read = this.reader.read(new URL(str).openStream());
            String valueOf = read.valueOf("//*[local-name()='query-continue']//*[local-name()='allpages']/@apcontinue");
            String str2 = "?action=wbgetentities&format=xml&ids=";
            Iterator it = read.selectNodes("//*[local-name()='p']").iterator();
            while (it.hasNext()) {
                str2 = str2 + ((Element) it.next()).valueOf("@title").split(":")[1] + "|";
            }
            Iterator it2 = this.reader.read(new URL(this.baseUrl + str2.substring(0, str2.length() - 1)).openStream()).selectNodes("//*[local-name()='entity']").iterator();
            while (it2.hasNext()) {
                this.queue.add(((Node) it2.next()).asXML());
            }
            System.out.println("Done");
            return valueOf;
        } catch (Exception e) {
            throw new RuntimeException("Error processing data from: " + str, e);
        }
    }
}
