/*
 * Decompiled with CFR 0.152.
 */
package org.gcube.resource.discovery.crawler;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Vector;
import net.matuschek.http.HttpDocManager;
import net.matuschek.http.HttpException;
import net.matuschek.http.URLLogger;
import net.matuschek.spider.WebRobot;
import org.apache.log4j.Logger;
import org.gcube.resource.discovery.crawler.Configs;
import org.gcube.resource.discovery.urlFilter.UrlFilter;

public class Crawler {
    private static final Logger logger = Logger.getLogger(Crawler.class);
    private WebRobot crawler = new WebRobot();

    public Crawler() throws IOException, HttpException {
        Configs.configureCrawler(this.crawler);
    }

    public Crawler(boolean isValidator) throws IOException, HttpException {
        if (isValidator) {
            Configs.configureCrawlerForValidation(this.crawler);
        } else {
            Configs.configureCrawler(this.crawler);
        }
    }

    public void reconfigureForRetry() {
        this.crawler.setMaxDepth(2);
    }

    public Vector<String> getLinks(String url) throws MalformedURLException, IOException, InterruptedException {
        logger.debug((Object)("Retrieving links from url " + url));
        this.crawler.setStartURL(new URL(UrlFilter.resolveRedirections(url)));
        StringWriter sw = new StringWriter();
        URLLogger log = new URLLogger((Writer)sw);
        this.crawler.setDocManager((HttpDocManager)log);
        this.crawler.run();
        String[] links = sw.getBuffer().toString().split("\n");
        Vector<String> linksV = new Vector<String>();
        for (int i = 0; i < links.length; ++i) {
            linksV.add(links[i]);
        }
        return linksV;
    }

    public static void main(String[] args) {
        Crawler c;
        try {
            c = new Crawler();
            System.out.println(c.crawler.getAllowWholeHost() + " " + c.crawler.getAllowWholeDomain());
        }
        catch (Exception e) {
            System.err.println("FATAL ERROR: Crawler could not be configured. Please check your robot.xml parameters and try again.");
            System.err.println(e.getLocalizedMessage());
            e.printStackTrace();
            return;
        }
        String url = "http://www.di.uoa.gr/gr";
        try {
            System.out.println(c.getLinks(url));
        }
        catch (Exception e) {
            System.err.println("ERROR: Crawler could not retrieve links from url " + url);
            System.err.println(e.getLocalizedMessage());
            e.printStackTrace();
        }
    }

    public WebRobot getCrawler() {
        return this.crawler;
    }
}

