/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.bioschemas.api;

import eu.dnetlib.bioschemas.api.crawl.CrawlRecord;
import eu.dnetlib.bioschemas.api.scraper.BMUSEScraper;
import eu.dnetlib.bioschemas.api.scraper.ScrapeState;
import eu.dnetlib.bioschemas.api.scraper.ScrapeThread;
import eu.dnetlib.bioschemas.api.utils.UrlParser;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Objects;
import java.util.Properties;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class ServiceScrapeDriver {
    private static final String propertiesFile = "application.properties";
    private int waitTime = 1;
    private int numberOfPagesToCrawlInALoop;
    private int totalNumberOfPagesToCrawlInASession;
    private String outputFolder;
    private int pagesCounter = 0;
    private int scrapeVersion = 1;
    private String sitemapUrl;
    private String sitemapURLKey;
    private String maxScrapedPages;
    private String outputFilename;
    private static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z");
    private static final Log logger = LogFactory.getLog(ServiceScrapeDriver.class);

    public ServiceScrapeDriver(String sitemapUrl, String sitemapURLKey, String maxScrapedPages, String outputFilename, String outputFolder) {
        this.sitemapUrl = sitemapUrl;
        this.sitemapURLKey = sitemapURLKey;
        this.maxScrapedPages = maxScrapedPages;
        this.outputFilename = outputFilename;
        this.outputFolder = outputFolder;
    }

    public void runScrape() throws IOException {
        this.processProperties();
        String url = this.sitemapUrl.toLowerCase();
        Elements urls = UrlParser.getSitemapList((String)this.getSitemapUrl(), (String)this.getSitemapURLKey());
        Stream urlStream = null;
        urlStream = Objects.nonNull(this.maxScrapedPages) ? urls.stream().limit(Long.parseLong(this.maxScrapedPages)) : urls.stream();
        List sites = urlStream.collect(Collectors.toList());
        logger.info((Object)("Pages available for scraping: " + sites.size()));
        List pagesToPull = this.generatePagesToPull(sites);
        if (pagesToPull.isEmpty()) {
            logger.error((Object)"Cannot retrieve URLs");
            throw new RuntimeException("No pages found from sitemap");
        }
        ScrapeState scrapeState = new ScrapeState(pagesToPull);
        logger.info((Object)("STARTING CRAWL: " + formatter.format(new Date(System.currentTimeMillis()))));
        while (this.pagesCounter < this.totalNumberOfPagesToCrawlInASession) {
            logger.debug((Object)(this.pagesCounter + " scraped of " + this.totalNumberOfPagesToCrawlInASession));
            ScrapeThread scrape1 = new ScrapeThread(new BMUSEScraper(), scrapeState, this.waitTime, this.scrapeVersion);
            scrape1.setName("S1");
            scrape1.start();
            long startTime = System.nanoTime();
            try {
                scrape1.join();
            }
            catch (InterruptedException e) {
                logger.error((Object)"Exception waiting on thread");
                e.printStackTrace();
                return;
            }
            if (!scrape1.isFileWritten()) {
                logger.error((Object)"Could not write output file so shutting down!");
                Date date = new Date(System.currentTimeMillis());
                logger.info((Object)("ENDING CRAWL after failure at: " + formatter.format(date)));
                return;
            }
            logger.debug((Object)("Value of isFileWritten: " + scrape1.isFileWritten()));
            long endTime = System.nanoTime();
            long timeElapsed = endTime - startTime;
            logger.debug((Object)("Time in s to complete: " + (double)timeElapsed / 1.0E9));
            this.pagesCounter += this.numberOfPagesToCrawlInALoop;
            logger.debug((Object)"ENDED loop");
        }
        logger.info((Object)("ENDING CRAWL: " + formatter.format(new Date(System.currentTimeMillis()))));
        File output = new File(this.outputFolder.concat("/").concat(this.outputFilename));
        if (output.exists()) {
            output.delete();
            output.createNewFile();
        }
        FileWriter fileWriter = new FileWriter(output.getAbsoluteFile(), true);
        BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);
        List processed = scrapeState.getPagesProcessed();
        for (int i = 0; i < processed.size(); ++i) {
            try {
                bufferedWriter.write(((CrawlRecord)processed.get(i)).getNquads());
                bufferedWriter.newLine();
                bufferedWriter.flush();
                continue;
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        bufferedWriter.close();
        logger.info((Object)(" Data stored into  " + output.getAbsolutePath()));
    }

    private List<CrawlRecord> generatePagesToPull(List<Element> sites) {
        List<CrawlRecord> crawls = sites.stream().map(s -> {
            CrawlRecord crawlRecord = new CrawlRecord(s.text());
            String[] urlSplitted = crawlRecord.getUrl().split("/");
            String name = urlSplitted[urlSplitted.length - 1];
            crawlRecord.setName(name);
            return crawlRecord;
        }).collect(Collectors.toList());
        return crawls;
    }

    private void processProperties() {
        ClassLoader classLoader = ServiceScrapeDriver.class.getClassLoader();
        InputStream is = classLoader.getResourceAsStream(propertiesFile);
        if (is == null) {
            logger.error((Object)"     Cannot find application.properties file");
            throw new IllegalArgumentException("application.propertiesfile is not found!");
        }
        Properties prop = new Properties();
        try {
            prop.load(is);
        }
        catch (IOException e) {
            logger.error((Object)"     Cannot load application.properties", (Throwable)e);
            System.exit(0);
        }
        this.waitTime = Integer.parseInt(prop.getProperty("waitTime").trim());
        logger.info((Object)("     waitTime: " + this.waitTime));
        this.numberOfPagesToCrawlInALoop = Integer.parseInt(prop.getProperty("numberOfPagesToCrawlInALoop").trim());
        logger.info((Object)("     numberOfPagesToCrawl: " + this.numberOfPagesToCrawlInALoop));
        this.totalNumberOfPagesToCrawlInASession = Integer.parseInt(prop.getProperty("totalNumberOfPagesToCrawlInASession").trim());
        logger.info((Object)("     totalNumberOfPagesToCrawlInASession: " + this.totalNumberOfPagesToCrawlInASession));
        this.scrapeVersion = Integer.parseInt(prop.getProperty("scrapeVersion").trim());
        logger.info((Object)("     scrapeVersion: " + this.scrapeVersion));
        logger.info((Object)"\n\n\n");
    }

    public String getSitemapUrl() {
        return this.sitemapUrl;
    }

    public String getSitemapURLKey() {
        return this.sitemapURLKey;
    }

    private String getId(String pageUrl) {
        String[] parts = pageUrl.split("/");
        return parts[parts.length - 1];
    }
}

