package eu.dnetlib.data.utility.resource_discovery.plugin.crawler;

import eu.dnetlib.data.utility.resource_discovery.url_filter.UrlFilter;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import net.matuschek.http.AbstractHttpDocManager;
import net.matuschek.http.DocManagerException;
import net.matuschek.http.HttpDoc;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/* loaded from: input_file:eu/dnetlib/data/utility/resource_discovery/plugin/crawler/HttpFilter.class */
public class HttpFilter extends AbstractHttpDocManager {
    private static final Log logger = LogFactory.getLog(HttpFilter.class);
    private ArrayList<ResourceObject> objects = new ArrayList<>();

    public void processDocument(HttpDoc httpDoc) throws DocManagerException {
        try {
            logger.debug("Getting mimeType from " + httpDoc.getURL() + " which is " + httpDoc.getHeaderValue("Content-Type"));
            if (UrlFilter.checkMimeTypeProvided(httpDoc.getHeaderValue("Content-Type"))) {
                logger.debug("Adding to resourceUrls");
                ResourceObject resourceObject = new ResourceObject();
                resourceObject.setUrl(httpDoc.getURL().toString());
                if (httpDoc.getContent().length < 51200) {
                    InputStream openStream = new URL(httpDoc.getURL().toString()).openStream();
                    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                    while (true) {
                        int read = openStream.read();
                        if (read == -1) {
                            break;
                        } else {
                            byteArrayOutputStream.write(read);
                        }
                    }
                    resourceObject.setContent(byteArrayOutputStream.toByteArray());
                } else {
                    resourceObject.setContent(httpDoc.getContent());
                }
                this.objects.add(resourceObject);
            }
        } catch (Exception e) {
            throw new DocManagerException(e.toString());
        }
    }

    public List<ResourceObject> getResources() {
        return this.objects;
    }
}
