package eu.dnetlib.iis.metadataextraction;

import com.google.common.collect.Sets;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import junit.framework.TestCase;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.xpath.XPath;
import org.junit.Test;
import pl.edu.icm.cermine.ContentExtractor;
import pl.edu.icm.cermine.exception.AnalysisException;

/* loaded from: input_file:eu/dnetlib/iis/metadataextraction/CermineMetadataExtractionTest.class */
public class CermineMetadataExtractionTest extends TestCase {
    private static final String PDF_FILE = "/eu/dnetlib/iis/metadataextraction/example-1.pdf";

    @Test
    public void testMetadataExtraction() throws AnalysisException, IOException, JDOMException {
        ContentExtractor contentExtractor = new ContentExtractor();
        InputStream resourceAsStream = CermineMetadataExtractionTest.class.getResourceAsStream(PDF_FILE);
        try {
            contentExtractor.uploadPDF(resourceAsStream);
            Element nLMContent = contentExtractor.getNLMContent();
            resourceAsStream.close();
            Document document = new Document(nLMContent);
            assertEquals("Video Quality Prediction Models Based on Video Content Dynamics for H.264 Video over UMTS Networks", getElementValue(document, "/article/front//article-title"));
            HashSet newHashSet = Sets.newHashSet(new String[]{"Asiya Khan", "Lingfen Sun", "Emmanuel Ifeachor", "Jose-Oscar Fajardo", "Fidel Liberal", "Harilaos Koumaras"});
            List selectNodes = XPath.newInstance("/article/front//contrib-group/contrib[@contrib-type='author']/string-name").selectNodes(nLMContent);
            HashSet hashSet = new HashSet();
            Iterator it = selectNodes.iterator();
            while (it.hasNext()) {
                hashSet.add(((Element) it.next()).getText());
            }
            assertEquals(newHashSet, hashSet);
            HashSet newHashSet2 = Sets.newHashSet(new String[]{"0Institute of Informatics and Telecommunications, NCSR Demokritos, 15310 Athens, Greece", "1Department of Electronics and Telecommunications, University of the Basque Country (UPV/EHU), 48013 Bilbao, Spain", "2Centre for Signal Processing and Multimedia Communication, School of Computing, Communications and Electronics, University of Plymouth, Plymouth PL4 8AA, UK"});
            List selectNodes2 = XPath.newInstance("/article/front//contrib-group/aff").selectNodes(nLMContent);
            HashSet hashSet2 = new HashSet();
            Iterator it2 = selectNodes2.iterator();
            while (it2.hasNext()) {
                hashSet2.add(((Element) it2.next()).getValue());
            }
            assertEquals(newHashSet2, hashSet2);
            assertEquals("International Journal of Digital Multimedia Broadcasting", getElementValue(document, "/article/front//journal-title"));
            assertEquals("10.1155/2010/608138", getElementValue(document, "/article/front//article-id[@pub-id-type='doi']"));
            assertEquals("2010", getElementValue(document, "/article/front//pub-date/year"));
            assertNotNull(getElementValue(document, "/article/body"));
            assertFalse(getElementValue(document, "/article/body").isEmpty());
            assertEquals(32, XPath.newInstance("/article/back/ref-list/ref").selectNodes(nLMContent).size());
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    private String getElementValue(Document document, String str) throws JDOMException {
        String valueOf = XPath.newInstance(str).valueOf(document);
        if (valueOf != null) {
            valueOf = valueOf.trim();
        }
        return valueOf;
    }
}
