package eu.dnetlib.iis.metadataextraction;

import eu.dnetlib.iis.common.affiliation.AffiliationBuilder;
import eu.dnetlib.iis.metadataextraction.schemas.Affiliation;
import eu.dnetlib.iis.metadataextraction.schemas.Author;
import eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata;
import eu.dnetlib.iis.metadataextraction.schemas.Range;
import eu.dnetlib.iis.metadataextraction.schemas.ReferenceBasicMetadata;
import eu.dnetlib.iis.metadataextraction.schemas.ReferenceMetadata;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.avro.util.Utf8;
import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.xpath.XPath;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import pl.edu.icm.cermine.bibref.transformers.NLMElementToBibEntryConverter;
import pl.edu.icm.cermine.exception.TransformationException;

/* loaded from: input_file:eu/dnetlib/iis/metadataextraction/NlmToDocumentWithBasicMetadataConverter.class */
public final class NlmToDocumentWithBasicMetadataConverter {
    private static final Logger log = Logger.getLogger(NlmToDocumentWithBasicMetadataConverter.class);

    private NlmToDocumentWithBasicMetadataConverter() {
    }

    private static Map<String, Affiliation> convertAffiliations(Element element) {
        try {
            List<Element> selectNodes = XPath.newInstance("/article/front//contrib-group/aff").selectNodes(element);
            if (selectNodes == null || selectNodes.isEmpty()) {
                return null;
            }
            HashMap hashMap = new HashMap();
            for (Element element2 : selectNodes) {
                hashMap.put(element2.getAttributeValue("id"), AffiliationBuilder.build(element2));
            }
            return hashMap;
        } catch (JDOMException e) {
            return null;
        }
    }

    private static List<Author> convertAuthors(Element element, List<String> list) {
        try {
            List<Element> selectNodes = XPath.newInstance("/article/front//contrib-group/contrib[@contrib-type='author']").selectNodes(element);
            if (selectNodes == null || selectNodes.isEmpty()) {
                return null;
            }
            ArrayList arrayList = new ArrayList();
            for (Element element2 : selectNodes) {
                String childTextNormalize = element2.getChildTextNormalize("string-name");
                ArrayList arrayList2 = new ArrayList();
                for (Element element3 : element2.getChildren("xref")) {
                    String attributeValue = element3.getAttributeValue("ref-type");
                    String attributeValue2 = element3.getAttributeValue("rid");
                    if ("aff".equals(attributeValue) && attributeValue2 != null && list.contains(attributeValue2)) {
                        arrayList2.add(Integer.valueOf(list.indexOf(attributeValue2)));
                    }
                }
                if (arrayList2.isEmpty()) {
                    arrayList2 = null;
                }
                arrayList.add(Author.newBuilder().setAuthorFullName(childTextNormalize).setAffiliationPositions(arrayList2).build());
            }
            return arrayList;
        } catch (JDOMException e) {
            return null;
        }
    }

    private static List<ReferenceMetadata> convertReferences(String str, Element element) throws TransformationException {
        Element child;
        Element child2 = element.getChild("back");
        if (child2 == null || (child = child2.getChild("ref-list")) == null) {
            return null;
        }
        List<Element> children = child.getChildren("ref");
        ArrayList arrayList = new ArrayList();
        if (children != null) {
            int i = 1;
            NLMElementToBibEntryConverter nLMElementToBibEntryConverter = new NLMElementToBibEntryConverter();
            for (Element element2 : children) {
                if (element2.getChild("mixed-citation") != null) {
                    BibEntry convert = nLMElementToBibEntryConverter.convert(element2.getChild("mixed-citation"), new Object[0]);
                    if (convert != null) {
                        ReferenceMetadata.Builder newBuilder = ReferenceMetadata.newBuilder();
                        newBuilder.setPosition(Integer.valueOf(i));
                        newBuilder.setText(convert.getText());
                        newBuilder.setBasicMetadata(convertBibEntry(convert));
                        arrayList.add(newBuilder.build());
                    } else {
                        log.warn("got null bib-entry from element " + element2.getChild("mixed-citation").getValue());
                    }
                }
                i++;
            }
        }
        return arrayList;
    }

    public static ReferenceBasicMetadata convertBibEntry(BibEntry bibEntry) {
        if (bibEntry == null) {
            return null;
        }
        ReferenceBasicMetadata.Builder newBuilder = ReferenceBasicMetadata.newBuilder();
        List allFieldValues = bibEntry.getAllFieldValues("author");
        if (allFieldValues != null && allFieldValues.size() > 0) {
            ArrayList arrayList = new ArrayList(allFieldValues.size());
            Iterator it = allFieldValues.iterator();
            while (it.hasNext()) {
                arrayList.add((CharSequence) it.next());
            }
            newBuilder.setAuthors(arrayList);
        }
        String firstFieldValue = bibEntry.getFirstFieldValue("pages");
        if (firstFieldValue != null) {
            Matcher matcher = Pattern.compile("^([0-9]+)--([0-9]+)$").matcher(firstFieldValue);
            if (matcher.matches()) {
                newBuilder.setPages(Range.newBuilder().setStart(matcher.group(1)).setEnd(matcher.group(2)).build());
            } else {
                Matcher matcher2 = Pattern.compile("^[0-9]+$").matcher(firstFieldValue);
                if (matcher2.matches()) {
                    newBuilder.setPages(Range.newBuilder().setStart(matcher2.group()).setEnd(matcher2.group()).build());
                }
            }
        }
        String firstFieldValue2 = bibEntry.getFirstFieldValue("journal");
        if (firstFieldValue2 != null) {
            newBuilder.setSource(firstFieldValue2);
        }
        String firstFieldValue3 = bibEntry.getFirstFieldValue("title");
        if (firstFieldValue3 != null) {
            newBuilder.setTitle(firstFieldValue3);
        }
        String firstFieldValue4 = bibEntry.getFirstFieldValue("volume");
        if (firstFieldValue4 != null) {
            newBuilder.setVolume(firstFieldValue4);
        }
        String firstFieldValue5 = bibEntry.getFirstFieldValue("year");
        if (firstFieldValue5 != null) {
            newBuilder.setYear(firstFieldValue5);
        }
        String firstFieldValue6 = bibEntry.getFirstFieldValue("edition");
        if (firstFieldValue6 != null) {
            newBuilder.setEdition(firstFieldValue6);
        }
        String firstFieldValue7 = bibEntry.getFirstFieldValue("publisher");
        if (firstFieldValue7 != null) {
            newBuilder.setPublisher(firstFieldValue7);
        }
        String firstFieldValue8 = bibEntry.getFirstFieldValue("location");
        if (firstFieldValue8 != null) {
            newBuilder.setLocation(firstFieldValue8);
        }
        String firstFieldValue9 = bibEntry.getFirstFieldValue("series");
        if (firstFieldValue9 != null) {
            newBuilder.setSeries(firstFieldValue9);
        }
        String firstFieldValue10 = bibEntry.getFirstFieldValue("number");
        if (firstFieldValue10 != null) {
            newBuilder.setIssue(firstFieldValue10);
        }
        String firstFieldValue11 = bibEntry.getFirstFieldValue(AbstractMetadataExtractorMapper.FAULT_SUPPLEMENTARY_DATA_URL);
        if (firstFieldValue11 != null) {
            newBuilder.setUrl(firstFieldValue11);
        }
        return newBuilder.build();
    }

    private static ExtractedDocumentMetadata.Builder convertMeta(String str, Element element, ExtractedDocumentMetadata.Builder builder) {
        String textNormalize;
        Element child = element.getChild("front");
        if (child != null) {
            Element child2 = child.getChild("article-meta");
            if (child2 != null) {
                if (child2.getChild("title-group") != null) {
                    List children = child2.getChild("title-group").getChildren("article-title");
                    if (children.size() > 0) {
                        if (children.size() > 1) {
                            log.warn("got multiple titles for document " + str + ", storing first title only");
                        }
                        Iterator it = children.iterator();
                        while (true) {
                            if (!it.hasNext()) {
                                break;
                            }
                            String textNormalize2 = ((Element) it.next()).getTextNormalize();
                            if (textNormalize2 != null && !textNormalize2.isEmpty()) {
                                builder.setTitle(textNormalize2);
                                break;
                            }
                        }
                    }
                }
                if (child2.getChild("abstract") != null && child2.getChild("abstract").getChild("p") != null) {
                    builder.setAbstract$(child2.getChild("abstract").getChild("p").getTextNormalize());
                }
                if (child2.getChild("kwd-group") != null) {
                    Iterator it2 = child2.getChild("kwd-group").getChildren("kwd").iterator();
                    while (it2.hasNext()) {
                        String textNormalize3 = ((Element) it2.next()).getTextNormalize();
                        if (textNormalize3 != null && !textNormalize3.isEmpty()) {
                            if (builder.getKeywords() == null) {
                                builder.setKeywords(new ArrayList());
                            }
                            builder.getKeywords().add(textNormalize3);
                        }
                    }
                }
                List<Element> children2 = child2.getChildren("article-id");
                HashMap hashMap = new HashMap();
                for (Element element2 : children2) {
                    String attributeValue = element2.getAttributeValue("pub-id-type");
                    hashMap.put(new Utf8(attributeValue != null ? attributeValue : "unknown"), new Utf8(element2.getTextNormalize()));
                }
                if (!hashMap.isEmpty()) {
                    builder.setExternalIdentifiers(hashMap);
                }
                if (child2.getChild("pub-date") != null && child2.getChild("pub-date").getChild("year") != null) {
                    try {
                        builder.setYear(Integer.valueOf(child2.getChild("pub-date").getChild("year").getTextNormalize()));
                    } catch (Exception e) {
                        log.error("unable to parse year, unsupported format: " + child2.getChild("pub-date").getChild("year").getTextNormalize() + ", document id" + str, e);
                    }
                }
            }
            Element child3 = child.getChild("journal-meta");
            if (child3 != null) {
                if (child3.getChild("journal-title-group") != null) {
                    List children3 = child3.getChild("journal-title-group").getChildren("journal-title");
                    if (children3.size() > 0) {
                        builder.setJournal(((Element) children3.iterator().next()).getTextNormalize());
                        if (children3.size() > 1) {
                            log.warn("got multiple journal titles, retrieving first title only. Document id: " + str);
                        }
                    }
                }
                if (child3.getChild("publisher") != null && child3.getChild("publisher").getChild("publisher-name") != null && (textNormalize = child3.getChild("publisher").getChild("publisher-name").getTextNormalize()) != null && !textNormalize.isEmpty()) {
                    builder.setPublisher(textNormalize);
                }
            }
        }
        return builder;
    }

    public static ExtractedDocumentMetadata convertFull(String str, Document document) throws JDOMException, TransformationException {
        Element child;
        if (str == null) {
            throw new RuntimeException("unable to set null id");
        }
        ExtractedDocumentMetadata.Builder newBuilder = ExtractedDocumentMetadata.newBuilder();
        newBuilder.setId(str);
        if (document == null) {
            return newBuilder.build();
        }
        Element rootElement = document.getRootElement();
        convertMeta(str, rootElement, newBuilder);
        Element child2 = rootElement.getChild("front");
        if (child2 != null && (child = child2.getChild("article-meta")) != null) {
            if (child.getChild("volume") != null) {
                newBuilder.setVolume(child.getChild("volume").getTextNormalize());
            }
            if (child.getChild("issue") != null) {
                newBuilder.setIssue(child.getChild("issue").getTextNormalize());
            }
            Range.Builder newBuilder2 = Range.newBuilder();
            if (child.getChild("fpage") != null) {
                newBuilder2.setStart(child.getChild("fpage").getTextNormalize());
            }
            if (child.getChild("lpage") != null) {
                newBuilder2.setEnd(child.getChild("lpage").getTextNormalize());
            }
            if (newBuilder2.hasStart() || newBuilder2.hasEnd()) {
                newBuilder.setPages(newBuilder2.build());
            }
        }
        Map<String, Affiliation> convertAffiliations = convertAffiliations(rootElement);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        if (convertAffiliations != null && !convertAffiliations.isEmpty()) {
            for (Map.Entry<String, Affiliation> entry : convertAffiliations.entrySet()) {
                arrayList.add(entry.getKey());
                arrayList2.add(entry.getValue());
            }
            newBuilder.setAffiliations(arrayList2);
        }
        List<Author> convertAuthors = convertAuthors(rootElement, arrayList);
        if (convertAuthors != null && !convertAuthors.isEmpty()) {
            newBuilder.setAuthors(convertAuthors);
        }
        List<ReferenceMetadata> convertReferences = convertReferences(str, rootElement);
        if (convertReferences != null && convertReferences.size() > 0) {
            newBuilder.setReferences(convertReferences);
        }
        return newBuilder.build();
    }
}
