package eu.dnetlib.data.mapreduce.wf.dataimport;

import com.googlecode.sarasvati.Arc;
import com.googlecode.sarasvati.Engine;
import com.googlecode.sarasvati.NodeToken;
import eu.dnetlib.data.information.DataSourceResolver;
import eu.dnetlib.data.mapreduce.wf.HdfsJobNode;
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import javax.xml.ws.wsaddressing.W3CEndpointReference;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.dom4j.io.SAXReader;
import org.springframework.beans.factory.annotation.Required;

/* loaded from: input_file:eu/dnetlib/data/mapreduce/wf/dataimport/MDStoreExporter.class */
public class MDStoreExporter extends HdfsJobNode {
    private String hdfsDestinationDirectory;
    private DataSourceResolver dataSourceResolver;
    private ResultSetClientFactory resultsetClientFactory;
    private static final Log log = LogFactory.getLog(MDStoreExporter.class);

    protected void executeAsync(Engine engine, NodeToken nodeToken) {
        String attribute = nodeToken.getFullEnv().getAttribute("repoId");
        String attribute2 = nodeToken.getFullEnv().getAttribute("mdstoreDataSource");
        Configuration configuration = (Configuration) nodeToken.getEnv().getTransientAttribute("hbaseConf");
        Path path = new Path(getHdfsDestinationDirectory() + "/" + attribute + ".seq");
        nodeToken.getEnv().setAttribute("sequenceFile", path.toString());
        log.info("exporting mdstore of repo " + attribute + " to: " + path.toString());
        try {
            deleteHdfsFile(configuration, path);
            write(path, this.dataSourceResolver.resolve(attribute2).retrieve());
            engine.complete(nodeToken, Arc.DEFAULT_ARC);
        } catch (Throwable th) {
            failed(engine, nodeToken, th);
        }
    }

    private void write(Path path, W3CEndpointReference w3CEndpointReference) throws IOException {
        int i = 0;
        SequenceFile.Writer sequenceFileWriter = getSequenceFileWriter(path);
        SAXReader sAXReader = new SAXReader();
        Iterator it = this.resultsetClientFactory.getClient(w3CEndpointReference).iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            try {
                String valueOf = sAXReader.read(new StringReader(str)).valueOf("//*[local-name()='objIdentifier']");
                if (valueOf == null || valueOf.isEmpty()) {
                    log.warn("invalid record (missing objIdentifier) !\n" + str);
                } else {
                    sequenceFileWriter.append(new Text(valueOf), new Text(str));
                    i++;
                }
            } catch (Exception e) {
                log.warn("invalid record!\n" + str);
            }
        }
        sequenceFileWriter.close();
        log.info("written " + i + " records in sequence file: " + path.toString());
    }

    private SequenceFile.Writer getSequenceFileWriter(Path path) throws IOException {
        return SequenceFile.createWriter(new Configuration(), new SequenceFile.Writer.Option[]{SequenceFile.Writer.file(path), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK)});
    }

    public String getHdfsDestinationDirectory() {
        return this.hdfsDestinationDirectory;
    }

    @Required
    public void setHdfsDestinationDirectory(String str) {
        this.hdfsDestinationDirectory = str;
    }

    public DataSourceResolver getDataSourceResolver() {
        return this.dataSourceResolver;
    }

    @Required
    public void setDataSourceResolver(DataSourceResolver dataSourceResolver) {
        this.dataSourceResolver = dataSourceResolver;
    }

    public ResultSetClientFactory getResultsetClientFactory() {
        return this.resultsetClientFactory;
    }

    @Required
    public void setResultsetClientFactory(ResultSetClientFactory resultSetClientFactory) {
        this.resultsetClientFactory = resultSetClientFactory;
    }
}
