/*
 * Decompiled with CFR 0.152.
 */
package org.gcube.textextractor.extractors;

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.html.HtmlParser;
import org.apache.tika.sax.BodyContentHandler;
import org.gcube.semantic.annotator.AnnotationBase;
import org.gcube.semantic.annotator.utils.ANNOTATIONS;
import org.gcube.textextractor.entities.ExtractedEntity;
import org.gcube.textextractor.entities.ShortenCE4NameResponse;
import org.gcube.textextractor.extractors.InformationExtractor;
import org.gcube.textextractor.helpers.ExtractorHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;

public class HTMLExtractor
extends InformationExtractor {
    private static final Logger logger = LoggerFactory.getLogger(HTMLExtractor.class);

    public static void main(String[] args) throws Exception {
        HTMLExtractor ex = new HTMLExtractor();
        System.out.println(ex.extractFieldsFromFile("/home/alex/Downloads/244.html"));
    }

    @Override
    public Map<String, String> extractFieldsFromFile(String filename) throws Exception {
        logger.info("Processing file : " + filename);
        long starttime = System.currentTimeMillis();
        try {
            FileInputStream input = new FileInputStream(filename);
            BodyContentHandler handler = new BodyContentHandler();
            Metadata metadata = new Metadata();
            new HtmlParser().parse((InputStream)input, (ContentHandler)handler, metadata, new ParseContext());
            String text = ExtractorHelper.removeEmptyLines(handler.toString());
            HashMap<String, String> info = new HashMap<String, String>();
            info.put("documentID", filename);
            info.put("text", text);
            info.put("title", metadata.get("title"));
            info.put("language", new LanguageIdentifier(text).getLanguage());
            info.put("provenance", "WIOFish");
            long part_start_time = System.currentTimeMillis();
            Map<String, String> fields = HTMLExtractor.customFields(filename);
            long part_end_time = System.currentTimeMillis();
            info.putAll(fields);
            logger.info("~> field extraction time  : " + (double)(part_end_time - part_start_time) / 1000.0 + " secs");
            HashMap<String, String> hashMap = info;
            return hashMap;
        }
        catch (Exception e) {
            logger.error("error while extracting fields from  : " + filename, (Throwable)e);
            throw e;
        }
        finally {
            long endtime = System.currentTimeMillis();
            logger.info("time processing file : " + filename + " : " + (double)(endtime - starttime) / 1000.0 + " secs");
        }
    }

    @Override
    public List<Map<String, String>> extractInfo(String path) throws FileNotFoundException {
        ArrayList<Map<String, String>> extractedInfo = new ArrayList<Map<String, String>>();
        int cnt = 0;
        List<String> filenames = ExtractorHelper.getFilenames(path);
        for (String filename : filenames) {
            logger.info("Processing file : " + ++cnt + " " + filename);
            try {
                Map<String, String> info = this.extractFieldsFromFile(filename);
                long part_start_time = System.currentTimeMillis();
                Map<String, String> enriched = this.enrichRecord(info, filename);
                long part_end_time = System.currentTimeMillis();
                logger.info("~> field enrichment time  : " + (double)(part_end_time - part_start_time) / 1000.0 + " secs");
                extractedInfo.add(enriched);
            }
            catch (Exception e) {
                logger.error("error while extracting info from : " + filename + " . will skip this file", (Throwable)e);
            }
        }
        return extractedInfo;
    }

    @Override
    public Map<String, String> enrichRecord(Map<String, String> record, String filename) {
        long endtime;
        long starttime;
        HashMap<String, String> enrichedRecord = new HashMap<String, String>();
        HashMap<String, List<String>> uris = new HashMap<String, List<String>>();
        String docName = FilenameUtils.getName((String)filename);
        docName = docName.substring(docName.lastIndexOf("=") + 1).toLowerCase();
        String docURI = "http://smartfish.collection/wiofish/" + docName.toLowerCase();
        enrichedRecord.putAll(record);
        enrichedRecord.put("documentID", docURI);
        ExtractorHelper.enrichSimpleField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#country"), new ExtractorHelper.QueryWrapperSimple(){

            @Override
            public String doCall(ExtractedEntity arg) throws Exception {
                return ExtractorHelper.queryCountry(arg);
            }
        });
        ExtractorHelper.enrichSimpleField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#gear"), new ExtractorHelper.QueryWrapperSimple(){

            @Override
            public String doCall(ExtractedEntity arg) throws Exception {
                return ExtractorHelper.queryGear(arg);
            }
        });
        ExtractorHelper.enrichSimpleField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#vessel"), new ExtractorHelper.QueryWrapperSimple(){

            @Override
            public String doCall(ExtractedEntity arg) throws Exception {
                return ExtractorHelper.queryVessel(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#management"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryManagement(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#status"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryExploitationStatus(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#access_control"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryAccessControl(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#fishing_control"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryFishingControl(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#enforcement_method"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryEnforcementMethod(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#sector"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.querySector(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#technology"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryTechnologyInUse(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#owner_of_access_right"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryOwnershipOfAccessRight(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#other_income_source"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryIncome(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#market"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryMarkets(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#seasonality"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.querySeasonality(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#post_processing_method"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryPostHarvestingProcess(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#decision_maker"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryRapresentativeForDecisionMaking(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#management_indicator"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryManagementIndicators(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#finance_mgmt_authority"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryFinancingManagement(arg);
            }
        });
        ExtractorHelper.enrichListField(record, enrichedRecord, uris, ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#applicant_for_access_right"), new ExtractorHelper.QueryWrapperList(){

            @Override
            public String doCall(List<ExtractedEntity> arg) throws Exception {
                return ExtractorHelper.queryAccessRightApplicant(arg);
            }
        });
        try {
            String html = null;
            try {
                html = ExtractorHelper.fileContent(filename);
            }
            catch (IOException e) {
                logger.error("error while getting html contents");
                return null;
            }
            ArrayList<ExtractedEntity> targetSpecies = new ArrayList<ExtractedEntity>();
            ArrayList<ExtractedEntity> bycatchSpecies = new ArrayList<ExtractedEntity>();
            ArrayList<ExtractedEntity> discardSpecies = new ArrayList<ExtractedEntity>();
            ArrayList<ExtractedEntity> threatenedSpecies = new ArrayList<ExtractedEntity>();
            starttime = System.currentTimeMillis();
            if (record.get("species_english_name") != null && record.get("species_english_name").trim().length() > 0) {
                Pattern speciesRowPattern = Pattern.compile("<tr id='species_row_.*?'>.*?EnglishNameblock.*?>(.*?)</td>(.*?)</tr>");
                Matcher speciedRowMatcher = speciesRowPattern.matcher(html);
                List<ExtractedEntity> allSpecies = ExtractorHelper.covertToStringList(record.get("species_english_name"));
                while (speciedRowMatcher.find()) {
                    Pattern p2;
                    Matcher m2;
                    String speciesRow = speciedRowMatcher.group(1).trim();
                    String speciesRowRest = speciedRowMatcher.group(2).trim();
                    if (speciesRow.length() == 0 || !(m2 = (p2 = Pattern.compile("<td align='center'>.*?</td><td align='center'>.*?</td><td align='center'>(.*?)<br></td>.*?<img src=images/(.*?).gif></td><td width=33% align='center'><img src=images/(.*?).gif></td><td width=33% align='center'><img src=images/(.*?).gif></td>")).matcher(speciesRowRest)).find()) continue;
                    for (String species : speciesRow.split("\\s*,\\s*")) {
                        String type;
                        if (!this.containsSpecies(allSpecies, species = species.trim())) {
                            System.out.println("Error : " + species + " not in allSpecies : " + speciesRow);
                            throw new Exception("Error : " + species + " not in allSpecies : " + speciesRow);
                        }
                        if (!m2.group(1).trim().equalsIgnoreCase("Not Applicable")) {
                            type = "threatened";
                            threatenedSpecies.add(new ExtractedEntity(species, ""));
                        }
                        if (m2.group(2).trim().equalsIgnoreCase("tick_blue")) {
                            type = "target";
                            targetSpecies.add(new ExtractedEntity(species, ""));
                        }
                        if (m2.group(3).trim().equalsIgnoreCase("tick_blue")) {
                            type = "by-catch";
                            bycatchSpecies.add(new ExtractedEntity(species, ""));
                        }
                        if (!m2.group(4).trim().equalsIgnoreCase("tick_blue")) continue;
                        type = "discard";
                        discardSpecies.add(new ExtractedEntity(species, ""));
                    }
                }
                enrichedRecord.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#bycatch"), Joiner.on((String)", ").join(bycatchSpecies));
                enrichedRecord.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#target"), Joiner.on((String)", ").join(targetSpecies));
                enrichedRecord.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#thretened"), Joiner.on((String)", ").join(threatenedSpecies));
                enrichedRecord.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#discard"), Joiner.on((String)", ").join(discardSpecies));
                endtime = System.currentTimeMillis();
                logger.info("extracting threatened,target,by-catch,discard dur : " + (double)(endtime - starttime) / 1000.0 + " sec");
                starttime = System.currentTimeMillis();
                String speciesURIJson = null;
                speciesURIJson = ExtractorHelper.querySpecies(ExtractorHelper.covertToStringList(record.get("species_english_name")));
                endtime = System.currentTimeMillis();
                logger.info("query species dur : " + (double)(endtime - starttime) / 1000.0 + " sec");
                starttime = System.currentTimeMillis();
                if (threatenedSpecies.size() > 0) {
                    String threatenedSpeciesURIJson = ExtractorHelper.querySpecies(threatenedSpecies);
                    uris.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#thretened") + "_uris", ShortenCE4NameResponse.getURIFromJSON(threatenedSpeciesURIJson));
                }
                endtime = System.currentTimeMillis();
                logger.info("query threatened species dur : " + (double)(endtime - starttime) / 1000.0 + " sec");
                starttime = System.currentTimeMillis();
                if (targetSpecies.size() > 0) {
                    String targetSpeciesURIJson = ExtractorHelper.querySpecies(targetSpecies);
                    uris.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#target") + "_uris", ShortenCE4NameResponse.getURIFromJSON(targetSpeciesURIJson));
                }
                endtime = System.currentTimeMillis();
                logger.info("query target species dur : " + (double)(endtime - starttime) / 1000.0 + " sec");
                starttime = System.currentTimeMillis();
                if (bycatchSpecies.size() > 0) {
                    String bycatchSpeciesURIJson = ExtractorHelper.querySpecies(bycatchSpecies);
                    uris.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#bycatch") + "_uris", ShortenCE4NameResponse.getURIFromJSON(bycatchSpeciesURIJson));
                }
                endtime = System.currentTimeMillis();
                logger.info("query by catch species dur : " + (double)(endtime - starttime) / 1000.0 + " sec");
                starttime = System.currentTimeMillis();
                if (discardSpecies.size() > 0) {
                    String discardSpeciesURIJson = ExtractorHelper.querySpecies(discardSpecies);
                    uris.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#discard") + "_uris", ShortenCE4NameResponse.getURIFromJSON(discardSpeciesURIJson));
                }
                endtime = System.currentTimeMillis();
                logger.info("query discard species dur : " + (double)(endtime - starttime) / 1000.0 + " sec");
                enrichedRecord.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#species") + "_uris", speciesURIJson);
            }
        }
        catch (Exception e) {
            logger.error("Error processing species : " + record.get("species_english_name"), (Throwable)e);
        }
        try {
            starttime = System.currentTimeMillis();
            this.annotate(docURI, uris);
            endtime = System.currentTimeMillis();
            logger.info("annotate dur : " + (double)(endtime - starttime) / 1000.0 + " sec");
        }
        catch (FileNotFoundException ex) {
            logger.error("file : " + filename + " not found", (Throwable)ex);
        }
        return enrichedRecord;
    }

    static Map<String, String> customFields(String filename) {
        HashMap<String, String> fields = new HashMap<String, String>();
        String html = null;
        try {
            html = ExtractorHelper.fileContent(filename);
        }
        catch (IOException e) {
            logger.error("error while parsing the fields from : " + filename, (Throwable)e);
            return null;
        }
        Pattern p = null;
        Matcher m = null;
        HTMLExtractor.parseSimpleRow(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#country"), "<b>Reporting Area: </b>(.*?)</font>", html, fields);
        HTMLExtractor.parseSimpleRow("title", "<b>Fishery: </b>(.*?)</font>", html, fields);
        HTMLExtractor.parseSimpleRow("fishery_local_name", "<tr><td><b>Local name for this Fishery:</b></td><td>(.*?)</td></tr><tr>", html, fields);
        HTMLExtractor.parseSimpleRow(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#vessel"), "<tr><td><b>Type of vessel</b></td><td>(.*?)</td></tr>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#applicant_for_access_right"), "<b>Who can apply for access rights</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#owner_of_access_right"), "<b>Ownership of access right</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#other_income_source"), "<b>Alternative Incomes</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#market"), "<b>Markets</b></td></tr><tr><td>(.*?)</td></tr>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#post_processing_method"), "<b>Post-harvest processing</b></td></tr><tr><td>(.*?)</td></tr>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#decision_maker"), "<b>Representatives in decision making</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#gear"), "<tr><td><b>Select gear used in this fishery</b></td><td>(.*?)<br></td></tr>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#status"), "<b>FAO Status:</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#management_indicator"), "<td><b>Management Indicators</b>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#finance_mgmt_authority"), "<b>Financing management</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#access_control"), "<b>Access controls used in management</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#fishing_control"), "<b>Fishing controls used in management</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTable(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#enforcement_method"), "<b>Enforcement methods used</b></td><td>(.*?)</td>", html, fields);
        HTMLExtractor.parseTickTableMultipleLangs(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#sector"), "<b>Sector</b></td></tr><tr><td valign='top' colspan=2><table bgcolor='#eae8e8' class='table' cellspacing=1 width=100%><tr bgcolor='#ffffff'><td><table width=100% class='table' cellpadding=5><tr><td width=50% valign='top'>(.*?)</td><td valign='top' width=50%>", "<img src=images/tick_blue.gif>(.*?)<br>", html, fields);
        HTMLExtractor.parseTickTableMultipleLangs(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#technology"), "<a name='technology'>Technology Used</a>.*?</table>(.*?)</table>", "<img src=images/tick_blue.gif>(.*?)<br>", html, fields);
        HTMLExtractor.parseTickTableMultipleLangs(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#management"), "<b>Management Type</b>(.*?)</tr>", "<img src=images/tick_blue.gif>(.*?)<br>", html, fields);
        p = Pattern.compile("<tr id='dmtbl_row_0'><td>(.*?)</td><td>(.*?)<br></td>");
        m = p.matcher(html);
        if (m.find()) {
            String nameOfBody = m.group(1).trim();
            String bodyType = m.group(2).trim();
            String bodyTypeEng = bodyType.split("/")[0].trim();
            String bodyTypeFr = bodyType.split("/")[1].trim();
            if (bodyTypeEng != null && bodyTypeEng.trim().length() > 0) {
                fields.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#authority"), Joiner.on((String)", ").join((Object)nameOfBody, (Object)bodyTypeEng, new Object[0]));
            }
        }
        if ((m = (p = Pattern.compile("<tr><td><b>Jan</b></td><td><b>Feb</b></td><td><b>Mar</b></td><td><b>Apr</b></td><td><b>May</b></td><td><b>Jun</b></td><td><b>Jul</b></td><td><b>Aug</b></td><td><b>Sep</b></td><td><b>Oct</b></td><td><b>Nov</b></td><td><b>Dec</b></td></tr><tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td></tr><tr>")).matcher(html)).find()) {
            String[] months = new String[]{"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"};
            ArrayList seasonality = Lists.newArrayList();
            for (int i = 1; i <= m.groupCount(); ++i) {
                String img = m.group(1).trim();
                if (!img.equalsIgnoreCase("<img src=images/tick_blue.gif>")) continue;
                seasonality.add(months[i - 1]);
            }
            if (seasonality.size() > 0) {
                fields.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#seasonality"), Joiner.on((String)", ").join((Iterable)seasonality));
            }
        }
        p = Pattern.compile("<tr id='species_row_.*?'><td.*?>(.*?)</td>.*?EnglishNameblock.*?>(.*?)</td>");
        m = p.matcher(html);
        ArrayList scientificNames = Lists.newArrayList();
        ArrayList englishNames = Lists.newArrayList();
        while (m.find()) {
            if (m.group(1).trim().length() > 0) {
                scientificNames.add(m.group(1).trim());
            }
            if (m.group(2).trim().length() <= 0) continue;
            englishNames.add(m.group(2).trim());
        }
        if (scientificNames.size() > 0) {
            fields.put("species_scientific_name", Joiner.on((String)", ").join((Iterable)scientificNames));
        }
        if (englishNames.size() > 0) {
            fields.put("species_english_name", Joiner.on((String)", ").join((Iterable)englishNames));
        }
        return fields;
    }

    @Override
    public String convertInfoToRowset(Map<String, String> info) {
        return ExtractorHelper.createRowseFromFields(info.get("documentID"), "faoCollection", "SmartfishFT", info.get("language"), info);
    }

    private void annotate(String filename, Map<String, List<String>> uris) throws FileNotFoundException {
        AnnotationBase annotator = AnnotationBase.getInstance();
        Set<Map.Entry<String, List<String>>> entrySet = uris.entrySet();
        for (Map.Entry<String, List<String>> entry : entrySet) {
            List<String> uris_;
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#country") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_country(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#vessel") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_vessel(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#gear") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_gear(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#target") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_targetSpecies(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#bycatch") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_bycatchSpecies(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#discard") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_discardSpecies(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#thretened") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_thretenedSpecies(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#management") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_management(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#sector") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_sector(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#seasonality") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_seasonality(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#status") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_status(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#access_control") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_access_control(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#fishing_control") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_fishing_control(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#enforcement_method") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_enforcement_method(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#applicant_for_access_right") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_accessRightApplicant(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#owner_of_access_right") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_ownershipOfAccessRight(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#other_income_source") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_alternativeIncomeSource(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#market") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_market(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#post_processing_method") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_postHarvestProcessing(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#decision_maker") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_representativesInDecisionMaking(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#management_indicator") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_managementIndicator(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#finance_mgmt_authority") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_financingManagement(filename, uri_);
                }
                continue;
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#technology") + "_uris")) {
                uris_ = entry.getValue();
                for (String uri_ : uris_) {
                    annotator.WIOFISH_technologiesInUse(filename, uri_);
                }
                continue;
            }
            System.out.println("=> annotation uri not found: " + entry.getKey() + " all entries are : " + entrySet);
        }
    }

    static void parseSimpleRow(String fieldName, String pattern, String html, Map<String, String> fields) {
        Pattern p = Pattern.compile(pattern);
        Matcher m = p.matcher(html);
        if (m.find()) {
            String value = m.group(1).trim();
            value = value.replace("<br>", "");
            fields.put(fieldName, value);
        }
    }

    static void parseTickTableMultipleLangs(String fieldName, String pattern1, String pattern2, String html, Map<String, String> fields) {
        Pattern p = Pattern.compile(pattern1);
        Matcher m = p.matcher(html);
        if (m.find()) {
            String table = m.group(1).trim();
            Pattern p2 = Pattern.compile(pattern2);
            Matcher m2 = p2.matcher(table);
            ArrayList eng = Lists.newArrayList();
            ArrayList fr = Lists.newArrayList();
            while (m2.find()) {
                String valueEng;
                List values = Splitter.on((String)" / ").trimResults().omitEmptyStrings().splitToList((CharSequence)m2.group(1).trim());
                if (values.size() != 2 && values.size() != 1) {
                    logger.warn("bad data for " + fieldName + " : " + values);
                    break;
                }
                if (values.size() == 1) {
                    valueEng = (String)values.get(0);
                    if (valueEng.equalsIgnoreCase("None") || valueEng.equalsIgnoreCase("Unknown")) {
                        eng = Lists.newArrayList();
                        break;
                    }
                    eng.add(valueEng.trim());
                    continue;
                }
                if (values.size() != 2) continue;
                valueEng = (String)values.get(0);
                String valueFr = (String)values.get(1);
                if (valueEng.equalsIgnoreCase("None") || valueEng.equalsIgnoreCase("Unknown")) {
                    eng = Lists.newArrayList();
                    fr = Lists.newArrayList();
                    break;
                }
                eng.add(valueEng.trim());
                fr.add(valueFr.trim());
            }
            if (eng.size() > 0) {
                fields.put(fieldName, Joiner.on((String)", ").join((Iterable)eng));
            }
            if (fr.size() > 0) {
                fields.put(fieldName + "_fr", Joiner.on((String)", ").join((Iterable)fr));
            }
        }
    }

    static void parseTickTable(String fieldName, String pattern1, String pattern2, String html, Map<String, String> fields) {
        Pattern p = Pattern.compile(pattern1);
        Matcher m = p.matcher(html);
        if (m.find()) {
            String table = m.group(1).trim();
            Pattern p2 = Pattern.compile(pattern2);
            Matcher m2 = p2.matcher(table);
            ArrayList eng = Lists.newArrayList();
            while (m2.find()) {
                String value = m2.group(1).trim();
                if (value.equalsIgnoreCase("None") || value.equalsIgnoreCase("Unknown")) {
                    eng = Lists.newArrayList();
                    break;
                }
                eng.add(value);
            }
            if (eng.size() > 0) {
                fields.put(fieldName, Joiner.on((String)", ").join((Iterable)eng));
            }
        }
    }

    static void parseTable(String fieldName, String pattern, String html, Map<String, String> fields) {
        Pattern p = Pattern.compile(pattern);
        Matcher m = p.matcher(html);
        if (m.find()) {
            String table = m.group(1);
            List rows = Splitter.on((String)"<br>").trimResults().omitEmptyStrings().splitToList((CharSequence)table);
            ArrayList engValues = Lists.newArrayList();
            ArrayList frValues = Lists.newArrayList();
            for (String row : rows) {
                List values = Splitter.on((String)" / ").trimResults().omitEmptyStrings().splitToList((CharSequence)row);
                if (values.size() == 2) {
                    String dmEng = (String)values.get(0);
                    String dmFr = (String)values.get(1);
                    if (fieldName.equals(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#status"))) {
                        dmEng = dmEng.replace("-", "");
                        dmFr = dmFr.replace("-", "");
                    }
                    engValues.add(dmEng);
                    frValues.add(dmFr);
                    if (!dmEng.equalsIgnoreCase("None") && !dmEng.equalsIgnoreCase("Unknown")) continue;
                    engValues = Lists.newArrayList();
                    frValues = Lists.newArrayList();
                    continue;
                }
                logger.warn("bad data for " + fieldName + " : " + values + " for row : " + row);
            }
            if (engValues.size() > 0) {
                fields.put(fieldName, Joiner.on((String)", ").join((Iterable)engValues));
            }
            if (frValues.size() > 0) {
                fields.put(fieldName + "_fr", Joiner.on((String)", ").join((Iterable)frValues));
            }
        }
    }

    private boolean containsSpecies(List<ExtractedEntity> allSpecies, String species) {
        for (ExtractedEntity extractedEntity : allSpecies) {
            if (!extractedEntity.en_name.equalsIgnoreCase(species)) continue;
            return true;
        }
        return false;
    }
}

