package org.gcube.textextractor.extractors;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.html.HtmlParser;
import org.apache.tika.sax.BodyContentHandler;
import org.gcube.semantic.annotator.AnnotationBase;
import org.gcube.textextractor.entities.ShortenCE4NameResponse;
import org.gcube.textextractor.helpers.ExtractorHelper;

/* loaded from: input_file:org/gcube/textextractor/extractors/HTMLExtractor.class */
public class HTMLExtractor extends InformationExtractor {
    public HTMLExtractor(String str) {
        super(str);
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    List<Map<String, String>> extractInfo() throws FileNotFoundException {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (String str : getFilenames()) {
            i++;
            System.out.println("Processing file : " + i + " " + str);
            try {
                FileInputStream fileInputStream = new FileInputStream(str);
                BodyContentHandler bodyContentHandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                new HtmlParser().parse(fileInputStream, bodyContentHandler, metadata, new ParseContext());
                String removeEmptyLines = ExtractorHelper.removeEmptyLines(bodyContentHandler.toString());
                HashMap hashMap = new HashMap();
                hashMap.put("documentID", str);
                hashMap.put("text", removeEmptyLines);
                hashMap.put("title", metadata.get("title"));
                hashMap.put("language", new LanguageIdentifier(removeEmptyLines).getLanguage());
                hashMap.put("provenance", "WIOFish");
                hashMap.putAll(customFields(str));
                arrayList.add(enrichRecord(hashMap, str));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return arrayList;
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    Map<String, String> enrichRecord(Map<String, String> map, String str) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        String name = FilenameUtils.getName(str);
        String str2 = "http://smartfish.collection/wiofish/" + name.substring(name.lastIndexOf("=") + 1).toLowerCase().toLowerCase();
        hashMap.putAll(map);
        hashMap.put("documentID", str2);
        long currentTimeMillis = System.currentTimeMillis();
        try {
            if (map.get("country") != null) {
                String queryCountry = ExtractorHelper.queryCountry(map.get("country"));
                hashMap2.put("country_uris", ShortenCE4NameResponse.getURIFromJSON(queryCountry));
                hashMap.put("country_uri", queryCountry);
            }
        } catch (Exception e) {
            System.out.println("Error processing country : " + map.get("country"));
            e.printStackTrace();
        }
        System.out.println("query country dur : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " sec");
        long currentTimeMillis2 = System.currentTimeMillis();
        try {
            if (map.get("gear_used") != null) {
                String queryGear = ExtractorHelper.queryGear(map.get("gear_used"));
                hashMap2.put("gear_uris", ShortenCE4NameResponse.getURIFromJSON(queryGear));
                hashMap.put("gear_uri", queryGear);
            }
        } catch (Exception e2) {
            System.out.println("Error processing gear : " + map.get("gear_used"));
            e2.printStackTrace();
        }
        System.out.println("query gear used dur : " + ((System.currentTimeMillis() - currentTimeMillis2) / 1000.0d) + " sec");
        long currentTimeMillis3 = System.currentTimeMillis();
        try {
            if (map.get("type_of_vessel") != null) {
                String queryVessel = ExtractorHelper.queryVessel(map.get("type_of_vessel"));
                hashMap2.put("vessel_uris", ShortenCE4NameResponse.getURIFromJSON(queryVessel));
                hashMap.put("vessel_uri", queryVessel);
            }
        } catch (Exception e3) {
            System.out.println("Error processing type_of_vessel : " + map.get("type_of_vessel"));
            e3.printStackTrace();
        }
        System.out.println("query type of vessel dur : " + ((System.currentTimeMillis() - currentTimeMillis3) / 1000.0d) + " sec");
        long currentTimeMillis4 = System.currentTimeMillis();
        try {
            if (map.get("management") != null) {
                String queryManagement = ExtractorHelper.queryManagement(ExtractorHelper.covertToStringList(map.get("management")));
                hashMap2.put("management_uris", ShortenCE4NameResponse.getURIFromJSON(queryManagement));
                hashMap.put("management_uri", queryManagement);
            }
        } catch (Exception e4) {
            System.out.println("Error processing management : " + map.get("management"));
            e4.printStackTrace();
        }
        System.out.println("query management dur : " + ((System.currentTimeMillis() - currentTimeMillis4) / 1000.0d) + " sec");
        long currentTimeMillis5 = System.currentTimeMillis();
        try {
            if (map.get("exploitation_status") != null) {
                String queryExploitationStatus = ExtractorHelper.queryExploitationStatus(ExtractorHelper.covertToStringList(map.get("exploitation_status")));
                hashMap2.put("exploitation_status_uris", ShortenCE4NameResponse.getURIFromJSON(queryExploitationStatus));
                hashMap.put("exploitation_status_uri", queryExploitationStatus);
            }
        } catch (Exception e5) {
            System.out.println("Error processing exploitation_status : " + map.get("exploitation_status"));
            e5.printStackTrace();
        }
        System.out.println("query exploitation status dur : " + ((System.currentTimeMillis() - currentTimeMillis5) / 1000.0d) + " sec");
        long currentTimeMillis6 = System.currentTimeMillis();
        try {
            if (map.get("access_control") != null) {
                String queryAccessControl = ExtractorHelper.queryAccessControl(ExtractorHelper.covertToStringList(map.get("access_control")));
                hashMap2.put("access_control_uris", ShortenCE4NameResponse.getURIFromJSON(queryAccessControl));
                hashMap.put("access_control_uri", queryAccessControl);
            }
        } catch (Exception e6) {
            System.out.println("Error processing access_controls_used_in_management_eng : " + map.get("access_controls_used_in_management_eng"));
            e6.printStackTrace();
        }
        System.out.println("query access control dur : " + ((System.currentTimeMillis() - currentTimeMillis6) / 1000.0d) + " sec");
        long currentTimeMillis7 = System.currentTimeMillis();
        try {
            if (map.get("fishing_control") != null) {
                String queryFishingControl = ExtractorHelper.queryFishingControl(ExtractorHelper.covertToStringList(map.get("fishing_control")));
                hashMap2.put("fishing_control_uris", ShortenCE4NameResponse.getURIFromJSON(queryFishingControl));
                hashMap.put("fishing_control_uri", queryFishingControl);
            }
        } catch (Exception e7) {
            System.out.println("Error processing fishing_controls_used_in_management_eng : " + map.get("fishing_controls_used_in_management_eng"));
            e7.printStackTrace();
        }
        System.out.println("query fishing control dur : " + ((System.currentTimeMillis() - currentTimeMillis7) / 1000.0d) + " sec");
        long currentTimeMillis8 = System.currentTimeMillis();
        try {
            if (map.get("enforcement_method") != null) {
                String queryEnforcementMethod = ExtractorHelper.queryEnforcementMethod(ExtractorHelper.covertToStringList(map.get("enforcement_method")));
                hashMap2.put("enforcement_method_uris", ShortenCE4NameResponse.getURIFromJSON(queryEnforcementMethod));
                hashMap.put("enforcement_method_uri", queryEnforcementMethod);
            }
        } catch (Exception e8) {
            System.out.println("Error processing enforcement_method : " + map.get("enforcement_method"));
            e8.printStackTrace();
        }
        System.out.println("query enforcement method dur : " + ((System.currentTimeMillis() - currentTimeMillis8) / 1000.0d) + " sec");
        long currentTimeMillis9 = System.currentTimeMillis();
        try {
            if (map.get("sector") != null) {
                String querySector = ExtractorHelper.querySector(ExtractorHelper.covertToStringList(map.get("sector")));
                hashMap2.put("sector_uris", ShortenCE4NameResponse.getURIFromJSON(querySector));
                hashMap.put("sector_uri", querySector);
            }
        } catch (Exception e9) {
            System.out.println("Error processing sector : " + map.get("sector"));
            e9.printStackTrace();
        }
        System.out.println("query sector dur : " + ((System.currentTimeMillis() - currentTimeMillis9) / 1000.0d) + " sec");
        try {
            try {
                String fileContent = ExtractorHelper.fileContent(str);
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                ArrayList arrayList3 = new ArrayList();
                ArrayList arrayList4 = new ArrayList();
                long currentTimeMillis10 = System.currentTimeMillis();
                if (map.get("species_english_name").trim().length() > 0) {
                    Matcher matcher = Pattern.compile("<tr id='species_row_.*?'>.*?EnglishNameblock.*?>(.*?)</td>(.*?)</tr>").matcher(fileContent);
                    int i = 1;
                    List<String> covertToStringList = ExtractorHelper.covertToStringList(map.get("species_english_name"));
                    while (matcher.find()) {
                        String trim = matcher.group(1).trim();
                        String trim2 = matcher.group(2).trim();
                        if (trim.length() != 0) {
                            Matcher matcher2 = Pattern.compile("<td align='center'>.*?</td><td align='center'>.*?</td><td align='center'>(.*?)<br></td>.*?<img src=images/(.*?).gif></td><td width=33% align='center'><img src=images/(.*?).gif></td><td width=33% align='center'><img src=images/(.*?).gif></td>").matcher(trim2);
                            if (matcher2.find()) {
                                for (String str3 : trim.split("\\s*,\\s*")) {
                                    String trim3 = str3.trim();
                                    if (!covertToStringList.contains(trim3)) {
                                        System.out.println("Error : " + trim3 + " not in allSpecies : " + trim);
                                        throw new Exception("Error : " + trim3 + " not in allSpecies : " + trim);
                                    }
                                    if (!matcher2.group(1).trim().equalsIgnoreCase("Not Applicable")) {
                                        arrayList4.add(trim3);
                                    }
                                    if (matcher2.group(2).trim().equalsIgnoreCase("tick_blue")) {
                                        arrayList.add(trim3);
                                    }
                                    if (matcher2.group(3).trim().equalsIgnoreCase("tick_blue")) {
                                        arrayList2.add(trim3);
                                    }
                                    if (matcher2.group(4).trim().equalsIgnoreCase("tick_blue")) {
                                        arrayList3.add(trim3);
                                    }
                                }
                            }
                            i++;
                        }
                    }
                    System.out.println("threatenedSpecies : " + arrayList4);
                    System.out.println("targetSpecies : " + arrayList);
                    System.out.println("bycatchSpecies : " + arrayList2);
                    System.out.println("discardSpecies : " + arrayList3);
                    System.out.println("extracting threatened,target,by-catch,discard dur : " + ((System.currentTimeMillis() - currentTimeMillis10) / 1000.0d) + " sec");
                    long currentTimeMillis11 = System.currentTimeMillis();
                    String querySpecies = ExtractorHelper.querySpecies(ExtractorHelper.covertToStringList(map.get("species_english_name")));
                    hashMap2.put("species_uris", ShortenCE4NameResponse.getURIFromJSON(querySpecies));
                    System.out.println("query species dur : " + ((System.currentTimeMillis() - currentTimeMillis11) / 1000.0d) + " sec");
                    long currentTimeMillis12 = System.currentTimeMillis();
                    if (arrayList4.size() > 0) {
                        hashMap2.put("threatened_species", ShortenCE4NameResponse.getURIFromJSON(ExtractorHelper.querySpecies(arrayList4)));
                    }
                    System.out.println("query threatened species dur : " + ((System.currentTimeMillis() - currentTimeMillis12) / 1000.0d) + " sec");
                    long currentTimeMillis13 = System.currentTimeMillis();
                    if (arrayList.size() > 0) {
                        hashMap2.put("target_species", ShortenCE4NameResponse.getURIFromJSON(ExtractorHelper.querySpecies(arrayList)));
                    }
                    System.out.println("query target species dur : " + ((System.currentTimeMillis() - currentTimeMillis13) / 1000.0d) + " sec");
                    long currentTimeMillis14 = System.currentTimeMillis();
                    if (arrayList2.size() > 0) {
                        hashMap2.put("bycatch_species", ShortenCE4NameResponse.getURIFromJSON(ExtractorHelper.querySpecies(arrayList2)));
                    }
                    System.out.println("query by catch species dur : " + ((System.currentTimeMillis() - currentTimeMillis14) / 1000.0d) + " sec");
                    long currentTimeMillis15 = System.currentTimeMillis();
                    if (arrayList3.size() > 0) {
                        hashMap2.put("discard_species", ShortenCE4NameResponse.getURIFromJSON(ExtractorHelper.querySpecies(arrayList3)));
                    }
                    System.out.println("query discard species dur : " + ((System.currentTimeMillis() - currentTimeMillis15) / 1000.0d) + " sec");
                    hashMap.put("species_uri", querySpecies);
                }
            } catch (IOException e10) {
                e10.printStackTrace();
                return null;
            }
        } catch (Exception e11) {
            System.out.println("Error processing species : " + map.get("species_english_name"));
            e11.printStackTrace();
        }
        try {
            long currentTimeMillis16 = System.currentTimeMillis();
            annotate(str2, hashMap2);
            System.out.println("annotate dur : " + ((System.currentTimeMillis() - currentTimeMillis16) / 1000.0d) + " sec");
        } catch (FileNotFoundException e12) {
            e12.printStackTrace();
            Logger.getLogger(HTMLExtractor.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e12);
        }
        return hashMap;
    }

    private Map<String, String> customFields(String str) {
        HashMap hashMap = new HashMap();
        try {
            String fileContent = ExtractorHelper.fileContent(str);
            Matcher matcher = Pattern.compile("<b>Reporting Area: </b>(.*?)</font>").matcher(fileContent);
            if (matcher.find()) {
                hashMap.put("country", matcher.group(1).trim());
            }
            Matcher matcher2 = Pattern.compile("<b>Fishery: </b>(.*?)</font>").matcher(fileContent);
            if (matcher2.find()) {
                hashMap.put("title", matcher2.group(1).trim());
            }
            Matcher matcher3 = Pattern.compile("<tr><td><b>Local Name:</b></td><td>(.*?)</td></tr><tr>").matcher(fileContent);
            if (matcher3.find()) {
                hashMap.put("fishery_local_name", matcher3.group(1).trim());
            }
            Matcher matcher4 = Pattern.compile("<tr><td><b>Type of vessel</b></td><td>(.*?)<br></td></tr>").matcher(fileContent);
            if (matcher4.find()) {
                hashMap.put("type_of_vessel", matcher4.group(1).trim());
            }
            Matcher matcher5 = Pattern.compile("<tr><td><b>Select gear used in this fishery</b></td><td>(.*?)<br></td></tr>").matcher(fileContent);
            if (matcher5.find()) {
                hashMap.put("gear_used", matcher5.group(1).trim());
            }
            Matcher matcher6 = Pattern.compile("<b>FAO Status:</b></td><td>(.*?)</td>").matcher(fileContent);
            if (matcher6.find()) {
                String replace = matcher6.group(1).trim().replace("<br>", "");
                if (replace.split(" / ").length == 2) {
                    String trim = replace.split(" / ")[0].trim();
                    replace.split(" / ")[1].trim();
                    if (!trim.equalsIgnoreCase("None") && !trim.equalsIgnoreCase("Unknown") && trim != null && trim.trim().length() > 0) {
                        hashMap.put("exploitation_status", trim);
                    }
                } else {
                    System.out.println("bad data for exploitation_status : " + replace);
                }
            }
            Matcher matcher7 = Pattern.compile("<b>Access controls used in management</b></td><td>(.*?)</td>").matcher(fileContent);
            if (matcher7.find()) {
                String[] split = matcher7.group(1).trim().split("<br>");
                StringBuffer stringBuffer = new StringBuffer();
                StringBuffer stringBuffer2 = new StringBuffer();
                boolean z = true;
                boolean z2 = false;
                for (String str2 : split) {
                    if (str2.split(" / ").length != 2) {
                        System.out.println("bad data access control : " + str2);
                    } else {
                        String trim2 = str2.split(" / ")[0].trim();
                        String trim3 = str2.split(" / ")[1].trim();
                        if (trim2.equalsIgnoreCase("None") || trim2.equalsIgnoreCase("Unknown")) {
                            z2 = true;
                            break;
                        }
                        if (z) {
                            z = false;
                        } else {
                            stringBuffer.append(", ");
                            stringBuffer2.append(", ");
                        }
                        stringBuffer.append(trim2.trim());
                        stringBuffer2.append(trim3.trim());
                    }
                }
                if (!z2 && stringBuffer.toString() != null && stringBuffer.toString().trim().length() > 0) {
                    hashMap.put("access_control", stringBuffer.toString());
                }
            }
            Matcher matcher8 = Pattern.compile("<b>Fishing controls used in management</b></td><td>(.*?)</td>").matcher(fileContent);
            if (matcher8.find()) {
                String[] split2 = matcher8.group(1).trim().split("<br>");
                StringBuffer stringBuffer3 = new StringBuffer();
                StringBuffer stringBuffer4 = new StringBuffer();
                boolean z3 = true;
                boolean z4 = false;
                for (String str3 : split2) {
                    if (str3.split(" / ").length != 2) {
                        System.out.println("bad data fishing control : " + str3);
                    } else {
                        String trim4 = str3.split(" / ")[0].trim();
                        String trim5 = str3.split(" / ")[1].trim();
                        if (trim4.equalsIgnoreCase("None") || trim4.equalsIgnoreCase("Unknown")) {
                            z4 = true;
                            break;
                        }
                        if (z3) {
                            z3 = false;
                        } else {
                            stringBuffer3.append(", ");
                            stringBuffer4.append(", ");
                        }
                        stringBuffer3.append(trim4.trim());
                        stringBuffer4.append(trim5.trim());
                    }
                }
                if (!z4 && stringBuffer3.toString() != null && stringBuffer3.toString().trim().length() > 0) {
                    hashMap.put("fishing_control", stringBuffer3.toString());
                }
            }
            Matcher matcher9 = Pattern.compile("<b>Enforcement methods used</b></td><td>(.*?)</td>").matcher(fileContent);
            if (matcher9.find()) {
                String[] split3 = matcher9.group(1).trim().split("<br>");
                StringBuffer stringBuffer5 = new StringBuffer();
                StringBuffer stringBuffer6 = new StringBuffer();
                boolean z5 = true;
                boolean z6 = false;
                for (String str4 : split3) {
                    if (str4.split(" / ").length != 2) {
                        System.out.println("bad data enforcement methods : " + str4);
                    } else {
                        String trim6 = str4.split(" / ")[0].trim();
                        String trim7 = str4.split(" / ")[1].trim();
                        if (trim6.equalsIgnoreCase("None") || trim6.equalsIgnoreCase("Unknown")) {
                            z6 = true;
                            break;
                        }
                        if (z5) {
                            z5 = false;
                        } else {
                            stringBuffer5.append(", ");
                            stringBuffer6.append(", ");
                        }
                        stringBuffer5.append(trim6.trim());
                        stringBuffer6.append(trim7.trim());
                    }
                }
                if (!z6 && stringBuffer5.toString() != null && stringBuffer5.toString().trim().length() > 0) {
                    hashMap.put("enforcement_method", stringBuffer5.toString());
                }
            }
            Matcher matcher10 = Pattern.compile("<tr id='dmtbl_row_0'><td>(.*?)</td><td>(.*?)<br></td>").matcher(fileContent);
            if (matcher10.find()) {
                String trim8 = matcher10.group(1).trim();
                String trim9 = matcher10.group(2).trim();
                String trim10 = trim9.split("/")[0].trim();
                trim9.split("/")[1].trim();
                if (trim10 != null && trim10.trim().length() > 0) {
                    hashMap.put("authorities", trim8 + " ," + trim10);
                }
            }
            Matcher matcher11 = Pattern.compile("<tr><td><b>Jan</b></td><td><b>Feb</b></td><td><b>Mar</b></td><td><b>Apr</b></td><td><b>May</b></td><td><b>Jun</b></td><td><b>Jul</b></td><td><b>Aug</b></td><td><b>Sep</b></td><td><b>Oct</b></td><td><b>Nov</b></td><td><b>Dec</b></td></tr><tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td></tr><tr>").matcher(fileContent);
            if (matcher11.find()) {
                String[] strArr = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"};
                boolean z7 = true;
                StringBuffer stringBuffer7 = new StringBuffer();
                for (int i = 1; i <= matcher11.groupCount(); i++) {
                    if (matcher11.group(1).trim().equalsIgnoreCase("<img src=images/tick_blue.gif>")) {
                        if (z7) {
                            z7 = false;
                        } else {
                            stringBuffer7.append(", ");
                        }
                        stringBuffer7.append(strArr[i - 1]);
                    }
                    System.out.println(stringBuffer7);
                }
                if (stringBuffer7.toString() != null && stringBuffer7.toString().trim().length() > 0) {
                    hashMap.put("seasonality", stringBuffer7.toString());
                }
            }
            Matcher matcher12 = Pattern.compile("<b>Sector</b></td></tr><tr><td valign='top' colspan=2><table bgcolor='#eae8e8' class='table' cellspacing=1 width=100%><tr bgcolor='#ffffff'><td><table width=100% class='table' cellpadding=5><tr><td width=50% valign='top'>(.*?)</td><td valign='top' width=50%>").matcher(fileContent);
            if (matcher12.find()) {
                Matcher matcher13 = Pattern.compile("<img src=images/tick_blue.gif>(.*?)<br>").matcher(matcher12.group(1).trim());
                StringBuffer stringBuffer8 = new StringBuffer();
                StringBuffer stringBuffer9 = new StringBuffer();
                boolean z8 = true;
                boolean z9 = false;
                while (true) {
                    if (!matcher13.find()) {
                        break;
                    }
                    String trim11 = matcher13.group(1).trim();
                    if (trim11.split(" / ").length != 2) {
                        System.out.println("bad data in sector : " + trim11);
                        break;
                    }
                    String trim12 = trim11.split(" / ")[0].trim();
                    String trim13 = trim11.split(" / ")[1].trim();
                    if (trim12.equalsIgnoreCase("None") || trim12.equalsIgnoreCase("Unknown")) {
                        break;
                    }
                    if (z8) {
                        z8 = false;
                    } else {
                        stringBuffer8.append(", ");
                        stringBuffer9.append(", ");
                    }
                    stringBuffer8.append(trim12.trim());
                    stringBuffer9.append(trim13.trim());
                }
                z9 = true;
                if (!z9 && stringBuffer8.toString() != null && stringBuffer8.toString().trim().length() > 0) {
                    hashMap.put("sector", stringBuffer8.toString());
                }
            }
            Matcher matcher14 = Pattern.compile("<a name='technology'>Technology Used</a>.*?</table>(.*?)</table>").matcher(fileContent);
            if (matcher14.find()) {
                Matcher matcher15 = Pattern.compile("<img src=images/tick_blue.gif>(.*?)<br>").matcher(matcher14.group(1).trim());
                StringBuffer stringBuffer10 = new StringBuffer();
                boolean z10 = true;
                while (matcher15.find()) {
                    if (z10) {
                        z10 = false;
                    } else {
                        stringBuffer10.append(", ");
                    }
                    stringBuffer10.append(matcher15.group(1).trim());
                }
                if (stringBuffer10.toString() != null && stringBuffer10.toString().trim().length() > 0) {
                    hashMap.put("technology_used", stringBuffer10.toString().trim());
                }
            }
            Matcher matcher16 = Pattern.compile("<b>Management Type</b>(.*?)</tr>").matcher(fileContent);
            if (matcher16.find()) {
                Matcher matcher17 = Pattern.compile("<img src=images/tick_blue.gif>(.*?)<br>").matcher(matcher16.group(1).trim());
                StringBuffer stringBuffer11 = new StringBuffer();
                StringBuffer stringBuffer12 = new StringBuffer();
                boolean z11 = true;
                while (matcher17.find()) {
                    String trim14 = matcher17.group(1).trim();
                    String str5 = trim14.split("/")[0];
                    String str6 = trim14.split("/")[0];
                    if (!str5.equalsIgnoreCase("None") && !str5.equalsIgnoreCase("Unknown")) {
                        if (z11) {
                            z11 = false;
                        } else {
                            stringBuffer11.append(", ");
                            stringBuffer12.append(", ");
                        }
                        stringBuffer11.append(str5.trim());
                        stringBuffer12.append(str6.trim());
                    }
                }
                if (0 == 0 && stringBuffer11.toString() != null && stringBuffer11.toString().trim().length() > 0) {
                    hashMap.put("management", stringBuffer11.toString().trim());
                }
            }
            Matcher matcher18 = Pattern.compile("<tr id='species_row_.*?'><td.*?>(.*?)</td>.*?EnglishNameblock.*?>(.*?)</td>").matcher(fileContent);
            StringBuffer stringBuffer13 = new StringBuffer();
            StringBuffer stringBuffer14 = new StringBuffer();
            int i2 = 0;
            int i3 = 0;
            while (matcher18.find()) {
                if (matcher18.group(1).trim().length() > 0) {
                    if (i2 > 0) {
                        stringBuffer13.append(", ");
                    }
                    stringBuffer13.append(matcher18.group(1).trim());
                    i2++;
                }
                if (matcher18.group(2).trim().length() > 0) {
                    if (i3 > 0) {
                        stringBuffer14.append(", ");
                    }
                    stringBuffer14.append(matcher18.group(2).trim());
                    i3++;
                }
            }
            System.out.println(stringBuffer14.toString());
            hashMap.put("species_scientific_name", stringBuffer13.toString());
            hashMap.put("species_english_name", stringBuffer14.toString());
            System.out.println(hashMap);
            return hashMap;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public String createCustomRowset(Map<String, String> map) {
        return ExtractorHelper.createRowseFromFields(map.get("documentID"), InformationExtractor.collectionID, InformationExtractor.idxType, map.get("language"), map);
    }

    private void annotate(String str, Map<String, List<String>> map) throws FileNotFoundException {
        AnnotationBase annotationBase = AnnotationBase.getInstance();
        for (Map.Entry<String, List<String>> entry : map.entrySet()) {
            if (entry.getKey().equals("country_uris")) {
                Iterator<String> it = entry.getValue().iterator();
                while (it.hasNext()) {
                    annotationBase.WIOFISH_country(str, it.next());
                }
            }
            if (entry.getKey().equals("vessel_uris")) {
                Iterator<String> it2 = entry.getValue().iterator();
                while (it2.hasNext()) {
                    annotationBase.WIOFISH_vessel(str, it2.next());
                }
            }
            if (entry.getKey().equals("gear_uris")) {
                Iterator<String> it3 = entry.getValue().iterator();
                while (it3.hasNext()) {
                    annotationBase.WIOFISH_gear(str, it3.next());
                }
            }
            if (entry.getKey().equals("target_species")) {
                Iterator<String> it4 = entry.getValue().iterator();
                while (it4.hasNext()) {
                    annotationBase.WIOFISH_targetSpecies(str, it4.next());
                }
            }
            if (entry.getKey().equals("bycatch_species")) {
                Iterator<String> it5 = entry.getValue().iterator();
                while (it5.hasNext()) {
                    annotationBase.WIOFISH_bycatchSpecies(str, it5.next());
                }
            }
            if (entry.getKey().equals("discard_species")) {
                Iterator<String> it6 = entry.getValue().iterator();
                while (it6.hasNext()) {
                    annotationBase.WIOFISH_discardSpecies(str, it6.next());
                }
            }
            if (entry.getKey().equals("threatened_species_uris")) {
                Iterator<String> it7 = entry.getValue().iterator();
                while (it7.hasNext()) {
                    annotationBase.WIOFISH_thretenedSpecies(str, it7.next());
                }
            }
            if (entry.getKey().equals("management_uris")) {
                Iterator<String> it8 = entry.getValue().iterator();
                while (it8.hasNext()) {
                    annotationBase.WIOFISH_management(str, it8.next());
                }
            }
            if (entry.getKey().equals("sector_uris")) {
                Iterator<String> it9 = entry.getValue().iterator();
                while (it9.hasNext()) {
                    annotationBase.WIOFISH_sector(str, it9.next());
                }
            }
            if (entry.getKey().equals("seasonality_uris")) {
                Iterator<String> it10 = entry.getValue().iterator();
                while (it10.hasNext()) {
                    annotationBase.WIOFISH_seasonality(str, it10.next());
                }
            }
            if (entry.getKey().equals("exploitation_status_uris")) {
                Iterator<String> it11 = entry.getValue().iterator();
                while (it11.hasNext()) {
                    annotationBase.WIOFISH_status(str, it11.next());
                }
            }
            if (entry.getKey().equals("access_control_uris")) {
                Iterator<String> it12 = entry.getValue().iterator();
                while (it12.hasNext()) {
                    annotationBase.WIOFISH_access_control(str, it12.next());
                }
            }
            if (entry.getKey().equals("fishing_control_uris")) {
                Iterator<String> it13 = entry.getValue().iterator();
                while (it13.hasNext()) {
                    annotationBase.WIOFISH_fishing_control(str, it13.next());
                }
            }
            if (entry.getKey().equals("enforcement_method_uris")) {
                Iterator<String> it14 = entry.getValue().iterator();
                while (it14.hasNext()) {
                    annotationBase.WIOFISH_enforcement_method(str, it14.next());
                }
            }
        }
    }
}
