/*
 * Decompiled with CFR 0.152.
 */
package crawlercommons.robots;

import crawlercommons.robots.BaseRobotRules;
import crawlercommons.robots.BaseRobotsParser;
import crawlercommons.robots.SimpleRobotRules;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SimpleRobotRulesParser
extends BaseRobotsParser {
    private static final Logger LOGGER = LoggerFactory.getLogger(SimpleRobotRulesParser.class);
    private static Map<String, RobotDirective> DIRECTIVE_PREFIX = new HashMap<String, RobotDirective>();
    private static final Pattern COLON_DIRECTIVE_DELIMITER;
    private static final Pattern BLANK_DIRECTIVE_DELIMITER;
    private static final Pattern DIRECTIVE_SUFFIX_PATTERN;
    private static final Pattern ROBOT_NAMES_SPLIT;
    private static final Pattern SIMPLE_HTML_PATTERN;
    private static final Pattern USER_AGENT_PATTERN;
    protected static final Pattern USER_AGENT_PRODUCT_TOKEN_MATCHER;
    public static final int DEFAULT_MAX_WARNINGS = 5;
    public static final long DEFAULT_MAX_CRAWL_DELAY = 300000L;
    private ThreadLocal<Integer> _numWarningsDuringLastParse = new ThreadLocal();
    private int _maxWarnings;
    private long _maxCrawlDelay;
    private boolean _exactUserAgentMatching;

    private static RobotToken tokenize(String line) {
        String lowerLine = line.toLowerCase(Locale.ROOT);
        for (String prefix : DIRECTIVE_PREFIX.keySet()) {
            Matcher m;
            int prefixLength = prefix.length();
            if (!lowerLine.startsWith(prefix)) continue;
            RobotDirective directive = DIRECTIVE_PREFIX.get(prefix);
            String dataPortion = line.substring(prefixLength);
            if (directive.isPrefix()) {
                m = DIRECTIVE_SUFFIX_PATTERN.matcher(dataPortion);
                if (!m.matches()) continue;
                dataPortion = m.group(1);
            }
            if (!(m = COLON_DIRECTIVE_DELIMITER.matcher(dataPortion)).matches()) {
                m = BLANK_DIRECTIVE_DELIMITER.matcher(dataPortion);
            }
            if (!m.matches()) continue;
            return new RobotToken(directive, m.group(1).trim());
        }
        Matcher m = COLON_DIRECTIVE_DELIMITER.matcher(lowerLine);
        if (m.matches()) {
            return new RobotToken(RobotDirective.UNKNOWN, line);
        }
        return new RobotToken(RobotDirective.MISSING, line);
    }

    public SimpleRobotRulesParser() {
        this(300000L, 5);
    }

    public SimpleRobotRulesParser(long maxCrawlDelay, int maxWarnings) {
        this._maxCrawlDelay = maxCrawlDelay;
        this._maxWarnings = maxWarnings;
        this._exactUserAgentMatching = true;
    }

    protected static boolean isValidUserAgentToObey(String userAgent) {
        return userAgent != null && USER_AGENT_PRODUCT_TOKEN_MATCHER.matcher(userAgent).matches();
    }

    @Override
    public SimpleRobotRules failedFetch(int httpStatusCode) {
        SimpleRobotRules result;
        if (httpStatusCode >= 200 && httpStatusCode < 300) {
            throw new IllegalStateException("Can't use status code constructor with 2xx response");
        }
        if (httpStatusCode >= 300 && httpStatusCode < 400) {
            result = new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE);
            result.setDeferVisits(true);
        } else if (httpStatusCode >= 400 && httpStatusCode < 500) {
            result = new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_ALL);
        } else {
            result = new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE);
            result.setDeferVisits(true);
        }
        return result;
    }

    @Override
    @Deprecated
    public SimpleRobotRules parseContent(String url, byte[] content, String contentType, String robotNames) {
        return this.parseContent(url, content, contentType, new LinkedHashSet<String>(Arrays.asList(this.splitRobotNames(robotNames))), false);
    }

    protected String[] splitRobotNames(String robotNames) {
        robotNames = robotNames.toLowerCase(Locale.ROOT).trim();
        return ROBOT_NAMES_SPLIT.split(robotNames);
    }

    public static Collection<String> sanitizeRobotNames(Collection<String> robotNames) {
        boolean needsReplacement = false;
        for (String robotName : robotNames) {
            if (!SimpleRobotRulesParser.isValidUserAgentToObey(robotName)) {
                LOGGER.warn("User-agent product token sanitization: '{}' is not a valid user-agent token following RFC 9309, matching the user-agent may fail", (Object)robotName);
            }
            if (robotName.chars().anyMatch(c -> c >= 65 && c <= 90)) {
                LOGGER.warn("User-agent product token sanitization: lower-casing {}", (Object)robotName);
                needsReplacement = true;
            }
            if (!robotName.equals("*")) continue;
            LOGGER.warn("User-agent product token sanitization: removing wildcard user-agent");
            needsReplacement = true;
        }
        if (needsReplacement) {
            return robotNames.stream().filter(n -> !n.equals("*")).map(n -> n.toLowerCase(Locale.ROOT)).collect(Collectors.toSet());
        }
        return robotNames;
    }

    @Override
    public SimpleRobotRules parseContent(String url, byte[] content, String contentType, Collection<String> robotNames) {
        if (this.isExactUserAgentMatching()) {
            for (String robotName : robotNames) {
                if (!SimpleRobotRulesParser.isValidUserAgentToObey(robotName)) {
                    LOGGER.warn("User-agent product token sanitization: '{}' is not a valid user-agent token following RFC 9309", (Object)robotName);
                }
                if (robotName.chars().anyMatch(c -> c >= 65 && c <= 90)) {
                    throw new IllegalArgumentException("Parameter 'robotNames': user-agent tokens must be lower-case");
                }
                if (!robotName.equals("*")) continue;
                throw new IllegalArgumentException("Parameter 'robotNames': the wildcard user-agent ('*') should not be included");
            }
        }
        return this.parseContent(url, content, contentType, robotNames, this.isExactUserAgentMatching());
    }

    private SimpleRobotRules parseContent(String url, byte[] content, String contentType, Collection<String> robotNames, boolean exactUserAgentMatching) {
        if (content == null || content.length == 0) {
            return new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_ALL);
        }
        int bytesLen = content.length;
        int offset = 0;
        Charset encoding = StandardCharsets.UTF_8;
        if (bytesLen >= 3 && content[0] == -17 && content[1] == -69 && content[2] == -65) {
            offset = 3;
            bytesLen -= 3;
            encoding = StandardCharsets.UTF_8;
        } else if (bytesLen >= 2 && content[0] == -1 && content[1] == -2) {
            offset = 2;
            bytesLen -= 2;
            encoding = StandardCharsets.UTF_16LE;
        } else if (bytesLen >= 2 && content[0] == -2 && content[1] == -1) {
            offset = 2;
            bytesLen -= 2;
            encoding = StandardCharsets.UTF_16BE;
        }
        String contentAsStr = new String(content, offset, bytesLen, encoding);
        boolean isHtmlType = contentType != null && contentType.toLowerCase(Locale.ROOT).startsWith("text/html");
        boolean hasHTML = false;
        if (isHtmlType || SIMPLE_HTML_PATTERN.matcher(contentAsStr).find()) {
            if (!USER_AGENT_PATTERN.matcher(contentAsStr).find()) {
                LOGGER.trace("Found non-robots.txt HTML file: {}", (Object)url);
                return new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_ALL);
            }
            if (isHtmlType) {
                LOGGER.debug("HTML content type returned for robots.txt file: {}", (Object)url);
            } else {
                LOGGER.debug("Found HTML in robots.txt file: {}", (Object)url);
            }
            hasHTML = true;
        }
        StringTokenizer lineParser = new StringTokenizer(contentAsStr, "\n\r\u0085\u2028\u2029");
        ParseState parseState = new ParseState(url, robotNames);
        while (lineParser.hasMoreTokens()) {
            int hashPos;
            String line = lineParser.nextToken();
            if (hasHTML) {
                line = line.replaceAll("<[^>]+>", "");
            }
            if ((hashPos = line.indexOf("#")) >= 0) {
                line = line.substring(0, hashPos);
            }
            if ((line = line.trim()).length() == 0) continue;
            RobotToken token = SimpleRobotRulesParser.tokenize(line);
            switch (token.getDirective()) {
                case USER_AGENT: {
                    this.handleUserAgent(parseState, token);
                    break;
                }
                case DISALLOW: {
                    parseState.setFinishedAgentFields(true);
                    this.handleDisallow(parseState, token);
                    break;
                }
                case ALLOW: {
                    parseState.setFinishedAgentFields(true);
                    this.handleAllow(parseState, token);
                    break;
                }
                case CRAWL_DELAY: {
                    this.handleCrawlDelay(parseState, token);
                    parseState.setAddingCrawlDelay(false);
                    break;
                }
                case SITEMAP: {
                    this.handleSitemap(parseState, token);
                    break;
                }
                case HTTP: {
                    this.handleHttp(parseState, token);
                    break;
                }
                case UNKNOWN: {
                    this.reportWarning(parseState, "Unknown directive in robots.txt file: {}", line);
                    break;
                }
                case MISSING: {
                    this.reportWarning(parseState, "Unknown line in robots.txt file (size {}): {}", content.length, line);
                    break;
                }
            }
        }
        this._numWarningsDuringLastParse.set(parseState._numWarnings);
        SimpleRobotRules result = parseState.getRobotRules();
        if (result.getCrawlDelay() > this._maxCrawlDelay) {
            LOGGER.debug("Crawl delay exceeds max value - so disallowing all URLs: {}", (Object)url);
            return new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE);
        }
        result.sortRules();
        return result;
    }

    private void reportWarning(ParseState state, String msg, Object ... args2) {
        ++state._numWarnings;
        if (state._numWarnings == 1) {
            LOGGER.warn("Problem processing robots.txt for {}", (Object)state._url);
        }
        if (state._numWarnings < this._maxWarnings) {
            for (int i = 0; i < args2.length; ++i) {
                if (!(args2[i] instanceof String) || ((String)args2[i]).length() <= 1024) continue;
                args2[i] = ((String)args2[i]).substring(0, 1024) + " ...";
            }
            LOGGER.warn("\t " + msg, args2);
        }
    }

    protected boolean userAgentProductTokenPartialMatch(String agentName, Collection<String> targetTokens) {
        Matcher m = USER_AGENT_PRODUCT_TOKEN_MATCHER.matcher(agentName);
        return m.lookingAt() && targetTokens.contains(m.group());
    }

    private void handleUserAgent(ParseState state, RobotToken token) {
        if (state.isAddingRules() && state.isFinishedAgentFields()) {
            state.setAddingRules(false);
            state.setAddingCrawlDelay(false);
        }
        state.setFinishedAgentFields(false);
        Collection<String> targetNames = state.getTargetNames();
        if (this.isExactUserAgentMatching()) {
            String agentName = token.getData().trim().toLowerCase(Locale.ROOT);
            if (!agentName.isEmpty()) {
                if (agentName.equals("*") && !state.isMatchedRealName()) {
                    state.setMatchedWildcard(true);
                    state.setAddingRules(true);
                    state.setAddingCrawlDelay(true);
                } else if (targetNames.contains(agentName) || !SimpleRobotRulesParser.isValidUserAgentToObey(agentName) && this.userAgentProductTokenPartialMatch(agentName, targetNames)) {
                    if (state.isMatchedWildcard()) {
                        state.clearRules();
                        state.clearCrawlDelay();
                    }
                    state.setMatchedRealName(true);
                    state.setAddingRules(true);
                    state.setAddingCrawlDelay(true);
                    state.setMatchedWildcard(false);
                }
            }
        } else {
            String agentNameFull = token.getData().trim().toLowerCase(Locale.ROOT);
            boolean matched = false;
            if (agentNameFull.equals("*") && !state.isMatchedRealName()) {
                state.setMatchedWildcard(true);
                state.setAddingRules(true);
                state.setAddingCrawlDelay(true);
            } else if (this.userAgentProductTokenPartialMatch(agentNameFull, targetNames)) {
                matched = true;
            } else {
                String[] agentNames = ROBOT_NAMES_SPLIT.split(agentNameFull);
                if (agentNames.length > 1) {
                    LOGGER.debug("Multiple agent names in user-agent line: {}", (Object)token.getData());
                }
                block0: for (String agentName : agentNames) {
                    for (String targetName : targetNames) {
                        LOGGER.debug(targetName);
                        if (!targetName.startsWith(agentName)) continue;
                        matched = true;
                        continue block0;
                    }
                }
            }
            if (matched) {
                if (state.isMatchedWildcard()) {
                    state.clearRules();
                    state.clearCrawlDelay();
                }
                state.setMatchedRealName(true);
                state.setAddingRules(true);
                state.setAddingCrawlDelay(true);
                state.setMatchedWildcard(false);
            }
        }
    }

    private String normalizePathDirective(String path) {
        return SimpleRobotRules.escapePath(path.trim(), null);
    }

    private void handleDisallow(ParseState state, RobotToken token) {
        if (!state.isAddingRules()) {
            return;
        }
        String path = token.getData();
        try {
            path = this.normalizePathDirective(path);
            if (path.length() != 0) {
                state.addRule(path, false);
            }
        }
        catch (Exception e) {
            this.reportWarning(state, "Error parsing robots rules - can't decode path: {}", path);
        }
    }

    private void handleAllow(ParseState state, RobotToken token) {
        if (!state.isAddingRules()) {
            return;
        }
        String path = token.getData();
        try {
            path = this.normalizePathDirective(path);
        }
        catch (Exception e) {
            this.reportWarning(state, "Error parsing robots rules - can't decode path: {}", path);
        }
        if (path.length() != 0) {
            state.addRule(path, true);
        }
    }

    private void handleCrawlDelay(ParseState state, RobotToken token) {
        if (!state.isAddingCrawlDelay()) {
            return;
        }
        String delayString = token.getData();
        if (delayString.length() > 0) {
            try {
                if (delayString.indexOf(46) != -1) {
                    double delayValue = Double.parseDouble(delayString) * 1000.0;
                    state.setCrawlDelay(Math.round(delayValue));
                } else {
                    long delayValue = (long)Integer.parseInt(delayString) * 1000L;
                    state.setCrawlDelay(delayValue);
                }
            }
            catch (Exception e) {
                this.reportWarning(state, "Error parsing robots rules - can't decode crawl delay: {}", delayString);
            }
        }
        state.setAddingCrawlDelay(false);
    }

    private void handleSitemap(ParseState state, RobotToken token) {
        String sitemap = token.getData();
        try {
            URL base = null;
            try {
                base = new URI(state.getUrl()).toURL();
            }
            catch (IllegalArgumentException | MalformedURLException | URISyntaxException exception) {
                // empty catch block
            }
            URL sitemapUrl = base != null ? base.toURI().resolve(sitemap).toURL() : new URI(sitemap).toURL();
            String hostname = sitemapUrl.getHost();
            if (hostname != null && hostname.length() > 0) {
                state.addSitemap(sitemapUrl.toExternalForm());
            }
        }
        catch (Exception e) {
            this.reportWarning(state, "Invalid URL with sitemap directive:  {}", sitemap);
        }
    }

    private void handleHttp(ParseState state, RobotToken token) {
        String urlFragment = token.getData();
        if (urlFragment.contains("sitemap")) {
            RobotToken fixedToken = new RobotToken(RobotDirective.SITEMAP, "http:" + token.getData());
            this.handleSitemap(state, fixedToken);
        } else {
            this.reportWarning(state, "Found raw non-sitemap URL: http:{}", urlFragment);
        }
    }

    public int getNumWarnings() {
        return this._numWarningsDuringLastParse.get();
    }

    public int getMaxWarnings() {
        return this._maxWarnings;
    }

    public void setMaxWarnings(int maxWarnings) {
        this._maxWarnings = maxWarnings;
    }

    public long getMaxCrawlDelay() {
        return this._maxCrawlDelay;
    }

    public void setMaxCrawlDelay(long maxCrawlDelay) {
        this._maxCrawlDelay = maxCrawlDelay;
    }

    public void setExactUserAgentMatching(boolean exactMatching) {
        this._exactUserAgentMatching = exactMatching;
    }

    public boolean isExactUserAgentMatching() {
        return this._exactUserAgentMatching;
    }

    public static void main(String[] args2) throws IOException, URISyntaxException {
        BaseRobotRules rules;
        String agentName;
        block16: {
            if (args2.length < 1) {
                System.err.println("SimpleRobotRulesParser <robots.txt> [[<agentname>] <URL>...]");
                System.err.println();
                System.err.println("Parse a robots.txt file");
                System.err.println("  <robots.txt>\tURL pointing to robots.txt file.");
                System.err.println("              \tMax. five HTTP redirects are followed.");
                System.err.println("              \tTo read a local file use a file:// URL");
                System.err.println("              \t(parsed as http://example.com/robots.txt)");
                System.err.println("  <agentname> \tuser agent name to check for exclusion rules,");
                System.err.println("              \ta single 'product token' as per RFC 9309.");
                System.err.println("              \tIf not defined check with '*'");
                System.err.println("  <URL>       \tcheck URL whether allowed or forbidden.");
                System.err.println("              \tIf no URL is given show the robots.txt rules.");
                System.exit(1);
            }
            String url = args2[0];
            Set<Object> agentNames = Set.of();
            agentName = "*";
            if (args2.length >= 2) {
                agentName = args2[1].trim().toLowerCase(Locale.ROOT);
                agentNames = Set.of(agentName);
            }
            SimpleRobotRulesParser parser = new SimpleRobotRulesParser();
            rules = null;
            URL u = new URI(url).toURL();
            URLConnection connection = u.openConnection();
            if (!agentNames.isEmpty()) {
                connection.setRequestProperty("User-Agent", agentName);
            }
            try {
                if (connection instanceof HttpURLConnection) {
                    HttpURLConnection httpConnection = (HttpURLConnection)connection;
                    int maxRedirects = 5;
                    block6: for (int redirects = 0; redirects <= maxRedirects; ++redirects) {
                        httpConnection.setInstanceFollowRedirects(false);
                        int code = httpConnection.getResponseCode();
                        switch (code) {
                            case 200: {
                                System.out.println("Successfully fetched robots.txt");
                                byte[] content = IOUtils.toByteArray(httpConnection);
                                rules = parser.parseContent(url, content, httpConnection.getContentType(), agentNames);
                                break;
                            }
                            case 301: 
                            case 302: 
                            case 303: {
                                String location = httpConnection.getHeaderField("Location");
                                if (location == null) {
                                    System.out.println("Redirect without Location header");
                                    rules = parser.failedFetch(code);
                                    break;
                                }
                                location = URLDecoder.decode(location, "UTF-8");
                                u = u.toURI().resolve(location).toURL();
                                if (redirects == maxRedirects) {
                                    System.out.println("Reached maximum of " + maxRedirects + " redirects, not following redirect to " + u.toString());
                                    rules = parser.failedFetch(code);
                                    break;
                                }
                                System.out.println("Following redirect to " + u.toString());
                                httpConnection = (HttpURLConnection)u.openConnection();
                                if (agentNames.isEmpty()) continue block6;
                                httpConnection.setRequestProperty("User-Agent", agentName);
                                continue block6;
                            }
                            default: {
                                System.out.println("Fetch of " + url + " failed with HTTP status code " + code);
                                rules = parser.failedFetch(code);
                                break;
                            }
                        }
                        break block16;
                    }
                    break block16;
                }
                byte[] content = IOUtils.toByteArray(connection);
                rules = parser.parseContent(url, content, "text/plain", agentNames);
                url = "http://example.com/robots.txt";
            }
            catch (IOException e) {
                System.out.println("Fetch of " + url + " failed with: " + e.getMessage());
                throw e;
            }
        }
        if (args2.length < 3) {
            System.out.println("Robot rules for user agentname '" + agentName + "':");
            System.out.println(rules.toString());
        } else {
            System.out.println("Checking URLs:");
            for (int i = 2; i < args2.length; ++i) {
                System.out.println((rules.isAllowed(args2[i]) ? "allowed  " : "forbidden") + "\t" + args2[i]);
            }
        }
    }

    static {
        for (RobotDirective directive : RobotDirective.values()) {
            if (directive.isSpecial()) continue;
            String prefix = directive.name().toLowerCase(Locale.ROOT).replaceAll("_", "-");
            DIRECTIVE_PREFIX.put(prefix, directive);
        }
        DIRECTIVE_PREFIX.put("useragent", RobotDirective.USER_AGENT);
        DIRECTIVE_PREFIX.put("useg-agent", RobotDirective.USER_AGENT);
        DIRECTIVE_PREFIX.put("ser-agent", RobotDirective.USER_AGENT);
        DIRECTIVE_PREFIX.put("users-agent", RobotDirective.USER_AGENT);
        DIRECTIVE_PREFIX.put("user agent", RobotDirective.USER_AGENT);
        DIRECTIVE_PREFIX.put("user-agnet", RobotDirective.USER_AGENT);
        DIRECTIVE_PREFIX.put("user-agents", RobotDirective.USER_AGENT);
        DIRECTIVE_PREFIX.put("desallow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("dissallow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("dissalow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("disalow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("dssalow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("dsallow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("diasllow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("disallaw", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("diallow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("disallows", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("disllow", RobotDirective.DISALLOW);
        DIRECTIVE_PREFIX.put("crawl delay", RobotDirective.CRAWL_DELAY);
        DIRECTIVE_PREFIX.put("clawl-delay", RobotDirective.CRAWL_DELAY);
        DIRECTIVE_PREFIX.put("craw-delay", RobotDirective.CRAWL_DELAY);
        DIRECTIVE_PREFIX.put("crawl-deley", RobotDirective.CRAWL_DELAY);
        DIRECTIVE_PREFIX.put("sitemaps", RobotDirective.SITEMAP);
        DIRECTIVE_PREFIX.put("https", RobotDirective.HTTP);
        COLON_DIRECTIVE_DELIMITER = Pattern.compile("[ \t]*:[ \t]*(.*)");
        BLANK_DIRECTIVE_DELIMITER = Pattern.compile("[ \t]+(.*)");
        DIRECTIVE_SUFFIX_PATTERN = Pattern.compile("[^: \t]+(.*)");
        ROBOT_NAMES_SPLIT = Pattern.compile("\\s*,\\s*|\\s+");
        SIMPLE_HTML_PATTERN = Pattern.compile("(?is)<(html|head|body)\\s*>");
        USER_AGENT_PATTERN = Pattern.compile("(?i)user-agent:");
        USER_AGENT_PRODUCT_TOKEN_MATCHER = Pattern.compile("[a-zA-Z_-]+");
    }

    private static class RobotToken {
        private RobotDirective _directive;
        private String _data;

        public RobotToken(RobotDirective directive, String data) {
            this._directive = directive;
            this._data = data;
        }

        public RobotDirective getDirective() {
            return this._directive;
        }

        public String getData() {
            return this._data;
        }
    }

    private static class ParseState {
        private boolean _matchedRealName;
        private boolean _matchedWildcard;
        private boolean _addingRules;
        private boolean _addingCrawlDelay;
        private boolean _finishedAgentFields;
        private boolean _crawlDelaySetRealName;
        private int _numWarnings;
        private String _url;
        private Collection<String> _targetNames;
        private SimpleRobotRules _curRules;

        public ParseState(String url, Collection<String> targetNames) {
            this._url = url;
            this._targetNames = targetNames;
            this._curRules = new SimpleRobotRules();
        }

        public Collection<String> getTargetNames() {
            return this._targetNames;
        }

        public boolean isMatchedRealName() {
            return this._matchedRealName;
        }

        public void setMatchedRealName(boolean matchedRealName) {
            this._matchedRealName = matchedRealName;
        }

        public boolean isMatchedWildcard() {
            return this._matchedWildcard;
        }

        public void setMatchedWildcard(boolean matchedWildcard) {
            this._matchedWildcard = matchedWildcard;
            this._curRules.setMatchedWildcard(matchedWildcard);
        }

        public boolean isAddingRules() {
            return this._addingRules;
        }

        public void setAddingRules(boolean addingRules) {
            this._addingRules = addingRules;
        }

        public boolean isAddingCrawlDelay() {
            return this._addingCrawlDelay;
        }

        public void setAddingCrawlDelay(boolean addingCrawlDelay) {
            this._addingCrawlDelay = addingCrawlDelay;
        }

        public boolean isFinishedAgentFields() {
            return this._finishedAgentFields;
        }

        public void setFinishedAgentFields(boolean finishedAgentFields) {
            this._finishedAgentFields = finishedAgentFields;
        }

        public void clearRules() {
            this._curRules.clearRules();
        }

        public void addRule(String prefix, boolean allow) {
            this._curRules.addRule(prefix, allow);
        }

        public void setCrawlDelay(long delay) {
            if (this._matchedRealName) {
                if (this._crawlDelaySetRealName && this._addingCrawlDelay) {
                    if (this._curRules.getCrawlDelay() > delay) {
                        this._curRules.setCrawlDelay(delay);
                    }
                } else {
                    this._curRules.setCrawlDelay(delay);
                }
                this._crawlDelaySetRealName = true;
            } else if (this._curRules.getCrawlDelay() == Long.MIN_VALUE) {
                this._curRules.setCrawlDelay(delay);
            } else if (this._curRules.getCrawlDelay() > delay) {
                this._curRules.setCrawlDelay(delay);
            }
        }

        public void clearCrawlDelay() {
            this._curRules.setCrawlDelay(Long.MIN_VALUE);
        }

        public SimpleRobotRules getRobotRules() {
            return this._curRules;
        }

        public String getUrl() {
            return this._url;
        }

        public void addSitemap(String sitemap) {
            this._curRules.addSitemap(sitemap);
        }
    }

    private static enum RobotDirective {
        USER_AGENT,
        DISALLOW,
        ALLOW,
        CRAWL_DELAY,
        SITEMAP,
        HOST,
        NO_INDEX,
        ACAP_(true, false),
        REQUEST_RATE,
        VISIT_TIME,
        ROBOT_VERSION,
        COMMENT,
        HTTP,
        UNKNOWN(false, true),
        MISSING(false, true);

        private boolean _prefix;
        private boolean _special;

        private RobotDirective() {
            this._prefix = false;
            this._special = false;
        }

        private RobotDirective(boolean isPrefix, boolean isSpecial) {
            this._prefix = isPrefix;
            this._special = isSpecial;
        }

        public boolean isSpecial() {
            return this._special;
        }

        public boolean isPrefix() {
            return this._prefix;
        }
    }
}

