/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.operator.loganalysis;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowReader;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.ListDataRowReader;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.loganalysis.LogEntryFilter;
import com.rapidminer.operator.loganalysis.RegularExpressionMatcher;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.jdom.JDOMException;
import org.polliwog.WeblogException;
import org.polliwog.data.Hit;
import org.polliwog.data.LogEntry;
import org.polliwog.data.LogEntryFormat;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class LogFileSource
extends Operator {
    private static final int NUM_ATTRIBUTES = 9;
    private Attribute ip;
    private Attribute agent;
    private Attribute uri;
    private Attribute time;
    private Attribute referer;
    private Attribute language;
    private Attribute browser;
    private Attribute os;
    private Attribute sessionId;
    private RegularExpressionMatcher osMatcher;
    private RegularExpressionMatcher browserMatcher;
    private RegularExpressionMatcher languageMatcher;
    private List<LogEntryFilter> filters;
    private Map<String, Integer> sessionMap = null;
    private Map<String, Hit> visitorMap = null;
    private int currentSession = 0;
    private int sessionTimeout;
    private boolean reverseDNSLookup;
    Map<String, String> dnsMap = new HashMap<String, String>();

    public LogFileSource(OperatorDescription description) {
        super(description);
    }

    public IOObject[] apply() throws OperatorException {
        this.browserMatcher = this.isParameterSet("browser_matcher") ? new RegularExpressionMatcher(this.getParameterList("browser_matcher")) : null;
        this.osMatcher = this.isParameterSet("os_matcher") ? new RegularExpressionMatcher(this.getParameterList("os_matcher")) : null;
        this.languageMatcher = this.isParameterSet("language_matcher") ? new RegularExpressionMatcher(this.getParameterList("language_matcher")) : null;
        this.filters = new LinkedList<LogEntryFilter>();
        if (this.isParameterSet("robot_filter")) {
            try {
                final RegularExpressionMatcher robotMatcher = new RegularExpressionMatcher(new FileReader(this.getParameterAsFile("robot_filter")), false);
                LogEntryFilter robotFilter = new LogEntryFilter(){

                    public boolean accept(Hit le) {
                        return !robotMatcher.isSubstringMatch(le.getUserAgent());
                    }
                };
                this.filters.add(robotFilter);
            }
            catch (IOException e2) {
                throw new UserError((Operator)this, 302, new Object[]{this.getParameterAsFile("robot_filter"), e2});
            }
        }
        if (this.isParameterSet("filetype_filter")) {
            try {
                final RegularExpressionMatcher fileTypeMatcher = new RegularExpressionMatcher(new StringReader(this.getParameterAsString("filetype_filter")), false);
                LogEntryFilter fileTypeFilter = new LogEntryFilter(){

                    public boolean accept(Hit le) {
                        return !fileTypeMatcher.isSubstringMatch(le.getRequestURI().getPath());
                    }
                };
                this.filters.add(fileTypeFilter);
            }
            catch (IOException e2) {
                throw new UserError((Operator)this, 302, new Object[]{this.getParameterAsFile("filetype_filter"), e2});
            }
        }
        if (this.getParameterAsBoolean("only_HTTP_200")) {
            LogEntryFilter returnCodeFilter = new LogEntryFilter(){

                public boolean accept(Hit le) {
                    return le.getStatus() == 200;
                }
            };
            this.filters.add(returnCodeFilter);
        }
        this.reverseDNSLookup = this.getParameterAsBoolean("dns_lookup");
        this.ip = AttributeFactory.createAttribute((String)"ip", (int)1);
        this.agent = AttributeFactory.createAttribute((String)"agent", (int)1);
        this.uri = AttributeFactory.createAttribute((String)"uri", (int)1);
        this.referer = AttributeFactory.createAttribute((String)"referer", (int)1);
        this.os = AttributeFactory.createAttribute((String)"os_name", (int)1);
        this.language = AttributeFactory.createAttribute((String)"language", (int)1);
        this.browser = AttributeFactory.createAttribute((String)"browser", (int)1);
        this.time = AttributeFactory.createAttribute((String)"time", (int)2);
        this.sessionId = AttributeFactory.createAttribute((String)"session", (int)1);
        LinkedList<Attribute> attributes = new LinkedList<Attribute>();
        attributes.add(this.sessionId);
        attributes.add(this.ip);
        attributes.add(this.agent);
        attributes.add(this.uri);
        attributes.add(this.referer);
        attributes.add(this.time);
        attributes.add(this.os);
        attributes.add(this.browser);
        attributes.add(this.language);
        MemoryExampleTable et = new MemoryExampleTable(attributes);
        LinkedList<DataRow> dataRows = new LinkedList<DataRow>();
        File formatConfig = this.getParameterAsFile("config_file");
        LogEntryFormat lef = null;
        try {
            lef = new LogEntryFormat(formatConfig, ".gz");
        }
        catch (IOException e1) {
            throw new UserError((Operator)this, 302, new Object[]{formatConfig.getAbsolutePath(), e1});
        }
        catch (JDOMException e1) {
            throw new UserError((Operator)this, 302, new Object[]{formatConfig.getAbsolutePath(), e1});
        }
        catch (WeblogException e1) {
            throw new UserError((Operator)this, 302, new Object[]{formatConfig.getAbsolutePath(), e1});
        }
        this.currentSession = 0;
        this.sessionMap = new HashMap<String, Integer>();
        this.visitorMap = new HashMap<String, Hit>();
        this.sessionTimeout = this.getParameterAsInt("session_timeout");
        File baseDir = this.getParameterAsFile("log_dir");
        File[] logFiles = baseDir.listFiles(new FileFilter(){

            public boolean accept(File f) {
                return f.isFile();
            }
        });
        int i = 0;
        while (i < logFiles.length) {
            try {
                BufferedReader br = new BufferedReader(new FileReader(logFiles[i]));
                String line = br.readLine();
                int count = 0;
                int errorLines = 0;
                while (line != null) {
                    if (!(line = line.trim()).startsWith("#") && line.length() > 0) {
                        LogEntry le = null;
                        try {
                            le = lef.createEntry(line, Hit.class);
                        }
                        catch (WeblogException e) {
                            le = null;
                            ++errorLines;
                        }
                        if (le != null) {
                            boolean process = true;
                            Iterator<LogEntryFilter> it = this.filters.iterator();
                            while (it.hasNext() && process) {
                                if (it.next().accept((Hit)le)) continue;
                                process = false;
                            }
                            if (process) {
                                dataRows.add(this.processEntry((Hit)le));
                                ++count;
                            }
                        }
                    }
                    if (errorLines > 0) {
                        this.getProcess().getLog().logWarning(String.valueOf(logFiles[i].getAbsolutePath()) + ": Could not read " + errorLines + "lines out of " + count);
                    }
                    line = br.readLine();
                }
            }
            catch (FileNotFoundException e) {
                this.getProcess().getLog().logWarning(String.valueOf(logFiles[i].getAbsolutePath()) + ": Could not read this file. Ignoring it");
            }
            catch (IOException e) {
                this.getProcess().getLog().logWarning(String.valueOf(logFiles[i].getAbsolutePath()) + ": Could not read this file. Ignoring it");
            }
            catch (RuntimeException e) {
                e.printStackTrace();
            }
            ++i;
        }
        et.readExamples((DataRowReader)new ListDataRowReader(dataRows.iterator()));
        ExampleSet es = et.createExampleSet();
        return new IOObject[]{es};
    }

    private DataRow processEntry(Hit le) {
        String visitorKey = String.valueOf(le.getHostname()) + ":" + le.getUserAgent();
        Hit lastHitByVistor = this.visitorMap.get(visitorKey);
        this.visitorMap.put(visitorKey, le);
        int session = -1;
        boolean newSession = false;
        if (lastHitByVistor != null) {
            if (le.getDate().getTime() - lastHitByVistor.getDate().getTime() > (long)this.sessionTimeout) {
                newSession = true;
            }
        } else {
            newSession = true;
        }
        if (newSession) {
            ++this.currentSession;
            session = this.currentSession;
            this.sessionMap.put(visitorKey, this.currentSession);
        } else {
            session = this.sessionMap.get(visitorKey);
        }
        DoubleArrayDataRow result = new DoubleArrayDataRow(new double[9]);
        String clientHostName = le.getHostname();
        if (this.reverseDNSLookup) {
            clientHostName = this.reverseDNSLookUp(clientHostName);
        }
        result.set(this.ip, (double)this.ip.getMapping().mapString(clientHostName));
        result.set(this.agent, (double)this.agent.getMapping().mapString(le.getUserAgent()));
        result.set(this.uri, (double)this.uri.getMapping().mapString(le.getRequestURI().toString()));
        if (le.getRefererURI() != null) {
            result.set(this.referer, (double)this.referer.getMapping().mapString(le.getRefererURI().toString()));
        } else {
            result.set(this.referer, Double.NaN);
        }
        Date date = le.getDate();
        int t = (int)(date.getTime() / 60000L);
        result.set(this.time, (double)t);
        if (this.browserMatcher != null) {
            String browserName = this.browserMatcher.getMatch(le.getUserAgent());
            if (browserName == null) {
                browserName = "other";
            }
            result.set(this.browser, (double)this.browser.getMapping().mapString(browserName));
        } else {
            result.set(this.browser, Double.NaN);
        }
        if (this.osMatcher != null) {
            String osName = this.osMatcher.getMatch(le.getUserAgent());
            if (osName == null) {
                osName = "other";
            }
            result.set(this.os, (double)this.os.getMapping().mapString(osName));
        } else {
            result.set(this.os, Double.NaN);
        }
        if (this.languageMatcher != null) {
            String languageStr = this.languageMatcher.getMatch(le.getUserAgent());
            if (languageStr == null) {
                languageStr = "other";
            }
            result.set(this.language, (double)this.language.getMapping().mapString(languageStr));
        } else {
            result.set(this.language, Double.NaN);
        }
        result.set(this.sessionId, (double)this.sessionId.getMapping().mapString("s" + session));
        return result;
    }

    private String reverseDNSLookUp(String ip) {
        String result = this.dnsMap.get(ip);
        if (result != null) {
            return result;
        }
        try {
            InetAddress addr = InetAddress.getByName(ip);
            result = addr.getHostName();
        }
        catch (UnknownHostException e) {
            result = null;
        }
        this.dnsMap.put(ip, result);
        if (result != null) {
            return result;
        }
        return ip;
    }

    public Class<?>[] getInputClasses() {
        return new Class[0];
    }

    public Class<?>[] getOutputClasses() {
        return new Class[]{ExampleSet.class};
    }

    public List<ParameterType> getParameterTypes() {
        List types = super.getParameterTypes();
        ParameterTypeFile configFile = new ParameterTypeFile("config_file", "the format configuration file", "xml", false);
        configFile.setExpert(false);
        types.add(configFile);
        ParameterTypeDirectory logDir = new ParameterTypeDirectory("log_dir", "the directory containing the log files", false);
        logDir.setExpert(false);
        types.add(logDir);
        types.add(new ParameterTypeBoolean("dns_lookup", "Perform reverse dns lookup on the client ip", false));
        types.add(new ParameterTypeFile("robot_filter", "file that contains regular expressions on user agents that should be filtered out. Each line must contain exactly one regular expression.", ".txt", true));
        types.add(new ParameterTypeString("filetype_filter", "file that contains regular expressions on files that should be filtered out. Each line must contain exactly one regular expression.", true));
        types.add(new ParameterTypeBoolean("only_HTTP_200", "Consider only entries with HTTP Response code 200", false));
        types.add(new ParameterTypeList("browser_matcher", "file that contains regular expressions to match browser types. Each line must contain exactly an expression of the form <name>:<regular expression>.", (ParameterType)new ParameterTypeString("regular_expression", "matches_languages", false)));
        types.add(new ParameterTypeList("os_matcher", "file that contains regular expressions to match os types. Each line must contain exactly an expression of the form <name>:<regular expression>.", (ParameterType)new ParameterTypeString("regular_expression", "matches os types", false)));
        types.add(new ParameterTypeList("language_matcher", "file that contains regular expressions to match languages. Each line must contain exactly an expression of the form <name>:<regular expression>.", (ParameterType)new ParameterTypeString("regular_expression", "matches_languages", false)));
        types.add(new ParameterTypeInt("session_timeout", "Time between two requests from the same source, such that the second request can be assumed to be a new session", 0, Integer.MAX_VALUE, 400000));
        return types;
    }
}

