/*
 * Decompiled with CFR 0.152.
 */
package org.openimaj.text.nlp.namedentity;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class IgnoreTokenStripper {
    private String units = "one|two|three|four|five|six|seven|eight|nine";
    private String tens = "twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety";
    private String teens = "ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen";
    private String and = "\\s*-?\\s*and\\s*-?\\s*";
    private String toNN = "[" + this.units + "|" + this.teens + "] | [" + this.tens + "]\\s*-?\\s*[" + this.units + "]";
    private String toNNN = this.toNN + " | [[" + this.units + "]\\s*-?\\s*hundred [" + this.and + "[" + this.toNN + "]+]+]";
    private Pattern writtenNumbers = Pattern.compile("[" + this.toNNN + "]+");
    private HashSet<String> ignoreTokens = new HashSet();

    public IgnoreTokenStripper(Language language) {
        for (InputStream fstream : this.getListStreams(language)) {
            this.addToIgnoreSet(fstream);
        }
    }

    private void addToIgnoreSet(InputStream fstream) {
        try {
            String strLine;
            DataInputStream in = new DataInputStream(fstream);
            BufferedReader br = new BufferedReader(new InputStreamReader(in));
            while ((strLine = br.readLine()) != null) {
                this.ignoreTokens.add(strLine.trim());
            }
            in.close();
        }
        catch (Exception e) {
            System.err.println("Error: " + e.getMessage());
        }
    }

    private List<InputStream> getListStreams(Language language) {
        ArrayList<InputStream> res = new ArrayList<InputStream>();
        if (language.equals((Object)Language.English)) {
            res.add(this.getClass().getResourceAsStream("/org/openimaj/text/stopwords/en_stopwords.txt"));
            res.add(this.getClass().getResourceAsStream("/org/openimaj/text/stopwords/en_nouns.txt"));
            res.add(this.getClass().getResourceAsStream("/org/openimaj/text/stopwords/en_countries.txt"));
            return res;
        }
        return null;
    }

    public ArrayList<String> getNonStopWords(List<String> intokens) {
        ArrayList<String> result = new ArrayList<String>();
        for (String string : intokens) {
            if (this.isIgnoreToken(string)) continue;
            result.add(string);
        }
        return result;
    }

    public boolean isIgnoreToken(String token) {
        if (this.ignoreTokens.contains(token)) {
            return true;
        }
        try {
            Double.parseDouble(token);
            return true;
        }
        catch (Exception exception) {
            Matcher m = this.writtenNumbers.matcher(token.toLowerCase());
            return m.matches();
        }
    }

    public static enum Language {
        English;

    }
}

