/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;

public class StringExtractor {
    private static final String END_OF_LINE = System.getProperty("line.separator", "\n");
    public static final String[] COMMON_FONT_NAMES = new String[]{"albertus medium", "albertus extra bold", "algerian", "antique olive", "arial", "book antiqua", "bookman old style", "braggadocio", "britannic bold", "brush script mt", "century gothic", "century schoolbook", "cg omega", "cg times", "clarendon condensed", "colonna mt", "coronet", "courier", "courier new", "desdemona", "footlight mt light", "garamond", "helvetica", "impact", "kino mt", "line printer", "marigold", "matura mt script capitals", "modern", "monotype corsiva", "letter gothic", "playbill", "roman", "script", "symbol", "tahoma", "times new roman", "times roman", "univers", "univers condensed", "verdana", "wide latin", "wingdings"};

    public String extract(InputStream stream) throws IOException {
        StringBuilder lineBuffer = new StringBuilder(512);
        StringBuilder textBuffer = new StringBuilder(65536);
        int b = -1;
        while ((b = stream.read()) != -1) {
            if (this.isTextCharacter(b)) {
                lineBuffer.append((char)b);
                continue;
            }
            if (lineBuffer.length() <= 0) continue;
            String line = lineBuffer.toString();
            lineBuffer.setLength(0);
            if ((line = this.postProcessLine(line)) == null) continue;
            String lineLowerCase = line.toLowerCase();
            if (this.isStartLine(lineLowerCase)) {
                textBuffer.setLength(0);
                continue;
            }
            if (!this.isValidLine(lineLowerCase)) continue;
            textBuffer.append(line);
            textBuffer.append(END_OF_LINE);
        }
        return textBuffer.toString();
    }

    protected boolean isStartLine(String lineLowerCase) {
        return false;
    }

    protected boolean isValidLine(String lineLowerCase) {
        for (int i = 0; i < COMMON_FONT_NAMES.length; ++i) {
            if (!lineLowerCase.startsWith(COMMON_FONT_NAMES[i])) continue;
            return false;
        }
        return true;
    }

    protected boolean isTextCharacter(int charNumber) {
        return charNumber >= 32 && charNumber <= 126 || charNumber == 9;
    }

    protected String postProcessLine(String line) {
        if ((line = line.trim()).length() <= 2) {
            line = null;
        } else {
            boolean containsWord = false;
            StringTokenizer st = new StringTokenizer(line, " ");
            while (st.hasMoreTokens() && !containsWord) {
                containsWord = this.isNormalWord(st.nextToken());
            }
            if (!containsWord) {
                line = null;
            }
        }
        return line;
    }

    protected boolean isNormalWord(String word) {
        char lastChar;
        boolean result = false;
        int wordLength = word.length();
        if (wordLength > 0 && ((lastChar = word.charAt(wordLength - 1)) == '.' || lastChar == ',')) {
            --wordLength;
        }
        if (wordLength >= 3) {
            int i;
            result = true;
            for (i = 0; i < wordLength && result; ++i) {
                if (Character.isLetter(word.charAt(i))) continue;
                result = false;
            }
            if (Character.isUpperCase(word.charAt(0))) {
                if (Character.isUpperCase(word.charAt(1))) {
                    for (i = 2; i < wordLength && result; ++i) {
                        result = Character.isUpperCase(word.charAt(i));
                    }
                } else {
                    for (i = 2; i < wordLength && result; ++i) {
                        result = !Character.isUpperCase(word.charAt(i));
                    }
                }
            } else {
                for (i = 0; i < wordLength && result; ++i) {
                    result = !Character.isUpperCase(word.charAt(i));
                }
            }
            if (result) {
                Integer freq;
                HashMap<Character, Integer> charFreq = new HashMap<Character, Integer>(32);
                for (int i2 = 0; i2 < wordLength; ++i2) {
                    Character c = new Character(word.charAt(i2));
                    freq = (Integer)charFreq.get(c);
                    freq = freq == null ? new Integer(1) : new Integer(freq + 1);
                    charFreq.put(c, freq);
                }
                int freqThreshold = wordLength / 2;
                Iterator valueIter = charFreq.values().iterator();
                while (valueIter.hasNext() && result) {
                    freq = (Integer)valueIter.next();
                    result = freq < freqThreshold;
                }
            }
        }
        return result;
    }
}

