/*
 * Decompiled with CFR 0.152.
 */
package simpletree.textprocessing.corpus.buffer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import simpletree.textprocessing.corpus.Corpus;
import simpletree.textprocessing.processing.Ngram;
import simpletree.textprocessing.processing.TermExtractor;

public class BufferCorpus
extends Corpus {
    public List<String> docs = new ArrayList<String>();
    public Map<Integer, ArrayList<Ngram>> ngramsMap = new HashMap<Integer, ArrayList<Ngram>>();

    public BufferCorpus(List<String> docs, List<Integer> ids, int nrGrams) {
        super(nrGrams);
        this.docs = docs;
        this.ids = new ArrayList<Integer>(ids);
        this.cdata = new float[ids.size()];
        Arrays.fill(this.cdata, 0.0f);
        this.run();
    }

    @Override
    public String getFullContent(int id) throws IOException {
        int idIndex = this.ids.indexOf(id);
        if (idIndex != -1) {
            return this.docs.get(idIndex);
        }
        return null;
    }

    @Override
    public String getFullContent(String itemUrl) throws IOException {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public String getFilteredContent(int id) throws IOException {
        return this.getFullContent(id);
    }

    @Override
    public String getViewContent(int id) throws IOException {
        return this.getFullContent(id);
    }

    @Override
    public String getViewContent(String itemUrl) throws IOException {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public String getSearchContent(int id) throws IOException {
        return this.getFullContent(id);
    }

    @Override
    public ArrayList<Ngram> getNgrams(int id) throws IOException {
        if (this.ngramsMap.isEmpty() || !this.ngramsMap.containsKey(id)) {
            return null;
        }
        return this.ngramsMap.get(id);
    }

    @Override
    public ArrayList<Ngram> getCorpusNgrams() throws IOException {
        if (this.ngramsMap.isEmpty()) {
            return null;
        }
        ArrayList<Ngram> corpusNgrams = new ArrayList<Ngram>();
        for (ArrayList<Ngram> docNgrams : this.ngramsMap.values()) {
            if (docNgrams == null) continue;
            corpusNgrams.addAll(docNgrams);
        }
        return corpusNgrams;
    }

    @Override
    protected void run() {
        if (this.docs.isEmpty()) {
            throw new UnsupportedOperationException("Document list empty. Feed it with documents first...");
        }
        try {
            for (int i = 0; i < this.docs.size(); ++i) {
                String doc = this.docs.get(i);
                int docId = (Integer)this.ids.get(i);
                ArrayList<Ngram> docNgrams = this.getNgramsFromDocument(doc);
                this.ngramsMap.put(docId, docNgrams);
            }
        }
        catch (IOException ex) {
            Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, "Error getting ngrams from documents");
        }
    }

    private ArrayList<Ngram> getNgramsFromDocument(String document) throws IOException {
        HashMap<String, Integer> ngramsTable = new HashMap<String, Integer>();
        Pattern pattern = Pattern.compile(TermExtractor.getRegularExpression());
        if (document != null) {
            Matcher matcher = pattern.matcher(document);
            String[] ngram = new String[this.nrGrams];
            int i = 0;
            int count = 0;
            while (count < this.nrGrams && matcher.find()) {
                String word;
                String term = matcher.group();
                if (term.length() > 0 && (word = term.toLowerCase()).trim().length() > 0) {
                    ngram[count] = word;
                    ++count;
                }
                ++i;
            }
            StringBuilder sb = new StringBuilder();
            for (int j = 0; j < ngram.length - 1; ++j) {
                sb.append(ngram[j]).append("<>");
            }
            sb.append(ngram[ngram.length - 1]);
            ngramsTable.put(sb.toString(), 1);
            while (matcher.find()) {
                String word;
                String term = matcher.group();
                if (term.trim().length() > 0 && (word = term.toLowerCase()).trim().length() > 0) {
                    String ng = this.addNextWord(ngram, word);
                    if (ngramsTable.containsKey(ng)) {
                        ngramsTable.put(ng, (Integer)ngramsTable.get(ng) + 1);
                    } else {
                        ngramsTable.put(ng, 1);
                    }
                }
                ++i;
            }
        }
        ArrayList<Ngram> ngrams = new ArrayList<Ngram>();
        for (String n : ngramsTable.keySet()) {
            ngrams.add(new Ngram(n, (Integer)ngramsTable.get(n)));
        }
        return ngrams;
    }

    private String addNextWord(String[] ngram, String word) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < ngram.length - 1; ++i) {
            ngram[i] = ngram[i + 1];
            sb.append(ngram[i]).append("<>");
        }
        ngram[ngram.length - 1] = word;
        sb.append(word);
        return sb.toString();
    }
}

