/*
 * Decompiled with CFR 0.152.
 */
package simpletree.textprocessing.corpus.zip;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InvalidClassException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import simpletree.textprocessing.corpus.Corpus;
import simpletree.textprocessing.corpus.Encoding;
import simpletree.textprocessing.corpus.zip.ZipCorpus;
import simpletree.textprocessing.processing.Ngram;
import simpletree.textprocessing.processing.TermExtractor;

public class InvertedZipCorpus {
    private static final String invDir = "inv/";
    private String invFilename;
    private ZipFile zip;
    private ZipCorpus corpus;
    private int nrGrams;

    public InvertedZipCorpus(ZipCorpus corpus, int nrGrams, String invFilename) {
        this.invFilename = invFilename;
        this.corpus = corpus;
        this.nrGrams = nrGrams;
        if (!new File(this.invFilename).exists() || corpus.getNumberGrams() != this.getNumberGrams() || !Corpus.getEncoding().toString().equals(this.getEncoding())) {
            try {
                this.removeFile();
                this.processCorpus(corpus, nrGrams, Corpus.getEncoding());
                this.dispose();
            }
            catch (IOException ex) {
                Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public void removeFile() {
        File f = new File(this.invFilename);
        if (f.exists()) {
            f.delete();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public ArrayList<Ngram> getNgrams(int id) throws IOException {
        ArrayList ngrams = null;
        BufferedInputStream bis = null;
        ObjectInputStream ois = null;
        try {
            ZipEntry entry;
            if (this.zip == null) {
                this.zip = new ZipFile(this.invFilename);
            }
            if ((entry = this.zip.getEntry(invDir + id)) != null) {
                bis = new BufferedInputStream(this.zip.getInputStream(entry));
                ois = new ObjectInputStream(bis);
                ngrams = (ArrayList)ois.readObject();
                ois.close();
            }
        }
        catch (InvalidClassException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
            this.dispose();
            this.removeFile();
        }
        catch (ClassNotFoundException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
        }
        finally {
            if (bis != null) {
                bis.close();
            }
            if (ois != null) {
                ois.close();
            }
        }
        return ngrams;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public ArrayList<Ngram> getCorpusNgrams() throws IOException {
        ArrayList ngrams = null;
        BufferedInputStream bis = null;
        ObjectInputStream ois = null;
        try {
            ZipEntry entry;
            if (this.zip == null) {
                this.zip = new ZipFile(this.invFilename);
            }
            if ((entry = this.zip.getEntry("corpusNgrams.txt")) != null) {
                bis = new BufferedInputStream(this.zip.getInputStream(entry));
                ois = new ObjectInputStream(bis);
                ngrams = (ArrayList)ois.readObject();
            }
        }
        catch (InvalidClassException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
            this.dispose();
            this.removeFile();
        }
        catch (ClassNotFoundException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
        }
        finally {
            if (bis != null) {
                bis.close();
            }
            if (ois != null) {
                ois.close();
            }
        }
        return ngrams;
    }

    public void dispose() {
        if (this.zip != null) {
            try {
                this.zip.close();
                this.zip = null;
            }
            catch (IOException ex) {
                Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public String getInvFilename() {
        return this.invFilename;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void processCorpus(ZipCorpus corpus, int nrGrams, Encoding encoding) throws IOException {
        HashMap<String, Integer> corpusNgrams = new HashMap<String, Integer>();
        DeflaterOutputStream zout = null;
        try {
            FileOutputStream dest = new FileOutputStream(this.invFilename);
            zout = new ZipOutputStream(new BufferedOutputStream(dest));
            ((ZipOutputStream)zout).setMethod(8);
            ((ZipOutputStream)zout).setLevel(1);
            ZipEntry entry = new ZipEntry("inverted.properties");
            ((ZipOutputStream)zout).putNextEntry(entry);
            String prop = "number.grams=" + nrGrams + "\n";
            ((ZipOutputStream)zout).write(prop.getBytes(), 0, prop.length());
            prop = "char.encoding=" + encoding.toString() + "\n";
            ((ZipOutputStream)zout).write(prop.getBytes(), 0, prop.length());
            for (int i = 0; i < corpus.getIds().size(); ++i) {
                ArrayList<Ngram> ngrams = this.getNgramsFromFile(corpus, corpus.getIds().get(i));
                this.addFile((ZipOutputStream)zout, ngrams, invDir + corpus.getIds().get(i));
                for (int j = 0; j < ngrams.size(); ++j) {
                    Ngram n = ngrams.get(j);
                    if (corpusNgrams.containsKey(n.ngram)) {
                        corpusNgrams.put(n.ngram, (Integer)corpusNgrams.get(n.ngram) + n.frequency);
                        continue;
                    }
                    corpusNgrams.put(n.ngram, n.frequency);
                }
            }
            ArrayList<Ngram> ngrams = new ArrayList<Ngram>();
            for (String key : corpusNgrams.keySet()) {
                ngrams.add(new Ngram(key, (Integer)corpusNgrams.get(key)));
            }
            Collections.sort(ngrams);
            this.addFile((ZipOutputStream)zout, ngrams, "corpusNgrams.txt");
        }
        catch (FileNotFoundException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
        }
        catch (IOException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
        }
        finally {
            try {
                if (zout != null) {
                    zout.flush();
                    ((ZipOutputStream)zout).close();
                }
            }
            catch (IOException ex) {
                Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private String getEncoding() {
        String encoding = "";
        ZipFile zip_aux = null;
        try {
            zip_aux = new ZipFile(this.invFilename);
            ZipEntry entry = zip_aux.getEntry("inverted.properties");
            if (entry != null) {
                Properties prop = new Properties();
                prop.load(zip_aux.getInputStream(entry));
                encoding = prop.getProperty("char.encoding");
            }
        }
        catch (NumberFormatException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
        }
        catch (IOException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
        }
        finally {
            try {
                zip_aux.close();
            }
            catch (IOException ex) {
                Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        return encoding;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private int getNumberGrams() {
        int nrGrams_aux = -1;
        ZipFile zip_aux = null;
        try {
            zip_aux = new ZipFile(this.invFilename);
            ZipEntry entry = zip_aux.getEntry("inverted.properties");
            if (entry != null) {
                Properties prop = new Properties();
                prop.load(zip_aux.getInputStream(entry));
                nrGrams_aux = Integer.parseInt(prop.getProperty("number.grams"));
            }
        }
        catch (NumberFormatException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
        }
        catch (IOException ex) {
            Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
        }
        finally {
            try {
                zip_aux.close();
            }
            catch (IOException ex) {
                Logger.getLogger(InvertedZipCorpus.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        return nrGrams_aux;
    }

    private void addFile(ZipOutputStream zout, ArrayList<Ngram> ngrams, String filename) throws IOException {
        Collections.sort(ngrams);
        ZipEntry entry = new ZipEntry(filename);
        zout.putNextEntry(entry);
        ObjectOutputStream oos = new ObjectOutputStream(zout);
        oos.writeObject(ngrams);
        oos.flush();
    }

    private ArrayList<Ngram> getNgramsFromFile(ZipCorpus corpus, int id) throws IOException {
        HashMap<String, Integer> ngramsTable = new HashMap<String, Integer>();
        Pattern pattern = Pattern.compile(TermExtractor.getRegularExpression());
        String filecontent = corpus.getFullContent(id);
        if (filecontent != null) {
            Matcher matcher = pattern.matcher(filecontent);
            String[] ngram = new String[corpus.getNumberGrams()];
            int i = 0;
            int count = 0;
            while (count < corpus.getNumberGrams() && matcher.find()) {
                String word;
                String term = matcher.group();
                if (term.length() > 0 && (word = term.toLowerCase()).trim().length() > 0) {
                    ngram[count] = word;
                    ++count;
                }
                ++i;
            }
            StringBuffer sb = new StringBuffer();
            for (int j = 0; j < ngram.length - 1; ++j) {
                sb.append(ngram[j] + "<>");
            }
            sb.append(ngram[ngram.length - 1]);
            ngramsTable.put(sb.toString(), 1);
            while (matcher.find()) {
                String word;
                String term = matcher.group();
                if (term.trim().length() > 0 && (word = term.toLowerCase()).trim().length() > 0) {
                    String ng = this.addNextWord(ngram, word);
                    if (ngramsTable.containsKey(ng)) {
                        ngramsTable.put(ng, (Integer)ngramsTable.get(ng) + 1);
                    } else {
                        ngramsTable.put(ng, 1);
                    }
                }
                ++i;
            }
        }
        ArrayList<Ngram> ngrams = new ArrayList<Ngram>();
        for (String n : ngramsTable.keySet()) {
            ngrams.add(new Ngram(n, (Integer)ngramsTable.get(n)));
        }
        return ngrams;
    }

    private String addNextWord(String[] ngram, String word) {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < ngram.length - 1; ++i) {
            ngram[i] = ngram[i + 1];
            sb.append(ngram[i] + "<>");
        }
        ngram[ngram.length - 1] = word;
        sb.append(word);
        return sb.toString();
    }
}

