/*
 * Decompiled with CFR 0.152.
 */
package simpletree.textprocessing.corpus.database;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.sql.Date;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import simpletree.textprocessing.corpus.Encoding;
import simpletree.textprocessing.corpus.database.ConnectionManager;
import simpletree.textprocessing.corpus.database.SqlManager;
import simpletree.textprocessing.processing.Ngram;
import simpletree.textprocessing.processing.TermExtractor;

public class ZipFileImporter {
    private String filename;
    private ArrayList<String> klasses;

    public ZipFileImporter(String filename) {
        this.filename = filename;
        this.klasses = new ArrayList();
    }

    public void execute(String collection, int nrLines, int nrGrams, Encoding encoding) throws IOException {
        HashMap<String, Integer> corpusNgrams = new HashMap<String, Integer>();
        if (!this.uniqueName(collection)) {
            throw new IOException("A collection intitled \"" + collection + "\" already exists. Please choose another name.");
        }
        int id_collection = this.getNewCollectionId();
        PreparedStatement stmt = null;
        try {
            stmt = SqlManager.getInstance().getSqlStatement("INSERT.COLLECTION");
            stmt.setInt(1, id_collection);
            stmt.setString(2, collection);
            stmt.setInt(3, nrGrams);
            stmt.executeUpdate();
        }
        catch (SQLException ex) {
            Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
            throw new IOException(ex.getMessage());
        }
        finally {
            if (stmt != null) {
                try {
                    stmt.close();
                    stmt = null;
                }
                catch (SQLException ex) {
                    Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
                    throw new IOException(ex.getMessage());
                }
            }
        }
        ZipFile zip = new ZipFile(this.filename);
        Enumeration<? extends ZipEntry> entries = zip.entries();
        int i = 0;
        while (entries.hasMoreElements()) {
            ZipEntry entry = entries.nextElement();
            if (entry != null && !entry.isDirectory()) {
                String content = this.getFileContent(zip, entry, encoding);
                ArrayList<Ngram> fngrams = this.getNgramsFromFile(content, nrGrams);
                String title = this.getFileTitle(zip, entry, nrLines, encoding);
                float klass = this.getKlass(entry.getName());
                Date date = this.getFileDate(zip, entry, encoding);
                this.saveToDataBase(i, id_collection, title, content, date, fngrams, klass);
                for (int j = 0; j < fngrams.size(); ++j) {
                    Ngram n = fngrams.get(j);
                    if (corpusNgrams.containsKey(n.ngram)) {
                        corpusNgrams.put(n.ngram, (Integer)corpusNgrams.get(n.ngram) + n.frequency);
                        continue;
                    }
                    corpusNgrams.put(n.ngram, n.frequency);
                }
            }
            ++i;
        }
        ArrayList<Ngram> ngrams = new ArrayList<Ngram>();
        for (String key : corpusNgrams.keySet()) {
            ngrams.add(new Ngram(key, (Integer)corpusNgrams.get(key)));
        }
        Collections.sort(ngrams);
        try {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            ObjectOutputStream oos = new ObjectOutputStream(baos);
            oos.writeObject(ngrams);
            oos.flush();
            oos.close();
            stmt = SqlManager.getInstance().getSqlStatement("UPDATE.NGRAMS.COLLECTION");
            stmt.setBytes(1, baos.toByteArray());
            stmt.setInt(2, id_collection);
            stmt.executeUpdate();
        }
        catch (SQLException ex) {
            Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
            throw new IOException(ex.getMessage());
        }
        finally {
            if (stmt != null) {
                try {
                    stmt.close();
                }
                catch (SQLException ex) {
                    Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
                    throw new IOException(ex.getMessage());
                }
            }
        }
        ConnectionManager.getInstance().dispose();
    }

    private boolean uniqueName(String collection) throws IOException {
        PreparedStatement stmt = null;
        ResultSet rs = null;
        try {
            stmt = SqlManager.getInstance().getSqlStatement("SELECT.COLLECTION.BY.NAME");
            stmt.setString(1, collection);
            rs = stmt.executeQuery();
            if (rs.next()) {
                boolean bl = false;
                return bl;
            }
            boolean bl = true;
            return bl;
        }
        catch (SQLException ex) {
            Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
            throw new IOException(ex.getMessage());
        }
        finally {
            if (stmt != null) {
                try {
                    stmt.close();
                }
                catch (SQLException ex) {
                    Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
                    throw new IOException(ex.getMessage());
                }
            }
        }
    }

    private int getNewCollectionId() throws IOException {
        PreparedStatement stmt = null;
        ResultSet rs = null;
        try {
            stmt = SqlManager.getInstance().getSqlStatement("SELECT.COLLECTION.ID");
            rs = stmt.executeQuery();
            if (rs.next()) {
                int n = rs.getInt(1) + 1;
                return n;
            }
            int n = 0;
            return n;
        }
        catch (SQLException ex) {
            Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
            throw new IOException(ex.getMessage());
        }
        finally {
            if (stmt != null) {
                try {
                    stmt.close();
                }
                catch (SQLException ex) {
                    Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
                    throw new IOException(ex.getMessage());
                }
            }
        }
    }

    private void saveToDataBase(int id, int id_collection, String title, String content, Date date, ArrayList<Ngram> ngrams, float klass) throws IOException {
        Statement stmt = null;
        try {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            ObjectOutputStream oos = new ObjectOutputStream(baos);
            oos.writeObject(ngrams);
            oos.flush();
            oos.close();
            stmt = SqlManager.getInstance().getSqlStatement("INSERT.DOCUMENT");
            stmt.setInt(1, id);
            stmt.setInt(2, id_collection);
            stmt.setString(3, title);
            stmt.setString(4, content);
            stmt.setBytes(5, baos.toByteArray());
            stmt.setFloat(6, klass);
            stmt.setDate(7, date);
            stmt.executeUpdate();
        }
        catch (SQLException ex) {
            Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
            throw new IOException(ex.getMessage());
        }
        finally {
            if (stmt != null) {
                try {
                    stmt.close();
                }
                catch (SQLException ex) {
                    Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
                    throw new IOException(ex.getMessage());
                }
            }
        }
    }

    private Date getFileDate(ZipFile zip, ZipEntry entry, Encoding encoding) throws IOException {
        Date date = new Date(System.currentTimeMillis());
        BufferedReader in = new BufferedReader(new InputStreamReader(zip.getInputStream(entry), encoding.toString()));
        String aux = null;
        while ((aux = in.readLine()) != null) {
            if (!aux.trim().startsWith("#:")) continue;
            if (!this.isInteger(aux = aux.replaceAll("#:", "").trim())) break;
            GregorianCalendar gc = new GregorianCalendar();
            gc.set(1, Integer.parseInt(aux));
            gc.set(2, 0);
            gc.set(5, 1);
            date = new Date(gc.getTimeInMillis());
            break;
        }
        return date;
    }

    private boolean isInteger(String s) {
        return s.matches("[0-9]*");
    }

    private String getFileTitle(ZipFile zip, ZipEntry entry, int nrLines, Encoding encoding) throws IOException {
        BufferedReader in = new BufferedReader(new InputStreamReader(zip.getInputStream(entry), encoding.toString()));
        String aux = null;
        StringBuffer text = new StringBuffer();
        for (int i = 0; i < nrLines && (aux = in.readLine()) != null; ++i) {
            if (aux.trim().length() > 0) {
                text.append(aux + " ");
                continue;
            }
            --i;
        }
        return text.toString();
    }

    private String getFileContent(ZipFile zip, ZipEntry entry, Encoding encoding) throws IOException {
        BufferedReader in = new BufferedReader(new InputStreamReader(zip.getInputStream(entry), encoding.toString()));
        String aux = null;
        StringBuffer text = new StringBuffer();
        while ((aux = in.readLine()) != null) {
            text.append(aux + "\r\n");
        }
        return text.toString();
    }

    private ArrayList<Ngram> getNgramsFromFile(String content, int nrGrams) throws IOException {
        HashMap<String, Integer> ngramsTable = new HashMap<String, Integer>();
        Pattern pattern = Pattern.compile(TermExtractor.getRegularExpression());
        if (content != null) {
            Matcher matcher = pattern.matcher(content);
            String[] ngram = new String[nrGrams];
            int i = 0;
            int count = 0;
            while (count < nrGrams && matcher.find()) {
                String word;
                String term = matcher.group();
                if (term.trim().length() > 0 && (word = term.toLowerCase()).trim().length() > 0) {
                    ngram[count] = word;
                    ++count;
                }
                ++i;
            }
            StringBuffer sb = new StringBuffer();
            for (int j = 0; j < ngram.length - 1; ++j) {
                sb.append(ngram[j] + "<>");
            }
            sb.append(ngram[ngram.length - 1]);
            ngramsTable.put(sb.toString(), 1);
            while (matcher.find()) {
                String word;
                String term = matcher.group();
                if (term.length() > 0 && (word = term.toLowerCase()).trim().length() > 0) {
                    String ng = this.addNextWord(ngram, word);
                    if (ngramsTable.containsKey(ng)) {
                        ngramsTable.put(ng, (Integer)ngramsTable.get(ng) + 1);
                    } else {
                        ngramsTable.put(ng, 1);
                    }
                }
                ++i;
            }
        }
        ArrayList<Ngram> ngrams = new ArrayList<Ngram>();
        for (String n : ngramsTable.keySet()) {
            ngrams.add(new Ngram(n, (Integer)ngramsTable.get(n)));
        }
        Collections.sort(ngrams);
        return ngrams;
    }

    private String addNextWord(String[] ngram, String word) {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < ngram.length - 1; ++i) {
            ngram[i] = ngram[i + 1];
            sb.append(ngram[i] + "<>");
        }
        ngram[ngram.length - 1] = word;
        sb.append(word);
        return sb.toString();
    }

    private float getKlass(String filename) {
        int begin = filename.lastIndexOf("/");
        if (begin > -1) {
            filename = filename.substring(begin + 1);
        } else {
            begin = filename.lastIndexOf("\\");
            if (begin > -1) {
                filename = filename.substring(begin + 1);
            }
        }
        String ini = filename;
        if (filename.length() > 2) {
            ini = filename.substring(0, 2);
        }
        if (!this.klasses.contains(ini)) {
            this.klasses.add(ini);
        }
        return this.klasses.indexOf(ini);
    }

    public static void main(String[] args) {
        try {
            String filename = "G:\\User\\users\\Documents\\FERNANDO\\Codigo\\java\\ExtractArticles\\ExtractArticles.zip";
            ZipFileImporter zfi = new ZipFileImporter(filename);
            zfi.execute("Infovis 2004 contest IV", 1, 1, Encoding.ASCII);
        }
        catch (IOException ex) {
            Logger.getLogger(ZipFileImporter.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

