/*
 * Decompiled with CFR 0.152.
 */
package simpletree.io.topic;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import simpletree.datamining.clustering.Clustering;
import simpletree.distance.dissimilarity.AbstractDissimilarity;
import simpletree.distance.dissimilarity.DissimilarityFactory;
import simpletree.io.cluster.TreeCluster;
import simpletree.io.cluster.TreeMultilevelClustering;
import simpletree.io.topic.CovarianceTopic;
import simpletree.io.topic.StopwordsMatrixFilter;
import simpletree.io.topic.TreeTopic;
import simpletree.io.topic.TreeTopicFile;
import simpletree.io.util.Pair;
import simpletree.matrix.AbstractMatrix;
import simpletree.matrix.MatrixFactory;

public class TreeTopicExtractor {
    private int numTerms;
    private AbstractMatrix mat;
    private List<String> stopwords;

    public TreeTopicExtractor(int numTerms, AbstractMatrix mat) {
        this.numTerms = numTerms;
        this.mat = mat;
        this.stopwords = new ArrayList<String>();
    }

    public TreeTopicExtractor(int numTerms, AbstractMatrix mat, List<String> stopwords) {
        this.numTerms = numTerms;
        this.mat = mat;
        this.stopwords = stopwords;
    }

    public TreeTopic extract(TreeCluster treeCluster) throws IOException {
        TreeTopic treeTopic = new TreeTopic();
        treeTopic.id = treeCluster.getId();
        if (treeCluster.getSize() <= 2) {
            return treeTopic;
        }
        System.out.println("#####");
        System.out.println("Extracting topic from ClusterId " + treeCluster.getId());
        System.out.println("- Size: " + treeCluster.getSize());
        System.out.println("- Elements: ");
        for (Integer index : treeCluster.getItemList()) {
            System.out.print(index + ", ");
        }
        System.out.println();
        AbstractMatrix clusterMatrix = treeCluster.getSubMatrix(this.mat);
        if (!this.stopwords.isEmpty()) {
            clusterMatrix = StopwordsMatrixFilter.filter(clusterMatrix, this.stopwords);
        }
        CovarianceTopic covAlgorithm = new CovarianceTopic(this.numTerms);
        ArrayList<Pair<String, Float>> terms = covAlgorithm.getTerms(clusterMatrix);
        treeTopic.words = terms;
        return treeTopic;
    }

    public List<TreeTopic> extractFromHierarchy(TreeCluster treeCluster) throws IOException {
        ArrayList<TreeTopic> topics = new ArrayList<TreeTopic>();
        topics.add(this.extract(treeCluster));
        if (!treeCluster.getSubClusterList().isEmpty()) {
            for (TreeCluster subCluster : treeCluster.getSubClusterList()) {
                topics.addAll(this.extractFromHierarchy(subCluster));
            }
        }
        return topics;
    }

    public static void main(String[] args) throws IOException {
        AbstractMatrix mat = MatrixFactory.getInstance("/home/renato/AP_BBC_CNN_Reuters_nosource_nodate_novo.data");
        AbstractDissimilarity diss = DissimilarityFactory.getInstance(DissimilarityFactory.DissimilarityType.COSINE_BASED);
        TreeMultilevelClustering clus = new TreeMultilevelClustering();
        Clustering technique = TreeMultilevelClustering.getClusteringTechnique(0, "20", diss, mat);
        TreeCluster rootCluster = clus.clusterize(mat, technique, diss, 50, false, 10);
        TreeTopicExtractor topicExtractor = new TreeTopicExtractor(20, mat);
        List<TreeTopic> topics = topicExtractor.extractFromHierarchy(rootCluster);
        File f = new File("/home/renato/corel1000Topics.bin");
        if (f.exists()) {
            f.delete();
        }
        TreeTopicFile file = new TreeTopicFile("/home/renato/corel1000Topics.bin");
        file.setParameters("Max dimensions = 2000, num words = 20");
        file.setTechnique("Covariance");
        file.save(topics);
        topics = null;
        file = new TreeTopicFile("/home/renato/corel1000Topics.bin");
        topics = file.loadAll();
        TreeTopic topic = file.load(2912);
        System.out.println("END!");
    }
}

