/*
 * Decompiled with CFR 0.152.
 */
package pt.sapo.hp24.db.tools;

import com.aliasi.spell.TfIdfDistance;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import pt.sapo.hp24.api.NewsItem;
import pt.sapo.hp24.api.Term;

public class Keywords {
    public static void extract(TfIdfDistance tfidf, List<NewsItem> articles, int numkeywords) {
        for (NewsItem ni : articles) {
            List<String> keywords = Keywords.extract(tfidf, ni, numkeywords);
            ni.setKeywords(StringUtils.join(keywords, (char)' '));
        }
    }

    public static List<String> extract(TfIdfDistance tfidf, NewsItem ni, int numkeywords) {
        String[] terms = StringUtils.split((String)ni.getText());
        HashMap<String, Term> unique_terms = new HashMap<String, Term>();
        for (String term : terms) {
            Term keyword = (Term)unique_terms.get(term);
            if (keyword != null) {
                keyword.incrementCount();
                continue;
            }
            unique_terms.put(term, new Term(term, tfidf.idf(term)));
        }
        ArrayList keywords = new ArrayList(unique_terms.values());
        Collections.sort(keywords, new Comparator<Term>(){

            @Override
            public int compare(Term o1, Term o2) {
                if (o1.getTfIdf() > o2.getTfIdf()) {
                    return -1;
                }
                if (o1.getTfIdf() < o2.getTfIdf()) {
                    return 1;
                }
                return 0;
            }
        });
        ArrayList<String> skeywords = new ArrayList<String>();
        keywords.subList(0, Math.min(numkeywords, keywords.size())).stream().forEach(o -> skeywords.add(o.getWord()));
        return skeywords;
    }
}

