/*
 * Decompiled with CFR 0.152.
 */
package pt.sapo.hp24.db;

import com.aliasi.spell.TfIdfDistance;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.Group;
import org.apache.solr.client.solrj.response.GroupCommand;
import org.apache.solr.client.solrj.response.GroupResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.caudexorigo.ErrorAnalyser;
import org.caudexorigo.Shutdown;
import org.caudexorigo.concurrent.CustomExecutors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pt.sapo.hp24.api.NewsDb;
import pt.sapo.hp24.api.NewsItem;
import pt.sapo.hp24.api.RelatedVideo;
import pt.sapo.hp24.classifier.NaiveClassifier;
import pt.sapo.hp24.classifier.SVMTool;
import pt.sapo.hp24.db.DbBuilder;
import pt.sapo.hp24.db.tools.Deduper;
import pt.sapo.hp24.db.tools.Keywords;
import pt.sapo.hp24.db.tools.NewsItemBuilder;
import pt.sapo.hp24.db.tools.NewsSolrSearch;
import pt.sapo.hp24.db.tools.SocialNetworkShareCount;
import pt.sapo.hp24.db.tools.Solr;
import pt.sapo.hp24.db.tools.Videos;
import pt.sapo.hp24.tools.Host;
import pt.sapo.hp24.tools.Text;

public class NewsDbBuilder {
    private static final Logger log = LoggerFactory.getLogger(NewsDbBuilder.class);

    public static void main(String[] args) {
        try {
            NewsDbBuilder.build();
        }
        catch (Throwable t) {
            Shutdown.now((Throwable)t);
        }
        finally {
            log.info("End processing.");
            Shutdown.now();
        }
    }

    public static void build() {
        try {
            SolrDocumentList lusaNews = NewsDbBuilder.fetchLusaNews(11);
            SolrDocumentList sapo24BlogNews = NewsDbBuilder.fetchSapo24BlogNews(50);
            SolrDocumentList lastNews = NewsDbBuilder.fetchLast24News(2000);
            HashSet<String> excludeUrls = new HashSet<String>();
            excludeUrls.addAll(Host.getExcludeurls());
            NewsDbBuilder.buildClassifierModel();
            SolrDocumentList protoEditorial = NewsDbBuilder.fetchEditorial(3);
            List<NewsItem> editorial = NewsDbBuilder.buildNewsItemList(protoEditorial, Collections.emptySet(), excludeUrls);
            SolrDocumentList protoHeadlines = NewsDbBuilder.fetchHeadlines(100, excludeUrls);
            SolrDocumentList news = NewsDbBuilder.fetchMainNews(400);
            SolrDocumentList newsTech = NewsDbBuilder.fetchTechNews(40);
            SolrDocumentList opinions = NewsDbBuilder.fetchOpinionNews(80);
            List<NewsItem> headlines = NewsDbBuilder.buildNewsItemList(protoHeadlines, Collections.emptySet(), excludeUrls);
            List<NewsItem> sapo24Blogs = NewsDbBuilder.buildNewsItemList(sapo24BlogNews, Collections.emptySet(), excludeUrls);
            List<NewsItem> lusa = NewsDbBuilder.buildNewsItemList(lusaNews, Collections.emptySet(), excludeUrls);
            HashSet<String> op_category = new HashSet<String>();
            op_category.add("opini\u00e3o");
            ArrayList<Object> lst_docs = new ArrayList<NewsItem>();
            Date d = new Date();
            Date lastDay = new Date(d.getTime() - 86400000L);
            List<NewsItem> articles = NewsDbBuilder.buildNewsItemList(news, Collections.emptySet(), excludeUrls);
            for (NewsItem newsItem : articles) {
                if (!newsItem.getHost().startsWith("www.publico.pt")) {
                    lst_docs.add(newsItem);
                    continue;
                }
                if (newsItem.getChangeDate().after(lastDay)) {
                    lst_docs.add(newsItem);
                    continue;
                }
                String string = newsItem.getBody();
                newsItem.setBody(Text.getSummary((String)string, (int)250, (boolean)true));
                lst_docs.add(newsItem);
            }
            articles = lst_docs;
            lst_docs = new ArrayList();
            List<NewsItem> articlesTech = NewsDbBuilder.buildNewsItemList(newsTech, Collections.emptySet(), excludeUrls);
            for (NewsItem newsItem : articlesTech) {
                if (!newsItem.getHost().startsWith("www.publico.pt")) {
                    lst_docs.add(newsItem);
                    continue;
                }
                if (newsItem.getChangeDate().after(lastDay)) {
                    lst_docs.add(newsItem);
                    continue;
                }
                String body = newsItem.getBody();
                newsItem.setBody(Text.getSummary((String)body, (int)250, (boolean)true));
                lst_docs.add(newsItem);
            }
            articlesTech = lst_docs;
            lst_docs = new ArrayList();
            articles.addAll(articlesTech);
            List<NewsItem> list = NewsDbBuilder.buildNewsItemList(opinions, op_category, excludeUrls);
            for (NewsItem ni : list) {
                if (ni.getHost().startsWith("www.publico.pt")) continue;
                lst_docs.add(ni);
            }
            ArrayList<Object> arrayList = lst_docs;
            articles.addAll(arrayList);
            articles.addAll(sapo24Blogs);
            ArrayList<NewsItem> arrayList2 = new ArrayList<NewsItem>();
            arrayList2.addAll(editorial);
            arrayList2.addAll(headlines);
            arrayList2.addAll(articles);
            TfIdfDistance tfidf_articles = NewsDbBuilder.buildTfIdfCorpus(arrayList2);
            NewsDbBuilder.rankHeadLinesByHost(headlines);
            NewsDbBuilder.aggregateSimilarItems(tfidf_articles, articles);
            NewsDbBuilder.populateSharedCount(articles);
            NewsDbBuilder.getStats(articles);
            NewsDbBuilder.rankArticles(articles);
            ArrayList<NewsItem> all = new ArrayList<NewsItem>();
            all.addAll(editorial);
            all.addAll(headlines);
            all.addAll(articles);
            NewsDbBuilder.classify(all, true);
            NewsDbBuilder.similiarVideos(tfidf_articles, all);
            HashSet<String> noDupsHosts = new HashSet<String>();
            HashMap<String, NewsItem> itemBySlug = new HashMap<String, NewsItem>();
            NewsDbBuilder.linkArticles(all, noDupsHosts, itemBySlug);
            HashSet<String> excludeLastestNewsUrls = new HashSet<String>();
            excludeLastestNewsUrls.addAll(Host.getExcludeurls());
            HashMap<String, List<NewsItem>> lNews = NewsDbBuilder.buildLastestNewsItemList(lastNews, Collections.emptySet(), excludeLastestNewsUrls);
            NewsDbBuilder.reduceNumberLastArticelesBySource(lNews);
            NewsDbBuilder.linkArticles(lNews, articles, noDupsHosts, itemBySlug);
            NewsDbBuilder.linkArticles(lusa, articles, noDupsHosts, itemBySlug);
            NewsDbBuilder.linkArticles(sapo24Blogs, articles, noDupsHosts, itemBySlug);
            NewsDbBuilder.classify(lusa, true);
            NewsDbBuilder.similiarVideos(tfidf_articles, lusa);
            NewsDbBuilder.classify(sapo24Blogs, true);
            NewsDbBuilder.similiarVideos(tfidf_articles, sapo24Blogs);
            NewsDb.build(editorial, headlines, articles, lNews, lusa, sapo24Blogs);
            System.out.println();
        }
        catch (Throwable t) {
            Shutdown.now((Throwable)t);
        }
    }

    public static void buildEditorial() {
        SolrDocumentList protoEditorial = NewsDbBuilder.fetchEditorial(3);
        HashSet<String> excludeUrls = new HashSet<String>();
        List<NewsItem> editorial = NewsDbBuilder.buildNewsItemList(protoEditorial, Collections.emptySet(), excludeUrls);
        int max = 2;
        if (editorial != null && editorial.size() - 1 < max && (max = editorial.size() - 1) < 0) {
            max = 0;
        }
        for (int i = 0; i < max; ++i) {
            editorial.get(i).setNextByPosition(editorial.get(i + 1).getSlug());
        }
        List lstNdb = NewsDb.getAllHeadlines();
        if (lstNdb != null && lstNdb.size() > 0) {
            editorial.get(max).setNextByPosition(((NewsItem)lstNdb.get(0)).getSlug());
        }
        try {
            log.info("before build");
            NewsDb.build(editorial);
            log.info("after build");
        }
        catch (Throwable t) {
            Shutdown.now((Throwable)t);
        }
    }

    private static void reduceNumberLastArticelesBySource(HashMap<String, List<NewsItem>> lNews) {
        int trimSize = 6;
        for (Map.Entry<String, List<NewsItem>> entry : lNews.entrySet()) {
            List<NewsItem> lst_temp = entry.getValue();
            if (lst_temp.size() <= trimSize) continue;
            lst_temp = new ArrayList<NewsItem>(lst_temp.subList(0, trimSize));
            lNews.put(entry.getKey(), lst_temp);
        }
    }

    private static void buildClassifierModel() {
        try {
            SolrDocumentList classifier_raw = NewsDbBuilder.fetchClassifierNews(1000, 0);
            classifier_raw.addAll((Collection)NewsDbBuilder.fetchClassifierNews(1000, 1000));
            classifier_raw.addAll((Collection)NewsDbBuilder.fetchClassifierNews(1000, 2000));
            classifier_raw.addAll((Collection)NewsDbBuilder.fetchClassifierNews(1000, 3000));
            List<NewsItem> articles = NewsDbBuilder.buildNewsItemList(classifier_raw, Collections.emptySet(), new HashSet<String>());
            TfIdfDistance tfidf_articles = NewsDbBuilder.buildTfIdfCorpus(articles);
            NewsDbBuilder.extractKeywords(tfidf_articles, articles, 20);
            NewsDbBuilder.classify(articles, false);
            List<NewsItem> train_articles = articles.stream().filter(n -> !n.getSections().isEmpty() && !n.getSection().equals("actualidade")).collect(Collectors.toList());
            System.out.println("NUmber of articles for model training: " + train_articles.size());
            SVMTool.createModel(train_articles, "./classifier.model");
        }
        catch (Throwable t) {
            Shutdown.now((Throwable)t);
        }
    }

    private static void aggregateSimilarItems(TfIdfDistance tfidf, List<NewsItem> lst_articles) {
        long start = System.currentTimeMillis();
        log.info("Start Deduplication");
        Deduper.aggregateSimilar(tfidf, lst_articles);
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("Deduplication took: %.2f seconds", duration));
    }

    private static HashMap<String, List<NewsItem>> buildLastestNewsItemList(SolrDocumentList solr_results, Set<String> categories, Set<String> exclude) {
        long start = System.currentTimeMillis();
        log.info("Start Article creation");
        HashMap<String, LinkedHashSet<NewsItem>> tmp_docs = new HashMap<String, LinkedHashSet<NewsItem>>();
        for (int i = 0; i < solr_results.size(); ++i) {
            String ni_url = ((SolrDocument)solr_results.get(i)).getFieldValue("Url").toString();
            if (exclude.contains(ni_url)) {
                log.info("Skip alread processed url: {}", (Object)ni_url);
                continue;
            }
            exclude.add(ni_url);
            NewsItem newsItem = new NewsItemBuilder((SolrDocument)solr_results.get(i)).get();
            if (categories.size() > 0) {
                newsItem.addCategories(categories);
            }
            if (newsItem.isValid()) {
                LinkedHashSet<NewsItem> xpto = (LinkedHashSet<NewsItem>)tmp_docs.get(newsItem.getHost());
                if (xpto == null) {
                    xpto = new LinkedHashSet<NewsItem>();
                }
                xpto.add(newsItem);
                tmp_docs.put(newsItem.getHost(), xpto);
                continue;
            }
            log.info("Discard invalid url: {}", (Object)ni_url);
        }
        HashMap<String, List<NewsItem>> lst_docs = new HashMap<String, List<NewsItem>>();
        for (Map.Entry entry : tmp_docs.entrySet()) {
            ArrayList lst_docs_tmp = new ArrayList();
            lst_docs_tmp.addAll((Collection)entry.getValue());
            lst_docs.put(((String)entry.getKey()).toString(), lst_docs_tmp);
        }
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("Article creation took: %.2f seconds", duration));
        return lst_docs;
    }

    private static List<NewsItem> buildNewsItemList(SolrDocumentList solr_results, Set<String> categories, Set<String> exclude) {
        long start = System.currentTimeMillis();
        log.info("Start Article creation");
        LinkedHashSet<NewsItem> tmp_docs = new LinkedHashSet<NewsItem>();
        for (int i = 0; i < solr_results.size(); ++i) {
            String ni_url = ((SolrDocument)solr_results.get(i)).getFieldValue("Url").toString();
            if (exclude.contains(ni_url)) {
                log.info("Skip alread processed url: {}", (Object)ni_url);
                continue;
            }
            exclude.add(ni_url);
            NewsItem ni = new NewsItemBuilder((SolrDocument)solr_results.get(i)).get();
            if (categories.size() > 0) {
                ni.addCategories(categories);
            }
            if (ni.isValid()) {
                tmp_docs.add(ni);
                continue;
            }
            log.info("Discard invalid url: {}", (Object)ni_url);
        }
        ArrayList<NewsItem> lst_docs = new ArrayList<NewsItem>();
        lst_docs.addAll(tmp_docs);
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("Article creation took: %.2f seconds", duration));
        return lst_docs;
    }

    private static TfIdfDistance buildTfIdfCorpus(List<NewsItem> articles) {
        long start = System.currentTimeMillis();
        log.info("Start BuildTfIdfCorpus operation");
        TfIdfDistance tfidf = new TfIdfDistance((TokenizerFactory)new IndoEuropeanTokenizerFactory());
        for (NewsItem ni : articles) {
            if (!ni.getHost().contains(".pt")) continue;
            tfidf.handle((CharSequence)ni.getText());
        }
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("BuildTfIdfCorpus operation took: %.2f seconds", duration));
        return tfidf;
    }

    private static void classify(List<NewsItem> articles, boolean useModel) {
        articles.stream().forEach(n -> NaiveClassifier.assignSections(n, useModel));
    }

    private static void extractKeywords(TfIdfDistance tfidf, List<NewsItem> articles, int numkeywords) {
        long start = System.currentTimeMillis();
        log.info("Start ExtractKeywords operation");
        Keywords.extract(tfidf, articles, numkeywords);
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("ExtractKeywords operation took: %.2f seconds", duration));
    }

    private static SolrDocumentList fetchEditorial(int i) {
        long start = System.currentTimeMillis();
        log.info("fetchEditorial from Search API");
        SolrQuery squery = new SolrQuery("*:*");
        squery.setFields(Solr.FIELDS);
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.addFilterQuery(new String[]{"SourceType:Backoffice"});
        squery.addFilterQuery(new String[]{"Position:1"});
        squery.addFilterQuery(new String[]{"IsActive:true"});
        squery.setRows(Integer.valueOf(i));
        squery.setSort(SolrQuery.SortClause.asc((String)"ItemPosition"));
        squery.set("group", true);
        squery.set("group.field", new String[]{"ItemPosition"});
        squery.set("group.sort", new String[]{"ChangedDate desc"});
        GroupResponse gres = NewsSolrSearch.query(squery).getGroupResponse();
        List lst_groupcmd = gres.getValues();
        SolrDocumentList docs = new SolrDocumentList();
        for (GroupCommand gcmd : lst_groupcmd) {
            List lst_group = gcmd.getValues();
            for (Group group : lst_group) {
                SolrDocument sd = (SolrDocument)group.getResult().get(0);
                Collection imgs = sd.getFieldValues("Images");
                if ((imgs == null || imgs.size() <= 0) && sd.getFieldValue("HighlightImages") == null) continue;
                docs.add((Object)sd);
            }
        }
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchEditorial took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static SolrDocumentList fetchHeadlines(int i, Set<String> excludeUrls) {
        long start = System.currentTimeMillis();
        log.info("fetchHeadlines from Search API");
        SolrQuery squery = new SolrQuery("*:*");
        squery.setFields(Solr.FIELDS);
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-128HOUR TO NOW]"});
        squery.addFilterQuery(new String[]{"Position:1"});
        squery.addFilterQuery(new String[]{"-SourceType:Backoffice"});
        squery.addFilterQuery(new String[]{"-Categories:\"Outros Mundos\""});
        if (excludeUrls != null && excludeUrls.size() > 0) {
            for (String url : excludeUrls) {
                squery.addFilterQuery(new String[]{String.format("-Url:\"%s\"", url)});
            }
        }
        squery.addFilterQuery(new String[]{"Hostname:economico.sapo.pt OR Hostname:sapo24.blogs.sapo.pt OR (Hostname:lusa.sapo.pt AND -Categories:\"Internacional\") OR Hostname:www.theguardian.com OR Hostname:publico.pt OR Hostname:www.publico.pt OR Hostname:mag.sapo.pt OR Hostname:www.rtp.pt OR Hostname:rr.sapo.pt OR Hostname:www.bbc.com OR Hostname:www.cnn.com OR Hostname:edition.cnn.com OR Hostname:www.ft.com OR Hostname:abola.pt OR Hostname:www.abola.pt OR Hostname:desporto.sapo.pt OR Hostname:elpais.com"});
        squery.set("group", true);
        squery.set("group.field", new String[]{"Hostname"});
        squery.setRows(Integer.valueOf(i));
        squery.setSort(SolrQuery.SortClause.desc((String)"PublishDate"));
        GroupResponse gres = NewsSolrSearch.query(squery).getGroupResponse();
        List lst_groupcmd = gres.getValues();
        SolrDocumentList docs = new SolrDocumentList();
        for (GroupCommand gcmd : lst_groupcmd) {
            List lst_group = gcmd.getValues();
            for (Group group : lst_group) {
                SolrDocument sd = (SolrDocument)group.getResult().get(0);
                docs.add((Object)sd);
            }
        }
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchHeadlines took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static SolrDocumentList fetchLast24News(int i) {
        long start = System.currentTimeMillis();
        log.info("fetchHeadlines from Search API");
        SolrQuery squery = new SolrQuery("*:*");
        squery.setFields(Solr.FIELDS);
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-24HOUR TO NOW]"});
        squery.addFilterQuery(new String[]{"-SourceType:Backoffice"});
        if (DbBuilder.environment.equals("production")) {
            squery.addFilterQuery(new String[]{"-Hostname:comboios150.blogs.sapo.pt"});
        }
        squery.addFilterQuery(new String[]{"-Hostname:sicnoticias.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:visao.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:expresso.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.jornaldenegocios.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.cmjornal.xl.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.record.xl.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:cmtv.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.ojogo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.tsf.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.jn.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.dn.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:observador.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.ionline.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.sol.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.tvi24.iol.pt"});
        squery.setRows(Integer.valueOf(i));
        squery.setSort(SolrQuery.SortClause.desc((String)"PublishDate"));
        SolrDocumentList docs = NewsSolrSearch.query(squery).getResults();
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchHeadlines took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static SolrDocumentList fetchLusaNews(int i) {
        long start = System.currentTimeMillis();
        log.info("fetchHeadlines from Search API");
        SolrQuery squery = new SolrQuery("*:*");
        squery.setFields(Solr.FIELDS);
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-24HOUR TO NOW]"});
        squery.addFilterQuery(new String[]{"-SourceType:Backoffice"});
        squery.addFilterQuery(new String[]{"-Categories:\"Outros Mundos\""});
        squery.addFilterQuery(new String[]{"-Categories:\"Internacional\""});
        squery.setRows(Integer.valueOf(i));
        squery.setSort(SolrQuery.SortClause.desc((String)"PublishDate"));
        squery.addFilterQuery(new String[]{"Hostname:lusa.sapo.pt"});
        SolrDocumentList docs = NewsSolrSearch.query(squery).getResults();
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchLusaNews took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static SolrDocumentList fetchSapo24BlogNews(int i) {
        long start = System.currentTimeMillis();
        log.info("fetchHeadlines from Search API");
        SolrQuery squery = new SolrQuery("*:*");
        squery.setFields(Solr.FIELDS);
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.setRows(Integer.valueOf(6));
        squery.addFilterQuery(new String[]{"-SourceType:Backoffice"});
        squery.addFilterQuery(new String[]{"-Categories:\"Outros Mundos\""});
        squery.setRows(Integer.valueOf(i));
        squery.setSort(SolrQuery.SortClause.desc((String)"PublishDate"));
        squery.addFilterQuery(new String[]{"Hostname:sapo24.blogs.sapo.pt"});
        SolrDocumentList docs = NewsSolrSearch.query(squery).getResults();
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchSapo24BlogNews took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static SolrDocumentList fetchClassifierNews(int numberOfDocs, int start_from) throws SolrServerException {
        long start = System.currentTimeMillis();
        log.info("fetchClassifierNews from Search API");
        SolrQuery squery = new SolrQuery("Hostname:*.pt*");
        squery.setFields(Solr.FIELDS);
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-144HOUR TO NOW]"});
        squery.addFilterQuery(new String[]{"-Categories:\"Outros Mundos\""});
        squery.setStart(Integer.valueOf(start_from));
        squery.setRows(Integer.valueOf(numberOfDocs));
        squery.setSort(SolrQuery.SortClause.desc((String)"PublishDate"));
        SolrDocumentList docs = NewsSolrSearch.query(squery).getResults();
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchClassifierNews took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static SolrDocumentList fetchMainNews(int numberOfDocs) throws SolrServerException {
        long start = System.currentTimeMillis();
        log.info("fetchMainNews from Search API");
        SolrQuery squery = new SolrQuery("*:*");
        squery.setFields(Solr.FIELDS);
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.addFilterQuery(new String[]{"Hostname:*.pt*"});
        if (DbBuilder.environment.equals("production")) {
            squery.addFilterQuery(new String[]{"-Hostname:comboios150.blogs.sapo.pt"});
        }
        squery.addFilterQuery(new String[]{"-Hostname:sicnoticias.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:visao.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:expresso.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.jornaldenegocios.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.cmjornal.xl.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.record.xl.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:cmtv.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.ojogo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.tsf.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.jn.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.dn.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:observador.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.tvi24.iol.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.ionline.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.sol.pt"});
        squery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-48HOUR TO NOW]"});
        squery.addFilterQuery(new String[]{"-Categories:opini\u00e3o"});
        squery.addFilterQuery(new String[]{"-Categories:Opini\u00e3o"});
        squery.addFilterQuery(new String[]{"-Categories:Editorial"});
        squery.addFilterQuery(new String[]{"-Categories:Coment\u00e1rio"});
        squery.addFilterQuery(new String[]{"-Categories:Cr\u00f3nica"});
        squery.addFilterQuery(new String[]{"-Tags:opini\u00e3o"});
        squery.addFilterQuery(new String[]{"-Tags:cr\u00f3nica"});
        squery.addFilterQuery(new String[]{"-Tags:editorial"});
        squery.addFilterQuery(new String[]{"-Tags:coment\u00e1rio"});
        squery.setRows(Integer.valueOf(numberOfDocs));
        squery.setSort(SolrQuery.SortClause.desc((String)"PublishDate"));
        SolrDocumentList docs = NewsSolrSearch.query(squery).getResults();
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchMainNews took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static SolrDocumentList fetchTechNews(int numberOfDocs) throws SolrServerException {
        long start = System.currentTimeMillis();
        log.info("fetchOpinionNews from Search API");
        SolrQuery squery = new SolrQuery("Categories:tecnologia OR Tags:tecnologia OR Hostname:tek.sapo.pt");
        squery.setFields(Solr.FIELDS);
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.addFilterQuery(new String[]{"PublishDate:[NOW/DAY-7DAY TO NOW]"});
        squery.setRows(Integer.valueOf(numberOfDocs));
        squery.setSort(SolrQuery.SortClause.desc((String)"PublishDate"));
        SolrDocumentList docs = NewsSolrSearch.query(squery).getResults();
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchOpinionNews took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static SolrDocumentList fetchOpinionNews(int numberOfDocs) throws SolrServerException {
        long start = System.currentTimeMillis();
        log.info("fetchOpinionNews from Search API");
        SolrQuery squery = new SolrQuery("Categories:opiniao OR Categories:opini\u00e3o OR Categories:Opini\u00e3o OR Categories:Editorial OR Categories:Coment\u00e1rio OR Categories:Cr\u00f3nica OR Tags:opini\u00e3o OR Tags:opini\u00e3o OR Tags:editORial OR Tags:coment\u00e1rio OR Tags:cr\u00f3nica");
        squery.setFields(Solr.FIELDS);
        if (DbBuilder.environment.equals("production")) {
            squery.addFilterQuery(new String[]{"-Hostname:comboios150.blogs.sapo.pt"});
        }
        squery.addFilterQuery(new String[]{"-Hostname:sicnoticias.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:visao.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:expresso.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.jornaldenegocios.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.cmjornal.xl.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.record.xl.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:cmtv.sapo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.ojogo.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.tsf.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.jn.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.dn.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:observador.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.tvi24.iol.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.ionline.pt"});
        squery.addFilterQuery(new String[]{"-Hostname:www.sol.pt"});
        squery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        squery.addFilterQuery(new String[]{"PublishDate:[NOW/DAY-7DAY TO NOW]"});
        squery.setRows(Integer.valueOf(numberOfDocs));
        squery.setSort(SolrQuery.SortClause.desc((String)"PublishDate"));
        SolrDocumentList docs = NewsSolrSearch.query(squery).getResults();
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("fetchOpinionNews took: %.2f seconds, %s items", duration, docs.size()));
        return docs;
    }

    private static void getStats(List<NewsItem> articles) {
        double totalSocialActions = 0.0;
        double totalContentCount = articles.size();
        double now = System.currentTimeMillis();
        for (NewsItem ni : articles) {
            totalSocialActions += (double)ni.getSocialActionCount();
        }
        for (NewsItem ni : articles) {
            double socialscore = (double)ni.getSocialActionCount() / totalSocialActions;
            double similarscore = (double)ni.getContentCount() / totalContentCount;
            double age = Math.abs(now - (double)ni.getPubDate().getTime()) / 3600000.0;
            double agescore = Math.pow(Math.E, -1.0 * age / 2.0);
            double score = Math.pow(socialscore * similarscore * agescore, 0.3333333333333333);
            ni.setScore(score);
        }
    }

    private static void linkArticles(List<NewsItem> articles, Collection<String> noDupsHosts, Map<String, NewsItem> itemBySlug) {
        long start = System.currentTimeMillis();
        log.info("Start LinkArticles operation");
        String[] sections = new String[]{"vida", "sociedade", "tecnologia", "economia", "desporto", "opiniao", "actualidade"};
        if (articles.size() >= 2) {
            for (String section : sections) {
                block1: for (int i = 0; i < articles.size() - 1; ++i) {
                    NewsItem ni0 = articles.get(i);
                    if (!section.equals(ni0.getSection()) || !StringUtils.isBlank((CharSequence)ni0.getNextByCategory())) continue;
                    for (int j = i + 1; j < articles.size() - 1; ++j) {
                        NewsItem ni1 = articles.get(j);
                        if (!ni0.getSection().equals(ni1.getSection())) continue;
                        ni0.setNextByCategory(ni1.getSlug());
                        noDupsHosts.add(ni1.getHost());
                        continue block1;
                    }
                }
            }
        }
        HashSet<String> noDupsarticles = new HashSet<String>();
        int j = 0;
        for (int i = 0; i < articles.size(); ++i) {
            if (noDupsarticles.contains(articles.get(i).getSlug())) {
                ++j;
                continue;
            }
            noDupsarticles.add(articles.get(i).getSlug());
            articles.get(i).setPosition(i - j);
            itemBySlug.put(articles.get(i).getUrl(), articles.get(i));
            if (i + 1 < articles.size()) {
                articles.get(i).setNextByPosition(articles.get(i + 1).getSlug());
            }
            noDupsHosts.add(articles.get(i).getHost());
        }
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("LinkArticles operation took: %.2f seconds", duration));
    }

    private static void linkArticles(List<NewsItem> lusa, List<NewsItem> articles, Collection<String> noDupsHosts, Map<String, NewsItem> itemBySlug) {
        int total;
        NewsItem lastElm = articles.get(articles.size() - 1);
        if (lastElm != null && lusa.size() > 0 && lusa.get(0) != null) {
            lastElm.setNextByPosition(lusa.get(0).getSlug());
        }
        if ((total = lusa.size()) >= 2) {
            for (int i = 0; i < total - 1; ++i) {
                NewsItem ni0 = lusa.get(i);
                if (itemBySlug.containsKey(ni0.getUrl())) {
                    ni0 = itemBySlug.get(ni0.getUrl());
                    lusa.set(i, ni0);
                }
                if (i + 1 >= total) continue;
                ni0.setNextByLatestHost(lusa.get(i + 1).getSlug());
                ni0.setNextByPosition(lusa.get(i + 1).getSlug());
            }
        }
    }

    private static void linkArticles(HashMap<String, List<NewsItem>> lNews, List<NewsItem> all, Collection<String> noDupsHosts, Map<String, NewsItem> itemBySlug) {
        List<NewsItem> articles;
        long start = System.currentTimeMillis();
        log.info("Start LinkLatestArticles operation");
        ArrayList<NewsItem> lst_temp = null;
        for (Map.Entry<String, List<NewsItem>> entry : lNews.entrySet()) {
            articles = entry.getValue();
            String host = entry.getKey().toString();
            noDupsHosts.add(host);
            lst_temp = new ArrayList<NewsItem>();
            if (articles.size() >= 2) {
                for (int i = 0; i < articles.size() - 1; ++i) {
                    NewsItem ni0 = articles.get(i);
                    if (itemBySlug.containsKey(ni0.getUrl())) {
                        ni0 = itemBySlug.get(ni0.getUrl());
                    }
                    if (i + 1 < articles.size()) {
                        ni0.setNextByLatestHost(articles.get(i + 1).getSlug());
                    }
                    lst_temp.add(ni0);
                }
                lst_temp.add(articles.get(articles.size() - 1));
            }
            if (lst_temp.size() <= 0) continue;
            lNews.put(entry.getKey(), lst_temp);
        }
        for (String host : noDupsHosts) {
            boolean first = false;
            NewsItem ni = null;
            if (lNews.containsKey(host)) {
                articles = lNews.get(host);
                int j = articles.size() - 1;
                ni = articles.get(j);
                first = true;
            }
            block3: for (int i = 0; i < all.size() - 1; ++i) {
                NewsItem ni0 = all.get(i);
                if (!ni0.getHost().equalsIgnoreCase(host)) continue;
                if (first) {
                    first = false;
                    ni.setNextByHost(ni0.getSlug());
                }
                for (int j = i + 1; j < all.size() - 1; ++j) {
                    NewsItem ni1 = all.get(j);
                    if (!ni1.getHost().equalsIgnoreCase(host)) continue;
                    ni0.setNextByHost(ni1.getSlug());
                    continue block3;
                }
            }
        }
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("LinkLatestArticles operation took: %.2f seconds", duration));
    }

    private static void populateSharedCount(List<NewsItem> articles) {
        long start = System.currentTimeMillis();
        log.info("Start SocialNetworkShareCount operation");
        ThreadPoolExecutor exec = CustomExecutors.newThreadPool((int)5, (String)"exec");
        final CountDownLatch latch = new CountDownLatch(articles.size());
        for (final NewsItem ni : articles) {
            Runnable counter = new Runnable(){

                @Override
                public void run() {
                    try {
                        Map<String, Integer> social_shares = SocialNetworkShareCount.getShareCounts(ni.getUrl());
                        ni.setSocialShares(social_shares);
                    }
                    finally {
                        latch.countDown();
                    }
                }
            };
            exec.execute(counter);
        }
        try {
            latch.await();
        }
        catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("SharedCount operation took: %.2f seconds", duration));
    }

    private static void promoteFullContentArticles(List<NewsItem> lst_articles) {
        long start = System.currentTimeMillis();
        log.info("Start PromoteFullContentArticles");
        int total = 0;
        for (int j = 0; j < lst_articles.size(); ++j) {
            int i = 0;
            NewsItem ni = lst_articles.get(j);
            List sim_lst = ni.getSimilarItems();
            if (i >= sim_lst.size()) continue;
            NewsItem sim = (NewsItem)sim_lst.get(i);
            sim_lst.set(i, ni);
            lst_articles.set(j, sim);
            log.info("Swap '{}' with '{}'", (Object)ni.getUrl(), (Object)sim.getUrl());
            ++total;
        }
        log.info("Total swapped news: {}", (Object)total);
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("PromoteFullContentArticles took: %.2f seconds", duration));
    }

    private static void rankArticles(List<NewsItem> articles) {
        Collections.sort(articles, new Comparator<NewsItem>(){

            @Override
            public int compare(NewsItem o1, NewsItem o2) {
                if (o1.getScore() > o2.getScore()) {
                    return -1;
                }
                if (o1.getScore() < o2.getScore()) {
                    return 1;
                }
                return 0;
            }
        });
    }

    private static void rankHeadLinesByHost(List<NewsItem> headlines) {
        Collections.sort(headlines, new Comparator<NewsItem>(){

            @Override
            public int compare(NewsItem o1, NewsItem o2) {
                if (Host.getHeadlineOrder((String)o1.getHost()) > Host.getHeadlineOrder((String)o2.getHost())) {
                    return 1;
                }
                if (Host.getHeadlineOrder((String)o1.getHost()) < Host.getHeadlineOrder((String)o2.getHost())) {
                    return -1;
                }
                return 0;
            }
        });
    }

    private static void similiarVideos(TfIdfDistance tfidf_articles, List<NewsItem> lst_ni) {
        long start = System.currentTimeMillis();
        log.info("Start SimiliarVideos operation");
        NewsDbBuilder.extractKeywords(tfidf_articles, lst_ni, 20);
        int total_number_of_related_videos = 0;
        int total_articles_with_videos = 0;
        for (NewsItem ni : lst_ni) {
            try {
                List<RelatedVideo> vids = Videos.fetchRelatedVideos(ni);
                ArrayList<RelatedVideo> lst_vid = new ArrayList<RelatedVideo>();
                if (ni.getHasVideos()) {
                    String videoUrl = ni.getVideoUrl();
                    for (RelatedVideo video : vids) {
                        String videoRelatedUrl = video.getVideoUrl();
                        if (videoUrl.equals(videoRelatedUrl = StringUtils.replace((String)videoRelatedUrl, (String)"/mov/1", (String)""))) continue;
                        lst_vid.add(video);
                    }
                    vids = lst_vid;
                }
                ni.setRelatedVideos(vids);
                ni.setVideoPreProcessed(true);
                total_number_of_related_videos += vids.size();
                if (vids.size() <= 0) continue;
                ++total_articles_with_videos;
            }
            catch (Throwable t) {
                Throwable r = ErrorAnalyser.findRootCause((Throwable)t);
                log.error("Could not fetch related videos for '{}', reason: {}", (Object)ni.getUrl(), (Object)r.getMessage());
            }
        }
        double duration = (double)(System.currentTimeMillis() - start) / 1000.0;
        log.info(String.format("Total number of related videos: %d", total_number_of_related_videos));
        log.info(String.format("Total number of articles with related videos: %d", total_articles_with_videos));
        log.info(String.format("SimiliarVideos operation took: %.2f seconds", duration));
    }
}

