package pt.sapo.hp24.db;

import com.aliasi.spell.TfIdfDistance;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.Group;
import org.apache.solr.client.solrj.response.GroupCommand;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.caudexorigo.ErrorAnalyser;
import org.caudexorigo.Shutdown;
import org.caudexorigo.concurrent.CustomExecutors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pt.sapo.hp24.api.NewsDb;
import pt.sapo.hp24.api.NewsItem;
import pt.sapo.hp24.api.RelatedVideo;
import pt.sapo.hp24.classifier.NaiveClassifier;
import pt.sapo.hp24.classifier.SVMTool;
import pt.sapo.hp24.db.tools.Deduper;
import pt.sapo.hp24.db.tools.Keywords;
import pt.sapo.hp24.db.tools.NewsItemBuilder;
import pt.sapo.hp24.db.tools.NewsSolrSearch;
import pt.sapo.hp24.db.tools.SocialNetworkShareCount;
import pt.sapo.hp24.db.tools.Solr;
import pt.sapo.hp24.db.tools.Videos;
import pt.sapo.hp24.tools.Host;
import pt.sapo.hp24.tools.Text;

/* loaded from: input_file:pt/sapo/hp24/db/NewsDbBuilder.class */
public class NewsDbBuilder {
    private static final Logger log = LoggerFactory.getLogger(NewsDbBuilder.class);

    public static void main(String[] strArr) {
        try {
            try {
                build();
                log.info("End processing.");
                Shutdown.now();
            } catch (Throwable th) {
                Shutdown.now(th);
                log.info("End processing.");
                Shutdown.now();
            }
        } catch (Throwable th2) {
            log.info("End processing.");
            Shutdown.now();
            throw th2;
        }
    }

    public static void build() {
        try {
            List<NewsItem> buildNewsItemList = buildNewsItemList(fetchLusaNews(10), Collections.emptySet(), new HashSet());
            List<NewsItem> buildNewsItemList2 = buildNewsItemList(fetchSapo24BlogNews(10), Collections.emptySet(), new HashSet());
            SolrDocumentList fetchLast24News = fetchLast24News(2000);
            HashSet hashSet = new HashSet();
            hashSet.addAll(Host.getExcludeurls());
            buildClassifierModel();
            SolrDocumentList fetchEditorial = fetchEditorial(3);
            SolrDocumentList fetchHeadlines = fetchHeadlines(100);
            SolrDocumentList fetchMainNews = fetchMainNews(400);
            SolrDocumentList fetchTechNews = fetchTechNews(40);
            SolrDocumentList fetchOpinionNews = fetchOpinionNews(80);
            List<NewsItem> buildNewsItemList3 = buildNewsItemList(fetchEditorial, Collections.emptySet(), hashSet);
            HashSet hashSet2 = new HashSet();
            hashSet2.addAll(Host.getExcludeurls());
            List<NewsItem> buildNewsItemList4 = buildNewsItemList(fetchHeadlines, Collections.emptySet(), hashSet2);
            HashSet hashSet3 = new HashSet();
            hashSet3.add("opinião");
            ArrayList arrayList = new ArrayList();
            Date date = new Date(new Date().getTime() - 86400000);
            for (NewsItem newsItem : buildNewsItemList(fetchMainNews, Collections.emptySet(), hashSet)) {
                if (!newsItem.getHost().startsWith("www.publico.pt")) {
                    arrayList.add(newsItem);
                } else if (newsItem.getChangeDate().after(date)) {
                    arrayList.add(newsItem);
                } else {
                    newsItem.setBody(Text.getSummary(newsItem.getBody(), 250, true));
                    arrayList.add(newsItem);
                }
            }
            ArrayList arrayList2 = new ArrayList();
            for (NewsItem newsItem2 : buildNewsItemList(fetchTechNews, Collections.emptySet(), hashSet)) {
                if (!newsItem2.getHost().startsWith("www.publico.pt")) {
                    arrayList2.add(newsItem2);
                } else if (newsItem2.getChangeDate().after(date)) {
                    arrayList2.add(newsItem2);
                } else {
                    newsItem2.setBody(Text.getSummary(newsItem2.getBody(), 250, true));
                    arrayList2.add(newsItem2);
                }
            }
            ArrayList arrayList3 = new ArrayList();
            arrayList.addAll(arrayList2);
            for (NewsItem newsItem3 : buildNewsItemList(fetchOpinionNews, hashSet3, hashSet)) {
                if (!newsItem3.getHost().startsWith("www.publico.pt")) {
                    arrayList3.add(newsItem3);
                }
            }
            arrayList.addAll(arrayList3);
            ArrayList arrayList4 = new ArrayList();
            arrayList4.addAll(buildNewsItemList3);
            arrayList4.addAll(buildNewsItemList4);
            arrayList4.addAll(arrayList);
            TfIdfDistance buildTfIdfCorpus = buildTfIdfCorpus(arrayList4);
            rankHeadLinesByHost(buildNewsItemList4);
            aggregateSimilarItems(buildTfIdfCorpus, arrayList);
            populateSharedCount(arrayList);
            getStats(arrayList);
            rankArticles(arrayList);
            ArrayList arrayList5 = new ArrayList();
            arrayList5.addAll(buildNewsItemList3);
            arrayList5.addAll(buildNewsItemList4);
            arrayList5.addAll(arrayList);
            classify(arrayList5, true);
            similiarVideos(buildTfIdfCorpus, arrayList5);
            HashSet hashSet4 = new HashSet();
            HashMap hashMap = new HashMap();
            linkArticles(arrayList5, hashSet4, hashMap);
            HashSet hashSet5 = new HashSet();
            hashSet5.addAll(Host.getExcludeurls());
            HashMap<String, List<NewsItem>> buildLastestNewsItemList = buildLastestNewsItemList(fetchLast24News, Collections.emptySet(), hashSet5);
            reduceNumberLastArticelesBySource(buildLastestNewsItemList);
            linkArticles(buildLastestNewsItemList, arrayList, hashSet4, hashMap);
            linkArticles(buildNewsItemList, arrayList, hashSet4, hashMap);
            linkArticles(buildNewsItemList2, arrayList, hashSet4, hashMap);
            NewsDb.build(buildNewsItemList3, buildNewsItemList4, arrayList, buildLastestNewsItemList, buildNewsItemList, buildNewsItemList2);
            System.out.println();
        } catch (Throwable th) {
            Shutdown.now(th);
        }
    }

    public static void buildEditorial() {
        List<NewsItem> buildNewsItemList = buildNewsItemList(fetchEditorial(3), Collections.emptySet(), new HashSet());
        int i = 2;
        if (buildNewsItemList != null && buildNewsItemList.size() - 1 < 2) {
            i = buildNewsItemList.size() - 1;
            if (i < 0) {
                i = 0;
            }
        }
        for (int i2 = 0; i2 < i; i2++) {
            buildNewsItemList.get(i2).setNextByPosition(buildNewsItemList.get(i2 + 1).getSlug());
        }
        List allHeadlines = NewsDb.getAllHeadlines();
        if (allHeadlines != null && allHeadlines.size() > 0) {
            buildNewsItemList.get(i).setNextByPosition(((NewsItem) allHeadlines.get(0)).getSlug());
        }
        try {
            log.info("before build");
            NewsDb.build(buildNewsItemList);
            log.info("after build");
        } catch (Throwable th) {
            Shutdown.now(th);
        }
    }

    private static void reduceNumberLastArticelesBySource(HashMap<String, List<NewsItem>> hashMap) {
        for (Map.Entry<String, List<NewsItem>> entry : hashMap.entrySet()) {
            List<NewsItem> value = entry.getValue();
            if (value.size() > 6) {
                hashMap.put(entry.getKey(), new ArrayList(value.subList(0, 6)));
            }
        }
    }

    private static void buildClassifierModel() {
        try {
            SolrDocumentList fetchClassifierNews = fetchClassifierNews(1000, 0);
            fetchClassifierNews.addAll(fetchClassifierNews(1000, 1000));
            fetchClassifierNews.addAll(fetchClassifierNews(1000, 2000));
            fetchClassifierNews.addAll(fetchClassifierNews(1000, 3000));
            List<NewsItem> buildNewsItemList = buildNewsItemList(fetchClassifierNews, Collections.emptySet(), new HashSet());
            extractKeywords(buildTfIdfCorpus(buildNewsItemList), buildNewsItemList, 20);
            classify(buildNewsItemList, false);
            List list = (List) buildNewsItemList.stream().filter(newsItem -> {
                return (newsItem.getSections().isEmpty() || newsItem.getSection().equals("actualidade")) ? false : true;
            }).collect(Collectors.toList());
            System.out.println("NUmber of articles for model training: " + list.size());
            SVMTool.createModel(list, "./classifier.model");
        } catch (Throwable th) {
            Shutdown.now(th);
        }
    }

    private static void aggregateSimilarItems(TfIdfDistance tfIdfDistance, List<NewsItem> list) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start Deduplication");
        Deduper.aggregateSimilar(tfIdfDistance, list);
        log.info(String.format("Deduplication took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
    }

    private static HashMap<String, List<NewsItem>> buildLastestNewsItemList(SolrDocumentList solrDocumentList, Set<String> set, Set<String> set2) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start Article creation");
        HashMap hashMap = new HashMap();
        for (int i = 0; i < solrDocumentList.size(); i++) {
            String obj = ((SolrDocument) solrDocumentList.get(i)).getFieldValue("Url").toString();
            if (set2.contains(obj)) {
                log.info("Skip alread processed url: {}", obj);
            } else {
                set2.add(obj);
                NewsItem newsItem = new NewsItemBuilder((SolrDocument) solrDocumentList.get(i)).get();
                if (set.size() > 0) {
                    newsItem.addCategories(set);
                }
                if (newsItem.isValid()) {
                    Set set3 = (Set) hashMap.get(newsItem.getHost());
                    if (set3 == null) {
                        set3 = new LinkedHashSet();
                    }
                    set3.add(newsItem);
                    hashMap.put(newsItem.getHost(), set3);
                } else {
                    log.info("Discard invalid url: {}", obj);
                }
            }
        }
        HashMap<String, List<NewsItem>> hashMap2 = new HashMap<>();
        for (Map.Entry entry : hashMap.entrySet()) {
            ArrayList arrayList = new ArrayList();
            arrayList.addAll((Collection) entry.getValue());
            hashMap2.put(((String) entry.getKey()).toString(), arrayList);
        }
        log.info(String.format("Article creation took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
        return hashMap2;
    }

    private static List<NewsItem> buildNewsItemList(SolrDocumentList solrDocumentList, Set<String> set, Set<String> set2) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start Article creation");
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        for (int i = 0; i < solrDocumentList.size(); i++) {
            String obj = ((SolrDocument) solrDocumentList.get(i)).getFieldValue("Url").toString();
            if (set2.contains(obj)) {
                log.info("Skip alread processed url: {}", obj);
            } else {
                set2.add(obj);
                NewsItem newsItem = new NewsItemBuilder((SolrDocument) solrDocumentList.get(i)).get();
                if (set.size() > 0) {
                    newsItem.addCategories(set);
                }
                if (newsItem.isValid()) {
                    linkedHashSet.add(newsItem);
                } else {
                    log.info("Discard invalid url: {}", obj);
                }
            }
        }
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(linkedHashSet);
        log.info(String.format("Article creation took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
        return arrayList;
    }

    private static TfIdfDistance buildTfIdfCorpus(List<NewsItem> list) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start BuildTfIdfCorpus operation");
        TfIdfDistance tfIdfDistance = new TfIdfDistance(new IndoEuropeanTokenizerFactory());
        for (NewsItem newsItem : list) {
            if (newsItem.getHost().contains(".pt")) {
                tfIdfDistance.handle(newsItem.getText());
            }
        }
        log.info(String.format("BuildTfIdfCorpus operation took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
        return tfIdfDistance;
    }

    private static void classify(List<NewsItem> list, boolean z) {
        list.stream().forEach(newsItem -> {
            NaiveClassifier.assignSections(newsItem, z);
        });
    }

    private static void extractKeywords(TfIdfDistance tfIdfDistance, List<NewsItem> list, int i) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start ExtractKeywords operation");
        Keywords.extract(tfIdfDistance, list, i);
        log.info(String.format("ExtractKeywords operation took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
    }

    private static SolrDocumentList fetchEditorial(int i) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchEditorial from Search API");
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.setFields(Solr.FIELDS);
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.addFilterQuery(new String[]{"SourceType:Backoffice"});
        solrQuery.addFilterQuery(new String[]{"Position:1"});
        solrQuery.addFilterQuery(new String[]{"IsActive:true"});
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.asc("ItemPosition"));
        solrQuery.set("group", true);
        solrQuery.set("group.field", new String[]{"ItemPosition"});
        solrQuery.set("group.sort", new String[]{"ChangedDate desc"});
        List values = NewsSolrSearch.query(solrQuery).getGroupResponse().getValues();
        SolrDocumentList solrDocumentList = new SolrDocumentList();
        Iterator it = values.iterator();
        while (it.hasNext()) {
            Iterator it2 = ((GroupCommand) it.next()).getValues().iterator();
            while (it2.hasNext()) {
                SolrDocument solrDocument = (SolrDocument) ((Group) it2.next()).getResult().get(0);
                Collection fieldValues = solrDocument.getFieldValues("Images");
                if ((fieldValues != null && fieldValues.size() > 0) || solrDocument.getFieldValue("HighlightImages") != null) {
                    solrDocumentList.add(solrDocument);
                }
            }
        }
        log.info(String.format("fetchEditorial took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(solrDocumentList.size())));
        return solrDocumentList;
    }

    private static SolrDocumentList fetchHeadlines(int i) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchHeadlines from Search API");
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.setFields(Solr.FIELDS);
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-128HOUR TO NOW]"});
        solrQuery.addFilterQuery(new String[]{"Position:1"});
        solrQuery.addFilterQuery(new String[]{"-SourceType:Backoffice"});
        solrQuery.addFilterQuery(new String[]{"-Categories:\"Outros Mundos\""});
        solrQuery.addFilterQuery(new String[]{"Hostname:economico.sapo.pt OR Hostname:tek.sapo.pt OR Hostname:lusa.sapo.pt OR Hostname:www.theguardian.com OR Hostname:publico.pt OR Hostname:www.publico.pt OR Hostname:mag.sapo.pt OR Hostname:www.rtp.pt OR Hostname:rr.sapo.pt OR Hostname:www.bbc.com OR Hostname:www.cnn.com OR Hostname:edition.cnn.com OR Hostname:www.ft.com OR Hostname:abola.pt OR Hostname:www.abola.pt OR Hostname:desporto.sapo.pt OR Hostname:elpais.com"});
        solrQuery.set("group", true);
        solrQuery.set("group.field", new String[]{"Hostname"});
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.desc("PublishDate"));
        List values = NewsSolrSearch.query(solrQuery).getGroupResponse().getValues();
        SolrDocumentList solrDocumentList = new SolrDocumentList();
        Iterator it = values.iterator();
        while (it.hasNext()) {
            Iterator it2 = ((GroupCommand) it.next()).getValues().iterator();
            while (it2.hasNext()) {
                solrDocumentList.add((SolrDocument) ((Group) it2.next()).getResult().get(0));
            }
        }
        log.info(String.format("fetchHeadlines took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(solrDocumentList.size())));
        return solrDocumentList;
    }

    private static SolrDocumentList fetchLast24News(int i) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchHeadlines from Search API");
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.setFields(Solr.FIELDS);
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-24HOUR TO NOW]"});
        solrQuery.addFilterQuery(new String[]{"-SourceType:Backoffice"});
        if (DbBuilder.environment.equals("production")) {
            solrQuery.addFilterQuery(new String[]{"-Hostname:sapo24.blogs.sapo.pt"});
            solrQuery.addFilterQuery(new String[]{"-Hostname:comboios150.blogs.sapo.pt"});
        }
        solrQuery.addFilterQuery(new String[]{"-Hostname:sicnoticias.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:visao.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:expresso.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.jornaldenegocios.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.cmjornal.xl.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.record.xl.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:cmtv.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.ojogo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.tsf.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.jn.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.dn.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:observador.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.ionline.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.sol.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.tvi24.iol.pt"});
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.desc("PublishDate"));
        SolrDocumentList results = NewsSolrSearch.query(solrQuery).getResults();
        log.info(String.format("fetchHeadlines took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(results.size())));
        return results;
    }

    private static SolrDocumentList fetchLusaNews(int i) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchHeadlines from Search API");
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.setFields(Solr.FIELDS);
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-24HOUR TO NOW]"});
        solrQuery.addFilterQuery(new String[]{"-SourceType:Backoffice"});
        solrQuery.addFilterQuery(new String[]{"-Categories:\"Outros Mundos\""});
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.desc("PublishDate"));
        solrQuery.addFilterQuery(new String[]{"Hostname:lusa.sapo.pt"});
        SolrDocumentList results = NewsSolrSearch.query(solrQuery).getResults();
        log.info(String.format("fetchLusaNews took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(results.size())));
        return results;
    }

    private static SolrDocumentList fetchSapo24BlogNews(int i) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchHeadlines from Search API");
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.setFields(Solr.FIELDS);
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.setRows(6);
        solrQuery.addFilterQuery(new String[]{"-SourceType:Backoffice"});
        solrQuery.addFilterQuery(new String[]{"-Categories:\"Outros Mundos\""});
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.desc("PublishDate"));
        solrQuery.addFilterQuery(new String[]{"Hostname:sapo24.blogs.sapo.pt"});
        SolrDocumentList results = NewsSolrSearch.query(solrQuery).getResults();
        log.info(String.format("fetchSapo24BlogNews took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(results.size())));
        return results;
    }

    private static SolrDocumentList fetchClassifierNews(int i, int i2) throws SolrServerException {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchClassifierNews from Search API");
        SolrQuery solrQuery = new SolrQuery("Hostname:*.pt*");
        solrQuery.setFields(Solr.FIELDS);
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-144HOUR TO NOW]"});
        solrQuery.addFilterQuery(new String[]{"-Categories:\"Outros Mundos\""});
        solrQuery.setStart(Integer.valueOf(i2));
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.desc("PublishDate"));
        SolrDocumentList results = NewsSolrSearch.query(solrQuery).getResults();
        log.info(String.format("fetchClassifierNews took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(results.size())));
        return results;
    }

    private static SolrDocumentList fetchMainNews(int i) throws SolrServerException {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchMainNews from Search API");
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.setFields(Solr.FIELDS);
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.addFilterQuery(new String[]{"Hostname:*.pt*"});
        if (DbBuilder.environment.equals("production")) {
            solrQuery.addFilterQuery(new String[]{"-Hostname:sapo24.blogs.sapo.pt"});
            solrQuery.addFilterQuery(new String[]{"-Hostname:comboios150.blogs.sapo.pt"});
        }
        solrQuery.addFilterQuery(new String[]{"-Hostname:sicnoticias.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:visao.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:expresso.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.jornaldenegocios.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.cmjornal.xl.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.record.xl.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:cmtv.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.ojogo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.tsf.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.jn.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.dn.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:observador.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.tvi24.iol.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.ionline.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.sol.pt"});
        solrQuery.addFilterQuery(new String[]{"PublishDate:[NOW/HOUR-48HOUR TO NOW]"});
        solrQuery.addFilterQuery(new String[]{"-Categories:opinião"});
        solrQuery.addFilterQuery(new String[]{"-Categories:Opinião"});
        solrQuery.addFilterQuery(new String[]{"-Categories:Editorial"});
        solrQuery.addFilterQuery(new String[]{"-Categories:Comentário"});
        solrQuery.addFilterQuery(new String[]{"-Categories:Crónica"});
        solrQuery.addFilterQuery(new String[]{"-Tags:opinião"});
        solrQuery.addFilterQuery(new String[]{"-Tags:crónica"});
        solrQuery.addFilterQuery(new String[]{"-Tags:editorial"});
        solrQuery.addFilterQuery(new String[]{"-Tags:comentário"});
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.desc("PublishDate"));
        SolrDocumentList results = NewsSolrSearch.query(solrQuery).getResults();
        log.info(String.format("fetchMainNews took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(results.size())));
        return results;
    }

    private static SolrDocumentList fetchTechNews(int i) throws SolrServerException {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchOpinionNews from Search API");
        SolrQuery solrQuery = new SolrQuery("Categories:tecnologia OR Tags:tecnologia OR Hostname:tek.sapo.pt");
        solrQuery.setFields(Solr.FIELDS);
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.addFilterQuery(new String[]{"PublishDate:[NOW/DAY-7DAY TO NOW]"});
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.desc("PublishDate"));
        SolrDocumentList results = NewsSolrSearch.query(solrQuery).getResults();
        log.info(String.format("fetchOpinionNews took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(results.size())));
        return results;
    }

    private static SolrDocumentList fetchOpinionNews(int i) throws SolrServerException {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("fetchOpinionNews from Search API");
        SolrQuery solrQuery = new SolrQuery("Categories:opinião OR Categories:Opinião OR Categories:Editorial OR Categories:Comentário OR Categories:Crónica OR Tags:opinião OR Tags:opinião OR Tags:editORial OR Tags:comentário OR Tags:crónica");
        solrQuery.setFields(Solr.FIELDS);
        if (DbBuilder.environment.equals("production")) {
            solrQuery.addFilterQuery(new String[]{"-Hostname:sapo24.blogs.sapo.pt"});
            solrQuery.addFilterQuery(new String[]{"-Hostname:comboios150.blogs.sapo.pt"});
        }
        solrQuery.addFilterQuery(new String[]{"-Hostname:sicnoticias.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:visao.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:expresso.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.jornaldenegocios.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.cmjornal.xl.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.record.xl.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:cmtv.sapo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.ojogo.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.tsf.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.jn.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.dn.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:observador.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.tvi24.iol.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.ionline.pt"});
        solrQuery.addFilterQuery(new String[]{"-Hostname:www.sol.pt"});
        solrQuery.addFilterQuery(new String[]{"Slug:[* TO *]"});
        solrQuery.addFilterQuery(new String[]{"PublishDate:[NOW/DAY-7DAY TO NOW]"});
        solrQuery.setRows(Integer.valueOf(i));
        solrQuery.setSort(SolrQuery.SortClause.desc("PublishDate"));
        SolrDocumentList results = NewsSolrSearch.query(solrQuery).getResults();
        log.info(String.format("fetchOpinionNews took: %.2f seconds, %s items", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d), Integer.valueOf(results.size())));
        return results;
    }

    private static void getStats(List<NewsItem> list) {
        double d = 0.0d;
        double size = list.size();
        double currentTimeMillis = System.currentTimeMillis();
        while (list.iterator().hasNext()) {
            d += r0.next().getSocialActionCount();
        }
        Iterator<NewsItem> it = list.iterator();
        while (it.hasNext()) {
            it.next().setScore(Math.pow((r0.getSocialActionCount() / d) * (r0.getContentCount() / size) * Math.pow(2.718281828459045d, ((-1.0d) * (Math.abs(currentTimeMillis - r0.getPubDate().getTime()) / 3600000.0d)) / 2.0d), 0.3333333333333333d));
        }
    }

    private static void linkArticles(List<NewsItem> list, Collection<String> collection, Map<String, NewsItem> map) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start LinkArticles operation");
        String[] strArr = {"vida", "sociedade", "tecnologia", "economia", "desporto", "opiniao", "actualidade"};
        if (list.size() >= 2) {
            for (String str : strArr) {
                for (int i = 0; i < list.size() - 1; i++) {
                    NewsItem newsItem = list.get(i);
                    if (str.equals(newsItem.getSection()) && StringUtils.isBlank(newsItem.getNextByCategory())) {
                        int i2 = i + 1;
                        while (true) {
                            if (i2 >= list.size() - 1) {
                                break;
                            }
                            NewsItem newsItem2 = list.get(i2);
                            if (newsItem.getSection().equals(newsItem2.getSection())) {
                                newsItem.setNextByCategory(newsItem2.getSlug());
                                collection.add(newsItem2.getHost());
                                break;
                            }
                            i2++;
                        }
                        if (i + 1 < list.size()) {
                            newsItem.setNextByPosition(list.get(i + 1).getSlug());
                        }
                        collection.add(newsItem.getHost());
                    }
                }
            }
        }
        for (int i3 = 0; i3 < list.size(); i3++) {
            list.get(i3).setPosition(i3);
            map.put(list.get(i3).getUrl(), list.get(i3));
        }
        log.info(String.format("LinkArticles operation took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
    }

    private static void linkArticles(List<NewsItem> list, List<NewsItem> list2, Collection<String> collection, Map<String, NewsItem> map) {
        NewsItem newsItem = list2.get(list2.size() - 1);
        if (newsItem != null && list.size() > 0 && list.get(0) != null) {
            newsItem.setNextByPosition(list.get(0).getSlug());
        }
        int size = list.size();
        if (size >= 2) {
            for (int i = 0; i < size - 1; i++) {
                NewsItem newsItem2 = list.get(i);
                if (map.containsKey(newsItem2.getUrl())) {
                    newsItem2 = map.get(newsItem2.getUrl());
                    list.set(i, newsItem2);
                }
                if (i + 1 < size) {
                    newsItem2.setNextByLatestHost(list.get(i + 1).getSlug());
                    newsItem2.setNextByPosition(list.get(i + 1).getSlug());
                }
            }
        }
    }

    private static void linkArticles(HashMap<String, List<NewsItem>> hashMap, List<NewsItem> list, Collection<String> collection, Map<String, NewsItem> map) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start LinkLatestArticles operation");
        for (Map.Entry<String, List<NewsItem>> entry : hashMap.entrySet()) {
            List<NewsItem> value = entry.getValue();
            collection.add(entry.getKey().toString());
            ArrayList arrayList = new ArrayList();
            if (value.size() >= 2) {
                for (int i = 0; i < value.size() - 1; i++) {
                    NewsItem newsItem = value.get(i);
                    if (map.containsKey(newsItem.getUrl())) {
                        newsItem = map.get(newsItem.getUrl());
                    }
                    if (i + 1 < value.size()) {
                        newsItem.setNextByLatestHost(value.get(i + 1).getSlug());
                    }
                    arrayList.add(newsItem);
                }
                arrayList.add(value.get(value.size() - 1));
            }
            if (arrayList.size() > 0) {
                hashMap.put(entry.getKey(), arrayList);
            }
        }
        for (String str : collection) {
            boolean z = false;
            NewsItem newsItem2 = null;
            if (hashMap.containsKey(str)) {
                List<NewsItem> list2 = hashMap.get(str);
                newsItem2 = list2.get(list2.size() - 1);
                z = true;
            }
            for (int i2 = 0; i2 < list.size() - 1; i2++) {
                NewsItem newsItem3 = list.get(i2);
                if (newsItem3.getHost().equalsIgnoreCase(str)) {
                    if (z) {
                        z = false;
                        newsItem2.setNextByHost(newsItem3.getSlug());
                    }
                    int i3 = i2 + 1;
                    while (true) {
                        if (i3 < list.size() - 1) {
                            NewsItem newsItem4 = list.get(i3);
                            if (newsItem4.getHost().equalsIgnoreCase(str)) {
                                newsItem3.setNextByHost(newsItem4.getSlug());
                                break;
                            }
                            i3++;
                        }
                    }
                }
            }
        }
        log.info(String.format("LinkLatestArticles operation took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
    }

    private static void populateSharedCount(List<NewsItem> list) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start SocialNetworkShareCount operation");
        ThreadPoolExecutor newThreadPool = CustomExecutors.newThreadPool(5, "exec");
        final CountDownLatch countDownLatch = new CountDownLatch(list.size());
        for (final NewsItem newsItem : list) {
            newThreadPool.execute(new Runnable() { // from class: pt.sapo.hp24.db.NewsDbBuilder.1
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        newsItem.setSocialShares(SocialNetworkShareCount.getShareCounts(newsItem.getUrl()));
                    } finally {
                        countDownLatch.countDown();
                    }
                }
            });
        }
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
        log.info(String.format("SharedCount operation took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
    }

    private static void promoteFullContentArticles(List<NewsItem> list) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start PromoteFullContentArticles");
        int i = 0;
        for (int i2 = 0; i2 < list.size(); i2++) {
            NewsItem newsItem = list.get(i2);
            List similarItems = newsItem.getSimilarItems();
            if (0 < similarItems.size()) {
                NewsItem newsItem2 = (NewsItem) similarItems.get(0);
                similarItems.set(0, newsItem);
                list.set(i2, newsItem2);
                log.info("Swap '{}' with '{}'", newsItem.getUrl(), newsItem2.getUrl());
                i++;
            }
        }
        log.info("Total swapped news: {}", Integer.valueOf(i));
        log.info(String.format("PromoteFullContentArticles took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
    }

    private static void rankArticles(List<NewsItem> list) {
        Collections.sort(list, new Comparator<NewsItem>() { // from class: pt.sapo.hp24.db.NewsDbBuilder.2
            @Override // java.util.Comparator
            public int compare(NewsItem newsItem, NewsItem newsItem2) {
                if (newsItem.getScore() > newsItem2.getScore()) {
                    return -1;
                }
                return newsItem.getScore() < newsItem2.getScore() ? 1 : 0;
            }
        });
    }

    private static void rankHeadLinesByHost(List<NewsItem> list) {
        Collections.sort(list, new Comparator<NewsItem>() { // from class: pt.sapo.hp24.db.NewsDbBuilder.3
            @Override // java.util.Comparator
            public int compare(NewsItem newsItem, NewsItem newsItem2) {
                if (Host.getHeadlineOrder(newsItem.getHost()) > Host.getHeadlineOrder(newsItem2.getHost())) {
                    return 1;
                }
                return Host.getHeadlineOrder(newsItem.getHost()) < Host.getHeadlineOrder(newsItem2.getHost()) ? -1 : 0;
            }
        });
    }

    private static void similiarVideos(TfIdfDistance tfIdfDistance, List<NewsItem> list) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Start SimiliarVideos operation");
        extractKeywords(tfIdfDistance, list, 20);
        int i = 0;
        int i2 = 0;
        for (NewsItem newsItem : list) {
            try {
                List<RelatedVideo> fetchRelatedVideos = Videos.fetchRelatedVideos(newsItem);
                ArrayList arrayList = new ArrayList();
                if (newsItem.getHasVideos()) {
                    String videoUrl = newsItem.getVideoUrl();
                    for (RelatedVideo relatedVideo : fetchRelatedVideos) {
                        if (!videoUrl.equals(StringUtils.replace(relatedVideo.getVideoUrl(), "/mov/1", ""))) {
                            arrayList.add(relatedVideo);
                        }
                    }
                    fetchRelatedVideos = arrayList;
                }
                newsItem.setRelatedVideos(fetchRelatedVideos);
                newsItem.setVideoPreProcessed(true);
                i += fetchRelatedVideos.size();
                if (fetchRelatedVideos.size() > 0) {
                    i2++;
                }
            } catch (Throwable th) {
                log.error("Could not fetch related videos for '{}', reason: {}", newsItem.getUrl(), ErrorAnalyser.findRootCause(th).getMessage());
            }
        }
        log.info(String.format("Total number of related videos: %d", Integer.valueOf(i)));
        log.info(String.format("Total number of articles with related videos: %d", Integer.valueOf(i2)));
        log.info(String.format("SimiliarVideos operation took: %.2f seconds", Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
    }
}
