/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.document;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.SortedSet;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.EnhancedTextProfileSignature;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.language.synonyms.AutotaggingLibrary;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.document.DateDetection;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
import net.yacy.document.SentenceReader;
import net.yacy.document.Tokenizer;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.WordTokenizer;
import net.yacy.document.language.Identificator;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.SetTools;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;

public final class Condenser
extends Tokenizer {
    private long fuzzy_signature = 0L;
    private long exact_signature = 0L;
    private String fuzzy_signature_text = null;
    private final Identificator languageIdentificator;
    public LinkedHashSet<Date> dates_in_content;

    public Condenser(Document document, VocabularyScraper scraper, boolean indexText, boolean indexMedia, WordCache meaningLib, boolean doAutotagging, boolean findDatesInContent, int timezoneOffset) {
        super(document.dc_source(), indexText ? document.getTextString() : "", meaningLib, doAutotagging, scraper);
        String text;
        String initialThreadName = Thread.currentThread().getName();
        Thread.currentThread().setName("condenser-" + document.dc_identifier());
        this.dates_in_content = new LinkedHashSet();
        Classification.ContentDomain contentDomain = document.getContentDomain();
        if (contentDomain == Classification.ContentDomain.IMAGE || !document.getImages().isEmpty()) {
            this.RESULT_FLAGS.set(20, true);
        }
        if (contentDomain == Classification.ContentDomain.AUDIO || !document.getAudiolinks().isEmpty()) {
            this.RESULT_FLAGS.set(21, true);
        }
        if (contentDomain == Classification.ContentDomain.VIDEO || !document.getVideolinks().isEmpty()) {
            this.RESULT_FLAGS.set(22, true);
        }
        if (contentDomain == Classification.ContentDomain.APP || !document.getApplinks().isEmpty()) {
            this.RESULT_FLAGS.set(23, true);
        }
        if (document.lat() != 0.0 && document.lon() != 0.0) {
            this.RESULT_FLAGS.set(19, true);
        }
        this.languageIdentificator = new Identificator();
        this.insertTextToWords(new SentenceReader(document.dc_source().toTokens()), 0, 28, this.RESULT_FLAGS, false, meaningLib);
        if (indexText) {
            text = document.getTextString();
            if (findDatesInContent) {
                this.dates_in_content = DateDetection.parse(text, timezoneOffset);
            }
            this.insertTextToWords(new SentenceReader(document.dc_title()), 1, 25, this.RESULT_FLAGS, true, meaningLib);
            for (String description : document.dc_description()) {
                this.insertTextToWords(new SentenceReader(description), 3, 24, this.RESULT_FLAGS, true, meaningLib);
            }
            this.insertTextToWords(new SentenceReader(document.dc_creator()), 4, 26, this.RESULT_FLAGS, true, meaningLib);
            this.insertTextToWords(new SentenceReader(document.dc_publisher()), 5, 26, this.RESULT_FLAGS, true, meaningLib);
            this.insertTextToWords(new SentenceReader(document.dc_subject(' ')), 6, 24, this.RESULT_FLAGS, true, meaningLib);
            String[] titles = document.getSectionTitles();
            for (int i = 0; i < titles.length; ++i) {
                this.insertTextToWords(new SentenceReader(titles[i]), i + 10, 29, this.RESULT_FLAGS, true, meaningLib);
            }
        } else {
            this.RESULT_NUMB_WORDS = 0;
            this.RESULT_NUMB_SENTENCES = 0;
        }
        if (indexMedia) {
            for (Map.Entry<AnchorURL, String> entry2 : document.getAudiolinks().entrySet()) {
                this.insertTextToWords(new SentenceReader(entry2.getKey().toNormalform(true)), 99, 21, this.RESULT_FLAGS, false, meaningLib);
                this.insertTextToWords(new SentenceReader(entry2.getValue()), 99, 21, this.RESULT_FLAGS, true, meaningLib);
            }
            for (Map.Entry<AnchorURL, String> entry2 : document.getVideolinks().entrySet()) {
                this.insertTextToWords(new SentenceReader(entry2.getKey().toNormalform(true)), 99, 22, this.RESULT_FLAGS, false, meaningLib);
                this.insertTextToWords(new SentenceReader(entry2.getValue()), 99, 22, this.RESULT_FLAGS, true, meaningLib);
            }
            for (Map.Entry<AnchorURL, String> entry2 : document.getApplinks().entrySet()) {
                this.insertTextToWords(new SentenceReader(entry2.getKey().toNormalform(true)), 99, 23, this.RESULT_FLAGS, false, meaningLib);
                this.insertTextToWords(new SentenceReader(entry2.getValue()), 99, 23, this.RESULT_FLAGS, true, meaningLib);
            }
            for (ImageEntry ientry : document.getImages().values()) {
                DigestURL url = ientry.url();
                if (url == null) continue;
                this.insertTextToWords(new SentenceReader(url.toNormalform(true)), 99, 20, this.RESULT_FLAGS, false, meaningLib);
                this.insertTextToWords(new SentenceReader(ientry.alt()), 99, 20, this.RESULT_FLAGS, true, meaningLib);
            }
            for (Map.Entry we : this.words.entrySet()) {
                Word wprop = (Word)we.getValue();
                if (wprop.flags != null) continue;
                wprop.flags = this.RESULT_FLAGS.clone();
                this.words.put(((String)we.getKey()).toLowerCase(), wprop);
            }
        }
        if (doAutotagging) {
            this.extractAutoTagsFromLinkedDataTypes(document.getLinkedDataTypes(), LibraryProvider.autotagging);
        }
        if (!this.tags.isEmpty()) {
            document.addMetatags(this.tags);
        }
        text = document.getTextString();
        this.languageIdentificator.add(text);
        EnhancedTextProfileSignature fuzzySignatureFactory = new EnhancedTextProfileSignature();
        HashMap<String, String> sp = new HashMap<String, String>();
        sp.put("quantRate", Float.toString(Ranking.getQuantRate()));
        sp.put("minTokenLen", Integer.toString(Ranking.getMinTokenLen()));
        fuzzySignatureFactory.init((SolrParams)new MapSolrParams(sp));
        fuzzySignatureFactory.add(text);
        this.fuzzy_signature = EnhancedTextProfileSignature.getSignatureLong(fuzzySignatureFactory);
        this.fuzzy_signature_text = fuzzySignatureFactory.getSignatureText().toString();
        this.exact_signature = EnhancedTextProfileSignature.getSignatureLong(text);
        Thread.currentThread().setName(initialThreadName);
    }

    protected void extractAutoTagsFromLinkedDataTypes(Set<DigestURL> linkedDataTypes, AutotaggingLibrary tagLibrary) {
        if (linkedDataTypes == null || tagLibrary == null) {
            return;
        }
        for (DigestURL linkedDataType : linkedDataTypes) {
            Set<Tagging.Metatag> tags = tagLibrary.getTagsFromTermURL(linkedDataType);
            for (Tagging.Metatag tag : tags) {
                String navigatorName = tag.getVocabularyName();
                HashSet<Tagging.Metatag> tagset = (HashSet<Tagging.Metatag>)this.tags.get(navigatorName);
                if (tagset == null) {
                    tagset = new HashSet<Tagging.Metatag>();
                    this.tags.put(navigatorName, tagset);
                }
                tagset.add(tag);
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void insertTextToWords(SentenceReader text, int phrase, int flagpos, Bitfield flagstemplate, boolean useForLanguageIdentification, WordCache meaningLib) {
        if (text == null) {
            return;
        }
        WordTokenizer wordenum = new WordTokenizer(text, meaningLib);
        try {
            int pip = 0;
            while (wordenum.hasMoreElements()) {
                String word = wordenum.nextElement().toString();
                if (useForLanguageIdentification) {
                    this.languageIdentificator.add(word);
                }
                if (word.length() < 2) continue;
                Word wprop = (Word)this.words.get(word = word.toLowerCase(Locale.ENGLISH));
                if (wprop == null) {
                    wprop = new Word(0, pip, phrase);
                }
                if (wprop.flags == null) {
                    wprop.flags = flagstemplate.clone();
                }
                wprop.flags.set(flagpos, true);
                this.words.put(word, wprop);
                ++pip;
                ++this.RESULT_NUMB_WORDS;
            }
        }
        finally {
            wordenum.close();
            wordenum = null;
        }
    }

    public int excludeWords(SortedSet<String> stopwords) {
        int oldsize = this.words.size();
        SetTools.excludeDestructive(this.words, stopwords);
        return oldsize - this.words.size();
    }

    public long fuzzySignature() {
        return this.fuzzy_signature;
    }

    public String fuzzySignatureText() {
        return this.fuzzy_signature_text;
    }

    public long exactSignature() {
        return this.exact_signature;
    }

    public String language() {
        return this.languageIdentificator.getLanguage();
    }

    public double languageProbability() {
        return this.languageIdentificator.getProbability();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args) {
        FileInputStream inStream = null;
        try {
            File f = new File(args[0]);
            Properties p = new Properties();
            inStream = new FileInputStream(f);
            p.load(inStream);
            StringBuilder sb = new StringBuilder();
            sb.append("{\n");
            for (int i = 0; i <= 15; ++i) {
                String[] l;
                sb.append('\"');
                String s = p.getProperty("keywords" + i);
                for (String element : l = CommonPattern.COMMA.split(s)) {
                    sb.append(ASCII.String(Word.word2hash(element)));
                }
                if (i >= 15) continue;
                sb.append(",\n");
            }
            sb.append("}\n");
            System.out.println(sb.toString());
        }
        catch (FileNotFoundException e) {
            ConcurrentLog.logException(e);
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
        }
        finally {
            if (inStream != null) {
                try {
                    inStream.close();
                }
                catch (IOException e) {
                    ConcurrentLog.logException(e);
                }
            }
        }
    }
}

