/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.search.query;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.storage.HandleSet;
import net.yacy.document.parser.html.AbstractScraper;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.util.SetTools;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryModifier;
import net.yacy.search.schema.CollectionSchema;

public class QueryGoal {
    private static char space = (char)32;
    private static char sq = (char)39;
    private static char dq = (char)34;
    private static String seps = ":;#*`!$%()=?^<>/&_";
    public String query_original;
    private HandleSet include_hashes;
    private HandleSet exclude_hashes;
    private final NormalizedWords include_words;
    private final NormalizedWords exclude_words;
    private final ArrayList<String> include_strings;
    private final ArrayList<String> exclude_strings;

    public QueryGoal(HandleSet include_hashes, HandleSet exclude_hashes) {
        this.query_original = null;
        this.include_words = new NormalizedWords();
        this.exclude_words = new NormalizedWords();
        this.include_strings = new ArrayList();
        this.exclude_strings = new ArrayList();
        this.include_hashes = include_hashes;
        this.exclude_hashes = exclude_hashes;
    }

    public QueryGoal(String query_words) {
        assert (query_words != null);
        this.query_original = query_words;
        this.include_words = new NormalizedWords();
        this.exclude_words = new NormalizedWords();
        this.include_strings = new ArrayList();
        this.exclude_strings = new ArrayList();
        query_words = CharacterCoding.html2unicode(AbstractScraper.stripAllTags(((String)query_words).toCharArray())).toLowerCase().trim();
        for (int i = 0; i < seps.length(); ++i) {
            int c;
            while ((c = ((String)query_words).indexOf(seps.charAt(i))) >= 0) {
                query_words = ((String)query_words).substring(0, c) + (String)(c + 1 < ((String)query_words).length() ? " " + ((String)query_words).substring(c + 1) : "");
            }
        }
        QueryGoal.parseQuery((String)query_words, this.include_strings, this.exclude_strings);
        for (String s : this.include_strings) {
            QueryGoal.parseQuery(s, this.include_words, this.include_words);
        }
        for (String s : this.exclude_strings) {
            QueryGoal.parseQuery(s, this.exclude_words, this.exclude_words);
        }
        WordCache.learn(this.include_words);
        WordCache.learn(this.exclude_words);
        this.include_hashes = null;
        this.exclude_hashes = null;
    }

    private static void parseQuery(String s, Collection<String> include_string, Collection<String> exclude_string) {
        while (s.length() > 0) {
            int p;
            for (p = 0; p < s.length() && s.charAt(p) == space; ++p) {
            }
            if ((s = s.substring(p)).length() == 0) {
                return;
            }
            boolean inc = true;
            if (s.charAt(0) == '-') {
                inc = false;
                s = s.substring(1);
            } else if (s.charAt(0) == '+') {
                inc = true;
                s = s.substring(1);
            }
            if (s.length() == 0) {
                return;
            }
            char stop = space;
            if (s.charAt(0) == dq) {
                stop = s.charAt(0);
                s = s.substring(1);
            } else if (s.charAt(0) == sq) {
                stop = s.charAt(0);
                s = s.substring(1);
            }
            if (stop == space) {
                while (p < s.length() && s.charAt(p) != stop) {
                    ++p;
                }
            } else {
                while (p < s.length() && s.charAt(p) != stop) {
                    ++p;
                }
                if (p < s.length() && s.charAt(p) == stop) {
                    ++p;
                }
            }
            String string = stop == space ? s.substring(0, p) : s.substring(0, p - 1);
            s = p < s.length() ? s.substring(p) : "";
            ++p;
            if (string.length() <= 0) continue;
            if (inc) {
                if (include_string.contains(string)) continue;
                include_string.add(string);
                continue;
            }
            if (exclude_string.contains(string)) continue;
            exclude_string.add(string);
        }
        boolean contains_single = false;
        boolean contains_multiple = false;
        for (String token : include_string) {
            if (token.length() == 1) {
                contains_single = true;
                continue;
            }
            contains_multiple = true;
        }
        if (contains_single && contains_multiple) {
            Iterator<String> i = include_string.iterator();
            while (i.hasNext()) {
                if (i.next().length() != 1) continue;
                i.remove();
            }
        }
    }

    public String getQueryString(boolean encodeHTML) {
        String ret;
        if (this.query_original == null) {
            return null;
        }
        if (encodeHTML) {
            try {
                ret = URLEncoder.encode(this.query_original, StandardCharsets.UTF_8.name());
            }
            catch (UnsupportedEncodingException e) {
                ret = this.query_original;
            }
        } else {
            ret = this.query_original;
        }
        return ret;
    }

    public HandleSet getIncludeHashes() {
        if (this.include_hashes == null) {
            this.include_hashes = Word.words2hashesHandles(this.include_words);
        }
        return this.include_hashes;
    }

    public HandleSet getExcludeHashes() {
        if (this.exclude_hashes == null) {
            this.exclude_hashes = Word.words2hashesHandles(this.exclude_words);
        }
        return this.exclude_hashes;
    }

    public int getIncludeSize() {
        assert (this.include_hashes == null || this.include_words.size() == 0 || this.include_hashes.size() == this.include_words.size());
        return this.include_hashes == null ? this.include_words.size() : this.include_hashes.size();
    }

    public int getExcludeSize() {
        assert (this.exclude_hashes == null || this.exclude_words.size() == 0 || this.exclude_hashes.size() == this.exclude_words.size());
        return this.exclude_hashes == null ? this.exclude_words.size() : this.exclude_hashes.size();
    }

    public Iterator<String> getIncludeWords() {
        return this.include_words.iterator();
    }

    public Set<String> getIncludeWordsSet() {
        return new NormalizedWords(this.include_words);
    }

    public Iterator<String> getExcludeWords() {
        return this.exclude_words.iterator();
    }

    public Set<String> getExcludeWordsSet() {
        return new NormalizedWords(this.exclude_words);
    }

    public Iterator<String> getIncludeStrings() {
        return this.include_strings.iterator();
    }

    public Iterator<String> getExcludeStrings() {
        return this.exclude_strings.iterator();
    }

    public void removeIncludeWords(Set<String> words) {
        if (!words.isEmpty()) {
            SetTools.excludeDestructiveByTestSmallInLarge(this.exclude_words, words);
            SetTools.excludeDestructiveByTestSmallInLarge(this.exclude_strings, words);
            if (this.include_hashes != null) {
                for (String word : words) {
                    this.include_hashes.remove(Word.word2hash(word));
                }
            }
        }
    }

    public String getIncludeString() {
        if (this.include_strings.size() == 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder(10 * this.include_strings.size());
        for (String s : this.include_strings) {
            sb.append(s).append(' ');
        }
        return sb.toString().substring(0, sb.length() - 1);
    }

    public boolean isCatchall() {
        if (this.include_hashes != null && this.include_hashes.has(Segment.catchallHash)) {
            return true;
        }
        if (this.include_strings == null || this.include_strings.size() != 1) {
            return false;
        }
        return this.include_strings.contains("yacyall");
    }

    public boolean containsInclude(String word) {
        if (word == null || word.length() == 0) {
            return false;
        }
        String t = word.toLowerCase(Locale.ENGLISH);
        return this.include_strings.contains(t) || this.include_words.contains(t);
    }

    public boolean matches(String text) {
        if (text == null || text.length() == 0) {
            return false;
        }
        if (this.isCatchall()) {
            return true;
        }
        String t = text.toLowerCase(Locale.ENGLISH);
        for (String i : this.include_strings) {
            if (t.indexOf(i.toLowerCase()) >= 0) continue;
            return false;
        }
        for (String e : this.exclude_strings) {
            if (t.indexOf(e.toLowerCase()) < 0) continue;
            return false;
        }
        return true;
    }

    public void filterOut(SortedSet<String> blueList) {
        for (String word : blueList) {
            this.include_words.remove(word);
            this.include_strings.remove(word);
        }
        HandleSet blues = Word.words2hashesHandles(blueList);
        for (byte[] b : blues) {
            this.include_hashes.remove(b);
        }
    }

    public List<String> collectionTextFilterQuery(boolean noimages) {
        ArrayList<String> fqs = new ArrayList<String>();
        fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
        if (noimages) {
            fqs.add("-" + CollectionSchema.content_type.getSolrFieldName() + ":(image/*)");
            fqs.add("-" + CollectionSchema.url_file_ext_s.getSolrFieldName() + ":(jpg OR png OR gif)");
        }
        return fqs;
    }

    public StringBuilder collectionTextQuery() {
        if (this.isCatchall()) {
            return new StringBuilder("*:*");
        }
        return this.getGoalQuery();
    }

    public List<String> collectionImageFilterQuery(boolean strict) {
        ArrayList<String> fqs = new ArrayList<String>();
        fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
        StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*)");
        if (!strict) {
            filter.append(" OR ").append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *]");
        }
        fqs.add(filter.toString());
        return fqs;
    }

    public List<String> collectionAudioFilterQuery(boolean strict) {
        ArrayList<String> fqs = new ArrayList<String>();
        fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
        StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(audio/*)");
        if (!strict) {
            filter.append(" OR ").append(CollectionSchema.audiolinkscount_i.getSolrFieldName()).append(":[1 TO *]");
        }
        fqs.add(filter.toString());
        return fqs;
    }

    public List<String> collectionVideoFilterQuery(boolean strict) {
        ArrayList<String> fqs = new ArrayList<String>();
        fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
        StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(video/*)");
        if (!strict) {
            filter.append(" OR ").append(CollectionSchema.videolinkscount_i.getSolrFieldName()).append(":[1 TO *]");
        }
        fqs.add(filter.toString());
        return fqs;
    }

    public List<String> collectionApplicationFilterQuery(boolean strict) {
        ArrayList<String> fqs = new ArrayList<String>();
        fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
        StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(application/*)");
        if (!strict) {
            filter.append(" OR ").append(CollectionSchema.applinkscount_i.getSolrFieldName()).append(":[1 TO *]");
        }
        fqs.add(filter.toString());
        return fqs;
    }

    public StringBuilder collectionImageQuery(QueryModifier modifier) {
        StringBuilder q = new StringBuilder(80);
        if (this.isCatchall()) {
            return new StringBuilder("*:*");
        }
        StringBuilder w = this.getGoalQuery();
        q.append((CharSequence)w);
        if (w.length() > 0) {
            String hostname = modifier == null || modifier.sitehost == null || modifier.sitehost.length() == 0 ? null : Domains.getSmartSLD(modifier.sitehost);
            q.append(" AND (");
            q.append('(').append(CollectionSchema.images_text_t.getSolrFieldName()).append(':').append((CharSequence)(hostname == null ? w : "(" + w + " " + hostname + ")")).append("^100.0) OR ");
            q.append('(').append(CollectionSchema.title.getSolrFieldName()).append(':').append((CharSequence)w).append("^50.0) OR ");
            q.append('(').append(CollectionSchema.keywords.getSolrFieldName()).append(':').append((CharSequence)w).append("^10.0) OR ");
            q.append('(').append(CollectionSchema.text_t.getSolrFieldName()).append(':').append((CharSequence)w).append(')');
            q.append(')');
        }
        return q;
    }

    private StringBuilder getGoalQuery() {
        int wc = 0;
        StringBuilder w = new StringBuilder(80);
        for (String s : this.include_strings) {
            if ("yacyall".equals(s)) continue;
            if (wc > 0) {
                w.append(" AND ");
            }
            if (s.indexOf(126) >= 0 || s.indexOf(42) >= 0 || s.indexOf(63) >= 0) {
                w.append(s);
            } else {
                w.append(dq).append(s).append(dq);
            }
            ++wc;
        }
        for (String s : this.exclude_strings) {
            if (wc > 0) {
                w.append(" AND -");
            }
            if (s.indexOf(126) >= 0 || s.indexOf(42) >= 0 || s.indexOf(63) >= 0) {
                w.append(s);
            } else {
                w.append(dq).append(s).append(dq);
            }
            ++wc;
        }
        if (wc > 1) {
            w.insert(0, '(');
            w.append(')');
        }
        return w;
    }

    public static class NormalizedWords
    extends TreeSet<String> {
        private static final long serialVersionUID = -3050851079671868007L;

        public NormalizedWords() {
            super(NaturalOrder.naturalComparator);
        }

        public NormalizedWords(String[] rawWords) {
            super(NaturalOrder.naturalComparator);
            for (String word : rawWords) {
                super.add(word.toLowerCase(Locale.ENGLISH));
            }
        }

        public NormalizedWords(Collection<String> rawWords) {
            super(NaturalOrder.naturalComparator);
            for (String word : rawWords) {
                super.add(word.toLowerCase(Locale.ENGLISH));
            }
        }

        @Override
        public boolean add(String word) {
            return super.add(word.toLowerCase(Locale.ENGLISH));
        }

        @Override
        public boolean contains(Object word) {
            if (!(word instanceof String)) {
                return false;
            }
            return super.contains(((String)word).toLowerCase(Locale.ENGLISH));
        }
    }
}

