/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.search.schema;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.ProcessType;
import net.yacy.cora.federate.solr.SchemaConfiguration;
import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.storage.Configuration;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.HostBalancer;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;

public class WebgraphConfiguration
extends SchemaConfiguration
implements Serializable {
    private static final long serialVersionUID = -499100932212840385L;

    public WebgraphConfiguration(boolean lazy) {
        this.lazy = lazy;
    }

    public WebgraphConfiguration(File configurationFile, boolean lazy) throws IOException {
        super(configurationFile);
        this.lazy = lazy;
        if (this.isEmpty()) {
            return;
        }
        Iterator<Configuration.Entry> it = this.entryIterator();
        Configuration.Entry etr = it.next();
        while (it.hasNext()) {
            try {
                WebgraphSchema f = WebgraphSchema.valueOf(etr.key());
                f.setSolrFieldName(etr.getValue());
            }
            catch (IllegalArgumentException e) {
                ConcurrentLog.fine("SolrWebgraphWriter", "solr schema file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + etr.toString() + "'");
                it.remove();
            }
            etr = it.next();
        }
        for (WebgraphSchema field : WebgraphSchema.values()) {
            if (this.get(field.name()) != null) continue;
            ConcurrentLog.warn("SolrWebgraphWriter", " solr schema file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'");
        }
    }

    public List<SolrInputDocument> getEdges(CollectionConfiguration.Subgraph subgraph, DigestURL source, ResponseHeader responseHeader, Map<String, Pattern> collections, int crawldepth_source, Set<ProcessType> processTypes, Collection<AnchorURL> links, String sourceName) {
        boolean allAttr = this.isEmpty();
        boolean generalNofollow = responseHeader == null ? false : responseHeader.get("X-Robots-Tag", "").indexOf("nofollow") >= 0;
        int target_order = 0;
        ArrayList<SolrInputDocument> edges = new ArrayList<SolrInputDocument>();
        for (AnchorURL target_url : links) {
            SolrInputDocument edge = this.getEdge(subgraph, source, responseHeader, collections, crawldepth_source, processTypes, sourceName, allAttr, generalNofollow, target_order, target_url);
            ++target_order;
            edges.add(edge);
        }
        return edges;
    }

    public SolrInputDocument getEdge(CollectionConfiguration.Subgraph subgraph, DigestURL source_url, ResponseHeader responseHeader, Map<String, Pattern> collections, int crawldepth_source, Set<ProcessType> processTypes, String sourceName, boolean allAttr, boolean generalNofollow, int target_order, AnchorURL target_url) {
        Map<String, String> target_searchpart;
        Map<String, String> source_searchpart;
        String name = target_url.getNameProperty();
        String text = target_url.getTextProperty();
        Object rel = target_url.getRelProperty();
        String source_host = source_url.getHost();
        String target_host = target_url.getHost();
        if (generalNofollow) {
            if (((String)rel).length() == 0) {
                rel = "nofollow";
            } else if (((String)rel).indexOf("nofollow") < 0) {
                rel = (String)rel + ",nofollow";
            }
        }
        StringBuilder idi = new StringBuilder(8);
        idi.append(Integer.toHexString((name + text + (String)rel).hashCode()).toLowerCase());
        while (idi.length() < 8) {
            idi.insert(0, '0');
        }
        String source_id = ASCII.String(source_url.hash());
        String target_id = ASCII.String(target_url.hash());
        StringBuilder id = new StringBuilder(source_id).append(target_id).append((CharSequence)idi);
        SolrInputDocument edge = new SolrInputDocument(new String[0]);
        this.add(edge, (SchemaDeclaration)WebgraphSchema.id, id.toString());
        this.add(edge, (SchemaDeclaration)WebgraphSchema.target_order_i, target_order);
        if (allAttr || this.contains(WebgraphSchema.load_date_dt)) {
            Date loadDate = new Date();
            this.add(edge, (SchemaDeclaration)WebgraphSchema.load_date_dt, loadDate);
        }
        if (allAttr || this.contains(WebgraphSchema.last_modified)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.last_modified, responseHeader == null ? new Date() : responseHeader.lastModified());
        }
        String source_url_string = source_url.toNormalform(false);
        if (allAttr || this.contains(CollectionSchema.collection_sxt) && collections != null && collections.size() > 0) {
            ArrayList<String> cs = new ArrayList<String>();
            for (Map.Entry<String, Pattern> e : collections.entrySet()) {
                if (!e.getValue().matcher(source_url_string).matches()) continue;
                cs.add(e.getKey());
            }
            this.add(edge, (SchemaDeclaration)WebgraphSchema.collection_sxt, cs);
        }
        this.add(edge, (SchemaDeclaration)WebgraphSchema.source_id_s, source_id);
        if (allAttr || this.contains(WebgraphSchema.source_protocol_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_protocol_s, source_url.getProtocol());
        }
        if (allAttr || this.contains(WebgraphSchema.source_urlstub_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_urlstub_s, source_url.urlstub(true, true));
        }
        if ((source_searchpart = source_url.getSearchpartMap()) == null) {
            if (allAttr || this.contains(WebgraphSchema.source_parameter_count_i)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_parameter_count_i, 0);
            }
        } else {
            if (allAttr || this.contains(WebgraphSchema.source_parameter_count_i)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_parameter_count_i, source_searchpart.size());
            }
            if (allAttr || this.contains(WebgraphSchema.source_parameter_key_sxt)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_parameter_key_sxt, source_searchpart.keySet().toArray(new String[source_searchpart.size()]));
            }
            if (allAttr || this.contains(WebgraphSchema.source_parameter_value_sxt)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_parameter_value_sxt, source_searchpart.values().toArray(new String[source_searchpart.size()]));
            }
        }
        if (allAttr || this.contains(WebgraphSchema.source_chars_i)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_chars_i, source_url_string.length());
        }
        if (source_host != null) {
            String orga;
            String dnc = Domains.getDNC(source_host);
            String subdomOrga = source_host.length() - dnc.length() <= 0 ? "" : source_host.substring(0, source_host.length() - dnc.length() - 1);
            int pp = subdomOrga.lastIndexOf(46);
            String subdom = pp < 0 ? "" : subdomOrga.substring(0, pp);
            String string = orga = pp < 0 ? subdomOrga : subdomOrga.substring(pp + 1);
            if (allAttr || this.contains(WebgraphSchema.source_host_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_host_s, source_host);
            }
            if (allAttr || this.contains(WebgraphSchema.source_host_id_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_host_id_s, source_url.hosthash());
            }
            if (allAttr || this.contains(WebgraphSchema.source_host_dnc_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_host_dnc_s, dnc);
            }
            if (allAttr || this.contains(WebgraphSchema.source_host_organization_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_host_organization_s, orga);
            }
            if (allAttr || this.contains(WebgraphSchema.source_host_organizationdnc_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_host_organizationdnc_s, orga + "." + dnc);
            }
            if (allAttr || this.contains(WebgraphSchema.source_host_subdomain_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.source_host_subdomain_s, subdom);
            }
        }
        if (allAttr || this.contains(WebgraphSchema.source_file_ext_s) || this.contains(WebgraphSchema.source_file_name_s)) {
            String source_file_name = source_url.getFileName();
            String source_file_ext = MultiProtocolURL.getFileExtension(source_file_name);
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_file_name_s, source_file_name.toLowerCase().endsWith("." + source_file_ext) ? source_file_name.substring(0, source_file_name.length() - source_file_ext.length() - 1) : source_file_name);
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_file_ext_s, source_file_ext);
        }
        if (allAttr || this.contains(WebgraphSchema.source_path_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_path_s, source_url.getPath());
        }
        if (allAttr || this.contains(WebgraphSchema.source_path_folders_count_i) || this.contains(WebgraphSchema.source_path_folders_sxt)) {
            String[] paths = source_url.getPaths();
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_path_folders_count_i, paths.length);
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_path_folders_sxt, paths);
        }
        if ((allAttr || this.contains(WebgraphSchema.source_crawldepth_i)) && this.contains(WebgraphSchema.source_protocol_s) && this.contains(WebgraphSchema.source_urlstub_s) && this.contains(WebgraphSchema.source_id_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.source_crawldepth_i, crawldepth_source);
        }
        boolean inbound = CollectionConfiguration.enrichSubgraph(subgraph, source_url, target_url);
        if (allAttr || this.contains(WebgraphSchema.target_inbound_b)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_inbound_b, inbound);
        }
        if (allAttr || this.contains(WebgraphSchema.target_name_t)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_name_t, name.length() > 0 ? name : "");
        }
        if (allAttr || this.contains(WebgraphSchema.target_rel_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_rel_s, (String)(((String)rel).length() > 0 ? rel : ""));
        }
        if (allAttr || this.contains(WebgraphSchema.target_relflags_i)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_relflags_i, WebgraphConfiguration.relEval((String)(((String)rel).length() > 0 ? rel : "")));
        }
        if (allAttr || this.contains(WebgraphSchema.target_linktext_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_linktext_s, target_url.getTextProperty());
        }
        if (allAttr || this.contains(WebgraphSchema.target_linktext_charcount_i)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_linktext_charcount_i, target_url.getTextProperty().length());
        }
        if (allAttr || this.contains(WebgraphSchema.target_linktext_wordcount_i)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_linktext_wordcount_i, target_url.getTextProperty().length() > 0 ? CommonPattern.SPACE.split(target_url.getTextProperty()).length : 0);
        }
        if (target_url.getImageAlt() != null) {
            if (allAttr || this.contains(WebgraphSchema.target_alt_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_alt_s, target_url.getImageAlt());
            }
            if (allAttr || this.contains(WebgraphSchema.target_alt_charcount_i)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_alt_charcount_i, target_url.getImageAlt().length());
            }
            if (allAttr || this.contains(WebgraphSchema.target_alt_wordcount_i)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_alt_wordcount_i, target_url.getImageAlt().length() > 0 ? CommonPattern.SPACE.split(target_url.getImageAlt()).length : 0);
            }
        }
        this.add(edge, (SchemaDeclaration)WebgraphSchema.target_id_s, target_id);
        String target_url_string = target_url.toNormalform(false);
        if (allAttr || this.contains(WebgraphSchema.target_protocol_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_protocol_s, target_url.getProtocol());
        }
        if (allAttr || this.contains(WebgraphSchema.target_urlstub_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_urlstub_s, target_url.urlstub(true, true));
        }
        if ((target_searchpart = target_url.getSearchpartMap()) == null) {
            if (allAttr || this.contains(WebgraphSchema.target_parameter_count_i)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_parameter_count_i, 0);
            }
        } else {
            if (allAttr || this.contains(WebgraphSchema.target_parameter_count_i)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_parameter_count_i, target_searchpart.size());
            }
            if (allAttr || this.contains(WebgraphSchema.target_parameter_key_sxt)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_parameter_key_sxt, target_searchpart.keySet().toArray(new String[target_searchpart.size()]));
            }
            if (allAttr || this.contains(WebgraphSchema.target_parameter_value_sxt)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_parameter_value_sxt, target_searchpart.values().toArray(new String[target_searchpart.size()]));
            }
        }
        if (allAttr || this.contains(WebgraphSchema.target_chars_i)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_chars_i, target_url_string.length());
        }
        if (target_host != null) {
            String orga;
            String dnc = Domains.getDNC(target_host);
            String subdomOrga = target_host.length() - dnc.length() <= 0 ? "" : target_host.substring(0, target_host.length() - dnc.length() - 1);
            int pp = subdomOrga.lastIndexOf(46);
            String subdom = pp < 0 ? "" : subdomOrga.substring(0, pp);
            String string = orga = pp < 0 ? subdomOrga : subdomOrga.substring(pp + 1);
            if (allAttr || this.contains(WebgraphSchema.target_host_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_host_s, target_host);
            }
            if (allAttr || this.contains(WebgraphSchema.target_host_id_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_host_id_s, target_url.hosthash());
            }
            if (allAttr || this.contains(WebgraphSchema.target_host_dnc_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_host_dnc_s, dnc);
            }
            if (allAttr || this.contains(WebgraphSchema.target_host_organization_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_host_organization_s, orga);
            }
            if (allAttr || this.contains(WebgraphSchema.target_host_organizationdnc_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_host_organizationdnc_s, orga + "." + dnc);
            }
            if (allAttr || this.contains(WebgraphSchema.target_host_subdomain_s)) {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_host_subdomain_s, subdom);
            }
        }
        if (allAttr || this.contains(WebgraphSchema.target_file_ext_s) || this.contains(WebgraphSchema.target_file_name_s)) {
            String target_file_name = target_url.getFileName();
            String target_file_ext = MultiProtocolURL.getFileExtension(target_file_name);
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_file_name_s, target_file_name.toLowerCase().endsWith("." + target_file_ext) ? target_file_name.substring(0, target_file_name.length() - target_file_ext.length() - 1) : target_file_name);
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_file_ext_s, target_file_ext);
        }
        if (allAttr || this.contains(WebgraphSchema.target_path_s)) {
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_path_s, target_url.getPath());
        }
        if (allAttr || this.contains(WebgraphSchema.target_path_folders_count_i) || this.contains(WebgraphSchema.target_path_folders_sxt)) {
            String[] paths = target_url.getPaths();
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_path_folders_count_i, paths.length);
            this.add(edge, (SchemaDeclaration)WebgraphSchema.target_path_folders_sxt, paths);
        }
        if ((allAttr || this.contains(WebgraphSchema.target_crawldepth_i)) && this.contains(WebgraphSchema.target_protocol_s) && this.contains(WebgraphSchema.target_urlstub_s) && this.contains(WebgraphSchema.target_id_s)) {
            if (target_host.equals(source_host)) {
                Long targetdepth = HostBalancer.depthCache.get(target_url.hash());
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_crawldepth_i, targetdepth == null ? crawldepth_source + 1 : targetdepth.intValue());
            } else {
                this.add(edge, (SchemaDeclaration)WebgraphSchema.target_crawldepth_i, 1111);
            }
        }
        if ((allAttr || this.contains(WebgraphSchema.process_sxt)) && (this.contains(WebgraphSchema.source_id_s) && this.contains(WebgraphSchema.source_cr_host_norm_i) || this.contains(WebgraphSchema.target_id_s) && this.contains(WebgraphSchema.target_cr_host_norm_i)) && processTypes.contains((Object)ProcessType.CITATION)) {
            ArrayList<String> pr = new ArrayList<String>();
            pr.add(ProcessType.CITATION.name());
            this.add(edge, (SchemaDeclaration)WebgraphSchema.process_sxt, pr);
            if (allAttr || this.contains(CollectionSchema.harvestkey_s)) {
                this.add(edge, (SchemaDeclaration)CollectionSchema.harvestkey_s, sourceName);
            }
        }
        return edge;
    }

    private static int relEval(String rels) {
        int i = 0;
        String s0 = rels.toLowerCase().trim();
        if ("me".equals(s0)) {
            ++i;
        }
        if ("nofollow".equals(s0)) {
            i += 2;
        }
        return i;
    }

    @Override
    public void commit() throws IOException {
        try {
            super.commit();
            Iterator<Configuration.Entry> it = this.entryIterator();
            Configuration.Entry etr = it.next();
            while (it.hasNext()) {
                try {
                    WebgraphSchema f = WebgraphSchema.valueOf(etr.key());
                    f.setSolrFieldName(etr.getValue());
                }
                catch (IllegalArgumentException e) {
                    // empty catch block
                }
                etr = it.next();
            }
        }
        catch (IOException iOException) {
            // empty catch block
        }
    }

    public SolrInputDocument toSolrInputDocument(SolrDocument doc) {
        SolrInputDocument sid = new SolrInputDocument(new String[0]);
        for (String name : doc.getFieldNames()) {
            if (!this.contains(name)) continue;
            sid.addField(name, doc.getFieldValue(name));
        }
        return sid;
    }
}

