/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.search.schema;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.EnhancedTextProfileSignature;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.FailType;
import net.yacy.cora.federate.solr.ProcessType;
import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaConfiguration;
import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.logic.BooleanLiteral;
import net.yacy.cora.federate.solr.logic.CatchallLiteral;
import net.yacy.cora.federate.solr.logic.Conjunction;
import net.yacy.cora.federate.solr.logic.Disjunction;
import net.yacy.cora.federate.solr.logic.LongLiteral;
import net.yacy.cora.federate.solr.logic.Negation;
import net.yacy.cora.federate.solr.logic.StringLiteral;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.storage.Configuration;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.ProbabilisticClassifier;
import net.yacy.document.SentenceReader;
import net.yacy.document.content.DCEntry;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.IconEntry;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.citation.CitationReference;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.index.RowHandleMap;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryParams;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphConfiguration;
import net.yacy.search.schema.WebgraphSchema;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;

public class CollectionConfiguration
extends SchemaConfiguration
implements Serializable {
    private static final long serialVersionUID = -499100932212840385L;
    public static boolean UNIQUE_HEURISTIC_PREFER_HTTPS = false;
    public static boolean UNIQUE_HEURISTIC_PREFER_WWWPREFIX = true;
    private final ArrayList<Ranking> rankings;
    private static final Set<String> omitFields = new HashSet<String>(3);
    public static boolean postprocessingRunning;
    public static String postprocessingActivity;
    public static long postprocessingStartTime;
    public static int postprocessingCollection1Count;
    public static int postprocessingWebgraphCount;

    public CollectionConfiguration(File configurationFile, boolean lazy) throws IOException {
        super(configurationFile);
        this.lazy = lazy;
        this.rankings = new ArrayList(4);
        for (int i = 0; i <= 3; ++i) {
            this.rankings.add(new Ranking());
        }
        if (this.isEmpty()) {
            return;
        }
        Iterator<Configuration.Entry> it = this.entryIterator();
        Configuration.Entry etr = it.next();
        while (it.hasNext()) {
            try {
                CollectionSchema f = CollectionSchema.valueOf(etr.key());
                f.setSolrFieldName(etr.getValue());
            }
            catch (IllegalArgumentException e) {
                ConcurrentLog.fine("SolrCollectionWriter", "solr schema file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + etr.toString() + "'");
                it.remove();
            }
            etr = it.next();
        }
        for (CollectionSchema field : CollectionSchema.values()) {
            if (this.get(field.name()) != null || CollectionSchema.author_sxt.getSolrFieldName().endsWith(field.name()) || CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName().endsWith(field.name()) || CollectionSchema.coordinate_p_1_coordinate.getSolrFieldName().endsWith(field.name())) continue;
            ConcurrentLog.warn("SolrCollectionWriter", " solr schema file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'");
        }
        this.checkMandatoryFields();
        this.checkFieldRelationConsistency();
    }

    private void checkFieldRelationConsistency() {
        Configuration.Entry e;
        if (this.contains(CollectionSchema.outboundlinks_urlstub_sxt) && !this.contains(CollectionSchema.outboundlinks_protocol_sxt)) {
            e = new Configuration.Entry(CollectionSchema.outboundlinks_protocol_sxt.name(), CollectionSchema.outboundlinks_protocol_sxt.getSolrFieldName(), true);
            this.put(CollectionSchema.outboundlinks_protocol_sxt.name(), e);
        }
        if (this.contains(CollectionSchema.inboundlinks_urlstub_sxt) && !this.contains(CollectionSchema.inboundlinks_protocol_sxt)) {
            e = new Configuration.Entry(CollectionSchema.inboundlinks_protocol_sxt.name(), CollectionSchema.inboundlinks_protocol_sxt.getSolrFieldName(), true);
            this.put(CollectionSchema.inboundlinks_protocol_sxt.name(), e);
        }
        if (this.contains(CollectionSchema.icons_urlstub_sxt) && !this.contains(CollectionSchema.icons_protocol_sxt)) {
            e = new Configuration.Entry(CollectionSchema.icons_protocol_sxt.name(), CollectionSchema.icons_protocol_sxt.getSolrFieldName(), true);
            this.put(CollectionSchema.icons_protocol_sxt.name(), e);
        }
        if (this.contains(CollectionSchema.images_urlstub_sxt) && !this.contains(CollectionSchema.images_protocol_sxt)) {
            e = new Configuration.Entry(CollectionSchema.images_protocol_sxt.name(), CollectionSchema.images_protocol_sxt.getSolrFieldName(), true);
            this.put(CollectionSchema.images_protocol_sxt.name(), e);
        }
    }

    private void checkMandatoryFields() {
        for (CollectionSchema field : CollectionSchema.values()) {
            if (!field.isMandatory()) continue;
            Configuration.Entry entry2 = (Configuration.Entry)this.get(field.name());
            if (entry2 != null) {
                if (entry2.enabled()) continue;
                entry2.setEnable(true);
                ConcurrentLog.info("SolrCollectionWriter", "Forced activation of mandatory field " + field.name());
                continue;
            }
            this.put(field.name(), new Configuration.Entry(field.name(), field.getSolrFieldName(), true));
            ConcurrentLog.info("SolrCollectionWriter", "Added missing mandatory field " + field.name());
        }
    }

    public String[] allFields() {
        ArrayList<String> a = new ArrayList<String>(this.size());
        for (CollectionSchema f : CollectionSchema.values()) {
            if (!this.contains(f)) continue;
            a.add(f.getSolrFieldName());
        }
        return a.toArray(new String[a.size()]);
    }

    public Ranking getRanking(int idx2) {
        return this.rankings.get(idx2 % this.rankings.size());
    }

    public Ranking getRanking(String name) {
        if (name == null) {
            return null;
        }
        for (int i = 0; i < this.rankings.size(); ++i) {
            Ranking currentRanking = this.rankings.get(i);
            if (!name.equals(currentRanking.getName())) continue;
            return currentRanking;
        }
        return null;
    }

    @Override
    public void commit() throws IOException {
        this.checkMandatoryFields();
        this.checkFieldRelationConsistency();
        try {
            super.commit();
            Iterator<Configuration.Entry> it = this.entryIterator();
            Configuration.Entry etr = it.next();
            while (it.hasNext()) {
                try {
                    CollectionSchema f = CollectionSchema.valueOf(etr.key());
                    f.setSolrFieldName(etr.getValue());
                }
                catch (IllegalArgumentException e) {
                    // empty catch block
                }
                etr = it.next();
            }
        }
        catch (IOException iOException) {
            // empty catch block
        }
    }

    public SolrInputDocument toSolrInputDocument(SolrDocument doc) {
        return this.toSolrInputDocument(doc, omitFields);
    }

    public SolrDocument toSolrDocument(SolrInputDocument doc) {
        return this.toSolrDocument(doc, omitFields);
    }

    public String addURIAttributes(SolrInputDocument doc, boolean allAttr, DigestURL digestURL) {
        Map<String, String> searchpart;
        String filenameStub;
        InetAddress address;
        this.add(doc, (SchemaDeclaration)CollectionSchema.id, ASCII.String(digestURL.hash()));
        if (allAttr || this.contains(CollectionSchema.host_id_s)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.host_id_s, digestURL.hosthash());
        }
        String us = digestURL.toNormalform(true);
        this.add(doc, (SchemaDeclaration)CollectionSchema.sku, us);
        if ((allAttr || this.contains(CollectionSchema.ip_s)) && (address = digestURL.getInetAddress()) != null) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.ip_s, address.getHostAddress());
        }
        String host = null;
        host = digestURL.getHost();
        if (host != null) {
            String orga;
            String dnc = Domains.getDNC(host);
            String subdomOrga = host.length() - dnc.length() <= 0 ? "" : host.substring(0, host.length() - dnc.length() - 1);
            int p = subdomOrga.lastIndexOf(46);
            String subdom = p < 0 ? "" : subdomOrga.substring(0, p);
            String string = orga = p < 0 ? subdomOrga : subdomOrga.substring(p + 1);
            if (allAttr || this.contains(CollectionSchema.host_s)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.host_s, host);
            }
            if (allAttr || this.contains(CollectionSchema.host_dnc_s)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.host_dnc_s, dnc);
            }
            if (allAttr || this.contains(CollectionSchema.host_organization_s)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.host_organization_s, orga);
            }
            if (allAttr || this.contains(CollectionSchema.host_organizationdnc_s)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.host_organizationdnc_s, orga + "." + dnc);
            }
            if (allAttr || this.contains(CollectionSchema.host_subdomain_s)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.host_subdomain_s, subdom);
            }
        }
        String filename = digestURL.getFileName();
        String extension = MultiProtocolURL.getFileExtension(filename);
        String string = filenameStub = filename.toLowerCase(Locale.ROOT).endsWith("." + extension) ? filename.substring(0, filename.length() - extension.length() - 1) : filename;
        if (extension.indexOf(59) >= 0) {
            extension = extension.substring(0, extension.indexOf(59));
        }
        if (allAttr || this.contains(CollectionSchema.url_chars_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.url_chars_i, us.length());
        }
        if (allAttr || this.contains(CollectionSchema.url_protocol_s)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.url_protocol_s, digestURL.getProtocol());
        }
        if (allAttr || this.contains(CollectionSchema.url_paths_sxt) || this.contains(CollectionSchema.url_paths_count_i)) {
            String[] paths = digestURL.getPaths();
            if (allAttr || this.contains(CollectionSchema.url_paths_count_i)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.url_paths_count_i, paths.length);
            }
            if (allAttr || this.contains(CollectionSchema.url_paths_sxt)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.url_paths_sxt, paths);
            }
        }
        if (allAttr || this.contains(CollectionSchema.url_file_name_s)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.url_file_name_s, filenameStub);
        }
        if (allAttr || this.contains(CollectionSchema.url_file_name_tokens_t)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.url_file_name_tokens_t, MultiProtocolURL.toTokens(filenameStub));
        }
        if (allAttr || this.contains(CollectionSchema.url_file_ext_s)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.url_file_ext_s, extension);
        }
        if ((searchpart = digestURL.getSearchpartMap()) == null) {
            if (allAttr || this.contains(CollectionSchema.url_parameter_i)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.url_parameter_i, 0);
            }
        } else {
            if (allAttr || this.contains(CollectionSchema.url_parameter_i)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.url_parameter_i, searchpart.size());
            }
            if (allAttr || this.contains(CollectionSchema.url_parameter_key_sxt)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.url_parameter_key_sxt, searchpart.keySet().toArray(new String[searchpart.size()]));
            }
            if (allAttr || this.contains(CollectionSchema.url_parameter_value_sxt)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.url_parameter_value_sxt, searchpart.values().toArray(new String[searchpart.size()]));
            }
        }
        return us;
    }

    public SolrInputDocument metadata2solr(URIMetadataNode md) {
        String description;
        boolean description_exist;
        Integer[] cv;
        SolrInputDocument doc = this.toSolrInputDocument(md);
        boolean allAttr = this.isEmpty();
        this.addURIAttributes(doc, allAttr, md.url());
        String title = md.dc_title();
        if (allAttr || this.contains(CollectionSchema.title_count_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.title_count_i, 1);
        }
        if (allAttr || this.contains(CollectionSchema.title_chars_val)) {
            cv = new Integer[]{title.length()};
            this.add(doc, (SchemaDeclaration)CollectionSchema.title_chars_val, cv);
        }
        if (allAttr || this.contains(CollectionSchema.title_words_val)) {
            cv = new Integer[]{CommonPattern.SPACES.split(title).length};
            this.add(doc, (SchemaDeclaration)CollectionSchema.title_words_val, cv);
        }
        boolean bl = description_exist = (description = md.snippet()) != null;
        if (description == null) {
            description = "";
        }
        if (allAttr || this.contains(CollectionSchema.description_txt)) {
            String[] stringArray;
            if (description_exist) {
                String[] stringArray2 = new String[1];
                stringArray = stringArray2;
                stringArray2[0] = description;
            } else {
                stringArray = new String[]{};
            }
            this.add(doc, (SchemaDeclaration)CollectionSchema.description_txt, stringArray);
        }
        if (allAttr || this.contains(CollectionSchema.description_count_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.description_count_i, description_exist ? 1 : 0);
        }
        if (allAttr || this.contains(CollectionSchema.description_chars_val)) {
            Integer[] integerArray;
            if (description_exist) {
                Integer[] integerArray2 = new Integer[1];
                integerArray = integerArray2;
                integerArray2[0] = description.length();
            } else {
                integerArray = new Integer[]{};
            }
            this.add(doc, (SchemaDeclaration)CollectionSchema.description_chars_val, integerArray);
        }
        if (allAttr || this.contains(CollectionSchema.description_words_val)) {
            Integer[] integerArray;
            if (description_exist) {
                Integer[] integerArray3 = new Integer[1];
                integerArray = integerArray3;
                integerArray3[0] = description.length() == 0 ? 0 : CommonPattern.SPACES.split(description).length;
            } else {
                integerArray = new Integer[]{};
            }
            this.add(doc, (SchemaDeclaration)CollectionSchema.description_words_val, integerArray);
        }
        Object keywords = md.dc_subject();
        Bitfield flags = md.flags();
        if (flags.get(0)) {
            keywords = keywords == null || ((String)keywords).isEmpty() ? "indexof" : (((String)keywords).indexOf(44) > 0 ? (String)keywords + ", indexof" : (String)keywords + " indexof");
        }
        if (allAttr || this.contains(CollectionSchema.keywords)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.keywords, (String)keywords);
        }
        this.processIcons(doc, allAttr, md.getIcons());
        if (allAttr || this.contains(CollectionSchema.imagescount_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.imagescount_i, md.limage());
        }
        if (allAttr || this.contains(CollectionSchema.linkscount_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.linkscount_i, md.llocal() + md.lother());
        }
        if (allAttr || this.contains(CollectionSchema.inboundlinkscount_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.inboundlinkscount_i, md.llocal());
        }
        if (allAttr || this.contains(CollectionSchema.outboundlinkscount_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.outboundlinkscount_i, md.lother());
        }
        if (allAttr || this.contains(CollectionSchema.charset_s)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.charset_s, StandardCharsets.UTF_8.name());
        }
        if (md.lat() != 0.0 && md.lon() != 0.0 && (allAttr || this.contains(CollectionSchema.coordinate_p))) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.coordinate_p, Double.toString(md.lat()) + "," + Double.toString(md.lon()));
        }
        if (allAttr || this.contains(CollectionSchema.httpstatus_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.httpstatus_i, 200);
        }
        if (allAttr || this.contains(CollectionSchema.publisher_t)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.publisher_t, md.dc_publisher());
        }
        if (allAttr || this.contains(CollectionSchema.audiolinkscount_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.audiolinkscount_i, md.laudio());
        }
        if (allAttr || this.contains(CollectionSchema.videolinkscount_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.videolinkscount_i, md.lvideo());
        }
        if (allAttr || this.contains(CollectionSchema.applinkscount_i)) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.applinkscount_i, md.lapp());
        }
        return doc;
    }

    public static boolean enrichSubgraph(Subgraph subgraph, DigestURL source_url, AnchorURL target_url) {
        String text = target_url.getTextProperty();
        String source_host = source_url.getHost();
        String target_host = target_url.getHost();
        boolean inbound = source_host == null && target_host == null || source_host != null && target_host != null && (target_host.equals(source_host) || target_host.equals("www." + source_host) || source_host.equals("www." + target_host));
        int ioidx = inbound ? 0 : 1;
        subgraph.urlProtocols[ioidx].add(target_url.getProtocol());
        subgraph.urlStubs[ioidx].add(target_url.urlstub(true, true));
        subgraph.urlAnchorTexts[ioidx].add(text);
        return inbound;
    }

    /*
     * WARNING - void declaration
     */
    public SolrVector yacy2solr(Segment segment, Map<String, Pattern> collections, ResponseHeader responseHeader, Document document, Condenser condenser, DigestURL referrerURL, String language, boolean setUnique, WebgraphConfiguration webgraph, String sourceName) {
        Date modDate;
        Boolean canonical_equal_sku;
        SolrVector doc = new SolrVector();
        DigestURL digestURL = document.dc_source();
        boolean allAttr = this.isEmpty();
        String url = this.addURIAttributes(doc, allAttr, digestURL);
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.content_type, new String[]{document.dc_format()});
        LinkedHashSet<ProcessType> processTypes = new LinkedHashSet<ProcessType>();
        String host = digestURL.getHost();
        int crawldepth = document.getDepth();
        if (allAttr || this.contains(CollectionSchema.crawldepth_i)) {
            CollectionSchema.crawldepth_i.add((SolrInputDocument)doc, crawldepth);
        }
        if (allAttr || this.contains(CollectionSchema.cr_host_chance_d) && this.contains(CollectionSchema.cr_host_count_i) && this.contains(CollectionSchema.cr_host_norm_i)) {
            processTypes.add(ProcessType.CITATION);
        }
        if (allAttr || this.contains(CollectionSchema.collection_sxt) && collections != null && collections.size() > 0) {
            ArrayList<String> cs = new ArrayList<String>();
            for (Map.Entry<String, Pattern> entry2 : collections.entrySet()) {
                if (!entry2.getValue().matcher(url).matches()) continue;
                cs.add(entry2.getKey());
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.collection_sxt, cs);
        }
        List<String> titles = document.titles();
        if (allAttr || this.contains(CollectionSchema.title)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.title, titles);
            if ((allAttr || this.contains(CollectionSchema.title_exact_signature_l)) && titles.size() > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.title_exact_signature_l, EnhancedTextProfileSignature.getSignatureLong(titles.get(0)));
            }
        }
        if (allAttr || this.contains(CollectionSchema.title_count_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.title_count_i, titles.size());
        }
        if (allAttr || this.contains(CollectionSchema.title_chars_val)) {
            ArrayList<Integer> cv2 = new ArrayList<Integer>(titles.size());
            for (String string : titles) {
                cv2.add(string.length());
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.title_chars_val, cv2);
        }
        if (allAttr || this.contains(CollectionSchema.title_words_val)) {
            ArrayList<Integer> cv = new ArrayList<Integer>(titles.size());
            for (String string : titles) {
                cv.add(CommonPattern.SPACES.split(string).length);
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.title_words_val, cv);
        }
        String[] descriptions = document.dc_description();
        if (allAttr || this.contains(CollectionSchema.description_txt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.description_txt, descriptions);
            if ((allAttr || this.contains(CollectionSchema.description_exact_signature_l)) && descriptions != null && descriptions.length > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.description_exact_signature_l, EnhancedTextProfileSignature.getSignatureLong(descriptions));
            }
        }
        if (allAttr || this.contains(CollectionSchema.description_count_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.description_count_i, descriptions.length);
        }
        if (allAttr || this.contains(CollectionSchema.description_chars_val)) {
            ArrayList<Integer> arrayList = new ArrayList<Integer>(descriptions.length);
            for (String s : descriptions) {
                arrayList.add(s.length());
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.description_chars_val, arrayList);
        }
        if (allAttr || this.contains(CollectionSchema.description_words_val)) {
            ArrayList<Integer> arrayList = new ArrayList<Integer>(descriptions.length);
            for (String s : descriptions) {
                arrayList.add(CommonPattern.SPACES.split(s).length);
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.description_words_val, arrayList);
        }
        if (allAttr || this.contains(CollectionSchema.author)) {
            void var20_31;
            String string = document.dc_creator();
            if (string == null || string.length() == 0) {
                String string2 = document.dc_publisher();
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.author, (String)var20_31);
        }
        if (allAttr || this.contains(CollectionSchema.last_modified)) {
            void var20_38;
            void var20_36;
            Date date;
            Date date2 = date = responseHeader == null ? document.getLastModified() : responseHeader.lastModified();
            if (date == null) {
                long l = segment.getFirstSeenTime(digestURL.hash());
                if (l > 0L) {
                    Date date3 = new Date(l);
                } else {
                    Date date4 = new Date();
                }
            }
            if (document.getLastModified().before((Date)var20_36)) {
                Date date5 = document.getLastModified();
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.last_modified, (Date)var20_38);
        }
        if (allAttr || this.contains(CollectionSchema.dates_in_content_dts) || this.contains(CollectionSchema.dates_in_content_count_i)) {
            LinkedHashSet<Date> linkedHashSet = condenser.dates_in_content;
            if (allAttr || this.contains(CollectionSchema.dates_in_content_count_i)) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.dates_in_content_count_i, linkedHashSet.size());
            }
            if (linkedHashSet.size() > 0 && (allAttr || this.contains(CollectionSchema.dates_in_content_dts))) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.dates_in_content_dts, linkedHashSet.toArray(new Date[linkedHashSet.size()]));
            }
        }
        if (allAttr || this.contains(CollectionSchema.keywords)) {
            String string = document.dc_subject(' ');
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.keywords, string);
        }
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.http_unique_b, setUnique || UNIQUE_HEURISTIC_PREFER_HTTPS ? digestURL.isHTTPS() : digestURL.isHTTP());
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.www_unique_b, setUnique || host != null && (UNIQUE_HEURISTIC_PREFER_WWWPREFIX ? host.startsWith("www.") : !host.startsWith("www.")));
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.exact_signature_l, condenser.exactSignature());
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.exact_signature_unique_b, true);
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.exact_signature_copycount_i, 0);
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.fuzzy_signature_l, condenser.fuzzySignature());
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.fuzzy_signature_text_t, condenser.fuzzySignatureText());
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.fuzzy_signature_unique_b, true);
        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.fuzzy_signature_copycount_i, 0);
        if (this.contains(CollectionSchema.exact_signature_unique_b) || this.contains(CollectionSchema.exact_signature_copycount_i) || this.contains(CollectionSchema.fuzzy_signature_l) || this.contains(CollectionSchema.fuzzy_signature_copycount_i) || this.contains(CollectionSchema.http_unique_b) || this.contains(CollectionSchema.www_unique_b)) {
            processTypes.add(ProcessType.UNIQUE);
        }
        LinkedHashMap<DigestURL, String> linkedHashMap = document.inboundLinks();
        LinkedHashMap<DigestURL, String> linkedHashMap2 = document.outboundLinks();
        Subgraph subgraph = new Subgraph(linkedHashMap.size(), linkedHashMap2.size());
        int c = 0;
        Object scraper = document.getScraperObject();
        boolean containsCanonical = false;
        MultiProtocolURL canonical = null;
        this.processIcons(doc, allAttr, linkedHashMap, linkedHashMap2, document.getIcons().values());
        if (scraper instanceof ContentScraper) {
            String refresh;
            List<Date> endDates;
            List<Date> startDates;
            String x_robots_tag;
            ContentScraper html = (ContentScraper)scraper;
            List<ImageEntry> images = html.getImages();
            int n = 0;
            int f = 1;
            String[] hs = html.getHeadlines(1);
            int n2 = n | (hs.length > 0 ? f : 0);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h1_txt, hs);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h1_i, hs.length);
            hs = html.getHeadlines(2);
            int n3 = n2 | (hs.length > 0 ? (f *= 2) : 0);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h2_txt, hs);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h2_i, hs.length);
            hs = html.getHeadlines(3);
            int n4 = n3 | (hs.length > 0 ? (f *= 2) : 0);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h3_txt, hs);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h3_i, hs.length);
            hs = html.getHeadlines(4);
            int n5 = n4 | (hs.length > 0 ? (f *= 2) : 0);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h4_txt, hs);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h4_i, hs.length);
            hs = html.getHeadlines(5);
            int n6 = n5 | (hs.length > 0 ? (f *= 2) : 0);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h5_txt, hs);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h5_i, hs.length);
            hs = html.getHeadlines(6);
            int n7 = n6 | (hs.length > 0 ? (f *= 2) : 0);
            f *= 2;
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h6_txt, hs);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.h6_i, hs.length);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.htags_i, n7);
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.schema_org_breadcrumb_i, html.breadcrumbCount());
            String og = html.getMetas().get("og:title");
            if (og != null) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.opengraph_title_t, og);
            }
            if ((og = html.getMetas().get("og:type")) != null) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.opengraph_type_s, og);
            }
            if ((og = html.getMetas().get("og:url")) != null) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.opengraph_url_s, og);
            }
            if ((og = html.getMetas().get("og:image")) != null) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.opengraph_image_s, og);
            }
            int b = 0;
            String robots_meta = html.getMetas().get("robots");
            if (robots_meta != null) {
                if ((robots_meta = robots_meta.toLowerCase(Locale.ROOT)).indexOf("all", 0) >= 0) {
                    ++b;
                }
                if (robots_meta.indexOf("index", 0) == 0 || robots_meta.indexOf(" index", 0) >= 0 || robots_meta.indexOf(",index", 0) >= 0) {
                    b += 2;
                }
                if (robots_meta.indexOf("follow", 0) == 0 || robots_meta.indexOf(" follow", 0) >= 0 || robots_meta.indexOf(",follow", 0) >= 0) {
                    b += 4;
                }
                if (robots_meta.indexOf("noindex", 0) >= 0) {
                    b += 8;
                }
                if (robots_meta.indexOf("nofollow", 0) >= 0) {
                    b += 16;
                }
                if (robots_meta.indexOf("noarchive", 0) >= 0) {
                    b += 32;
                }
            }
            String string = x_robots_tag = responseHeader == null ? "" : responseHeader.getXRobotsTag();
            if (!x_robots_tag.isEmpty()) {
                if (x_robots_tag.indexOf("all", 0) >= 0) {
                    b += 256;
                }
                if (x_robots_tag.indexOf("noindex", 0) >= 0 || x_robots_tag.indexOf("none", 0) >= 0) {
                    b += 512;
                }
                if (x_robots_tag.indexOf("nofollow", 0) >= 0 || x_robots_tag.indexOf("none", 0) >= 0) {
                    b += 1024;
                }
                if (x_robots_tag.indexOf("noarchive", 0) >= 0) {
                    b += 2048;
                }
                if (x_robots_tag.indexOf("nosnippet", 0) >= 0) {
                    b += 4096;
                }
                if (x_robots_tag.indexOf("noodp", 0) >= 0) {
                    b += 8192;
                }
                if (x_robots_tag.indexOf("notranslate", 0) >= 0) {
                    b += 16384;
                }
                if (x_robots_tag.indexOf("noimageindex", 0) >= 0) {
                    b += 32768;
                }
                if (x_robots_tag.indexOf("unavailable_after", 0) >= 0) {
                    b += 65536;
                }
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.robots_i, b);
            String generator = html.getMetas().get("generator");
            if (generator != null) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.metagenerator_t, generator);
            }
            String[] bold = html.getBold();
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.boldcount_i, bold.length);
            if (bold.length > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.bold_txt, bold);
                if (allAttr || this.contains(CollectionSchema.bold_val)) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.bold_val, html.getBoldCount(bold));
                }
            }
            String[] italic = html.getItalic();
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.italiccount_i, italic.length);
            if (italic.length > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.italic_txt, italic);
                if (allAttr || this.contains(CollectionSchema.italic_val)) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.italic_val, html.getItalicCount(italic));
                }
            }
            String[] underline = html.getUnderline();
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.underlinecount_i, underline.length);
            if (underline.length > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.underline_txt, underline);
                if (allAttr || this.contains(CollectionSchema.underline_val)) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.underline_val, html.getUnderlineCount(underline));
                }
            }
            String[] li = html.getLi();
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.licount_i, li.length);
            if (li.length > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.li_txt, li);
            }
            String[] dt = html.getDt();
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.dtcount_i, dt.length);
            if (dt.length > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.dt_txt, dt);
            }
            String[] dd = html.getDd();
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.ddcount_i, dd.length);
            if (dd.length > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.dd_txt, dd);
            }
            if ((startDates = html.getStartDates()).size() > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.startDates_dts, startDates.toArray(new Date[startDates.size()]));
            }
            if ((endDates = html.getEndDates()).size() > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.endDates_dts, endDates.toArray(new Date[endDates.size()]));
            }
            List<String> articles = html.getArticles();
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.articlecount_i, articles.size());
            if (articles.size() > 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.article_txt, articles);
            }
            this.processImages(doc, allAttr, linkedHashMap, linkedHashMap2, images);
            if (allAttr || this.contains(CollectionSchema.css_tag_sxt)) {
                Map<DigestURL, String> csss = html.getCSS();
                String[] css_tag = new String[csss.size()];
                String[] css_url = new String[csss.size()];
                c = 0;
                for (Map.Entry<DigestURL, String> entry2 : csss.entrySet()) {
                    String cssurl = entry2.getKey().toNormalform(false);
                    linkedHashMap.remove(entry2.getKey());
                    linkedHashMap2.remove(entry2.getKey());
                    css_tag[c] = "<link rel=\"stylesheet\" type=\"text/css\" media=\"" + entry2.getValue() + "\" href=\"" + cssurl + "\" />";
                    css_url[c] = cssurl;
                    ++c;
                }
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.csscount_i, css_tag.length);
                if (css_tag.length > 0) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.css_tag_sxt, css_tag);
                }
                if (css_url.length > 0) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.css_url_sxt, css_url);
                }
            }
            if (allAttr || this.contains(CollectionSchema.scripts_sxt)) {
                Set<AnchorURL> scriptss = html.getScript();
                String[] scripts = new String[scriptss.size()];
                c = 0;
                for (AnchorURL u : scriptss) {
                    linkedHashMap.remove(u);
                    linkedHashMap2.remove(u);
                    scripts[c++] = u.toNormalform(false);
                }
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.scriptscount_i, scripts.length);
                if (scripts.length > 0) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.scripts_sxt, scripts);
                }
            }
            if (allAttr || this.contains(CollectionSchema.frames_sxt)) {
                Set<AnchorURL> framess = html.getFrames();
                String[] frames = new String[framess.size()];
                c = 0;
                for (AnchorURL u : framess) {
                    linkedHashMap.remove(u);
                    linkedHashMap2.remove(u);
                    frames[c++] = u.toNormalform(false);
                }
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.framesscount_i, frames.length);
                if (frames.length > 0) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.frames_sxt, frames);
                }
            }
            if (allAttr || this.contains(CollectionSchema.iframes_sxt)) {
                Set<AnchorURL> iframess = html.getIFrames();
                String[] iframes = new String[iframess.size()];
                c = 0;
                for (AnchorURL u : iframess) {
                    linkedHashMap.remove(u);
                    linkedHashMap2.remove(u);
                    iframes[c++] = u.toNormalform(false);
                }
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.iframesscount_i, iframes.length);
                if (iframes.length > 0) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.iframes_sxt, iframes);
                }
            }
            if (allAttr || this.contains(CollectionSchema.canonical_s)) {
                int p;
                String link;
                canonical = html.getCanonical();
                if (canonical == null && responseHeader != null && (link = responseHeader.get("Link", null)) != null && (p = link.indexOf("rel=\"canonical\"")) > 0) {
                    link = link.substring(0, p).trim();
                    p = link.indexOf(60);
                    int q = link.lastIndexOf(62);
                    if (p >= 0 && q > 0) {
                        link = link.substring(p + 1, q);
                        try {
                            canonical = new DigestURL(link);
                        }
                        catch (MalformedURLException u) {
                            // empty catch block
                        }
                    }
                }
                if (canonical != null) {
                    containsCanonical = true;
                    linkedHashMap.remove(canonical);
                    linkedHashMap2.remove(canonical);
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.canonical_s, canonical.toNormalform(false));
                    if (this.contains(CollectionSchema.canonical_equal_sku_b)) {
                        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.canonical_equal_sku_b, canonical.equals(digestURL));
                    }
                }
            }
            if ((allAttr || this.contains(CollectionSchema.refresh_s)) && (refresh = html.getRefreshPath()) != null && refresh.length() > 0) {
                try {
                    MultiProtocolURL refreshURL;
                    MultiProtocolURL multiProtocolURL = refreshURL = refresh.startsWith("http") ? new MultiProtocolURL(html.getRefreshPath()) : new MultiProtocolURL(digestURL, html.getRefreshPath());
                    if (refreshURL != null) {
                        linkedHashMap.remove(refreshURL);
                        linkedHashMap2.remove(refreshURL);
                        this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.refresh_s, refreshURL.toNormalform(false));
                    }
                }
                catch (MalformedURLException e) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.refresh_s, refresh);
                }
            }
            if (allAttr || this.contains(CollectionSchema.flash_b)) {
                DigestURL[] flashURLs = html.getFlash();
                for (DigestURL u : flashURLs) {
                    linkedHashMap.remove(u);
                    linkedHashMap2.remove(u);
                }
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.flash_b, flashURLs.length > 0);
            }
            for (String model : html.getEvaluationModelNames()) {
                String[] scorenames;
                if (!allAttr && !this.contains("ext_" + model + "_txt") || (scorenames = html.getEvaluationModelScoreNames(model)).length <= 0) continue;
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.valueOf("ext_" + model + "_txt"), scorenames);
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.valueOf("ext_" + model + "_val"), html.getEvaluationModelScoreCounts(model, scorenames));
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.responsetime_i, responseHeader == null ? 0 : Integer.parseInt(responseHeader.get("ResponseTimeMillis", "0")));
            if (allAttr || this.contains(CollectionSchema.hreflang_url_sxt) && this.contains(CollectionSchema.hreflang_cc_sxt)) {
                String[] ccs = new String[html.getHreflang().size()];
                String[] urls2 = new String[html.getHreflang().size()];
                c = 0;
                for (Map.Entry<String, DigestURL> e : html.getHreflang().entrySet()) {
                    ccs[c] = e.getKey();
                    urls2[c] = e.getValue().toNormalform(true);
                    ++c;
                }
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.hreflang_cc_sxt, ccs);
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.hreflang_url_sxt, urls2);
            }
            if (allAttr || this.contains(CollectionSchema.navigation_url_sxt) && this.contains(CollectionSchema.navigation_type_sxt)) {
                String[] navs = new String[html.getNavigation().size()];
                String[] urls3 = new String[html.getNavigation().size()];
                c = 0;
                for (Map.Entry<String, DigestURL> e : html.getNavigation().entrySet()) {
                    navs[c] = e.getKey();
                    urls3[c] = e.getValue().toNormalform(true);
                    ++c;
                }
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.navigation_type_sxt, navs);
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.navigation_url_sxt, urls3);
            }
            if (allAttr || this.contains(CollectionSchema.publisher_url_s) && html.getPublisherLink() != null) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.publisher_url_s, html.getPublisherLink().toNormalform(true));
            }
        }
        if (scraper instanceof DCEntry) {
            DCEntry dcentry = (DCEntry)((Object)scraper);
            for (Map.Entry entry3 : dcentry.getMap().entrySet()) {
                String[] values;
                CollectionSchema solr_field;
                String tag = (String)entry3.getKey();
                if (!tag.startsWith("md:") || tag.length() < 4 || (solr_field = CollectionSchema.valueOf(tag.substring(3))) == null || (values = (String[])entry3.getValue()) == null || values.length == 0 || !allAttr && !this.contains(solr_field)) continue;
                this.add((SolrInputDocument)doc, (SchemaDeclaration)solr_field, values);
            }
        }
        String content = document.getTextString();
        if (document.getContentDomain() == Classification.ContentDomain.IMAGE) {
            if (allAttr || this.contains(CollectionSchema.images_height_val) || this.contains(CollectionSchema.images_width_val) || this.contains(CollectionSchema.images_pixel_val)) {
                Iterator<ImageEntry> imgit = document.getImages().values().iterator();
                ArrayList<Integer> arrayList = new ArrayList<Integer>();
                ArrayList<Integer> widths = new ArrayList<Integer>();
                ArrayList<Integer> pixels = new ArrayList<Integer>();
                while (imgit.hasNext()) {
                    ImageEntry img = imgit.next();
                    int imgpixels = img.height() < 0 || img.width() < 0 ? -1 : img.height() * img.width();
                    if (imgpixels <= 0) continue;
                    arrayList.add(img.height());
                    widths.add(img.width());
                    pixels.add(imgpixels);
                }
                if (arrayList.size() > 0) {
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_height_val, arrayList);
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_width_val, widths);
                    this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_pixel_val, pixels);
                }
            }
            if (allAttr || this.contains(CollectionSchema.images_text_t)) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_text_t, content);
                content = digestURL.toTokens();
            }
        }
        if (allAttr || this.contains(CollectionSchema.text_t)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.text_t, content);
        }
        if (allAttr || this.contains(CollectionSchema.wordcount_i)) {
            if (content.length() == 0) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.wordcount_i, 0);
            } else {
                void var29_74;
                int contentwc = 1;
                int n = content.length() - 1;
                while (var29_74 >= 0) {
                    if (content.charAt((int)var29_74) == ' ') {
                        ++contentwc;
                    }
                    --var29_74;
                }
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.wordcount_i, contentwc);
            }
        }
        if (allAttr || this.contains(CollectionSchema.linkscount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.linkscount_i, linkedHashMap.size() + linkedHashMap2.size());
        }
        if (allAttr || this.contains(CollectionSchema.linksnofollowcount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.linksnofollowcount_i, document.inboundLinkNofollowCount() + document.outboundLinkNofollowCount());
        }
        if (allAttr || this.contains(CollectionSchema.inboundlinkscount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.inboundlinkscount_i, linkedHashMap.size());
        }
        if (allAttr || this.contains(CollectionSchema.inboundlinksnofollowcount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.inboundlinksnofollowcount_i, document.inboundLinkNofollowCount());
        }
        if (allAttr || this.contains(CollectionSchema.outboundlinkscount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.outboundlinkscount_i, linkedHashMap2.size());
        }
        if (allAttr || this.contains(CollectionSchema.outboundlinksnofollowcount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.outboundlinksnofollowcount_i, document.outboundLinkNofollowCount());
        }
        Boolean bl = canonical_equal_sku = canonical == null ? null : Boolean.valueOf(canonical.toNormalform(true).equals(url));
        if (webgraph != null && (!containsCanonical || canonical_equal_sku != null && canonical_equal_sku.booleanValue())) {
            List<SolrInputDocument> list2 = webgraph.getEdges(subgraph, digestURL, responseHeader, collections, crawldepth, processTypes, document.getHyperlinks().keySet(), sourceName);
            doc.webgraphDocuments.addAll(list2);
        } else if (allAttr || this.contains(CollectionSchema.inboundlinks_protocol_sxt) || this.contains(CollectionSchema.inboundlinks_urlstub_sxt) || this.contains(CollectionSchema.inboundlinks_anchortext_txt) || this.contains(CollectionSchema.outboundlinks_protocol_sxt) || this.contains(CollectionSchema.outboundlinks_urlstub_sxt) || this.contains(CollectionSchema.outboundlinks_anchortext_txt)) {
            for (AnchorURL target_url : document.getHyperlinks().keySet()) {
                CollectionConfiguration.enrichSubgraph(subgraph, digestURL, target_url);
            }
        }
        if (allAttr || this.contains(CollectionSchema.inboundlinks_protocol_sxt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.inboundlinks_protocol_sxt, CollectionConfiguration.protocolList2indexedList(subgraph.urlProtocols[0]));
        }
        if (allAttr || this.contains(CollectionSchema.inboundlinks_urlstub_sxt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.inboundlinks_urlstub_sxt, subgraph.urlStubs[0]);
        }
        if (allAttr || this.contains(CollectionSchema.inboundlinks_anchortext_txt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.inboundlinks_anchortext_txt, subgraph.urlAnchorTexts[0]);
        }
        if (allAttr || this.contains(CollectionSchema.outboundlinks_protocol_sxt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.outboundlinks_protocol_sxt, CollectionConfiguration.protocolList2indexedList(subgraph.urlProtocols[1]));
        }
        if (allAttr || this.contains(CollectionSchema.outboundlinks_urlstub_sxt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.outboundlinks_urlstub_sxt, subgraph.urlStubs[1]);
        }
        if (allAttr || this.contains(CollectionSchema.outboundlinks_anchortext_txt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.outboundlinks_anchortext_txt, subgraph.urlAnchorTexts[1]);
        }
        if (allAttr || this.contains(CollectionSchema.charset_s)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.charset_s, document.getCharset());
        }
        if (document.lat() != 0.0 && document.lon() != 0.0 && (allAttr || this.contains(CollectionSchema.coordinate_p))) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.coordinate_p, Double.toString(document.lat()) + "," + Double.toString(document.lon()));
        }
        if (allAttr || this.contains(CollectionSchema.httpstatus_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.httpstatus_i, responseHeader == null ? 200 : responseHeader.getStatusCode());
        }
        Date date = new Date();
        Date date6 = modDate = responseHeader == null ? new Date() : responseHeader.lastModified();
        if (modDate.getTime() > date.getTime()) {
            modDate = date;
        }
        int size = (int)Math.max(document.dc_source().length(), responseHeader == null ? 0L : (long)responseHeader.getContentLength());
        if (allAttr || this.contains(CollectionSchema.load_date_dt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.load_date_dt, date);
        }
        if (allAttr || this.contains(CollectionSchema.fresh_date_dt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.fresh_date_dt, new Date(date.getTime() + Math.max(0L, date.getTime() - modDate.getTime()) / 2L));
        }
        if ((allAttr || this.contains(CollectionSchema.referrer_id_s)) && referrerURL != null) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.referrer_id_s, ASCII.String(referrerURL.hash()));
        }
        if (allAttr || this.contains(CollectionSchema.publisher_t)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.publisher_t, document.dc_publisher());
        }
        if ((allAttr || this.contains(CollectionSchema.language_s)) && language != null) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.language_s, language);
        }
        if (allAttr || this.contains(CollectionSchema.size_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.size_i, size);
        }
        if (allAttr || this.contains(CollectionSchema.audiolinkscount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.audiolinkscount_i, document.getAudiolinks().size());
        }
        if (allAttr || this.contains(CollectionSchema.videolinkscount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.videolinkscount_i, document.getVideolinks().size());
        }
        if (allAttr || this.contains(CollectionSchema.applinkscount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.applinkscount_i, document.getApplinks().size());
        }
        if ((allAttr || this.contains(CollectionSchema.process_sxt)) && processTypes.size() > 0) {
            ArrayList<String> p = new ArrayList<String>();
            for (ProcessType t : processTypes) {
                p.add(t.name());
            }
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.process_sxt, p);
            if (allAttr || this.contains(CollectionSchema.harvestkey_s)) {
                this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.harvestkey_s, sourceName);
            }
        }
        this.enrich(doc, condenser.synonyms(), document.getGenericFacets());
        return doc;
    }

    private void processIcons(SolrInputDocument doc, boolean allAttr, Collection<IconEntry> icons) {
        this.processIcons(doc, allAttr, null, null, icons);
    }

    private void processIcons(SolrInputDocument doc, boolean allAttr, LinkedHashMap<DigestURL, String> inboundLinks, LinkedHashMap<DigestURL, String> outboundLinks, Collection<IconEntry> icons) {
        if (icons != null) {
            ArrayList<String> protocols = new ArrayList<String>(icons.size());
            String[] sizes = new String[icons.size()];
            String[] stubs = new String[icons.size()];
            String[] rels = new String[icons.size()];
            int i = 0;
            for (IconEntry ie : icons) {
                DigestURL url = ie.getUrl();
                if (inboundLinks != null) {
                    inboundLinks.remove(url);
                }
                if (outboundLinks != null) {
                    outboundLinks.remove(url);
                }
                String protocol = url.getProtocol();
                protocols.add(protocol);
                sizes[i] = ie.sizesToString();
                stubs[i] = url.toString().substring(protocol.length() + 3);
                rels[i] = ie.relToString();
                ++i;
            }
            if (allAttr || this.contains(CollectionSchema.icons_protocol_sxt)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.icons_protocol_sxt, CollectionConfiguration.protocolList2indexedList(protocols));
            }
            if (allAttr || this.contains(CollectionSchema.icons_urlstub_sxt)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.icons_urlstub_sxt, stubs);
            }
            if (allAttr || this.contains(CollectionSchema.icons_rel_sxt)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.icons_rel_sxt, rels);
            }
            if (allAttr || this.contains(CollectionSchema.icons_sizes_sxt)) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.icons_sizes_sxt, sizes);
            }
        }
    }

    private void processImages(SolrVector doc, boolean allAttr, LinkedHashMap<DigestURL, String> inboundLinks, LinkedHashMap<DigestURL, String> outboundLinks, List<ImageEntry> images) {
        ArrayList<String> imgprots = new ArrayList<String>(images.size());
        Integer[] imgheights = new Integer[images.size()];
        Integer[] imgwidths = new Integer[images.size()];
        Integer[] imgpixels = new Integer[images.size()];
        String[] imgstubs = new String[images.size()];
        String[] imgalts = new String[images.size()];
        int withalt = 0;
        int i = 0;
        LinkedHashSet<String> images_text_map = new LinkedHashSet<String>();
        for (ImageEntry ie : images) {
            DigestURL uri = ie.url();
            inboundLinks.remove(uri);
            outboundLinks.remove(uri);
            imgheights[i] = ie.height();
            imgwidths[i] = ie.width();
            imgpixels[i] = ie.height() < 0 || ie.width() < 0 ? -1 : ie.height() * ie.width();
            String protocol = uri.getProtocol();
            imgprots.add(protocol);
            imgstubs[i] = uri.toString().substring(protocol.length() + 3);
            imgalts[i] = ie.alt();
            for (String it : CommonPattern.SPACE.split(uri.toTokens())) {
                images_text_map.add(it);
            }
            if (ie.alt() != null && ie.alt().length() > 0) {
                SentenceReader sr = new SentenceReader(ie.alt());
                while (sr.hasNext()) {
                    images_text_map.add(sr.next().toString());
                }
                ++withalt;
            }
            ++i;
        }
        StringBuilder images_text = new StringBuilder(images_text_map.size() * 6 + 1);
        for (String s : images_text_map) {
            images_text.append(s.trim()).append(' ');
        }
        if (allAttr || this.contains(CollectionSchema.imagescount_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.imagescount_i, images.size());
        }
        if (allAttr || this.contains(CollectionSchema.images_protocol_sxt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_protocol_sxt, CollectionConfiguration.protocolList2indexedList(imgprots));
        }
        if (allAttr || this.contains(CollectionSchema.images_urlstub_sxt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_urlstub_sxt, imgstubs);
        }
        if (allAttr || this.contains(CollectionSchema.images_alt_sxt)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_alt_sxt, imgalts);
        }
        if (allAttr || this.contains(CollectionSchema.images_height_val)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_height_val, imgheights);
        }
        if (allAttr || this.contains(CollectionSchema.images_width_val)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_width_val, imgwidths);
        }
        if (allAttr || this.contains(CollectionSchema.images_pixel_val)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_pixel_val, imgpixels);
        }
        if (allAttr || this.contains(CollectionSchema.images_withalt_i)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_withalt_i, withalt);
        }
        if (allAttr || this.contains(CollectionSchema.images_text_t)) {
            this.add((SolrInputDocument)doc, (SchemaDeclaration)CollectionSchema.images_text_t, images_text.toString().trim());
        }
    }

    public void enrich(SolrInputDocument doc, List<String> synonyms, Map<String, Set<String>> genericFacets) {
        this.remove(doc, CollectionSchema.vocabularies_sxt);
        for (SolrInputField sif : doc) {
            if (!sif.getName().startsWith("vocabulary_")) continue;
            this.remove(doc, sif.getName());
        }
        if (this.isEmpty() || this.contains(CollectionSchema.vocabularies_sxt)) {
            String text = (String)doc.getFieldValue(CollectionSchema.text_t.getSolrFieldName());
            Map<String, String> classification = ProbabilisticClassifier.getClassification(text);
            for (Map.Entry<String, String> entry2 : classification.entrySet()) {
                HashSet<String> facetAttrbutes = new HashSet<String>();
                facetAttrbutes.add(entry2.getValue());
                genericFacets.put(entry2.getKey(), facetAttrbutes);
            }
            ArrayList<String> vocabularies = new ArrayList<String>();
            for (Map.Entry<String, Set<String>> facet : genericFacets.entrySet()) {
                String facetName = facet.getKey();
                Set<String> facetValues = facet.getValue();
                int count = facetValues.size();
                if (count == 0) continue;
                int logcount = (int)(Math.log(count) / Math.log(2.0));
                Integer[] counts = new Integer[logcount + 1];
                for (int i = 0; i <= logcount; ++i) {
                    counts[i] = i;
                }
                doc.setField("vocabulary_" + facetName + "_sxt", (Object)facetValues.toArray(new String[count]));
                doc.setField("vocabulary_" + facetName + "_i", (Object)facetValues.size());
                doc.setField("vocabulary_" + facetName + "_log_i", (Object)logcount);
                doc.setField("vocabulary_" + facetName + "_log_val", (Object)counts);
                vocabularies.add(facetName);
            }
            if (vocabularies.size() > 0) {
                this.add(doc, (SchemaDeclaration)CollectionSchema.vocabularies_sxt, vocabularies);
            }
        }
        this.remove(doc, CollectionSchema.synonyms_sxt);
        if ((this.isEmpty() || this.contains(CollectionSchema.synonyms_sxt)) && synonyms.size() > 0) {
            this.add(doc, (SchemaDeclaration)CollectionSchema.synonyms_sxt, synonyms);
        }
    }

    public static final String collection1query(Segment segment, String harvestkey) {
        return (String)(harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
    }

    public static final String webgraphquery(Segment segment, String harvestkey) {
        return (String)(harvestkey == null || !segment.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.harvestkey_s) ? "" : WebgraphSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
    }

    public int postprocessing(Segment segment, Segment.ReferenceReportCache rrCache, String harvestkey, boolean byPartialUpdate) {
        Map<String, CRV> rankings;
        boolean shallComputeCR;
        if (!this.contains(CollectionSchema.process_sxt)) {
            return 0;
        }
        if (!segment.connectedCitation() && !segment.fulltext().useWebgraph()) {
            return 0;
        }
        SolrConnector collectionConnector = segment.fulltext().getDefaultConnector();
        collectionConnector.commit(false);
        if (segment.fulltext().useWebgraph()) {
            segment.fulltext().getWebgraphConnector().commit(false);
        }
        CollectionConfiguration collection = segment.fulltext().getDefaultConfiguration();
        WebgraphConfiguration webgraph = segment.fulltext().getWebgraphConfiguration();
        String collection1query = CollectionConfiguration.collection1query(segment, harvestkey);
        String webgraphquery = CollectionConfiguration.webgraphquery(segment, harvestkey);
        postprocessingRunning = true;
        postprocessingStartTime = System.currentTimeMillis();
        postprocessingActivity = "collecting counts";
        ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
        try {
            postprocessingCollection1Count = (int)collectionConnector.getCountByQuery("{!cache=false}" + collection1query);
            postprocessingWebgraphCount = segment.fulltext().useWebgraph() ? (int)segment.fulltext().getWebgraphConnector().getCountByQuery("{!cache=false}" + webgraphquery) : 0;
        }
        catch (IOException e) {
            postprocessingCollection1Count = -1;
            postprocessingWebgraphCount = -1;
        }
        postprocessingActivity = "create ranking map";
        ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
        boolean bl = shallComputeCR = segment.fulltext().useWebgraph() && (webgraph.contains(WebgraphSchema.source_id_s) && webgraph.contains(WebgraphSchema.source_cr_host_norm_i) || webgraph.contains(WebgraphSchema.target_id_s) && webgraph.contains(WebgraphSchema.target_cr_host_norm_i)) || collection.contains(CollectionSchema.cr_host_count_i) && collection.contains(CollectionSchema.cr_host_chance_d) && collection.contains(CollectionSchema.cr_host_norm_i);
        if (shallComputeCR) {
            ReversibleScoreMap<String> collection1hosts;
            postprocessingActivity = "collecting host facets for collection";
            ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
            try {
                LinkedHashMap<String, ReversibleScoreMap<String>> hostfacet = collectionConnector.getFacets("{!cache=false}" + collection1query, 10000000, CollectionSchema.host_s.getSolrFieldName());
                collection1hosts = (ReversibleScoreMap)hostfacet.get(CollectionSchema.host_s.getSolrFieldName());
            }
            catch (IOException e2) {
                ConcurrentLog.logException(e2);
                collection1hosts = new ClusteredScoreMap<String>(true);
            }
            rankings = this.createRankingMap(segment, rrCache, collectionConnector, collection1hosts);
        } else {
            rankings = new ConcurrentHashMap<String, CRV>();
        }
        AtomicInteger allcount = new AtomicInteger(0);
        if (segment.fulltext().useWebgraph() && shallComputeCR) {
            this.postprocessWebgraph(segment, webgraph, webgraphquery, rankings, allcount);
        }
        this.postprocessDocuments(segment, rrCache, harvestkey, byPartialUpdate, collectionConnector, collection, collection1query, rankings, allcount);
        postprocessingCollection1Count = 0;
        postprocessingWebgraphCount = 0;
        postprocessingActivity = "postprocessing terminated";
        ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
        postprocessingRunning = false;
        return allcount.get();
    }

    private void postprocessDocuments(final Segment segment, final Segment.ReferenceReportCache rrCache, String harvestkey, final boolean byPartialUpdate, final SolrConnector collectionConnector, final CollectionConfiguration collection, String collection1query, final Map<String, CRV> rankings, final AtomicInteger allcount) {
        final HashMap hostExtentCache = new HashMap();
        final ConcurrentHashMap.KeySetView uniqueURLs = ConcurrentHashMap.newKeySet();
        final HashSet<String> localOmitFields = new HashSet<String>();
        localOmitFields.add(CollectionSchema.process_sxt.getSolrFieldName());
        localOmitFields.add(CollectionSchema.harvestkey_s.getSolrFieldName());
        final ConcurrentHashMap.KeySetView failids = ConcurrentHashMap.newKeySet();
        final AtomicInteger countcheck = new AtomicInteger(0);
        final AtomicInteger proccount = new AtomicInteger();
        final AtomicInteger proccount_referencechange = new AtomicInteger();
        final AtomicInteger proccount_citationchange = new AtomicInteger();
        try {
            final long count = collectionConnector.getCountByQuery("{!cache=false}" + collection1query);
            String partitioningKey = CollectionSchema.responsetime_i.getSolrFieldName();
            postprocessingActivity = "collecting " + count + " documents from the collection for harvestkey " + harvestkey + ", partitioned by " + partitioningKey;
            if (count > 0L) {
                int rewrite_start;
                String[] stringArray;
                LinkedHashMap<String, ReversibleScoreMap<String>> partitioningFacet = collectionConnector.getFacets("{!cache=false}" + collection1query, 100000, partitioningKey);
                ReversibleScoreMap partitioning = (ReversibleScoreMap)partitioningFacet.get(partitioningKey);
                long emptyCount = collectionConnector.getCountByQuery("{!cache=false}-" + partitioningKey + ":[* TO *] AND (" + collection1query + ")");
                if (emptyCount > 0L) {
                    partitioning.inc("", (int)emptyCount);
                }
                final long start = System.currentTimeMillis();
                ArrayList<String> querystrings = new ArrayList<String>(partitioning.size());
                for (String partitioningValue : partitioning) {
                    String partitioningQuery = "{!cache=false}" + (partitioningValue.length() == 0 ? "-" + partitioningKey + ":[* TO *] AND (" + collection1query + ")" : partitioningKey + ":" + partitioningValue + " AND (" + collection1query + ")");
                    querystrings.add(partitioningQuery);
                }
                int concurrency = Math.max(1, Math.min((int)(MemoryControl.available() / 0x6400000L), Runtime.getRuntime().availableProcessors()));
                final boolean reference_computation = this.contains(CollectionSchema.references_i) && this.contains(CollectionSchema.references_internal_i) && this.contains(CollectionSchema.references_external_i) && this.contains(CollectionSchema.references_exthosts_i);
                ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
                String string = this.contains(CollectionSchema.http_unique_b) || this.contains(CollectionSchema.www_unique_b) ? CollectionSchema.host_subdomain_s.getSolrFieldName() + " asc," + CollectionSchema.url_protocol_s.getSolrFieldName() + " asc" : null;
                if (byPartialUpdate) {
                    String[] stringArray2 = new String[18];
                    stringArray2[0] = CollectionSchema.id.getSolrFieldName();
                    stringArray2[1] = CollectionSchema.sku.getSolrFieldName();
                    stringArray2[2] = CollectionSchema.harvestkey_s.getSolrFieldName();
                    stringArray2[3] = CollectionSchema.process_sxt.getSolrFieldName();
                    stringArray2[4] = CollectionSchema.canonical_equal_sku_b.getSolrFieldName();
                    stringArray2[5] = CollectionSchema.canonical_s.getSolrFieldName();
                    stringArray2[6] = CollectionSchema.exact_signature_l.getSolrFieldName();
                    stringArray2[7] = CollectionSchema.fuzzy_signature_l.getSolrFieldName();
                    stringArray2[8] = CollectionSchema.title_exact_signature_l.getSolrFieldName();
                    stringArray2[9] = CollectionSchema.description_exact_signature_l.getSolrFieldName();
                    stringArray2[10] = CollectionSchema.host_id_s.getSolrFieldName();
                    stringArray2[11] = CollectionSchema.host_s.getSolrFieldName();
                    stringArray2[12] = CollectionSchema.host_subdomain_s.getSolrFieldName();
                    stringArray2[13] = CollectionSchema.url_chars_i.getSolrFieldName();
                    stringArray2[14] = CollectionSchema.url_protocol_s.getSolrFieldName();
                    stringArray2[15] = CollectionSchema.httpstatus_i.getSolrFieldName();
                    stringArray2[16] = CollectionSchema.inboundlinkscount_i.getSolrFieldName();
                    stringArray = stringArray2;
                    stringArray2[17] = CollectionSchema.robots_i.getSolrFieldName();
                } else {
                    stringArray = this.allFields();
                }
                final BlockingQueue<SolrDocument> docs = collectionConnector.concurrentDocumentsByQueries(querystrings, string, 0, 100000000, Long.MAX_VALUE, concurrency + 1, concurrency, true, stringArray);
                Thread[] rewriteThread = new Thread[concurrency];
                for (rewrite_start = 0; rewrite_start < concurrency; ++rewrite_start) {
                    rewriteThread[rewrite_start] = new Thread("CollectionConfiguration.postprocessing.rewriteThread-" + rewrite_start){

                        @Override
                        public void run() {
                            try {
                                SolrDocument doc;
                                while ((doc = (SolrDocument)docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                                    Collection proctags = doc.getFieldValues(CollectionSchema.process_sxt.getSolrFieldName());
                                    String u = (String)doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
                                    String i = (String)doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
                                    if (proctags == null || proctags.size() == 0) {
                                        ConcurrentLog.warn("CollectionConfiguration", "no process_sxt entry for url " + u + ", id=" + i);
                                        continue;
                                    }
                                    try {
                                        DigestURL url = new DigestURL(u, ASCII.getBytes(i));
                                        byte[] id = url.hash();
                                        SolrInputDocument sid = byPartialUpdate ? new SolrInputDocument() : collection.toSolrInputDocument(doc, localOmitFields);
                                        sid.setField(CollectionSchema.id.getSolrFieldName(), (Object)i);
                                        for (Object tag : proctags) {
                                            try {
                                                CRV crv;
                                                ProcessType tagtype = ProcessType.valueOf((String)tag);
                                                if (tagtype == ProcessType.CITATION && collection.contains(CollectionSchema.cr_host_count_i) && collection.contains(CollectionSchema.cr_host_chance_d) && collection.contains(CollectionSchema.cr_host_norm_i) && (crv = (CRV)rankings.remove(ASCII.String(id))) != null) {
                                                    sid.setField(CollectionSchema.cr_host_count_i.getSolrFieldName(), (Object)crv.count);
                                                    sid.setField(CollectionSchema.cr_host_chance_d.getSolrFieldName(), (Object)crv.cr);
                                                    sid.setField(CollectionSchema.cr_host_norm_i.getSolrFieldName(), (Object)crv.crn);
                                                    proccount_citationchange.incrementAndGet();
                                                }
                                                if (tagtype != ProcessType.UNIQUE) continue;
                                                CollectionConfiguration.this.postprocessing_http_unique(segment, doc, sid, url);
                                                CollectionConfiguration.this.postprocessing_www_unique(segment, doc, sid, url);
                                                CollectionConfiguration.this.postprocessing_doublecontent(segment, uniqueURLs, doc, sid, url);
                                            }
                                            catch (IllegalArgumentException tagtype) {}
                                        }
                                        if (reference_computation) {
                                            String hosthash = url.hosthash();
                                            if (!hostExtentCache.containsKey(hosthash)) {
                                                StringBuilder q = new StringBuilder();
                                                q.append(CollectionSchema.host_id_s.getSolrFieldName()).append(":\"").append(hosthash).append("\" AND ").append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200");
                                                long hostExtentCount = segment.fulltext().getDefaultConnector().getCountByQuery(q.toString());
                                                hostExtentCache.put(hosthash, hostExtentCount);
                                            }
                                            if (CollectionConfiguration.this.postprocessing_references(rrCache, sid, url, hostExtentCache)) {
                                                proccount_referencechange.incrementAndGet();
                                            }
                                        }
                                        if (byPartialUpdate) {
                                            sid.setField(CollectionSchema.process_sxt.getSolrFieldName(), null);
                                            sid.setField(CollectionSchema.harvestkey_s.getSolrFieldName(), null);
                                        }
                                        if (byPartialUpdate) {
                                            collectionConnector.update(sid);
                                        } else {
                                            collectionConnector.add(sid);
                                        }
                                        long thiscount = proccount.incrementAndGet();
                                        allcount.incrementAndGet();
                                        if (thiscount % 100L == 0L) {
                                            postprocessingActivity = "postprocessed " + thiscount + " from " + count + " collection documents; " + thiscount * 60000L / (System.currentTimeMillis() - start) + " ppm; " + (System.currentTimeMillis() - start) * (count - thiscount) / thiscount / 60000L + " minutes remaining";
                                            ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
                                        }
                                    }
                                    catch (Throwable e1) {
                                        ConcurrentLog.logException(e1);
                                        failids.add(i);
                                    }
                                    countcheck.incrementAndGet();
                                }
                            }
                            catch (InterruptedException e) {
                                ConcurrentLog.logException(e);
                            }
                        }
                    };
                    rewriteThread[rewrite_start].start();
                }
                for (rewrite_start = 0; rewrite_start < concurrency; ++rewrite_start) {
                    rewriteThread[rewrite_start].join();
                }
                if (failids.size() > 0) {
                    ConcurrentLog.info("CollectionConfiguration", "cleanup_processing: deleting " + failids.size() + " documents which have permanent execution fails");
                    collectionConnector.deleteByIds(failids);
                }
                if (count != (long)countcheck.get()) {
                    ConcurrentLog.warn("CollectionConfiguration", "ambiguous collection document count for harvestkey " + harvestkey + ": expected=" + count + ", counted=" + String.valueOf(countcheck) + "; countquery=" + collection1query);
                }
                ConcurrentLog.info("CollectionConfiguration", "cleanup_processing: re-calculated " + String.valueOf(proccount) + " new documents, " + String.valueOf(proccount_referencechange) + " reference-count changes, " + String.valueOf(proccount_citationchange) + " citation ranking changes.");
            }
        }
        catch (InterruptedException e2) {
            ConcurrentLog.warn("CollectionConfiguration", e2.getMessage(), e2);
        }
        catch (IOException e3) {
            ConcurrentLog.warn("CollectionConfiguration", e3.getMessage(), e3);
        }
        collectionConnector.commit(true);
    }

    private void postprocessWebgraph(final Segment segment, final WebgraphConfiguration webgraph, String webgraphquery, final Map<String, CRV> rankings, final AtomicInteger allcount) {
        ReversibleScoreMap<String> webgraphhosts;
        postprocessingActivity = "collecting host facets for webgraph cr calculation";
        ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
        final HashSet<String> omitFields = new HashSet<String>();
        omitFields.add(WebgraphSchema.process_sxt.getSolrFieldName());
        omitFields.add(WebgraphSchema.harvestkey_s.getSolrFieldName());
        try {
            LinkedHashMap<String, ReversibleScoreMap<String>> hostfacet = segment.fulltext().getWebgraphConnector().getFacets(webgraphquery, 10000000, WebgraphSchema.source_host_s.getSolrFieldName());
            webgraphhosts = (ReversibleScoreMap)hostfacet.get(WebgraphSchema.source_host_s.getSolrFieldName());
        }
        catch (IOException e2) {
            ConcurrentLog.logException(e2);
            webgraphhosts = new ClusteredScoreMap(true);
        }
        try {
            final long start = System.currentTimeMillis();
            for (String host : webgraphhosts.keyList(true)) {
                int i;
                if (webgraphhosts.get(host) <= 0) continue;
                final String hostfinal = host;
                postprocessingActivity = "writing cr values to webgraph for host " + host;
                ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
                String patchquery = WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\" AND " + WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
                final long count = segment.fulltext().getWebgraphConnector().getCountByQuery("{!cache=false}" + patchquery);
                int concurrency = Math.min((int)count, Math.max(1, Runtime.getRuntime().availableProcessors() / 4));
                ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph, concurrency = " + concurrency);
                final BlockingQueue<SolrDocument> docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery(patchquery, WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, concurrency + 1, concurrency, true, new String[0]);
                final AtomicInteger proccount = new AtomicInteger(0);
                Thread[] t = new Thread[concurrency];
                for (i = 0; i < t.length; ++i) {
                    t[i] = new Thread("CollectionConfiguration.postprocessing.webgraph-" + i){

                        /*
                         * Enabled aggressive block sorting
                         * Enabled unnecessary exception pruning
                         * Enabled aggressive exception aggregation
                         */
                        @Override
                        public void run() {
                            try {
                                SolrDocument doc;
                                while ((doc = (SolrDocument)docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                                    try {
                                        SolrInputDocument sid = webgraph.toSolrInputDocument(doc, omitFields);
                                        Collection proctags = doc.getFieldValues(WebgraphSchema.process_sxt.getSolrFieldName());
                                        for (Object tag : proctags) {
                                            try {
                                                String id;
                                                CRV crv;
                                                ProcessType tagtype = ProcessType.valueOf((String)tag);
                                                if (tagtype != ProcessType.CITATION) continue;
                                                if (segment.fulltext().useWebgraph() && webgraph.contains(WebgraphSchema.source_id_s) && webgraph.contains(WebgraphSchema.source_cr_host_norm_i) && (crv = (CRV)rankings.get(id = (String)doc.getFieldValue(WebgraphSchema.source_id_s.getSolrFieldName()))) != null) {
                                                    sid.setField(WebgraphSchema.source_cr_host_norm_i.getSolrFieldName(), (Object)crv.crn);
                                                }
                                                if (!webgraph.contains(WebgraphSchema.target_id_s) || !webgraph.contains(WebgraphSchema.target_cr_host_norm_i) || (crv = (CRV)rankings.get(id = (String)doc.getFieldValue(WebgraphSchema.target_id_s.getSolrFieldName()))) == null) continue;
                                                sid.setField(WebgraphSchema.target_cr_host_norm_i.getSolrFieldName(), (Object)crv.crn);
                                            }
                                            catch (IllegalArgumentException e) {
                                                ConcurrentLog.logException(e);
                                            }
                                        }
                                        try {
                                            sid.removeField(WebgraphSchema.process_sxt.getSolrFieldName());
                                            sid.removeField(WebgraphSchema.harvestkey_s.getSolrFieldName());
                                            segment.fulltext().getWebgraphConnector().add(sid);
                                        }
                                        catch (SolrException e) {
                                            ConcurrentLog.logException(e);
                                        }
                                        catch (IOException e) {
                                            ConcurrentLog.logException(e);
                                        }
                                        proccount.incrementAndGet();
                                        allcount.incrementAndGet();
                                        if (proccount.get() % 1000 != 0) continue;
                                        postprocessingActivity = "writing CitationRank values to webgraph for host " + hostfinal + "postprocessed " + String.valueOf(proccount) + " from " + count + " documents; " + (long)(proccount.get() * 1000) / (System.currentTimeMillis() - start) + " docs/second; " + (System.currentTimeMillis() - start) * (count - (long)proccount.get()) / (long)proccount.get() / 60000L + " minutes remaining";
                                        ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
                                    }
                                    catch (Throwable e) {
                                        ConcurrentLog.logException(e);
                                    }
                                }
                                return;
                            }
                            catch (InterruptedException e) {
                                ConcurrentLog.warn("CollectionConfiguration", e.getMessage(), e);
                            }
                        }
                    };
                    t[i].start();
                }
                for (i = 0; i < t.length; ++i) {
                    try {
                        t[i].join(10000L);
                        if (!t[i].isAlive()) continue;
                        t[i].interrupt();
                        continue;
                    }
                    catch (InterruptedException interruptedException) {
                        // empty catch block
                    }
                }
                if (count == (long)proccount.get()) continue;
                ConcurrentLog.warn("CollectionConfiguration", "ambiguous webgraph document count for host " + host + ": expected=" + count + ", counted=" + String.valueOf(proccount));
            }
        }
        catch (IOException e2) {
            ConcurrentLog.warn("CollectionConfiguration", e2.getMessage(), e2);
        }
    }

    private Map<String, CRV> createRankingMap(Segment segment, Segment.ReferenceReportCache rrCache, SolrConnector collectionConnector, ReversibleScoreMap<String> collection1hosts) {
        ConcurrentHashMap<String, CRV> rankings = new ConcurrentHashMap<String, CRV>();
        try {
            int concurrency = Math.min(collection1hosts.size(), Runtime.getRuntime().availableProcessors());
            postprocessingActivity = "collecting CitationRank for " + collection1hosts.size() + " hosts, concurrency = " + concurrency;
            ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
            int countcheck = 0;
            for (String host : collection1hosts.keyList(true)) {
                String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + ":[* TO *]";
                long patchquerycount = collectionConnector.getCountByQuery("{!cache=false}" + patchquery);
                BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, 20, 1, true, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
                int patchquerycountcheck = 0;
                try {
                    SolrDocument doc_B;
                    while ((doc_B = documents_with_canonical_tag.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                        DigestURL doc_C_url = new DigestURL((String)doc_B.getFieldValue(CollectionSchema.canonical_s.getSolrFieldName()));
                        byte[] doc_B_id = ASCII.getBytes((String)doc_B.getFieldValue(CollectionSchema.id.getSolrFieldName()));
                        if (segment.connectedCitation()) {
                            ReferenceContainer<CitationReference> doc_A_ids = segment.urlCitation().remove(doc_B_id);
                            if (doc_A_ids == null) continue;
                            Iterator<CitationReference> doc_A_ids_iterator = doc_A_ids.entries();
                            while (doc_A_ids_iterator.hasNext()) {
                                CitationReference doc_A_citation = doc_A_ids_iterator.next();
                                segment.urlCitation().add(doc_C_url.hash(), doc_A_citation);
                            }
                        }
                        ++patchquerycountcheck;
                        if (!MemoryControl.shortStatus()) continue;
                        ConcurrentLog.warn("CollectionConfiguration", "terminated canonical collection during postprocessing because of short memory");
                        break;
                    }
                }
                catch (InterruptedException e) {
                    ConcurrentLog.logException(e);
                }
                catch (SpaceExceededException e) {
                    ConcurrentLog.logException(e);
                }
                if (patchquerycount != (long)patchquerycountcheck) {
                    ConcurrentLog.warn("CollectionConfiguration", "ambiguous patchquery count for host " + host + ": expected=" + patchquerycount + ", counted=" + patchquerycountcheck);
                }
                if (collection1hosts.get(host) <= 0) continue;
                CRHost crh = new CRHost(segment, rrCache, host, 0.85, 6);
                int convergence_attempts = 0;
                while (convergence_attempts++ < 30) {
                    ConcurrentLog.info("CollectionConfiguration", "convergence step " + convergence_attempts + " for host " + host + " ...");
                    if (crh.convergenceStep()) break;
                    if (!MemoryControl.shortStatus()) continue;
                    ConcurrentLog.warn("CollectionConfiguration", "terminated convergenceStep during postprocessing because of short memory");
                    break;
                }
                ConcurrentLog.info("CollectionConfiguration", "convergence for host " + host + " after " + convergence_attempts + " steps");
                Map<String, CRV> crn = crh.normalize();
                rankings.putAll(crn);
                if (MemoryControl.shortStatus()) {
                    ConcurrentLog.warn("CollectionConfiguration", "terminated crn akkumulation during postprocessing because of short memory");
                    break;
                }
                ++countcheck;
            }
            if (collection1hosts.size() != countcheck) {
                ConcurrentLog.warn("CollectionConfiguration", "ambiguous host count: expected=" + collection1hosts.size() + ", counted=" + countcheck);
            }
        }
        catch (IOException e2) {
            ConcurrentLog.logException(e2);
            collection1hosts = new ClusteredScoreMap<String>(true);
        }
        return rankings;
    }

    public void postprocessing_http_unique(Segment segment, SolrDocument doc, SolrInputDocument sid, DigestURL url) {
        if (!this.contains(CollectionSchema.http_unique_b)) {
            return;
        }
        if (!url.isHTTPS() && !url.isHTTP()) {
            return;
        }
        try {
            DigestURL u = new DigestURL((url.isHTTP() ? "https://" : "http://") + url.urlstub(true, true));
            SolrDocument d = segment.fulltext().getDefaultConnector().getDocumentById(ASCII.String(u.hash()), CollectionSchema.http_unique_b.getSolrFieldName());
            this.set_unique_flag(CollectionSchema.http_unique_b, doc, sid, d);
        }
        catch (IOException e) {
            ConcurrentLog.warn("CollectionConfiguration", (String)("Failed to postProcess http_unique_b field" + e.getMessage() != null ? " : " + e.getMessage() : "."));
        }
    }

    public void postprocessing_www_unique(Segment segment, SolrDocument doc, SolrInputDocument sid, DigestURL url) {
        if (!this.contains(CollectionSchema.www_unique_b)) {
            return;
        }
        String us = url.urlstub(true, true);
        try {
            DigestURL u = new DigestURL(url.getProtocol() + (us.startsWith("www.") ? "://" + us.substring(4) : "://www." + us));
            SolrDocument d = segment.fulltext().getDefaultConnector().getDocumentById(ASCII.String(u.hash()), CollectionSchema.www_unique_b.getSolrFieldName());
            this.set_unique_flag(CollectionSchema.www_unique_b, doc, sid, d);
        }
        catch (IOException e) {
            ConcurrentLog.warn("CollectionConfiguration", (String)("Failed to postProcess www_unique_b field" + e.getMessage() != null ? " : " + e.getMessage() : "."));
        }
    }

    private void set_unique_flag(CollectionSchema field, SolrDocument doc, SolrInputDocument sid, SolrDocument d) {
        boolean obb;
        Object sb = doc.getFieldValue(field.getSolrFieldName());
        boolean sbb = sb != null && (Boolean)sb != false;
        Object ob = d == null ? null : d.getFieldValue(field.getSolrFieldName());
        boolean bl = obb = ob != null && (Boolean)ob != false;
        if (sbb == obb) {
            sid.setField(field.getSolrFieldName(), (Object)(!sbb ? 1 : 0));
        }
    }

    public void postprocessing_doublecontent(Segment segment, Set<String> uniqueURLs, SolrDocument doc, SolrInputDocument sid, DigestURL url) {
        SolrDocumentList docsAkk;
        CollectionSchema[][] doccheckschema;
        Conjunction ValidDocTermTemplate = new Conjunction();
        ValidDocTermTemplate.addOperand(new LongLiteral(CollectionSchema.httpstatus_i, 200L));
        ValidDocTermTemplate.addOperand(new Disjunction(new Negation(new CatchallLiteral(CollectionSchema.canonical_equal_sku_b)), new BooleanLiteral(CollectionSchema.canonical_equal_sku_b, true)));
        ValidDocTermTemplate.addOperand(new Negation(new LongLiteral(CollectionSchema.robots_i, 8L)));
        ValidDocTermTemplate.addOperand(new Negation(new LongLiteral(CollectionSchema.robots_i, 24L)));
        ValidDocTermTemplate.addOperand(new Negation(new LongLiteral(CollectionSchema.robots_i, 512L)));
        ValidDocTermTemplate.addOperand(new Negation(new LongLiteral(CollectionSchema.robots_i, 1536L)));
        String urlhash = ASCII.String(url.hash());
        String hostid = url.hosthash();
        Disjunction dnf = new Disjunction();
        for (CollectionSchema[] checkfields : doccheckschema = new CollectionSchema[][]{{CollectionSchema.exact_signature_l, CollectionSchema.exact_signature_unique_b, CollectionSchema.exact_signature_copycount_i}, {CollectionSchema.fuzzy_signature_l, CollectionSchema.fuzzy_signature_unique_b, CollectionSchema.fuzzy_signature_copycount_i}}) {
            Long signature;
            CollectionSchema signaturefield = checkfields[0];
            CollectionSchema uniquefield = checkfields[1];
            CollectionSchema countfield = checkfields[2];
            if (!this.contains(signaturefield) || !this.contains(uniquefield) || !this.contains(countfield) || (signature = (Long)doc.getFieldValue(signaturefield.getSolrFieldName())) == null) continue;
            dnf.addOperand(new LongLiteral(signaturefield, signature));
        }
        Conjunction con = (Conjunction)ValidDocTermTemplate.clone();
        con.addOperand(dnf);
        con.addOperand(new Negation(new StringLiteral(CollectionSchema.id, urlhash)));
        con.addOperand(new StringLiteral(CollectionSchema.host_id_s, hostid));
        String query2 = con.toString();
        try {
            docsAkk = segment.fulltext().getDefaultConnector().getDocumentListByQuery(query2, null, 0, 1000, CollectionSchema.id.getSolrFieldName(), CollectionSchema.exact_signature_l.getSolrFieldName(), CollectionSchema.fuzzy_signature_l.getSolrFieldName());
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
            docsAkk = new SolrDocumentList();
        }
        if (docsAkk.getNumFound() > 0L) {
            for (CollectionSchema[] checkfields : doccheckschema) {
                SolrDocument d;
                Long signature;
                CollectionSchema signaturefield = checkfields[0];
                CollectionSchema uniquefield = checkfields[1];
                CollectionSchema countfield = checkfields[2];
                if (!this.contains(signaturefield) || !this.contains(uniquefield) || !this.contains(countfield) || (signature = (Long)doc.getFieldValue(signaturefield.getSolrFieldName())) == null) continue;
                SolrDocumentList docs = new StringLiteral(signaturefield, signature.toString()).apply(docsAkk);
                if (docs.getNumFound() == 0L) {
                    sid.setField(uniquefield.getSolrFieldName(), (Object)true);
                    sid.setField(countfield.getSolrFieldName(), (Object)1);
                    continue;
                }
                boolean firstappearance = true;
                Iterator iterator = docs.iterator();
                if (iterator.hasNext() && uniqueURLs.contains((d = (SolrDocument)iterator.next()).getFieldValue(CollectionSchema.id.getSolrFieldName()))) {
                    firstappearance = false;
                }
                sid.setField(uniquefield.getSolrFieldName(), (Object)firstappearance);
                sid.setField(countfield.getSolrFieldName(), (Object)(docs.getNumFound() + 1L));
            }
        }
        Integer robots_i = this.contains(CollectionSchema.robots_i) ? (Integer)doc.getFieldValue(CollectionSchema.robots_i.getSolrFieldName()) : null;
        Integer httpstatus_i = this.contains(CollectionSchema.httpstatus_i) ? (Integer)doc.getFieldValue(CollectionSchema.httpstatus_i.getSolrFieldName()) : null;
        String canonical_s = this.contains(CollectionSchema.canonical_s) ? (String)doc.getFieldValue(CollectionSchema.canonical_s.getSolrFieldName()) : null;
        Boolean canonical_equal_sku_b = this.contains(CollectionSchema.canonical_equal_sku_b) ? (Boolean)doc.getFieldValue(CollectionSchema.canonical_equal_sku_b.getSolrFieldName()) : null;
        CollectionSchema[][] metadatacheckschema = new CollectionSchema[][]{{CollectionSchema.title, CollectionSchema.title_exact_signature_l, CollectionSchema.title_unique_b}, {CollectionSchema.description_txt, CollectionSchema.description_exact_signature_l, CollectionSchema.description_unique_b}};
        if (segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.host_id_s) && (robots_i == null || (robots_i & 0x200) == 0 && (robots_i & 8) == 0) && (canonical_s == null || canonical_s.length() == 0 || canonical_equal_sku_b != null && canonical_equal_sku_b.booleanValue() || url.toNormalform(true).equals(canonical_s)) && (httpstatus_i == null || httpstatus_i == 200)) {
            for (CollectionSchema[] checkfields : metadatacheckschema) {
                Long signature;
                CollectionSchema checkfield = checkfields[0];
                CollectionSchema signaturefield = checkfields[1];
                CollectionSchema uniquefield = checkfields[2];
                if (!this.contains(checkfield) || !this.contains(signaturefield) || !this.contains(uniquefield) || (signature = (Long)doc.getFieldValue(signaturefield.getSolrFieldName())) == null) continue;
                try {
                    Conjunction doccountterm = (Conjunction)ValidDocTermTemplate.clone();
                    doccountterm.addOperand(new Negation(new StringLiteral(CollectionSchema.id, urlhash)));
                    doccountterm.addOperand(new StringLiteral(CollectionSchema.host_id_s, hostid));
                    doccountterm.addOperand(new LongLiteral(signaturefield, signature));
                    long doccount = segment.fulltext().getDefaultConnector().getCountByQuery("{!cache=false}" + doccountterm.toString());
                    sid.setField(uniquefield.getSolrFieldName(), (Object)(doccount == 0L ? 1 : 0));
                }
                catch (IOException iOException) {
                    // empty catch block
                }
            }
        }
        uniqueURLs.add(urlhash);
    }

    public boolean postprocessing_references(Segment.ReferenceReportCache rrCache, SolrInputDocument sid, DigestURL url, Map<String, Long> hostExtentCount) {
        if (!(this.contains(CollectionSchema.references_i) || this.contains(CollectionSchema.references_internal_i) || this.contains(CollectionSchema.references_external_i) || this.contains(CollectionSchema.references_exthosts_i))) {
            return false;
        }
        Integer all_old = sid == null ? null : (Integer)sid.getFieldValue(CollectionSchema.references_i.getSolrFieldName());
        Integer internal_old = sid == null ? null : (Integer)sid.getFieldValue(CollectionSchema.references_internal_i.getSolrFieldName());
        Integer external_old = sid == null ? null : (Integer)sid.getFieldValue(CollectionSchema.references_external_i.getSolrFieldName());
        Integer exthosts_old = sid == null ? null : (Integer)sid.getFieldValue(CollectionSchema.references_exthosts_i.getSolrFieldName());
        Integer hostextc_old = sid == null ? null : (Integer)sid.getFieldValue(CollectionSchema.host_extent_i.getSolrFieldName());
        try {
            Segment.ReferenceReport rr = rrCache.getReferenceReport(ASCII.String(url.hash()), false);
            ArrayList<String> internalIDs = new ArrayList<String>();
            HandleSet iids = rr.getInternallIDs();
            for (byte[] b : iids) {
                internalIDs.add(ASCII.String(b));
            }
            boolean change = false;
            int all2 = rr.getExternalCount() + rr.getInternalCount();
            if (this.contains(CollectionSchema.references_i) && (all_old == null || all_old != all2)) {
                sid.setField(CollectionSchema.references_i.getSolrFieldName(), (Object)all2);
                change = true;
            }
            if (this.contains(CollectionSchema.references_internal_i) && (internal_old == null || internal_old.intValue() != rr.getInternalCount())) {
                sid.setField(CollectionSchema.references_internal_i.getSolrFieldName(), (Object)rr.getInternalCount());
                change = true;
            }
            if (this.contains(CollectionSchema.references_external_i) && (external_old == null || external_old.intValue() != rr.getExternalCount())) {
                sid.setField(CollectionSchema.references_external_i.getSolrFieldName(), (Object)rr.getExternalCount());
                change = true;
            }
            if (this.contains(CollectionSchema.references_exthosts_i) && (exthosts_old == null || exthosts_old.intValue() != rr.getExternalHostIDs().size())) {
                sid.setField(CollectionSchema.references_exthosts_i.getSolrFieldName(), (Object)rr.getExternalHostIDs().size());
                change = true;
            }
            Long hostExtent = hostExtentCount == null ? Long.MAX_VALUE : hostExtentCount.get(url.hosthash());
            if (this.contains(CollectionSchema.host_extent_i) && (hostextc_old == null || (long)hostextc_old.intValue() != hostExtent)) {
                sid.setField(CollectionSchema.host_extent_i.getSolrFieldName(), (Object)hostExtent.intValue());
                change = true;
            }
            return change;
        }
        catch (IOException iOException) {
            return false;
        }
    }

    public static List<String> protocolList2indexedList(List<String> protocol) {
        ArrayList<String> a = new ArrayList<String>();
        for (int i = 0; i < protocol.size(); ++i) {
            String p = protocol.get(i);
            if (p.equals("http")) continue;
            Object c = Integer.toString(i);
            while (((String)c).length() < 3) {
                c = "0" + (String)c;
            }
            a.add((String)c + "-" + p);
        }
        return a;
    }

    public static List<String> indexedList2protocolList(Collection<Object> iplist, int dimension) {
        ArrayList<String> a = new ArrayList<String>(dimension);
        for (int i = 0; i < dimension; ++i) {
            a.add("http");
        }
        if (iplist == null) {
            return a;
        }
        for (Object ip : iplist) {
            String indexedProtocol = (String)ip;
            int i = indexedProtocol.indexOf(45);
            if (i <= 0 || indexedProtocol.length() <= i + 1) continue;
            a.set(Integer.parseInt(indexedProtocol.substring(0, i)), indexedProtocol.substring(i + 1));
        }
        return a;
    }

    static {
        omitFields.add(CollectionSchema.author_sxt.getSolrFieldName());
        omitFields.add(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName());
        omitFields.add(CollectionSchema.coordinate_p_1_coordinate.getSolrFieldName());
        postprocessingRunning = false;
        postprocessingActivity = "";
        postprocessingStartTime = 0L;
        postprocessingCollection1Count = 0;
        postprocessingWebgraphCount = 0;
    }

    public static class Subgraph {
        public final ArrayList<String>[] urlProtocols = (ArrayList[])Array.newInstance(ArrayList.class, 2);
        public final ArrayList<String>[] urlStubs;
        public final ArrayList<String>[] urlAnchorTexts;

        public Subgraph(int inboundSize, int outboundSize) {
            this.urlProtocols[0] = new ArrayList(inboundSize);
            this.urlProtocols[1] = new ArrayList(outboundSize);
            this.urlStubs = (ArrayList[])Array.newInstance(ArrayList.class, 2);
            this.urlStubs[0] = new ArrayList(inboundSize);
            this.urlStubs[1] = new ArrayList(outboundSize);
            this.urlAnchorTexts = (ArrayList[])Array.newInstance(ArrayList.class, 2);
            this.urlAnchorTexts[0] = new ArrayList(inboundSize);
            this.urlAnchorTexts[1] = new ArrayList(outboundSize);
        }
    }

    public static class SolrVector
    extends SolrInputDocument {
        private static final long serialVersionUID = -210901881471714939L;
        private final List<SolrInputDocument> webgraphDocuments = new ArrayList<SolrInputDocument>();

        public void addWebgraphDocument(SolrInputDocument webgraphDocument) {
            this.webgraphDocuments.add(webgraphDocument);
        }

        public List<SolrInputDocument> getWebgraphDocuments() {
            return this.webgraphDocuments;
        }
    }

    private static final class CRHost {
        private final Segment segment;
        private final Map<String, double[]> crt;
        private final int cr_host_count;
        private final RowHandleMap internal_links_counter;
        private final double damping;
        private final int converge_eq_factor;
        private final Segment.ReferenceReportCache rrCache;

        public CRHost(Segment segment, Segment.ReferenceReportCache rrCache, String host, double damping, int converge_digits) {
            this.segment = segment;
            this.damping = damping;
            this.rrCache = rrCache;
            this.converge_eq_factor = (int)Math.pow(10.0, converge_digits);
            SolrConnector connector = segment.fulltext().getDefaultConnector();
            this.crt = new ConcurrentHashMap<String, double[]>();
            try {
                String id;
                BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!cache=false raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000L, 200, 1);
                while ((id = ids.take()) != "POISON_ID") {
                    this.crt.put(id, new double[]{0.0, 0.0});
                    if (!MemoryControl.shortStatus()) continue;
                    ConcurrentLog.warn("CollectionConfiguration", "terminated CRHost collection during postprocessing because of short memory");
                    break;
                }
            }
            catch (InterruptedException ids) {
                // empty catch block
            }
            this.cr_host_count = this.crt.size();
            double initval = 1.0 / (double)this.cr_host_count;
            for (Map.Entry<String, double[]> entry2 : this.crt.entrySet()) {
                entry2.getValue()[0] = initval;
            }
            this.internal_links_counter = new RowHandleMap(12, Base64Order.enhancedCoder, 8, 100, "internal_links_counter");
        }

        public Map<String, CRV> normalize() {
            TreeMap<Double, ArrayList<byte[]>> reorder = new TreeMap<Double, ArrayList<byte[]>>();
            for (Map.Entry<String, double[]> entry2 : this.crt.entrySet()) {
                Double d = entry2.getValue()[0];
                ArrayList<byte[]> ds = (ArrayList<byte[]>)reorder.get(d);
                if (ds == null) {
                    ds = new ArrayList<byte[]>();
                    reorder.put(d, ds);
                }
                ds.add(ASCII.getBytes(entry2.getKey()));
            }
            int nextcount = (this.cr_host_count + 1) / 2;
            int nextcrn = 0;
            HashMap<String, CRV> r = new HashMap<String, CRV>();
            while (reorder.size() > 0) {
                int count = nextcount;
                while (reorder.size() > 0 && count > 0) {
                    Map.Entry next = reorder.pollFirstEntry();
                    List ids = (List)next.getValue();
                    count -= ids.size();
                    double cr = (Double)next.getKey();
                    for (byte[] id : ids) {
                        r.put(ASCII.String(id), new CRV(this.cr_host_count, cr, nextcrn));
                    }
                }
                ++nextcrn;
                nextcount = Math.max(1, (nextcount + count + 1) / 2);
            }
            int inc = 11 - nextcrn;
            for (Map.Entry entry3 : r.entrySet()) {
                ((CRV)entry3.getValue()).crn += inc;
            }
            return r;
        }

        public void log(Map<byte[], CRV> rm) {
            SolrConnector connector = this.segment.fulltext().getDefaultConnector();
            for (Map.Entry<byte[], CRV> entry2 : rm.entrySet()) {
                if (entry2 == null || entry2.getValue() == null) continue;
                try {
                    String url = connector.getURL(ASCII.String(entry2.getKey()));
                    ConcurrentLog.info("CollectionConfiguration", "CR for " + url);
                    ConcurrentLog.info("CollectionConfiguration", ">> " + entry2.getValue().toString());
                }
                catch (IOException e) {
                    ConcurrentLog.logException(e);
                }
            }
        }

        public int getInternalLinks(byte[] id) {
            int il = (int)this.internal_links_counter.get(id);
            if (il >= 0) {
                return il;
            }
            SolrConnector connector = this.segment.fulltext().getDefaultConnector();
            if (connector == null) {
                return 0;
            }
            try {
                SolrDocument doc = connector.getDocumentById(ASCII.String(id), CollectionSchema.inboundlinkscount_i.getSolrFieldName());
                if (doc == null) {
                    this.internal_links_counter.put(id, 0L);
                    return 0;
                }
                Object x = doc.getFieldValue(CollectionSchema.inboundlinkscount_i.getSolrFieldName());
                il = x == null ? 0 : (x instanceof Integer ? (Integer)x : (x instanceof Long ? ((Long)x).intValue() : 0));
                this.internal_links_counter.put(id, il);
                return il;
            }
            catch (IOException e) {
                ConcurrentLog.logException(e);
            }
            catch (SpaceExceededException e) {
                ConcurrentLog.logException(e);
            }
            try {
                this.internal_links_counter.put(id, 0L);
            }
            catch (SpaceExceededException spaceExceededException) {
                // empty catch block
            }
            return 0;
        }

        public boolean convergenceStep() {
            boolean convergence = true;
            double df = (1.0 - this.damping) / (double)this.cr_host_count;
            try {
                for (Map.Entry<String, double[]> entry2 : this.crt.entrySet()) {
                    String id = entry2.getKey();
                    Segment.ReferenceReport rr = this.rrCache.getReferenceReport(id, false);
                    HandleSet iids = rr.getInternallIDs();
                    double ncr = 0.0;
                    for (byte[] iid : iids) {
                        int ilc = this.getInternalLinks(iid);
                        if (ilc <= 0) continue;
                        double[] d = this.crt.get(ASCII.String(iid));
                        if (d != null && d.length > 0) {
                            ncr += d[0] / (double)ilc;
                            continue;
                        }
                        ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + ASCII.String(iid));
                        break;
                    }
                    ncr = df + this.damping * ncr;
                    if (convergence && !this.eqd(ncr, entry2.getValue()[0])) {
                        convergence = false;
                    }
                    entry2.getValue()[1] = ncr;
                }
                for (Map.Entry<String, double[]> entry2 : this.crt.entrySet()) {
                    entry2.getValue()[0] = entry2.getValue()[1];
                }
            }
            catch (IOException iOException) {
                // empty catch block
            }
            return convergence;
        }

        private boolean eqd(double a, double b) {
            return (int)(a * (double)this.converge_eq_factor) == (int)(b * (double)this.converge_eq_factor);
        }
    }

    public static class FailDoc {
        DigestURL digestURL;
        final Map<String, Pattern> collections;
        final String failReason;
        final FailType failType;
        final int httpstatus;
        final Date failtime;
        final int crawldepth;

        public FailDoc(DigestURL digestURL, Map<String, Pattern> collections, String failReason, FailType failType, int httpstatus, int crawldepth) {
            this.digestURL = digestURL;
            this.collections = collections;
            this.failReason = failReason;
            this.failType = failType;
            this.httpstatus = httpstatus;
            this.failtime = new Date();
            this.crawldepth = crawldepth;
        }

        public FailDoc(SolrDocument doc) {
            try {
                this.digestURL = new DigestURL((String)doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
            }
            catch (MalformedURLException e) {
                this.digestURL = null;
            }
            this.collections = new HashMap<String, Pattern>();
            Collection c = doc.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName());
            if (c != null) {
                for (Object cn : c) {
                    if (cn == null) continue;
                    this.collections.put((String)cn, QueryParams.catchall_pattern);
                }
            }
            this.failReason = (String)doc.getFieldValue(CollectionSchema.failreason_s.getSolrFieldName());
            String fts = (String)doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName());
            if (fts == null) {
                ConcurrentLog.warn("CollectionConfiguration", "no fail type given for URL " + this.digestURL.toNormalform(true));
            }
            this.failType = fts == null ? FailType.fail : FailType.valueOf(fts);
            this.httpstatus = (Integer)doc.getFieldValue(CollectionSchema.httpstatus_i.getSolrFieldName());
            this.failtime = (Date)doc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
            Integer cd = (Integer)doc.getFieldValue(CollectionSchema.crawldepth_i.getSolrFieldName());
            this.crawldepth = cd == null ? 0 : cd;
        }

        public DigestURL getDigestURL() {
            return this.digestURL;
        }

        public Map<String, Pattern> getCollections() {
            return this.collections;
        }

        public String getFailReason() {
            return this.failReason;
        }

        public FailType getFailType() {
            return this.failType;
        }

        public Date getFailDate() {
            return this.failtime;
        }

        public int getHttpstatus() {
            return this.httpstatus;
        }

        public SolrInputDocument toSolr(CollectionConfiguration configuration) {
            boolean allAttr = configuration.isEmpty();
            assert (allAttr || configuration.contains(CollectionSchema.failreason_s));
            SolrInputDocument doc = new SolrInputDocument();
            String url = configuration.addURIAttributes(doc, allAttr, this.getDigestURL());
            CollectionSchema.content_type.add(doc, new String[]{Classification.url2mime(this.digestURL)});
            if (allAttr || configuration.contains(CollectionSchema.load_date_dt)) {
                configuration.add(doc, (SchemaDeclaration)CollectionSchema.load_date_dt, this.getFailDate());
            }
            if (allAttr || configuration.contains(CollectionSchema.crawldepth_i)) {
                configuration.add(doc, (SchemaDeclaration)CollectionSchema.crawldepth_i, this.crawldepth);
            }
            if (allAttr || configuration.contains(CollectionSchema.failreason_s)) {
                configuration.add(doc, (SchemaDeclaration)CollectionSchema.failreason_s, this.getFailReason());
            }
            if (allAttr || configuration.contains(CollectionSchema.failtype_s)) {
                configuration.add(doc, (SchemaDeclaration)CollectionSchema.failtype_s, this.getFailType().name());
            }
            if (allAttr || configuration.contains(CollectionSchema.httpstatus_i)) {
                configuration.add(doc, (SchemaDeclaration)CollectionSchema.httpstatus_i, this.getHttpstatus());
            }
            if (allAttr || configuration.contains(CollectionSchema.collection_sxt) && this.getCollections() != null && this.getCollections().size() > 0) {
                ArrayList<String> cs = new ArrayList<String>();
                for (Map.Entry<String, Pattern> e : this.getCollections().entrySet()) {
                    if (!e.getValue().matcher(url).matches()) continue;
                    cs.add(e.getKey());
                }
                configuration.add(doc, (SchemaDeclaration)CollectionSchema.collection_sxt, cs);
            }
            LinkedHashSet<ProcessType> processTypes = new LinkedHashSet<ProcessType>();
            if (allAttr || configuration.contains(CollectionSchema.cr_host_chance_d) && configuration.contains(CollectionSchema.cr_host_count_i) && configuration.contains(CollectionSchema.cr_host_norm_i)) {
                processTypes.add(ProcessType.CITATION);
            }
            if (allAttr || configuration.contains(CollectionSchema.process_sxt)) {
                ArrayList<String> p = new ArrayList<String>();
                for (ProcessType t : processTypes) {
                    p.add(t.name());
                }
                configuration.add(doc, (SchemaDeclaration)CollectionSchema.process_sxt, p);
            }
            return doc;
        }
    }

    private static final class CRV {
        public double cr;
        public int crn;
        public int count;

        public CRV(int count, double cr, int crn) {
            this.count = count;
            this.cr = cr;
            this.crn = crn;
        }

        public String toString() {
            return "count=" + this.count + ", cr=" + this.cr + ", crn=" + this.crn;
        }
    }
}

