/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.search.index;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ByteBuffer;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.LookAheadIterator;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.Transactions;
import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.parser.htmlParser;
import net.yacy.kelondro.data.citation.CitationReference;
import net.yacy.kelondro.data.citation.CitationReferenceFactory;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.rwi.IODispatcher;
import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceFactory;
import net.yacy.kelondro.table.IndexTable;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.ISO639;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.index.Fulltext;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphConfiguration;
import net.yacy.search.schema.WebgraphSchema;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;

public class Segment {
    public static final String catchallString = "yacyall";
    public static final byte[] catchallHash;
    static final Word catchallWord;
    public static final long wCacheMaxAge = 1800000L;
    public static final int wCacheMaxChunk = 800;
    public static final int lowcachedivisor = 900;
    public static final long targetFileSize = 0x4000000L;
    public static final int writeBufferSize = 0x400000;
    public static final String termIndexName = "text.index";
    public static final String citationIndexName = "citation.index";
    public static final String firstseenIndexName = "firstseen.index";
    public static final String loadtimeIndexName = "loadtime.index";
    public static final ReferenceFactory<WordReference> wordReferenceFactory;
    public static final ReferenceFactory<CitationReference> citationReferenceFactory;
    public static final ByteOrder wordOrder;
    private final ConcurrentLog log;
    private final File segmentPath;
    protected final Fulltext fulltext;
    protected IndexCell<WordReference> termIndex;
    private IndexCell<CitationReference> urlCitationIndex;
    private IndexTable firstSeenIndex;
    private IndexTable loadTimeIndex;
    private IODispatcher merger = null;

    public Segment(ConcurrentLog log, File segmentPath, File archivePath, CollectionConfiguration collectionConfiguration, WebgraphConfiguration webgraphConfiguration) throws IOException {
        log.info("Initializing Segment '" + String.valueOf(segmentPath) + ".");
        this.log = log;
        this.segmentPath = segmentPath;
        archivePath.mkdirs();
        this.fulltext = new Fulltext(segmentPath, archivePath, collectionConfiguration, webgraphConfiguration);
        this.termIndex = null;
        this.urlCitationIndex = null;
        this.firstSeenIndex = new IndexTable(new File(segmentPath, firstseenIndexName), 12, 8, false, false);
        this.loadTimeIndex = new IndexTable(new File(segmentPath, loadtimeIndexName), 12, 8, false, false);
    }

    public boolean connectedRWI() {
        return this.termIndex != null;
    }

    public void connectRWI(int entityCacheMaxSize, long maxFileSize) throws IOException {
        if (this.termIndex != null) {
            return;
        }
        if (this.merger == null) {
            this.merger = new IODispatcher(2, 2, 0x400000);
            this.merger.start();
        }
        this.termIndex = new IndexCell<WordReference>(new File(this.segmentPath, "default"), termIndexName, wordReferenceFactory, wordOrder, 12, entityCacheMaxSize, 0x4000000L, maxFileSize, 0x400000, this.merger);
    }

    public void disconnectRWI() {
        if (this.termIndex == null) {
            return;
        }
        this.termIndex.close();
        this.termIndex = null;
    }

    public boolean connectedCitation() {
        return this.urlCitationIndex != null;
    }

    public void connectCitation(int entityCacheMaxSize, long maxFileSize) throws IOException {
        if (this.urlCitationIndex != null) {
            return;
        }
        if (this.merger == null) {
            this.merger = new IODispatcher(2, 2, 0x400000);
            this.merger.start();
        }
        this.urlCitationIndex = new IndexCell<CitationReference>(new File(this.segmentPath, "default"), citationIndexName, citationReferenceFactory, wordOrder, 12, entityCacheMaxSize, 0x4000000L, maxFileSize, 0x400000, this.merger);
    }

    public void disconnectCitation() {
        if (this.urlCitationIndex == null) {
            return;
        }
        this.urlCitationIndex.close();
        this.urlCitationIndex = null;
    }

    public int citationCount() {
        return this.urlCitationIndex == null ? 0 : this.urlCitationIndex.sizesMax();
    }

    public long citationSegmentCount() {
        return this.urlCitationIndex == null ? 0L : (long)this.urlCitationIndex.getSegmentCount();
    }

    public Fulltext fulltext() {
        return this.fulltext;
    }

    public IndexCell<WordReference> termIndex() {
        return this.termIndex;
    }

    public IndexCell<CitationReference> urlCitation() {
        return this.urlCitationIndex;
    }

    public IndexTable firstSeenIndex() {
        return this.firstSeenIndex;
    }

    public IndexTable loadTimeIndex() {
        return this.loadTimeIndex;
    }

    public ReferenceReportCache getReferenceReportCache() {
        return new ReferenceReportCache();
    }

    public long RWICount() {
        if (this.termIndex == null) {
            return 0L;
        }
        return this.termIndex.sizesMax();
    }

    public long RWISegmentCount() {
        if (this.termIndex == null) {
            return 0L;
        }
        return this.termIndex.getSegmentCount();
    }

    public int RWIBufferCount() {
        if (this.termIndex == null) {
            return 0;
        }
        return this.termIndex.getBufferSize();
    }

    public int getWordCountGuess(String word) {
        if (word == null || word.indexOf(58) >= 0 || word.indexOf(32) >= 0 || word.indexOf(47) >= 0 || word.indexOf(34) >= 0) {
            return 0;
        }
        if (this.termIndex != null) {
            int count = this.termIndex.count(Word.word2hash(word));
            return count;
        }
        if (this.fulltext.getDefaultConnector() == null) {
            return 0;
        }
        try {
            return (int)this.fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.text_t.getSolrFieldName() + ":\"" + word + "\"");
        }
        catch (Throwable e) {
            ConcurrentLog.warn("Segment", "problem with word guess for word: " + word);
            ConcurrentLog.logException(e);
            return 0;
        }
    }

    public void setFirstSeenTime(byte[] urlhash, long time) {
        if (urlhash == null || time <= 0L) {
            return;
        }
        try {
            if (this.firstSeenIndex.has(urlhash)) {
                return;
            }
            this.firstSeenIndex.put(urlhash, time);
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
        }
    }

    public long getFirstSeenTime(byte[] urlhash) {
        if (urlhash == null) {
            return -1L;
        }
        try {
            return this.firstSeenIndex.get(urlhash);
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
            return -1L;
        }
    }

    public void setLoadTime(byte[] urlhash, long time) {
        if (urlhash == null || time <= 0L) {
            return;
        }
        try {
            this.loadTimeIndex.put(urlhash, time);
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
        }
    }

    public long getLoadTime(byte[] urlhash) {
        if (urlhash == null) {
            return -1L;
        }
        try {
            return this.loadTimeIndex.get(urlhash);
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
            return -1L;
        }
    }

    public boolean exists(String id) {
        return this.fulltext.exists(id);
    }

    public Iterator<DigestURL> urlSelector(MultiProtocolURL stub, long maxtime, int maxcount) {
        String urlstub;
        BlockingQueue<Object> docQueue;
        if (stub == null) {
            docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery("*:*", CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, Integer.MAX_VALUE, maxtime, maxcount, 1, false, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
            urlstub = null;
        } else {
            String host = stub.getHost();
            String hh = null;
            try {
                hh = DigestURL.hosthash(host, stub.getPort());
            }
            catch (MalformedURLException e) {
                ConcurrentLog.logException(e);
            }
            docQueue = hh == null ? new ArrayBlockingQueue(0) : this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(String.valueOf(CollectionSchema.host_id_s) + ":\"" + hh + "\"", CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, Integer.MAX_VALUE, maxtime, maxcount, 1, false, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
            urlstub = stub.toNormalform(true);
        }
        return new LookAheadIterator<DigestURL>(){

            @Override
            protected DigestURL next0() {
                DigestURL url;
                while (true) {
                    SolrDocument doc;
                    try {
                        doc = (SolrDocument)docQueue.take();
                    }
                    catch (InterruptedException e) {
                        ConcurrentLog.logException(e);
                        return null;
                    }
                    if (doc == null || doc == AbstractSolrConnector.POISON_DOCUMENT) {
                        return null;
                    }
                    String u = (String)doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
                    String id = (String)doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
                    try {
                        url = new DigestURL(u, ASCII.getBytes(id));
                    }
                    catch (MalformedURLException e) {
                        continue;
                    }
                    if (urlstub == null || u.startsWith(urlstub)) break;
                }
                return url;
            }
        };
    }

    public void clear() {
        try {
            if (this.termIndex != null) {
                this.termIndex.clear();
            }
            if (this.fulltext != null) {
                this.fulltext.clearLocalSolr();
            }
            if (this.fulltext != null) {
                this.fulltext.clearRemoteSolr();
            }
            if (this.urlCitationIndex != null) {
                this.urlCitationIndex.clear();
            }
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
        }
    }

    public void clearCaches() {
        if (this.urlCitationIndex != null) {
            this.urlCitationIndex.clearCache();
        }
        if (this.termIndex != null) {
            this.termIndex.clearCache();
        }
        this.fulltext.clearCaches();
    }

    public File getLocation() {
        return this.segmentPath;
    }

    public synchronized void close() {
        if (this.termIndex != null) {
            this.termIndex.close();
        }
        if (this.fulltext != null) {
            this.fulltext.close();
        }
        if (this.urlCitationIndex != null) {
            this.urlCitationIndex.close();
        }
        if (this.firstSeenIndex != null) {
            this.firstSeenIndex.close();
        }
        if (this.loadTimeIndex != null) {
            this.loadTimeIndex.close();
        }
        if (this.merger != null) {
            this.merger.terminate();
            this.merger = null;
        }
    }

    public static String votedLanguage(DigestURL url, String urlNormalform, Document document, Condenser condenser) {
        String language = condenser.language();
        String bymetadata = document.dc_language();
        if (language == null) {
            language = bymetadata == null ? url.language() : bymetadata;
        } else if (bymetadata == null) {
            if (condenser.languageProbability() < 0.9 && !language.equals(url.language())) {
                String u = urlNormalform.toLowerCase(Locale.ROOT);
                String ISO639_country = ISO639.country(language);
                if (!(u.contains("/" + language + "/") || ISO639_country != null && u.contains("/" + ISO639.country(language).toLowerCase(Locale.ROOT) + "/"))) {
                    language = url.language();
                }
            }
        } else if (!language.equals(bymetadata) && !language.equals(url.language())) {
            language = bymetadata.equals(url.language()) ? bymetadata : bymetadata;
        }
        return language;
    }

    public void storeRWI(ReferenceContainer<WordReference> wordContainer) throws IOException, SpaceExceededException {
        if (this.termIndex != null) {
            this.termIndex.add(wordContainer);
        }
    }

    public void storeRWI(byte[] termHash, WordReference entry2) throws IOException, SpaceExceededException {
        if (this.termIndex != null) {
            this.termIndex.add(termHash, entry2);
        }
    }

    public void putDocument(SolrInputDocument queueEntry) {
        try {
            this.fulltext().putDocument(queueEntry);
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
        }
    }

    public SolrInputDocument storeDocument(DigestURL url, DigestURL referrerURL, Map<String, Pattern> collections, CrawlProfile crawlProfile, ResponseHeader responseHeader, Document document, Condenser condenser, SearchEvent searchEvent, String sourceName, boolean storeToRWI, String proxy, String acceptLanguage) {
        CollectionConfiguration collectionConfig = this.fulltext.getDefaultConfiguration();
        String language = Segment.votedLanguage(url, url.toNormalform(true), document, condenser);
        CollectionConfiguration.SolrVector vector = collectionConfig.yacy2solr(this, collections, responseHeader, document, condenser, referrerURL, language, crawlProfile.isPushCrawlProfile(), this.fulltext().useWebgraph() ? this.fulltext.getWebgraphConfiguration() : null, sourceName);
        return this.storeDocument(url, crawlProfile, responseHeader, document, vector, language, condenser, searchEvent, sourceName, storeToRWI, proxy, acceptLanguage);
    }

    public SolrInputDocument storeDocument(DigestURL url, CrawlProfile crawlProfile, ResponseHeader responseHeader, Document document, CollectionConfiguration.SolrVector vector, String language, Condenser condenser, SearchEvent searchEvent, String sourceName, boolean storeToRWI, String proxy, String acceptLanguage) {
        Date modDate;
        long startTime = System.currentTimeMillis();
        CollectionConfiguration collectionConfig = this.fulltext.getDefaultConfiguration();
        String urlNormalform = url.toNormalform(true);
        Date loadDate = new Date();
        String id = ASCII.String(url.hash());
        String dc_title = document.dc_title();
        Date date = modDate = responseHeader == null ? document.getLastModified() : responseHeader.lastModified();
        if (modDate == null) {
            modDate = new Date();
        }
        if (document.getLastModified().before(modDate)) {
            modDate = document.getLastModified();
        }
        if (modDate.getTime() > loadDate.getTime()) {
            modDate = loadDate;
        }
        char docType = Response.docType(document.dc_format());
        this.fulltext.getDefaultConfiguration().postprocessing_references(this.getReferenceReportCache(), vector, url, null);
        if ((url.getProtocol().equals("http") || url.getProtocol().equals("https")) && crawlProfile != null && document.getDepth() <= crawlProfile.snapshotMaxdepth() && !crawlProfile.snapshotsMustnotmatch().matcher(urlNormalform).matches()) {
            Parser p = document.getParserObject();
            boolean mimesupported = false;
            if (p instanceof htmlParser) {
                mimesupported = ((htmlParser)p).supportedMimeTypes().contains(document.dc_format());
            }
            if (mimesupported) {
                Transactions.store(vector, true, crawlProfile.snapshotLoadImage(), crawlProfile.snapshotReplaceold(), proxy, acceptLanguage);
            }
        }
        this.putDocument(vector);
        List<SolrInputDocument> webgraph = vector.getWebgraphDocuments();
        String error = null;
        if (webgraph != null && webgraph.size() > 0 && this.fulltext.useWebgraph()) {
            for (int i = 0; i < 20; ++i) {
                try {
                    error = null;
                    this.fulltext.putEdges(webgraph);
                    break;
                }
                catch (IOException e) {
                    error = "failed to send " + urlNormalform + " to solr: " + e.getMessage();
                    ConcurrentLog.warn("SOLR", error);
                    if (i == 10) {
                        this.fulltext.commit(true);
                    }
                    try {
                        Thread.sleep(1000L);
                    }
                    catch (InterruptedException interruptedException) {
                        // empty catch block
                    }
                    continue;
                }
            }
        }
        long now = System.currentTimeMillis();
        this.setFirstSeenTime(url.hash(), Math.min(document.getLastModified().getTime(), now));
        this.setLoadTime(url.hash(), now);
        if (this.connectedCitation()) {
            try {
                String anchorhash;
                String referrerhash;
                String targetURL;
                int i;
                if (collectionConfig.contains(CollectionSchema.inboundlinks_protocol_sxt) || collectionConfig.contains(CollectionSchema.inboundlinks_urlstub_sxt)) {
                    List<String> inboundlinks_protocol;
                    Collection inboundlinks_urlstub = vector.getFieldValues(CollectionSchema.inboundlinks_urlstub_sxt.getSolrFieldName());
                    List<String> list2 = inboundlinks_protocol = inboundlinks_urlstub == null ? null : CollectionConfiguration.indexedList2protocolList(vector.getFieldValues(CollectionSchema.inboundlinks_protocol_sxt.getSolrFieldName()), inboundlinks_urlstub.size());
                    if (inboundlinks_protocol != null && inboundlinks_urlstub != null && inboundlinks_protocol.size() == inboundlinks_urlstub.size() && inboundlinks_urlstub instanceof List) {
                        for (i = 0; i < inboundlinks_protocol.size(); ++i) {
                            try {
                                targetURL = inboundlinks_protocol.get(i) + "://" + (String)((List)inboundlinks_urlstub).get(i);
                                referrerhash = id;
                                anchorhash = ASCII.String(new DigestURL(targetURL).hash());
                                if (referrerhash == null || anchorhash == null) continue;
                                this.urlCitationIndex.add(ASCII.getBytes(anchorhash), new CitationReference(ASCII.getBytes(referrerhash), loadDate.getTime()));
                                continue;
                            }
                            catch (Throwable e) {
                                ConcurrentLog.logException(e);
                            }
                        }
                    }
                }
                if (collectionConfig.contains(CollectionSchema.outboundlinks_protocol_sxt) || collectionConfig.contains(CollectionSchema.outboundlinks_urlstub_sxt)) {
                    List<String> outboundlinks_protocol;
                    Collection outboundlinks_urlstub = vector.getFieldValues(CollectionSchema.outboundlinks_urlstub_sxt.getSolrFieldName());
                    List<String> list3 = outboundlinks_protocol = outboundlinks_urlstub == null ? null : CollectionConfiguration.indexedList2protocolList(vector.getFieldValues(CollectionSchema.outboundlinks_protocol_sxt.getSolrFieldName()), outboundlinks_urlstub.size());
                    if (outboundlinks_protocol != null && outboundlinks_urlstub != null && outboundlinks_protocol.size() == outboundlinks_urlstub.size() && outboundlinks_urlstub instanceof List) {
                        for (i = 0; i < outboundlinks_protocol.size(); ++i) {
                            try {
                                targetURL = outboundlinks_protocol.get(i) + "://" + (String)((List)outboundlinks_urlstub).get(i);
                                referrerhash = id;
                                anchorhash = ASCII.String(new DigestURL(targetURL).hash());
                                if (referrerhash == null || anchorhash == null) continue;
                                this.urlCitationIndex.add(ASCII.getBytes(anchorhash), new CitationReference(ASCII.getBytes(referrerhash), loadDate.getTime()));
                                continue;
                            }
                            catch (Throwable e) {
                                ConcurrentLog.logException(e);
                            }
                        }
                    }
                }
            }
            catch (Throwable e) {
                ConcurrentLog.logException(e);
            }
        }
        if (error != null) {
            ConcurrentLog.severe("SOLR", error + ", PLEASE REPORT TO https://github.com/yacy/yacy_search_server/issues");
        }
        long storageEndTime = System.currentTimeMillis();
        if (this.termIndex != null && storeToRWI || searchEvent != null) {
            int outlinksSame = document.inboundLinks().size();
            int outlinksOther = document.outboundLinks().size();
            int urlLength = urlNormalform.length();
            int urlComps = MultiProtocolURL.urlComps(url.toNormalform(false)).length;
            int wordsintitle = CommonPattern.SPACES.split(dc_title).length;
            WordReferenceRow ientry = new WordReferenceRow(url.hash(), urlLength, urlComps, wordsintitle, condenser.RESULT_NUMB_WORDS, condenser.RESULT_NUMB_SENTENCES, modDate.getTime(), System.currentTimeMillis(), UTF8.getBytes(language), docType, outlinksSame, outlinksOther);
            Word wprop = null;
            for (Map.Entry<String, Word> wentry : condenser.words().entrySet()) {
                String word = wentry.getKey();
                wprop = wentry.getValue();
                assert (wprop.flags != null);
                ientry.setWord(wprop);
                byte[] wordhash = Word.word2hash(word);
                if (this.termIndex != null && storeToRWI) {
                    try {
                        this.termIndex.add(wordhash, ientry);
                    }
                    catch (Exception e) {
                        ConcurrentLog.logException(e);
                    }
                }
                if (searchEvent == null || searchEvent.query.getQueryGoal().getExcludeHashes().has(wordhash) || !searchEvent.query.getQueryGoal().getIncludeHashes().has(wordhash)) continue;
                try {
                    ReferenceContainer<WordReference> container = ReferenceContainer.emptyContainer(wordReferenceFactory, wordhash, 1);
                    container.add(ientry);
                    searchEvent.addRWIs(container, true, sourceName, 1, 5000L);
                }
                catch (SpaceExceededException e) {}
            }
            if (searchEvent != null) {
                searchEvent.addFinalize();
            }
            ientry.setWord(wprop == null ? catchallWord : wprop);
            if (this.termIndex != null) {
                try {
                    this.termIndex.add(catchallHash, ientry);
                }
                catch (Throwable e) {
                    ConcurrentLog.logException(e);
                }
            }
        }
        long indexingEndTime = System.currentTimeMillis();
        if (this.log.isInfo()) {
            this.log.info("*Indexed " + condenser.words().size() + " words in URL " + url.toNormalform(true) + " [" + id + "]\n\tDescription:  " + dc_title + "\n\tMimeType: " + document.dc_format() + " | Charset: " + document.getCharset() + " | Size: " + document.getTextLength() + " bytes | \n\tLinkStorageTime: " + (storageEndTime - startTime) + " ms | indexStorageTime: " + (indexingEndTime - storageEndTime) + " ms");
        }
        return vector;
    }

    public void removeAllUrlReferences(HandleSet urls2, LoaderDispatcher loader, ClientIdentification.Agent agent, CacheStrategy cacheStrategy) {
        for (byte[] urlhash : urls2) {
            this.removeAllUrlReferences(urlhash, loader, agent, cacheStrategy);
        }
    }

    public int removeAllUrlReferences(byte[] urlhash, LoaderDispatcher loader, ClientIdentification.Agent agent, CacheStrategy cacheStrategy) {
        if (urlhash == null) {
            return 0;
        }
        try {
            DigestURL url;
            String u = this.fulltext().getURL(ASCII.String(urlhash));
            DigestURL digestURL = url = u == null ? null : new DigestURL(u);
            if (url == null) {
                return 0;
            }
            Document document = Document.mergeDocuments(url, null, loader.loadDocuments(loader.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, null, agent));
            if (document == null) {
                this.fulltext().remove(urlhash);
                return 0;
            }
            Set<String> words = null;
            words = new Condenser(document, null, true, true, null, false, false, 0).words().keySet();
            int count = 0;
            if (words != null && this.termIndex() != null) {
                count = this.termIndex().remove(Word.words2hashesHandles(words), urlhash);
            }
            this.fulltext().remove(urlhash);
            return count;
        }
        catch (Parser.Failure e) {
            return 0;
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
            return 0;
        }
    }

    static {
        catchallWord = new Word(0, 0, 0);
        catchallHash = Word.word2hash(catchallString);
        Segment.catchallWord.flags = new Bitfield(4);
        for (int i = 0; i < Segment.catchallWord.flags.length(); ++i) {
            Segment.catchallWord.flags.set(i, true);
        }
        wordReferenceFactory = new WordReferenceFactory();
        citationReferenceFactory = new CitationReferenceFactory();
        wordOrder = Base64Order.enhancedCoder;
    }

    public class ReferenceReportCache {
        private final Map<String, ReferenceReport> cache = new ConcurrentHashMap<String, ReferenceReport>();

        public ReferenceReport getReferenceReport(String id, boolean acceptSelfReference) throws IOException {
            ReferenceReport rr = this.cache.get(id);
            if (MemoryControl.shortStatus()) {
                this.cache.clear();
            }
            if (rr != null) {
                return rr;
            }
            try {
                rr = new ReferenceReport(ASCII.getBytes(id), acceptSelfReference);
                this.cache.put(id, rr);
                return rr;
            }
            catch (SpaceExceededException e) {
                ConcurrentLog.logException(e);
                throw new IOException(e.getMessage());
            }
        }
    }

    public final class ReferenceReport {
        private int internal;
        private int external;
        private HandleSet externalHosts;
        private HandleSet externalIDs;
        private HandleSet internalIDs;

        public ReferenceReport(byte[] id, boolean acceptSelfReference) throws IOException, SpaceExceededException {
            block11: {
                this.internal = 0;
                this.external = 0;
                this.externalHosts = new RowHandleSet(6, (ByteOrder)Base64Order.enhancedCoder, 0);
                this.internalIDs = new RowHandleSet(12, (ByteOrder)Base64Order.enhancedCoder, 0);
                this.externalIDs = new RowHandleSet(12, (ByteOrder)Base64Order.enhancedCoder, 0);
                if (Segment.this.connectedCitation()) {
                    try {
                        ReferenceContainer<CitationReference> references = Segment.this.urlCitation().get(id, null);
                        if (references == null) {
                            return;
                        }
                        Iterator<CitationReference> ri = references.entries();
                        while (ri.hasNext()) {
                            CitationReference ref = ri.next();
                            byte[] hh = ref.hosthash();
                            if (ByteBuffer.equals(hh, 0, id, 6, 6)) {
                                this.internalIDs.put(ref.urlhash());
                                ++this.internal;
                                continue;
                            }
                            this.externalHosts.put(hh);
                            this.externalIDs.put(ref.urlhash());
                            ++this.external;
                        }
                    }
                    catch (SpaceExceededException e) {
                        if (!Segment.this.fulltext.useWebgraph()) break block11;
                        this.internalIDs.clear();
                    }
                }
            }
            if ((this.internalIDs.size() == 0 || !Segment.this.connectedCitation()) && Segment.this.fulltext.useWebgraph()) {
                SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
                BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!cache=false raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName());
                try {
                    SolrDocument doc;
                    while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT && !MemoryControl.shortStatus()) {
                        String refid = (String)doc.getFieldValue(WebgraphSchema.source_id_s.getSolrFieldName());
                        if (refid == null) continue;
                        byte[] refidh = ASCII.getBytes(refid);
                        byte[] hh = new byte[6];
                        System.arraycopy(refidh, 6, hh, 0, 6);
                        if (ByteBuffer.equals(hh, 0, id, 6, 6)) {
                            if (!acceptSelfReference && Arrays.equals(refidh, id)) continue;
                            this.internalIDs.put(refidh);
                            ++this.internal;
                            continue;
                        }
                        this.externalHosts.put(hh);
                        this.externalIDs.put(refidh);
                        ++this.external;
                    }
                }
                catch (InterruptedException e) {
                    ConcurrentLog.logException(e);
                }
            }
            this.externalHosts.optimize();
            this.internalIDs.optimize();
            this.externalIDs.optimize();
        }

        public int getInternalCount() {
            return this.internal;
        }

        public int getExternalCount() {
            return this.external;
        }

        public HandleSet getExternalHostIDs() {
            return this.externalHosts;
        }

        public HandleSet getExternalIDs() {
            return this.externalIDs;
        }

        public HandleSet getInternallIDs() {
            return this.internalIDs;
        }
    }
}

