/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.search.index;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
import net.yacy.document.TextParser;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.parser.html.TagValency;
import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.search.index.Segment;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.WebgraphConfiguration;
import org.apache.solr.common.SolrInputDocument;

public class DocumentIndex
extends Segment {
    private static AnchorURL poison;
    private BlockingQueue<AnchorURL> queue;
    private final Worker[] worker;
    private CallbackListener callback;
    private int timezoneOffset;
    static final ThreadGroup workerThreadGroup;

    public DocumentIndex(File segmentPath, File archivePath, File collectionConfigurationPath, File webgraphConfigurationPath, CallbackListener callback, int cachesize, int timezoneOffset) throws IOException {
        super(new ConcurrentLog("DocumentIndex"), segmentPath, archivePath, collectionConfigurationPath == null ? null : new CollectionConfiguration(collectionConfigurationPath, true), webgraphConfigurationPath == null ? null : new WebgraphConfiguration(webgraphConfigurationPath, true));
        this.timezoneOffset = timezoneOffset;
        super.connectRWI(cachesize, 0xFFFFFFFL);
        super.connectCitation(cachesize, 0xFFFFFFFL);
        super.fulltext().connectLocalSolr();
        super.fulltext().setUseWebgraph(true);
        this.callback = callback;
        this.queue = new LinkedBlockingQueue<AnchorURL>(WorkflowProcessor.availableCPU * 300);
        this.worker = new Worker[WorkflowProcessor.availableCPU];
        for (int i = 0; i < WorkflowProcessor.availableCPU; ++i) {
            this.worker[i] = new Worker(i);
            this.worker[i].start();
        }
    }

    public int pending() {
        return this.queue.size();
    }

    public void clearQueue() {
        this.queue.clear();
    }

    private SolrInputDocument[] add(AnchorURL url, int timezoneOffset) throws IOException {
        Document[] documents;
        long length;
        if (url == null) {
            throw new IOException("file = null");
        }
        if (url.isDirectory()) {
            throw new IOException("file should be a document, not a path");
        }
        if (!url.canRead()) {
            throw new IOException("cannot read file");
        }
        try {
            length = url.length();
        }
        catch (Exception e) {
            length = -1L;
        }
        InputStream sourceStream = null;
        try {
            sourceStream = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent);
            documents = TextParser.parseSource(url, null, null, TagValency.EVAL, new HashSet<String>(), new VocabularyScraper(), timezoneOffset, 0, length, sourceStream);
        }
        catch (Exception e) {
            throw new IOException("cannot parse " + url.toNormalform(false) + ": " + e.getMessage());
        }
        finally {
            if (sourceStream != null) {
                try {
                    sourceStream.close();
                }
                catch (IOException e) {
                    ConcurrentLog.warn("DocumentIndex", "Could not close source stream : " + e.getMessage());
                }
            }
        }
        SolrInputDocument[] rows = new SolrInputDocument[documents.length];
        int c = 0;
        for (Document document : documents) {
            if (document == null) continue;
            Condenser condenser = new Condenser(document, null, true, true, LibraryProvider.dymLib, true, true, 0);
            rows[c++] = super.storeDocument((DigestURL)url, null, null, null, null, document, condenser, null, DocumentIndex.class.getName() + ".add", false, null, null);
        }
        return rows;
    }

    public void addConcurrent(AnchorURL start) throws IOException {
        assert (start != null);
        assert (start.canRead()) : start.toString();
        if (!start.isDirectory()) {
            try {
                this.queue.put(start);
            }
            catch (InterruptedException interruptedException) {
                // empty catch block
            }
            return;
        }
        String[] s = start.list();
        if (s != null) {
            for (String t : s) {
                try {
                    AnchorURL w = new AnchorURL((MultiProtocolURL)start, t);
                    if (!w.canRead() || w.isHidden()) continue;
                    if (w.isDirectory()) {
                        this.addConcurrent(w);
                        continue;
                    }
                    try {
                        this.queue.put(w);
                    }
                    catch (InterruptedException interruptedException) {}
                }
                catch (MalformedURLException e1) {
                    ConcurrentLog.logException(e1);
                }
            }
        }
    }

    @Override
    public synchronized void close() {
        for (Worker element : this.worker) {
            try {
                this.queue.put(poison);
            }
            catch (InterruptedException interruptedException) {
                // empty catch block
            }
        }
        for (Worker element : this.worker) {
            try {
                element.join();
            }
            catch (InterruptedException interruptedException) {
                // empty catch block
            }
        }
        super.close();
    }

    static {
        try {
            poison = new AnchorURL("file://.");
        }
        catch (MalformedURLException malformedURLException) {
            // empty catch block
        }
        workerThreadGroup = new ThreadGroup("workerThreadGroup");
    }

    public static interface CallbackListener {
        public void commit(DigestURL var1, SolrInputDocument var2);

        public void fail(DigestURL var1, String var2);
    }

    class Worker
    extends Thread {
        public Worker(int count) {
            super(workerThreadGroup, "query-" + count);
        }

        @Override
        public void run() {
            try {
                AnchorURL f;
                while ((f = DocumentIndex.this.queue.take()) != poison) {
                    try {
                        SolrInputDocument[] resultRows;
                        for (SolrInputDocument resultRow : resultRows = DocumentIndex.this.add(f, DocumentIndex.this.timezoneOffset)) {
                            if (DocumentIndex.this.callback == null) continue;
                            if (resultRow == null) {
                                DocumentIndex.this.callback.fail(f, "result is null");
                                continue;
                            }
                            DocumentIndex.this.callback.commit(f, resultRow);
                        }
                    }
                    catch (IOException e) {
                        if (e.getMessage().indexOf("cannot parse", 0) < 0) {
                            ConcurrentLog.logException(e);
                        }
                        DocumentIndex.this.callback.fail(f, e.getMessage());
                    }
                }
            }
            catch (InterruptedException interruptedException) {
                // empty catch block
            }
        }
    }
}

