/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.repository;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.solr.FailCategory;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.Cache;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.retrieval.FTPLoader;
import net.yacy.crawler.retrieval.FileLoader;
import net.yacy.crawler.retrieval.HTTPLoader;
import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.retrieval.Response;
import net.yacy.crawler.retrieval.SMBLoader;
import net.yacy.crawler.retrieval.StreamResponse;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.TextParser;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;

public final class LoaderDispatcher {
    private static final int accessTimeMaxsize = 1000;
    private static final ConcurrentLog log = new ConcurrentLog("LOADER");
    private static final ConcurrentHashMap<String, Long> accessTime = new ConcurrentHashMap();
    private final Switchboard sb;
    private final HashSet<String> supportedProtocols;
    private final HTTPLoader httpLoader;
    private final FTPLoader ftpLoader;
    private final SMBLoader smbLoader;
    private final FileLoader fileLoader;
    private final ConcurrentHashMap<DigestURL, Semaphore> loaderSteering;

    public LoaderDispatcher(Switchboard sb) {
        this.sb = sb;
        this.supportedProtocols = new HashSet<String>(Arrays.asList("http", "https", "ftp", "smb", "file"));
        this.httpLoader = new HTTPLoader(sb, log);
        this.ftpLoader = new FTPLoader(sb, log);
        this.smbLoader = new SMBLoader(sb, log);
        this.fileLoader = new FileLoader(sb, log);
        this.loaderSteering = new ConcurrentHashMap();
    }

    public boolean isSupportedProtocol(String protocol) {
        if (protocol == null || protocol.isEmpty()) {
            return false;
        }
        return this.supportedProtocols.contains(protocol.trim().toLowerCase(Locale.ROOT));
    }

    public HashSet<String> getSupportedProtocols() {
        return (HashSet)this.supportedProtocols.clone();
    }

    public Request request(DigestURL url, boolean forText, boolean global) {
        CrawlProfile profile2 = forText ? (global ? this.sb.crawler.defaultTextSnippetGlobalProfile : this.sb.crawler.defaultTextSnippetLocalProfile) : (global ? this.sb.crawler.defaultMediaSnippetGlobalProfile : this.sb.crawler.defaultMediaSnippetLocalProfile);
        return new Request(ASCII.getBytes(this.sb.peers.mySeed().hash), url, null, "", new Date(), profile2.handle(), 0, profile2.timezoneOffset());
    }

    public void load(DigestURL url, CacheStrategy cacheStratgy, int maxFileSize, File targetFile, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        byte[] b = this.load(this.request(url, false, true), cacheStratgy, maxFileSize, blacklistType, agent).getContent();
        if (b == null) {
            throw new IOException("load == null");
        }
        File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
        File parent = targetFile.getParentFile();
        if (!parent.exists()) {
            parent.mkdirs();
        }
        FileUtils.copy(b, tmp);
        tmp.renameTo(targetFile);
    }

    public Response load(Request request, CacheStrategy cacheStrategy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        return this.load(request, cacheStrategy, this.protocolMaxFileSize(request.url()), blacklistType, agent);
    }

    public Response load(Request request, CacheStrategy cacheStrategy, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        Semaphore check = this.loaderSteering.get(request.url());
        if (check != null && cacheStrategy != CacheStrategy.NOCACHE) {
            long t = System.currentTimeMillis();
            try {
                check.tryAcquire(5L, TimeUnit.SECONDS);
            }
            catch (InterruptedException interruptedException) {
                // empty catch block
            }
            ConcurrentLog.info("LoaderDispatcher", "waited " + (System.currentTimeMillis() - t) + " ms for " + request.url().toNormalform(true));
        }
        this.loaderSteering.put(request.url(), new Semaphore(0));
        try {
            Response response;
            Response response2 = response = this.loadInternal(request, cacheStrategy, maxFileSize, blacklistType, agent);
            return response2;
        }
        catch (IOException e) {
            throw e;
        }
        catch (Throwable e) {
            throw new IOException(e);
        }
        finally {
            check = this.loaderSteering.remove(request.url());
            if (check != null) {
                check.release(1000);
            }
        }
    }

    private Response loadInternal(Request request, CacheStrategy cacheStrategy, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        CrawlProfile crawlProfile;
        DigestURL url = request.url();
        if (url.isFile() || url.isSMB()) {
            cacheStrategy = CacheStrategy.NOCACHE;
        }
        String protocol = url.getProtocol();
        String host = url.getHost();
        CrawlProfile crawlProfile2 = crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.get(UTF8.getBytes(request.profileHandle()));
        if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(Locale.ROOT), url.getFile())) {
            this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$");
        }
        Response response = this.loadFromCache(request, cacheStrategy, agent, url, crawlProfile);
        if (response != null) {
            return response;
        }
        if (cacheStrategy == CacheStrategy.CACHEONLY) {
            throw new IOException("cache only strategy");
        }
        this.checkAccessTime(agent, url);
        if (host != null) {
            if (accessTime.size() > 1000) {
                accessTime.clear();
            }
            accessTime.put(host, System.currentTimeMillis());
        }
        if (protocol.equals("http") || protocol.equals("https")) {
            response = this.httpLoader.load(request, crawlProfile, maxFileSize, blacklistType, agent);
        } else if (protocol.equals("ftp")) {
            response = this.ftpLoader.load(request, true);
        } else if (protocol.equals("smb")) {
            response = this.smbLoader.load(request, true);
        } else if (protocol.equals("file")) {
            response = this.fileLoader.load(request, true);
        } else {
            throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
        }
        if (response == null) {
            throw new IOException("no response (NULL) for url " + url);
        }
        if (response.getContent() == null) {
            throw new IOException("empty response (code " + response.getStatus() + ") for url " + url.toNormalform(true));
        }
        if (crawlProfile == null || !crawlProfile.storeHTCache()) {
            return response;
        }
        String storeError = response.shallStoreCacheForCrawler();
        if (storeError == null) {
            try {
                Cache.store(response.url(), response.getResponseHeader(), response.getContent());
            }
            catch (IOException e) {
                log.warn("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e);
            }
        } else {
            log.warn("cannot write " + response.url() + " to Cache (4): " + storeError);
        }
        return response;
    }

    private Response loadFromCache(Request request, CacheStrategy cacheStrategy, ClientIdentification.Agent agent, DigestURL url, CrawlProfile crawlProfile) throws IOException {
        Response response = null;
        if (cacheStrategy != CacheStrategy.NOCACHE && crawlProfile != null) {
            ResponseHeader cachedResponse;
            ResponseHeader responseHeader = cachedResponse = url.isLocal() ? null : Cache.getResponseHeader(url.hash());
            if (cachedResponse != null && Cache.hasContent(url.hash())) {
                byte[] content;
                RequestHeader requestHeader = new RequestHeader();
                requestHeader.put("User-Agent", agent.userAgent);
                String refererURL = null;
                if (request.referrerhash() != null) {
                    refererURL = this.sb.getURL(request.referrerhash());
                }
                if (refererURL != null) {
                    requestHeader.put("Referer", refererURL);
                }
                response = new Response(request, requestHeader, cachedResponse, crawlProfile, true, null);
                if ((cacheStrategy == CacheStrategy.IFEXIST || cacheStrategy == CacheStrategy.CACHEONLY) && (content = Cache.getContent(url.hash())) != null) {
                    log.info("cache hit/useall for: " + url.toNormalform(true));
                    response.setContent(content);
                    return response;
                }
                if (response.isFreshForProxy() && (content = Cache.getContent(url.hash())) != null) {
                    log.info("cache hit/fresh for: " + url.toNormalform(true));
                    response.setContent(content);
                    return response;
                }
                log.info("cache hit/stale for: " + url.toNormalform(true));
                response = null;
            } else if (cachedResponse != null) {
                log.warn("HTCACHE contained response header, but not content for url " + url.toNormalform(true));
            }
        }
        return response;
    }

    private StreamResponse openInputStreamInternal(Request request, CacheStrategy cacheStrategy, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        StreamResponse response;
        CrawlProfile crawlProfile;
        DigestURL url = request.url();
        if (url.isFile() || url.isSMB()) {
            cacheStrategy = CacheStrategy.NOCACHE;
        }
        String protocol = url.getProtocol();
        String host = url.getHost();
        CrawlProfile crawlProfile2 = crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.get(UTF8.getBytes(request.profileHandle()));
        if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(Locale.ROOT), url.getFile())) {
            this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$");
        }
        Response cachedResponse = this.loadFromCache(request, cacheStrategy, agent, url, crawlProfile);
        if (cachedResponse != null) {
            return new StreamResponse(cachedResponse, new ByteArrayInputStream(cachedResponse.getContent()));
        }
        if (cacheStrategy == CacheStrategy.CACHEONLY) {
            throw new IOException("cache only strategy");
        }
        this.checkAccessTime(agent, url);
        if (host != null) {
            if (accessTime.size() > 1000) {
                accessTime.clear();
            }
            accessTime.put(host, System.currentTimeMillis());
        }
        if (protocol.equals("http") || protocol.equals("https")) {
            response = this.httpLoader.openInputStream(request, crawlProfile, 2, maxFileSize, blacklistType, agent);
        } else if (protocol.equals("ftp")) {
            response = this.ftpLoader.openInputStream(request, true);
        } else if (protocol.equals("smb")) {
            response = this.smbLoader.openInputStream(request, true);
        } else if (protocol.equals("file")) {
            response = this.fileLoader.openInputStream(request, true, maxFileSize);
        } else {
            throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
        }
        return response;
    }

    private void checkAccessTime(ClientIdentification.Agent agent, DigestURL url) {
        if (!url.isLocal()) {
            String host = url.getHost();
            Long lastAccess = accessTime.get(host);
            long wait = 0L;
            if (lastAccess != null) {
                wait = Math.max(0L, (long)agent.minimumDelta + lastAccess - System.currentTimeMillis());
            }
            if (wait > 0L) {
                long untilTime = System.currentTimeMillis() + wait;
                LoaderDispatcher.cleanupAccessTimeTable(untilTime);
                if (System.currentTimeMillis() < untilTime) {
                    long frcdslp = untilTime - System.currentTimeMillis();
                    log.info("Forcing sleep of " + frcdslp + " ms for host " + host);
                    try {
                        Thread.sleep(frcdslp);
                    }
                    catch (InterruptedException interruptedException) {
                        // empty catch block
                    }
                }
            }
        }
    }

    public int protocolMaxFileSize(DigestURL url) {
        if (url.isHTTP() || url.isHTTPS()) {
            return this.sb.getConfigInt("crawler.http.maxFileSize", 0xA00000);
        }
        if (url.isFTP()) {
            return this.sb.getConfigInt("crawler.ftp.maxFileSize", 0xA00000);
        }
        if (url.isSMB()) {
            return this.sb.getConfigInt("crawler.smb.maxFileSize", 0xA00000);
        }
        if (url.isFile()) {
            return this.sb.getConfigInt("crawler.file.maxFileSize", 100000000);
        }
        return Integer.MAX_VALUE;
    }

    public byte[] loadContent(Request request, CacheStrategy cacheStrategy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        Response entry2 = this.load(request, cacheStrategy, blacklistType, agent);
        if (entry2 == null) {
            return null;
        }
        return entry2.getContent();
    }

    public StreamResponse openInputStream(Request request, CacheStrategy cacheStrategy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent, int maxFileSize) throws IOException {
        StreamResponse response;
        Semaphore check = this.loaderSteering.get(request.url());
        if (check != null && cacheStrategy != CacheStrategy.NOCACHE) {
            long t = System.currentTimeMillis();
            try {
                check.tryAcquire(5L, TimeUnit.SECONDS);
            }
            catch (InterruptedException interruptedException) {
                // empty catch block
            }
            ConcurrentLog.info("LoaderDispatcher", "waited " + (System.currentTimeMillis() - t) + " ms for " + request.url().toNormalform(true));
        }
        this.loaderSteering.put(request.url(), new Semaphore(0));
        try {
            response = this.openInputStreamInternal(request, cacheStrategy, maxFileSize, blacklistType, agent);
        }
        catch (IOException ioe) {
            throw ioe;
        }
        catch (Throwable e) {
            throw new IOException(e);
        }
        finally {
            check = this.loaderSteering.remove(request.url());
            if (check != null) {
                check.release(1000);
            }
        }
        return response;
    }

    public StreamResponse openInputStream(Request request, CacheStrategy cacheStrategy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        int maxFileSize = this.protocolMaxFileSize(request.url());
        return this.openInputStream(request, cacheStrategy, blacklistType, agent, maxFileSize);
    }

    public Document[] loadDocuments(Request request, CacheStrategy cacheStrategy, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException, Parser.Failure {
        Response response = this.load(request, cacheStrategy, maxFileSize, blacklistType, agent);
        DigestURL url = request.url();
        if (response == null) {
            throw new IOException("no Response for url " + url);
        }
        if (response.getContent() == null || response.getResponseHeader() == null) {
            throw new IOException("no Content available for url " + url);
        }
        Document[] documents = response.parse();
        String x_robots_tag = response.getResponseHeader().getXRobotsTag();
        if (x_robots_tag.indexOf("noindex", 0) >= 0) {
            for (Document d : documents) {
                d.setIndexingDenied(true);
            }
        }
        return documents;
    }

    public Document loadDocument(DigestURL location, CacheStrategy cachePolicy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        Request request = this.request(location, true, false);
        Response response = this.load(request, cachePolicy, blacklistType, agent);
        DigestURL url = request.url();
        if (response == null) {
            throw new IOException("no Response for url " + url);
        }
        if (response.getContent() == null || response.getResponseHeader() == null) {
            throw new IOException("no Content available for url " + url);
        }
        try {
            Document[] documents = response.parse();
            Document merged = Document.mergeDocuments(location, response.getMimeType(), documents);
            String x_robots_tag = response.getResponseHeader().getXRobotsTag();
            if (x_robots_tag.indexOf("noindex", 0) >= 0) {
                merged.setIndexingDenied(true);
            }
            return merged;
        }
        catch (Parser.Failure e) {
            throw new IOException(e.getMessage());
        }
    }

    public Document loadDocumentAsStream(DigestURL location, CacheStrategy cachePolicy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
        Request request = this.request(location, true, false);
        StreamResponse streamResponse = this.openInputStream(request, cachePolicy, blacklistType, agent);
        Response response = streamResponse.getResponse();
        DigestURL url = request.url();
        if (response == null) {
            throw new IOException("no Response for url " + url);
        }
        if (streamResponse.getContentStream() == null || response.getResponseHeader() == null) {
            throw new IOException("no Content available for url " + url);
        }
        try {
            Document[] documents = streamResponse.parse();
            Document merged = Document.mergeDocuments(location, response.getMimeType(), documents);
            String x_robots_tag = response.getResponseHeader().getXRobotsTag();
            if (x_robots_tag.indexOf("noindex", 0) >= 0) {
                merged.setIndexingDenied(true);
            }
            return merged;
        }
        catch (Parser.Failure e) {
            throw new IOException(e.getMessage());
        }
    }

    public Document loadDocumentAsLimitedStream(DigestURL location, CacheStrategy cachePolicy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent, int maxLinks, long maxBytes) throws IOException {
        Request request = this.request(location, true, false);
        StreamResponse streamResponse = this.openInputStream(request, cachePolicy, blacklistType, agent, -1);
        Response response = streamResponse.getResponse();
        DigestURL url = request.url();
        if (response == null) {
            throw new IOException("no Response for url " + url);
        }
        if (streamResponse.getContentStream() == null || response.getResponseHeader() == null) {
            throw new IOException("no Content available for url " + url);
        }
        try {
            Document[] documents = streamResponse.parseWithLimits(maxLinks, maxBytes);
            Document merged = Document.mergeDocuments(location, response.getMimeType(), documents);
            String x_robots_tag = response.getResponseHeader().getXRobotsTag();
            if (x_robots_tag.indexOf("noindex", 0) >= 0) {
                merged.setIndexingDenied(true);
            }
            return merged;
        }
        catch (Parser.Failure e) {
            throw new IOException(e.getMessage());
        }
    }

    public final Map<AnchorURL, String> loadLinks(DigestURL url, CacheStrategy cacheStrategy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent, int timezoneOffset) throws IOException {
        Response response = this.load(this.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, blacklistType, agent);
        if (response == null) {
            throw new IOException("response == null");
        }
        ResponseHeader responseHeader = response.getResponseHeader();
        if (response.getContent() == null) {
            throw new IOException("resource == null");
        }
        if (responseHeader == null) {
            throw new IOException("responseHeader == null");
        }
        Document[] documents = null;
        String supportError = TextParser.supports(url, responseHeader.getContentType());
        if (supportError != null) {
            throw new IOException("no parser support: " + supportError);
        }
        try {
            documents = TextParser.parseSource(url, responseHeader.getContentType(), responseHeader.getCharacterEncoding(), response.profile().defaultValency(), response.profile().valencySwitchTagNames(), response.profile().scraper(), timezoneOffset, response.depth(), response.getContent());
            if (documents == null) {
                throw new IOException("document == null");
            }
        }
        catch (Exception e) {
            throw new IOException("parser error: " + e.getMessage());
        }
        return Document.getHyperlinks(documents, true);
    }

    public static synchronized void cleanupAccessTimeTable(long timeout) {
        Iterator<Map.Entry<String, Long>> i = accessTime.entrySet().iterator();
        while (i.hasNext()) {
            Map.Entry<String, Long> e = i.next();
            if (System.currentTimeMillis() > timeout) break;
            if (System.currentTimeMillis() - e.getValue() <= 1000L) continue;
            i.remove();
        }
    }

    public void loadIfNotExistBackground(DigestURL url, File cache, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) {
        new Loader(url, cache, maxFileSize, CacheStrategy.IFEXIST, blacklistType, agent).start();
    }

    public void loadIfNotExistBackground(DigestURL url, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) {
        new Loader(url, null, maxFileSize, CacheStrategy.IFEXIST, blacklistType, agent).start();
    }

    private class Loader
    extends Thread {
        private final DigestURL url;
        private final File cache;
        private final int maxFileSize;
        private final CacheStrategy cacheStrategy;
        private final Blacklist.BlacklistType blacklistType;
        private final ClientIdentification.Agent agent;

        public Loader(DigestURL url, File cache, int maxFileSize, CacheStrategy cacheStrategy, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) {
            super("LoaderDispatcher.Loader");
            this.url = url;
            this.cache = cache;
            this.maxFileSize = maxFileSize;
            this.cacheStrategy = cacheStrategy;
            this.blacklistType = blacklistType;
            this.agent = agent;
        }

        @Override
        public void run() {
            if (this.cache != null && this.cache.exists()) {
                return;
            }
            try {
                Response response = LoaderDispatcher.this.load(LoaderDispatcher.this.request(this.url, false, true), this.cacheStrategy, this.maxFileSize, this.blacklistType, this.agent);
                byte[] b = response.getContent();
                if (this.cache != null) {
                    FileUtils.copy(b, this.cache);
                }
            }
            catch (MalformedURLException malformedURLException) {
            }
            catch (IOException iOException) {
                // empty catch block
            }
        }
    }
}

