/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.htroot;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.FailCategory;
import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.CrawlSwitchboard;
import net.yacy.crawler.FileCrawlStarterTask;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.NoticedURL;
import net.yacy.crawler.retrieval.SitemapImporter;
import net.yacy.document.Document;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.TagValency;
import net.yacy.document.parser.html.TransformerWriter;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Fulltext;
import net.yacy.search.index.Segment;
import net.yacy.search.index.SingleDocumentMatcher;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SyntaxError;
import org.json.JSONException;
import org.json.JSONObject;

public class Crawler_p {
    /*
     * WARNING - void declaration
     */
    public static serverObjects respond(RequestHeader header, serverObjects post, serverSwitch env) {
        String queuemessage;
        String queue;
        NoticedURL.StackType[] c22;
        boolean debug;
        Switchboard sb = (Switchboard)env;
        serverObjects prop = new serverObjects();
        prop.put("rejected", 0L);
        if (post != null && post.containsKey("callback")) {
            String jsonp = post.get("callback") + "([";
            prop.put("jsonp-start", jsonp);
            prop.put("jsonp-end", "])");
        } else {
            prop.put("jsonp-start", "");
            prop.put("jsonp-end", "");
        }
        Segment segment = sb.index;
        Fulltext fulltext = segment.fulltext();
        String localSolr = "solr/select?core=collection1&q=*:*&start=0&rows=3";
        String remoteSolr = env.getConfig("federated.service.solr.indexing.url", "solr/select?core=collection1&q=*:*&start=0&rows=3");
        if (!remoteSolr.endsWith("/")) {
            remoteSolr = remoteSolr + "/";
        }
        prop.put("urlpublictextSolrURL", fulltext.connectedLocalSolr() ? "solr/select?core=collection1&q=*:*&start=0&rows=3" : remoteSolr + "collection1/select?&q=*:*&start=0&rows=3");
        prop.putNum("urlpublictextSize", fulltext.collectionSize());
        prop.putNum("urlpublictextSegmentCount", fulltext.getDefaultConnector().getSegmentCount());
        prop.put("webgraphSolrURL", fulltext.connectedLocalSolr() ? "solr/select?core=collection1&q=*:*&start=0&rows=3".replace("collection1", "webgraph") : remoteSolr + "webgraph/select?&q=*:*&start=0&rows=3");
        prop.putNum("webgraphSize", fulltext.useWebgraph() ? fulltext.webgraphSize() : 0L);
        prop.putNum("webgraphSegmentCount", fulltext.useWebgraph() ? (long)fulltext.getWebgraphConnector().getSegmentCount() : 0L);
        prop.putNum("citationSize", segment.citationCount());
        prop.putNum("citationSegmentCount", segment.citationSegmentCount());
        prop.putNum("rwipublictextSize", segment.RWICount());
        prop.putNum("rwipublictextSegmentCount", segment.RWISegmentCount());
        prop.put("list", "0");
        prop.put("loaderSize", 0L);
        prop.put("loaderMax", 0L);
        prop.put("list-loader", 0L);
        int coreCrawlJobSize = sb.crawlQueues.coreCrawlJobSize();
        int limitCrawlJobSize = sb.crawlQueues.limitCrawlJobSize();
        int remoteTriggeredCrawlJobSize = sb.crawlQueues.remoteTriggeredCrawlJobSize();
        int noloadCrawlJobSize = sb.crawlQueues.noloadCrawlJobSize();
        int allsize = coreCrawlJobSize + limitCrawlJobSize + remoteTriggeredCrawlJobSize + noloadCrawlJobSize;
        prop.put("localCrawlSize", coreCrawlJobSize);
        prop.put("localCrawlState", "");
        prop.put("limitCrawlSize", limitCrawlJobSize);
        prop.put("limitCrawlState", "");
        prop.put("remoteCrawlSize", remoteTriggeredCrawlJobSize);
        prop.put("remoteCrawlState", "");
        prop.put("noloadCrawlSize", noloadCrawlJobSize);
        prop.put("noloadCrawlState", "");
        prop.put("terminate-button", allsize == 0 ? 0L : 1L);
        prop.put("list-remote", 0L);
        prop.put("forwardToCrawlStart", "0");
        prop.put("info", "0");
        boolean bl = debug = post != null && post.containsKey("debug");
        if (post != null && (c22 = post.toString()).length() < 1000) {
            ConcurrentLog.info("Crawl Start", (String)c22);
        }
        if (post != null && post.containsKey("queues_terminate_all")) {
            sb.crawlQueues.noticeURL.clear();
            for (byte[] h : sb.crawler.getActive()) {
                CrawlProfile p = sb.crawler.getActive(h);
                if (CrawlSwitchboard.DEFAULT_PROFILES.contains(p.name())) continue;
                if (p != null) {
                    sb.crawler.putPassive(h, p);
                }
                sb.crawler.removeActive(h);
                sb.crawler.removePassive(h);
                try {
                    sb.crawlQueues.noticeURL.removeByProfileHandle(p.handle(), 10000L);
                }
                catch (SpaceExceededException spaceExceededException) {}
            }
            for (NoticedURL.StackType stackType : NoticedURL.StackType.values()) {
                sb.crawlQueues.noticeURL.clear(stackType);
            }
            try {
                sb.cleanProfiles();
            }
            catch (InterruptedException c22) {
                // empty catch block
            }
            sb.continueCrawlJob("50_localcrawl");
            sb.setConfig("50_localcrawl_isPaused_cause", "");
            sb.continueCrawlJob("62_remotetriggeredcrawl");
            sb.setConfig("62_remotetriggeredcrawl_isPaused_cause", "");
            prop.put("terminate-button", 0L);
        }
        if (post != null && post.containsKey("continue")) {
            queue = post.get("continue", "");
            if ("localcrawler".equals(queue)) {
                sb.continueCrawlJob("50_localcrawl");
                sb.setConfig("50_localcrawl_isPaused_cause", "");
            } else if ("remotecrawler".equals(queue)) {
                sb.continueCrawlJob("62_remotetriggeredcrawl");
                sb.setConfig("62_remotetriggeredcrawl_isPaused_cause", "");
            }
        }
        if (post != null && post.containsKey("pause")) {
            queue = post.get("pause", "");
            if ("localcrawler".equals(queue)) {
                sb.pauseCrawlJob("50_localcrawl", "user request in Crawler_p from " + header.refererHost());
            } else if ("remotecrawler".equals(queue)) {
                sb.pauseCrawlJob("62_remotetriggeredcrawl", "user request in Crawler_p from " + header.refererHost());
            }
        }
        if ((queuemessage = sb.getConfig("50_localcrawl_isPaused_cause", "")).length() == 0) {
            prop.put("info-queue", 0L);
        } else {
            prop.put("info-queue", 1L);
            prop.putHTML("info-queue_message", "pause reason: " + queuemessage);
        }
        if (post != null && post.containsKey("terminate")) {
            try {
                String handle = post.get("handle", "");
                CrawlProfile p = sb.crawler.getActive(handle.getBytes());
                if (p != null) {
                    sb.crawler.putPassive(handle.getBytes(), p);
                }
                sb.crawler.removeActive(handle.getBytes());
                sb.crawler.removePassive(handle.getBytes());
                sb.crawlQueues.noticeURL.removeByProfileHandle(handle, 10000L);
            }
            catch (SpaceExceededException e) {
                ConcurrentLog.logException(e);
            }
        }
        if (post != null && post.containsKey("crawlingstart")) {
            if (sb.peers == null) {
                prop.put("info", "3");
            } else {
                CrawlProfile profile2;
                byte[] handle;
                boolean hasCrawlstartDataOK;
                String crawlingMode;
                CacheStrategy cachePolicy;
                int p;
                File crawlingFile;
                String crawlingFileName;
                if (post.getBoolean("cleanSearchCache")) {
                    SearchEventCache.cleanupEvents(true);
                    sb.index.clearCaches();
                }
                if ((crawlingFileName = post.get("crawlingFile")) == null || crawlingFileName.isEmpty()) {
                    crawlingFile = null;
                } else {
                    if (crawlingFileName.startsWith("file://")) {
                        crawlingFileName = crawlingFileName.substring(7);
                    }
                    crawlingFile = new File(crawlingFileName);
                }
                if (crawlingFile != null && crawlingFile.exists()) {
                    post.remove("crawlingFile$file");
                }
                boolean storeHTCache = "on".equals(post.get("storeHTCache", "off"));
                String newcrawlingMustMatch = post.get("mustmatch", ".*");
                String newcrawlingMustNotMatch = post.get("mustnotmatch", "");
                if (newcrawlingMustMatch.length() < 2) {
                    newcrawlingMustMatch = ".*";
                }
                boolean fullDomain = "domain".equals(post.get("range", "wide"));
                boolean subPath = "subpath".equals(post.get("range", "wide"));
                boolean restrictedcrawl = fullDomain || subPath || !".*".equals(newcrawlingMustMatch);
                boolean deleteage = restrictedcrawl && "age".equals(post.get("deleteold", "off"));
                Date deleteageDate = null;
                if (deleteage) {
                    deleteageDate = Crawler_p.timeParser(true, post.getInt("deleteIfOlderNumber", -1), post.get("deleteIfOlderUnit", "year"));
                }
                boolean deleteold = deleteage && deleteageDate != null || restrictedcrawl && post.getBoolean("deleteold");
                String sitemapURLStr = post.get("sitemapURL", "");
                String crawlingStart0 = post.get("crawlingURL", "").trim();
                String[] rootURLs0 = crawlingStart0.indexOf(10) > 0 || crawlingStart0.indexOf(13) > 0 ? crawlingStart0.split("[\\r\\n]+") : crawlingStart0.split(Pattern.quote("|"));
                ArrayList<DigestURL> rootURLs = new ArrayList<DigestURL>();
                String crawlName = "";
                if (crawlingFile == null) {
                    StringBuilder crawlNameBuilder = new StringBuilder();
                    for (String crawlingStart : rootURLs0) {
                        if (crawlingStart == null || crawlingStart.length() == 0) continue;
                        int pos = crawlingStart.indexOf("://", 0);
                        if (pos == -1) {
                            crawlingStart = crawlingStart.startsWith("ftp") ? "ftp://" + crawlingStart : "https://" + crawlingStart;
                        }
                        try {
                            DigestURL crawlingStartURL = new DigestURL(crawlingStart);
                            rootURLs.add(crawlingStartURL);
                            crawlNameBuilder.append(crawlingStartURL.getHost() == null ? crawlingStartURL.toNormalform(true) : crawlingStartURL.getHost()).append(',');
                            if (crawlingStartURL == null || !crawlingStartURL.isFile() && !crawlingStartURL.isSMB()) continue;
                            storeHTCache = false;
                        }
                        catch (MalformedURLException e) {
                            ConcurrentLog.warn("Crawler_p", "crawl start url invalid: " + e.getMessage());
                        }
                    }
                    crawlName = crawlNameBuilder.toString();
                } else {
                    crawlName = crawlingFile.getName();
                }
                if (crawlName.endsWith(",")) {
                    crawlName = crawlName.substring(0, crawlName.length() - 1);
                }
                if (crawlName.length() > 64 && (p = (crawlName = "crawl_for_" + rootURLs.size() + "_start_points_" + Integer.toHexString(crawlName.hashCode())).lastIndexOf(44)) >= 8) {
                    crawlName = crawlName.substring(0, p);
                }
                if (crawlName.length() == 0 && sitemapURLStr.length() > 0) {
                    crawlName = "sitemap loader for " + sitemapURLStr;
                }
                if (fullDomain) {
                    for (DigestURL u : rootURLs) {
                        if (!u.isFile()) continue;
                        fullDomain = false;
                        subPath = true;
                        break;
                    }
                }
                String ipMustMatch = post.get("ipMustmatch", ".*");
                String ipMustNotMatch = post.get("ipMustnotmatch", "");
                if (ipMustMatch.length() < 2) {
                    ipMustMatch = ".*";
                }
                String countryMustMatch = post.getBoolean("countryMustMatchSwitch") ? post.get("countryMustMatchList", "") : "";
                sb.setConfig("crawlingIPMustMatch", ipMustMatch);
                sb.setConfig("crawlingIPMustNotMatch", ipMustNotMatch);
                if (countryMustMatch.length() > 0) {
                    sb.setConfig("crawlingCountryMustMatch", countryMustMatch);
                }
                String crawlerNoDepthLimitMatch = post.get("crawlingDepthExtension", "");
                String indexUrlMustMatch = post.get("indexmustmatch", ".*");
                String indexUrlMustNotMatch = post.get("indexmustnotmatch", "");
                String indexContentMustMatch = post.get("indexcontentmustmatch", ".*");
                String indexContentMustNotMatch = post.get("indexcontentmustnotmatch", "");
                boolean noindexWhenCanonicalUnequalURL = "on".equals(post.get("noindexWhenCanonicalUnequalURL", "off"));
                boolean crawlOrder = post.get("crawlOrder", "off").equals("on");
                env.setConfig("crawlOrder", crawlOrder);
                if (crawlOrder) {
                    crawlerNoDepthLimitMatch = "";
                }
                int newcrawlingdepth = post.getInt("crawlingDepth", 8);
                env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
                if (crawlOrder && newcrawlingdepth > 8) {
                    newcrawlingdepth = 8;
                }
                boolean directDocByURL = "on".equals(post.get("directDocByURL", "off"));
                env.setConfig("crawlingDirectDocByURL", directDocByURL);
                String collection = post.get("collection", "user");
                env.setConfig("collection", collection);
                String recrawl = post.get("recrawl", "nodoubles");
                Date crawlingIfOlder = null;
                if ("reload".equals(recrawl)) {
                    crawlingIfOlder = Crawler_p.timeParser(true, post.getInt("reloadIfOlderNumber", -1), post.get("reloadIfOlderUnit", "year"));
                }
                env.setConfig("crawlingIfOlder", crawlingIfOlder == null ? Long.MAX_VALUE : crawlingIfOlder.getTime());
                sb.tables.recordAPICall(post, "Crawler_p.html", "crawler", "crawl start for " + (rootURLs.size() == 0 ? post.get("crawlingFile", "") : ((DigestURL)rootURLs.iterator().next()).toNormalform(true)));
                boolean crawlingDomMaxCheck = "on".equals(post.get("crawlingDomMaxCheck", "off"));
                int crawlingDomMaxPages = crawlingDomMaxCheck ? post.getInt("crawlingDomMaxPages", -1) : -1;
                env.setConfig("crawlingDomMaxPages", Integer.toString(crawlingDomMaxPages));
                boolean followFrames = "on".equals(post.get("followFrames", "false"));
                env.setConfig("followFrames", followFrames);
                boolean obeyHtmlRobotsNoindex = "on".equals(post.get("obeyHtmlRobotsNoindex", "false"));
                env.setConfig("obeyHtmlRobotsNoindex", obeyHtmlRobotsNoindex);
                boolean obeyHtmlRobotsNofollow = "on".equals(post.get("obeyHtmlRobotsNofollow", "false"));
                env.setConfig("obeyHtmlRobotsNofollow", obeyHtmlRobotsNofollow);
                boolean indexText = "on".equals(post.get("indexText", "on"));
                env.setConfig("indexText", indexText);
                boolean indexMedia = "on".equals(post.get("indexMedia", "false"));
                env.setConfig("indexMedia", indexMedia);
                env.setConfig("storeHTCache", storeHTCache);
                String defaultAgentName = sb.isIntranetMode() ? "YaCy Intranet (greedy)" : "YaCy Internet (cautious)";
                String agentName = post.get("agentName", defaultAgentName);
                ClientIdentification.Agent agent = ClientIdentification.getAgent(agentName);
                if (agent == null) {
                    agent = ClientIdentification.getAgent(defaultAgentName);
                }
                if ((cachePolicy = CacheStrategy.parse(post.get("cachePolicy", "iffresh"))) == null) {
                    cachePolicy = CacheStrategy.IFFRESH;
                }
                if ("file".equals(crawlingMode = post.get("crawlingMode", "url")) && post.containsKey("crawlingFile")) {
                    newcrawlingMustNotMatch = "";
                    directDocByURL = false;
                }
                if ("sitemap".equals(crawlingMode)) {
                    newcrawlingMustMatch = ".*";
                    newcrawlingMustNotMatch = "";
                    newcrawlingdepth = 0;
                    directDocByURL = false;
                }
                if ("sitelist".equals(crawlingMode)) {
                    newcrawlingMustNotMatch = "";
                    ArrayList<DigestURL> newRootURLs = new ArrayList<DigestURL>();
                    for (DigestURL sitelistURL : rootURLs) {
                        try {
                            Document scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, Blacklist.BlacklistType.CRAWLER, agent);
                            for (DigestURL digestURL : scraper.getHyperlinks().keySet()) {
                                newRootURLs.add(digestURL);
                            }
                        }
                        catch (IOException e) {
                            ConcurrentLog.logException(e);
                        }
                    }
                    rootURLs.clear();
                    rootURLs.addAll(newRootURLs);
                    crawlingMode = "url";
                    if ((fullDomain || subPath) && newcrawlingdepth > 0) {
                        newcrawlingMustMatch = ".*";
                    }
                }
                ArrayList<String> deleteIDs = new ArrayList<String>();
                HashSet<String> hosthashes = new HashSet<String>();
                boolean anysmbftporpdf = false;
                for (DigestURL u : rootURLs) {
                    deleteIDs.add(new String(u.hash()));
                    hosthashes.add(u.hosthash());
                    if ("smb.ftp".indexOf(u.getProtocol()) < 0 && !"pdf".equals(MultiProtocolURL.getFileExtension(u.getFileName()))) continue;
                    anysmbftporpdf = true;
                }
                sb.index.fulltext().remove(deleteIDs);
                deleteIDs.forEach(urlhash -> {
                    try {
                        sb.index.loadTimeIndex().remove(urlhash.getBytes());
                    }
                    catch (IOException iOException) {
                        // empty catch block
                    }
                });
                sb.crawlQueues.removeHosts(hosthashes);
                sb.index.fulltext().commit(true);
                boolean crawlingQ = anysmbftporpdf || "on".equals(post.get("crawlingQ", "off")) || "sitemap".equals(crawlingMode);
                env.setConfig("crawlingQ", crawlingQ);
                if ((fullDomain || subPath) && newcrawlingdepth > 0) {
                    String siteFilter = ".*";
                    if (fullDomain) {
                        siteFilter = CrawlProfile.siteFilter(rootURLs);
                        if (deleteold) {
                            sb.index.fulltext().deleteStaleDomainHashes(hosthashes, deleteageDate);
                        }
                    } else if (subPath) {
                        siteFilter = CrawlProfile.subpathFilter(rootURLs);
                        if (deleteold) {
                            for (DigestURL digestURL : rootURLs) {
                                int p2;
                                String basepath = digestURL.toNormalform(true);
                                if (!basepath.endsWith("/") && (p2 = basepath.lastIndexOf("/")) > 0) {
                                    basepath = basepath.substring(0, p2 + 1);
                                }
                                int count = sb.index.fulltext().remove(basepath, deleteageDate);
                                try {
                                    sb.index.loadTimeIndex().clear();
                                }
                                catch (IOException iOException) {
                                    // empty catch block
                                }
                                if (count <= 0) continue;
                                ConcurrentLog.info("Crawler_p", "deleted " + count + " documents for host " + digestURL.getHost());
                            }
                        }
                    }
                    if (".*".equals(newcrawlingMustMatch)) {
                        newcrawlingMustMatch = siteFilter;
                    } else if (!".*".equals(siteFilter)) {
                        newcrawlingMustMatch = "(" + newcrawlingMustMatch + ")|(" + siteFilter + ")";
                    }
                }
                try {
                    Pattern mmp = Pattern.compile(newcrawlingMustMatch);
                    int n = 100;
                    for (DigestURL u : rootURLs) {
                        void var64_109;
                        assert (mmp.matcher(u.toNormalform(true)).matches()) : "pattern " + mmp.toString() + " does not match url " + u.toNormalform(true);
                        if (--var64_109 > 0) continue;
                        break;
                    }
                }
                catch (PatternSyntaxException e) {
                    prop.put("info", "4");
                    prop.putHTML("info_newcrawlingfilter", newcrawlingMustMatch);
                    prop.putHTML("info_error", e.getMessage());
                }
                boolean bl2 = hasCrawlstartDataOK = !crawlName.isEmpty();
                if (hasCrawlstartDataOK && "url".equals(crawlingMode) && rootURLs.size() == 0) {
                    prop.put("info", "5");
                    prop.putHTML("info_crawlingURL", "(no url given)");
                    prop.putHTML("info_reasonString", "you must submit at least one crawl url");
                    hasCrawlstartDataOK = false;
                }
                String string = post.get("snapshotsMaxDepth", "-1");
                int n = Integer.parseInt(string);
                boolean snapshotsLoadImage = post.getBoolean("snapshotsLoadImage");
                boolean snapshotsReplaceOld = post.getBoolean("snapshotsReplaceOld");
                String snapshotsMustnotmatch = post.get("snapshotsMustnotmatch", "");
                String valency_switch_tag_names_s = post.get("valency_switch_tag_names");
                HashSet<String> valency_switch_tag_names = new HashSet<String>();
                if (valency_switch_tag_names_s != null) {
                    String[] valency_switch_tag_name_a = valency_switch_tag_names_s.trim().split(",");
                    for (int i = 0; i < valency_switch_tag_name_a.length; ++i) {
                        valency_switch_tag_names.add(valency_switch_tag_name_a[i].trim());
                    }
                }
                String default_valency_radio = post.get("default_valency");
                TagValency default_valency = TagValency.EVAL;
                if (default_valency_radio != null && default_valency_radio.equals("IGNORE")) {
                    default_valency = TagValency.IGNORE;
                }
                JSONObject vocabulary_scraper = new JSONObject();
                for (String key : post.keySet()) {
                    JSONObject props;
                    if (!key.startsWith("vocabulary_") || !key.endsWith("_class")) continue;
                    String vocabulary = key.substring(11, key.length() - 6);
                    String value = post.get(key);
                    if (value == null || value.length() <= 0) continue;
                    try {
                        props = vocabulary_scraper.getJSONObject(vocabulary);
                    }
                    catch (JSONException e) {
                        props = new JSONObject();
                        try {
                            vocabulary_scraper.put(vocabulary, props);
                        }
                        catch (JSONException jSONException) {
                            // empty catch block
                        }
                    }
                    try {
                        props.put("class", value);
                    }
                    catch (JSONException e) {}
                }
                int timezoneOffset = post.getInt("timezoneOffset", 0);
                List<AnchorURL> hyperlinks_from_file = null;
                if ("file".equals(crawlingMode) && post.containsKey("crawlingFile") && crawlingFile != null) {
                    String crawlingFileContent = post.get("crawlingFile$file", "");
                    try {
                        if (newcrawlingdepth > 0) {
                            if (fullDomain) {
                                hyperlinks_from_file = Crawler_p.crawlingFileStart(crawlingFile, timezoneOffset, crawlingFileContent);
                                newcrawlingMustMatch = CrawlProfile.siteFilter(hyperlinks_from_file);
                            } else if (subPath) {
                                hyperlinks_from_file = Crawler_p.crawlingFileStart(crawlingFile, timezoneOffset, crawlingFileContent);
                                newcrawlingMustMatch = CrawlProfile.subpathFilter(hyperlinks_from_file);
                            }
                        }
                    }
                    catch (Exception e) {
                        prop.put("info", "7");
                        prop.putHTML("info_crawlingStart", crawlingFileName);
                        prop.putHTML("info_error", e.getMessage());
                        ConcurrentLog.logException(e);
                    }
                    sb.continueCrawlJob("50_localcrawl");
                }
                String solrQueryMustMatch = post.get(CrawlProfile.CrawlAttribute.INDEXING_SOLR_QUERY_MUSTMATCH.key, "*:*").trim();
                String solrQueryMustNotMatch = post.get(CrawlProfile.CrawlAttribute.INDEXING_SOLR_QUERY_MUSTNOTMATCH.key, "").trim();
                if (!solrQueryMustMatch.isEmpty() && !"*:*".equals(solrQueryMustMatch) || !"".equals(solrQueryMustNotMatch)) {
                    EmbeddedInstance embeddedSolr = sb.index.fulltext().getEmbeddedInstance();
                    SolrCore embeddedCore = embeddedSolr != null ? embeddedSolr.getDefaultCore() : null;
                    boolean embeddedSolrConnected = embeddedSolr != null && embeddedCore != null;
                    prop.put("noEmbeddedSolr", !embeddedSolrConnected);
                    if (embeddedSolrConnected) {
                        if (!solrQueryMustMatch.isEmpty() && !"*:*".equals(solrQueryMustMatch)) {
                            try {
                                SingleDocumentMatcher.toLuceneQuery(solrQueryMustMatch, embeddedCore);
                            }
                            catch (SolrException | SyntaxError e) {
                                hasCrawlstartDataOK = false;
                                prop.put("info", "10");
                                prop.put("info_solrQuery", solrQueryMustMatch);
                            }
                            catch (RuntimeException e) {
                                hasCrawlstartDataOK = false;
                                prop.put("info", "11");
                                prop.put("info_solrQuery", solrQueryMustMatch);
                            }
                        }
                        if (!"".equals(solrQueryMustNotMatch)) {
                            try {
                                SingleDocumentMatcher.toLuceneQuery(solrQueryMustNotMatch, embeddedCore);
                            }
                            catch (SolrException | SyntaxError e) {
                                hasCrawlstartDataOK = false;
                                prop.put("info", "10");
                                prop.put("info_solrQuery", solrQueryMustNotMatch);
                            }
                            catch (RuntimeException e) {
                                hasCrawlstartDataOK = false;
                                prop.put("info", "11");
                                prop.put("info_solrQuery", solrQueryMustNotMatch);
                            }
                        }
                    } else {
                        hasCrawlstartDataOK = false;
                        prop.put("info", "9");
                    }
                }
                if (hasCrawlstartDataOK) {
                    CrawlProfile profile3 = new CrawlProfile(crawlName, newcrawlingMustMatch, newcrawlingMustNotMatch, ipMustMatch, ipMustNotMatch, countryMustMatch, crawlerNoDepthLimitMatch, indexUrlMustMatch, indexUrlMustNotMatch, indexContentMustMatch, indexContentMustNotMatch, noindexWhenCanonicalUnequalURL, newcrawlingdepth, directDocByURL, crawlingIfOlder, crawlingDomMaxPages, crawlingQ, followFrames, obeyHtmlRobotsNoindex, obeyHtmlRobotsNofollow, indexText, indexMedia, storeHTCache, crawlOrder, n, snapshotsLoadImage, snapshotsReplaceOld, snapshotsMustnotmatch, cachePolicy, collection, agentName, default_valency, valency_switch_tag_names, new VocabularyScraper(vocabulary_scraper), timezoneOffset);
                    profile3.put(CrawlProfile.CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTMATCH.key, post.get(CrawlProfile.CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTMATCH.key, ".*"));
                    profile3.put(CrawlProfile.CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTNOTMATCH.key, post.get(CrawlProfile.CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTNOTMATCH.key, ""));
                    profile3.put(CrawlProfile.CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTMATCH.key, post.get(CrawlProfile.CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTMATCH.key, ".*"));
                    profile3.put(CrawlProfile.CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTNOTMATCH.key, post.get(CrawlProfile.CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTNOTMATCH.key, ""));
                    profile3.put(CrawlProfile.CrawlAttribute.INDEXING_SOLR_QUERY_MUSTMATCH.key, solrQueryMustMatch);
                    profile3.put(CrawlProfile.CrawlAttribute.INDEXING_SOLR_QUERY_MUSTNOTMATCH.key, solrQueryMustNotMatch);
                    profile3.put(CrawlProfile.CrawlAttribute.CRAWLER_ALWAYS_CHECK_MEDIA_TYPE.key, post.getBoolean("crawlerAlwaysCheckMediaType"));
                    handle = ASCII.getBytes(profile3.handle());
                    sb.crawler.removeActive(handle);
                    sb.crawler.removePassive(handle);
                    try {
                        sb.crawlQueues.noticeURL.removeByProfileHandle(profile3.handle(), 10000L);
                    }
                    catch (SpaceExceededException embeddedSolrConnected) {}
                } else {
                    profile2 = null;
                    handle = null;
                }
                if (hasCrawlstartDataOK) {
                    boolean wontReceiptRemoteRsults;
                    boolean bl3 = wontReceiptRemoteRsults = crawlOrder && !sb.getConfigBool("crawlResponse", false);
                    if ("url".equals(crawlingMode)) {
                        sb.crawler.putActive(handle, profile2);
                        HashSet<DigestURL> successurls = new HashSet<DigestURL>();
                        HashMap<DigestURL, String> failurls = new HashMap<DigestURL, String>();
                        sb.stackURLs(rootURLs, profile2, successurls, failurls);
                        if (failurls.size() == 0) {
                            prop.put("info", "8");
                            prop.putHTML("info_crawlingURL", post.get("crawlingURL"));
                            if (!sb.isRobinsonMode() && crawlOrder) {
                                HashMap<String, String> m = new HashMap<String, String>(profile2);
                                m.remove("specificDepth");
                                m.remove("indexText");
                                m.remove("indexMedia");
                                m.remove("remoteIndexing");
                                m.remove("xsstopw");
                                m.remove("xpstopw");
                                m.remove("xdstopw");
                                m.remove("storeTXCache");
                                m.remove("storeHTCache");
                                m.remove("generalFilter");
                                m.remove("specificFilter");
                                m.put("intention", post.get("intention", "").replace(',', '/'));
                                if (successurls.size() > 0) {
                                    m.put("startURL", ((DigestURL)successurls.iterator().next()).toNormalform(true));
                                }
                                sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), "crwlstrt", m);
                            }
                        } else {
                            StringBuilder fr = new StringBuilder();
                            for (Map.Entry failure : failurls.entrySet()) {
                                sb.crawlQueues.errorURL.push((DigestURL)failure.getKey(), 0, null, FailCategory.FINAL_LOAD_CONTEXT, (String)failure.getValue(), -1);
                                fr.append((String)failure.getValue()).append('/');
                            }
                            prop.put("info", "5");
                            prop.putHTML("info_crawlingURL", post.get("crawlingURL"));
                            prop.putHTML("info_reasonString", fr.toString());
                        }
                        if (successurls.size() > 0) {
                            sb.continueCrawlJob("50_localcrawl");
                            prop.put("wontReceiptRemoteResults", wontReceiptRemoteRsults);
                        }
                    } else if ("sitemap".equals(crawlingMode)) {
                        try {
                            DigestURL sitemapURL = sitemapURLStr.indexOf("//") > 0 ? new DigestURL(sitemapURLStr) : new DigestURL((MultiProtocolURL)rootURLs.iterator().next(), sitemapURLStr);
                            sb.crawler.putActive(handle, profile2);
                            SitemapImporter importer = new SitemapImporter(sb, sitemapURL, profile2);
                            importer.start();
                            sb.continueCrawlJob("50_localcrawl");
                            prop.put("wontReceiptRemoteResults", wontReceiptRemoteRsults);
                        }
                        catch (Exception e) {
                            prop.put("info", "6");
                            prop.putHTML("info_crawlingStart", sitemapURLStr);
                            prop.putHTML("info_error", e.getMessage());
                            ConcurrentLog.logException(e);
                        }
                    } else if ("file".equals(crawlingMode) && post.containsKey("crawlingFile") && crawlingFile != null) {
                        try {
                            if (newcrawlingdepth > 0 && (fullDomain || subPath)) {
                                if (hyperlinks_from_file != null) {
                                    sb.crawler.putActive(handle, profile2);
                                    sb.crawlStacker.enqueueEntriesAsynchronous(sb.peers.mySeed().hash.getBytes(), profile2.handle(), hyperlinks_from_file, profile2.timezoneOffset());
                                }
                            } else {
                                String crawlingFileContent = post.get("crawlingFile$file", "");
                                ContentScraper scraper = new ContentScraper(new DigestURL(crawlingFile), 10000000, new HashSet<String>(), TagValency.EVAL, new VocabularyScraper(), profile2.timezoneOffset());
                                FileCrawlStarterTask crawlStarterTask = new FileCrawlStarterTask(crawlingFile, crawlingFileContent, scraper, profile2, sb.crawlStacker, sb.peers.mySeed().hash.getBytes());
                                sb.crawler.putActive(handle, profile2);
                                crawlStarterTask.start();
                            }
                        }
                        catch (PatternSyntaxException e) {
                            prop.put("info", "4");
                            prop.putHTML("info_newcrawlingfilter", newcrawlingMustMatch);
                            prop.putHTML("info_error", e.getMessage());
                        }
                        catch (Exception e) {
                            prop.put("info", "7");
                            prop.putHTML("info_crawlingStart", crawlingFileName);
                            prop.putHTML("info_error", e.getMessage());
                            ConcurrentLog.logException(e);
                        }
                        sb.continueCrawlJob("50_localcrawl");
                        prop.put("wontReceiptRemoteResults", wontReceiptRemoteRsults);
                    }
                }
            }
        }
        if (post != null && post.containsKey("crawlingPerformance")) {
            int wPPM;
            String crawlingPerformance = post.get("crawlingPerformance", "custom");
            long LCbusySleep1 = sb.getConfigLong("50_localcrawl_busysleep", 1000L);
            int wantedPPM = LCbusySleep1 == 0L ? 30000 : (int)(60000L / LCbusySleep1);
            try {
                wantedPPM = post.getInt("customPPM", wantedPPM);
            }
            catch (NumberFormatException newcrawlingMustNotMatch) {
                // empty catch block
            }
            if ("minimum".equals(crawlingPerformance.toLowerCase(Locale.ROOT))) {
                wantedPPM = 10;
            }
            if ("maximum".equals(crawlingPerformance.toLowerCase(Locale.ROOT))) {
                wantedPPM = 30000;
            }
            if ((wPPM = wantedPPM) <= 0) {
                wPPM = 1;
            }
            if (wPPM >= 30000) {
                wPPM = 30000;
            }
            int newBusySleep = 60000 / wPPM;
            float loadprereq = wantedPPM <= 10 ? 1.0f : (wantedPPM <= 100 ? 2.0f : (wantedPPM >= 1000 ? 8.0f : 3.0f));
            BusyThread thread = sb.getThread("50_localcrawl");
            if (thread != null) {
                sb.setConfig("50_localcrawl_busysleep", thread.setBusySleep(newBusySleep));
                sb.setConfig("50_localcrawl_loadprereq", thread.setLoadPreReqisite(loadprereq));
                thread.setLoadPreReqisite(loadprereq);
                thread.setIdleSleep(2000L);
            }
            float latencyFactor = post.getFloat("latencyFactor", 0.5f);
            int MaxSameHostInQueue = post.getInt("MaxSameHostInQueue", 20);
            env.setConfig("crawler.latencyFactor", latencyFactor);
            env.setConfig("crawler.MaxSameHostInQueue", MaxSameHostInQueue);
        }
        long LCbusySleep = env.getConfigLong("50_localcrawl_busysleep", 1000L);
        int LCppm = (int)(60000L / Math.max(1L, LCbusySleep));
        prop.put("customPPMdefault", Integer.toString(LCppm));
        prop.put("latencyFactorDefault", env.getConfigFloat("crawler.latencyFactor", 0.5f));
        prop.put("MaxSameHostInQueueDefault", env.getConfigInt("crawler.MaxSameHostInQueue", 20));
        int count = 0;
        boolean dark = true;
        int domlistlength = post == null ? 160 : post.getInt("domlistlength", 160);
        String hosts = "";
        for (byte[] h : sb.crawler.getActive()) {
            CrawlProfile profile3 = sb.crawler.getActive(h);
            if (CrawlSwitchboard.DEFAULT_PROFILES.contains(profile3.name())) continue;
            profile3.putProfileEntry("crawlProfilesShow_list_", prop, true, dark, count, domlistlength);
            prop.put("crawlProfilesShow_list_" + count + "_debug", debug ? 1L : 0L);
            if (debug) {
                RowHandleSet urlhashes = sb.crawler.getURLHashes(h);
                prop.put("crawlProfilesShow_list_" + count + "_debug_count", urlhashes == null ? "unknown" : Integer.toString(urlhashes.size()));
            }
            hosts = hosts + "," + profile3.name();
            dark = !dark;
            ++count;
        }
        prop.put("crawlProfilesShow_debug", debug ? 1L : 0L);
        prop.put("crawlProfilesShow_list", count);
        prop.put("crawlProfilesShow_count", count);
        prop.put("crawlProfilesShow", count == 0 ? 0L : 1L);
        prop.put("crawlProfilesShow_linkstructure", 0L);
        if (post != null) {
            if (post.get("hidewebstructuregraph") != null) {
                sb.setConfig("decoration.grafics.linkstructure", false);
            }
            if (post.get("showwebstructuregraph") != null) {
                sb.setConfig("decoration.grafics.linkstructure", true);
            }
        }
        if (count > 0 && sb.getConfigBool("decoration.grafics.linkstructure", true)) {
            boolean showLinkstructure;
            boolean bl4 = showLinkstructure = hosts.length() > 0 && !hosts.contains("file:");
            if (showLinkstructure) {
                StringBuilder q = new StringBuilder();
                hosts = hosts.substring(1);
                q.append(CollectionSchema.host_s.getSolrFieldName()).append(':').append(hosts).append(" OR ").append(CollectionSchema.host_s.getSolrFieldName()).append(':').append("www.").append(hosts);
                try {
                    prop.put("crawlProfilesShow_linkstructure", count == 1 && sb.index.fulltext().getDefaultConnector().getCountByQuery(q.toString()) > 0L ? 1L : 2L);
                    prop.put("crawlProfilesShow_linkstructure_hosts", hosts);
                }
                catch (IOException iOException) {
                    // empty catch block
                }
            }
        }
        return prop;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static List<AnchorURL> crawlingFileStart(File crawlingFile, int timezoneOffset, String crawlingFileContent) throws MalformedURLException, IOException, FileNotFoundException {
        ContentScraper scraper = new ContentScraper(new DigestURL(crawlingFile), 10000000, new HashSet<String>(), TagValency.EVAL, new VocabularyScraper(), timezoneOffset);
        TransformerWriter writer = new TransformerWriter(null, null, scraper, false);
        if ((crawlingFileContent == null || crawlingFileContent.isEmpty()) && crawlingFile != null) {
            if (!crawlingFile.exists()) {
                ((Writer)writer).close();
                throw new FileNotFoundException(crawlingFile.getAbsolutePath() + " does not exists");
            }
            if (!crawlingFile.isFile()) {
                ((Writer)writer).close();
                throw new FileNotFoundException(crawlingFile.getAbsolutePath() + " exists but is not a regular file");
            }
            if (!crawlingFile.canRead()) {
                ((Writer)writer).close();
                throw new IOException("Can not read : " + crawlingFile.getAbsolutePath());
            }
        }
        if (crawlingFile != null) {
            FileInputStream inStream = null;
            try {
                inStream = new FileInputStream(crawlingFile);
                FileUtils.copy((InputStream)inStream, (Writer)writer);
            }
            finally {
                if (inStream != null) {
                    try {
                        inStream.close();
                    }
                    catch (IOException ignoredException) {
                        ConcurrentLog.info("Crawler_p", "Could not close crawlingFile : " + crawlingFile.getAbsolutePath());
                    }
                }
            }
        }
        FileUtils.copy(crawlingFileContent, (Writer)writer);
        ((Writer)writer).close();
        List<AnchorURL> hyperlinks_from_file = scraper.getAnchors();
        return hyperlinks_from_file;
    }

    private static Date timeParser(boolean recrawlIfOlderCheck, int number, String unit) {
        if (!recrawlIfOlderCheck) {
            return null;
        }
        if ("year".equals(unit)) {
            return new Date(System.currentTimeMillis() - (long)number * 31536000000L);
        }
        if ("month".equals(unit)) {
            return new Date(System.currentTimeMillis() - (long)number * 2592000000L);
        }
        if ("day".equals(unit)) {
            return new Date(System.currentTimeMillis() - (long)number * 86400000L);
        }
        if ("hour".equals(unit)) {
            return new Date(System.currentTimeMillis() - (long)number * 3600000L);
        }
        if ("minute".equals(unit)) {
            return new Date(System.currentTimeMillis() - (long)number * 60000L);
        }
        return null;
    }
}

