/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.htroot;

import java.net.MalformedURLException;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.regex.Pattern;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;

public class CrawlCheck_p {
    public static serverObjects respond(RequestHeader header, serverObjects post, serverSwitch env) {
        Switchboard sb = (Switchboard)env;
        serverObjects prop = new serverObjects();
        prop.put("starturls", "");
        if (post == null) {
            return prop;
        }
        if (post.containsKey("crawlcheck")) {
            String[] rootURLs0;
            LinkedHashSet<DigestURL> rootURLs = new LinkedHashSet<DigestURL>();
            String crawlingStart0 = post.get("crawlingURLs", "").trim();
            for (String crawlingStart : rootURLs0 = crawlingStart0.indexOf(10) > 0 || crawlingStart0.indexOf(13) > 0 ? crawlingStart0.split("[\\r\\n]+") : crawlingStart0.split(Pattern.quote("|"))) {
                if (crawlingStart == null || crawlingStart.length() == 0) continue;
                int pos = crawlingStart.indexOf("://", 0);
                if (pos == -1) {
                    if (crawlingStart.startsWith("www")) {
                        crawlingStart = "http://" + crawlingStart;
                    }
                    if (crawlingStart.startsWith("ftp")) {
                        crawlingStart = "ftp://" + crawlingStart;
                    }
                }
                try {
                    DigestURL crawlingStartURL = new DigestURL(crawlingStart);
                    rootURLs.add(crawlingStartURL);
                }
                catch (MalformedURLException e) {
                    ConcurrentLog.logException(e);
                }
            }
            if (rootURLs.size() == 0) {
                prop.put("table", 0L);
            } else {
                prop.put("table", 1L);
                ClientIdentification.Agent agent = ClientIdentification.getAgent(post.get("agentName", "YaCy Internet (cautious)"));
                Collection<RobotsTxt.CheckEntry> out = sb.robots.massCrawlCheck(rootURLs, agent);
                StringBuilder s = new StringBuilder(300);
                int row = 0;
                for (RobotsTxt.CheckEntry entry2 : out) {
                    String u = entry2.digestURL.toNormalform(true);
                    s.append(u).append('\n');
                    prop.put("table_list_" + row + "_url", u);
                    boolean robotsAllowed = true;
                    if (entry2.robotsTxtEntry == null) {
                        prop.put("table_list_" + row + "_robots", "no robots");
                        prop.put("table_list_" + row + "_crawldelay", agent.minimumDelta + " ms");
                        prop.put("table_list_" + row + "_sitemap", "");
                    } else {
                        robotsAllowed = !entry2.robotsTxtEntry.isDisallowed(entry2.digestURL);
                        prop.put("table_list_" + row + "_robots", "robots exist: " + (robotsAllowed ? "crawl allowed" : "url disallowed"));
                        prop.put("table_list_" + row + "_crawldelay", Math.max(agent.minimumDelta, entry2.robotsTxtEntry.getCrawlDelayMillis()) + " ms");
                        prop.put("table_list_" + row + "_sitemap", entry2.robotsTxtEntry.getSitemaps().toString());
                    }
                    if (robotsAllowed) {
                        if (entry2.response == null) {
                            prop.put("table_list_" + row + "_access", entry2.error == null ? "no response" : entry2.error);
                        } else if (entry2.response.getResponseHeader().getStatusCode() == 200) {
                            prop.put("table_list_" + row + "_access", "200 ok, last-modified = " + entry2.response.lastModified());
                        } else {
                            prop.put("table_list_" + row + "_access", entry2.response.getResponseHeader().getStatusCode() + " - load failed");
                        }
                    } else {
                        prop.put("table_list_" + row + "_access", "not loaded - prevented by robots.txt");
                    }
                    ++row;
                }
                prop.put("table_list", row);
                prop.put("starturls", s.toString());
            }
        }
        return prop;
    }
}

