/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.htroot.api;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.robots.RobotsTxtEntry;
import net.yacy.document.Document;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class getpageinfo_p {
    public static serverObjects respond(RequestHeader header, serverObjects post, serverSwitch env) {
        Switchboard sb = (Switchboard)env;
        serverObjects prop = new serverObjects();
        prop.put("title", "");
        prop.put("desc", "");
        prop.put("lang", "");
        prop.put("robots-allowed", "3");
        prop.put("robotsInfo", "");
        prop.put("sitemap", "");
        prop.put("icons", "0");
        prop.put("sitelist", "");
        prop.put("filter", ".*");
        prop.put("oai", 0L);
        String actions = "title,robots";
        if (post != null && post.containsKey("url")) {
            DigestURL theURL;
            String url;
            int maxLinks = post.getInt("maxLinks", Integer.MAX_VALUE);
            if (post.containsKey("actions")) {
                actions = post.get("actions");
            }
            if ((url = post.get("url")).toLowerCase(Locale.ROOT).startsWith("ftp://")) {
                prop.put("robots-allowed", "1");
                prop.put("robotsInfo", "ftp does not follow robots.txt");
                prop.putXML("title", "FTP: " + url);
                return prop;
            }
            if (!(url.startsWith("http://") || url.startsWith("https://") || url.startsWith("ftp://") || url.startsWith("smb://") || url.startsWith("file://"))) {
                url = "http://" + url;
            }
            if (actions.indexOf("title", 0) >= 0) {
                DigestURL u = null;
                try {
                    u = new DigestURL(url);
                }
                catch (MalformedURLException e) {
                    ConcurrentLog.logException(e);
                }
                Document scraper = null;
                if (u != null) {
                    try {
                        ClientIdentification.Agent agent = ClientIdentification.getAgent(post.get("agentName", "YaCy Internet (cautious)"));
                        if (post.containsKey("maxBytes")) {
                            long maxBytes = post.getLong("maxBytes", sb.loader.protocolMaxFileSize(u));
                            scraper = sb.loader.loadDocumentAsLimitedStream(u, CacheStrategy.IFEXIST, Blacklist.BlacklistType.CRAWLER, agent, maxLinks, maxBytes);
                        } else {
                            scraper = sb.loader.loadDocumentAsStream(u, CacheStrategy.IFEXIST, Blacklist.BlacklistType.CRAWLER, agent);
                        }
                    }
                    catch (IOException e) {
                        ConcurrentLog.logException(e);
                    }
                }
                if (scraper != null) {
                    prop.putXML("title", scraper.dc_title());
                    Set<DigestURL> iconURLs = scraper.getIcons().keySet();
                    long count = 0L;
                    for (DigestURL digestURL : iconURLs) {
                        if (count >= (long)maxLinks) break;
                        prop.putXML("icons_" + count + "_icon", digestURL.toNormalform(false));
                        prop.put("icons_" + count + "_eol", 1L);
                        ++count;
                    }
                    if (count > 0L) {
                        prop.put("icons_" + (count - 1L) + "_eol", 0L);
                    }
                    prop.put("icons", count);
                    Set<String> list2 = scraper.dc_subject();
                    count = 0L;
                    for (String element : list2) {
                        if (element.equals("")) continue;
                        prop.putXML("tags_" + count + "_tag", element);
                        ++count;
                    }
                    prop.put("tags", count);
                    prop.putXML("desc", scraper.dc_description().length > 0 ? scraper.dc_description()[0] : "");
                    Set<String> set = scraper.getContentLanguages();
                    prop.putXML("lang", set == null || set.size() == 0 ? "unknown" : set.iterator().next());
                    LinkedHashSet<AnchorURL> uris = new LinkedHashSet<AnchorURL>();
                    uris.addAll(scraper.getAnchors());
                    StringBuilder links = new StringBuilder(uris.size() * 80);
                    StringBuilder filter = new StringBuilder(uris.size() * 40);
                    count = 0L;
                    Iterator urisIt = uris.iterator();
                    while (urisIt.hasNext()) {
                        AnchorURL uri = (AnchorURL)urisIt.next();
                        if (uri == null) continue;
                        if (count >= (long)maxLinks) break;
                        links.append(';').append(uri.toNormalform(true));
                        filter.append('|').append(uri.getProtocol()).append("://").append(uri.getHost()).append(".*");
                        prop.putXML("links_" + count + "_link", uri.toNormalform(true));
                        ++count;
                    }
                    prop.put("links", count);
                    prop.put("hasMoreLinks", scraper.isPartiallyParsed() || count >= (long)maxLinks && urisIt.hasNext() ? "1" : "0");
                    prop.putXML("sitelist", links.length() > 0 ? links.substring(1) : "");
                    prop.putXML("filter", filter.length() > 0 ? filter.substring(1) : ".*");
                }
            }
            if (actions.indexOf("robots", 0) >= 0) {
                try {
                    theURL = new DigestURL(url);
                    ClientIdentification.Agent agent = ClientIdentification.getAgent(post.get("agentName", "YaCy Internet (cautious)"));
                    RobotsTxtEntry robotsEntry = sb.robots.getEntry(theURL, agent);
                    prop.put("robots-allowed", robotsEntry == null ? 1L : (robotsEntry.isDisallowed(theURL) ? 0L : 1L));
                    prop.putHTML("robotsInfo", robotsEntry == null ? "" : robotsEntry.getInfo());
                    List<Object> sitemaps = robotsEntry == null ? new ArrayList(0) : robotsEntry.getSitemaps();
                    int count = 0;
                    for (String string : sitemaps) {
                        if (count >= maxLinks) break;
                        prop.putXML("sitemaps_" + count + "_sitemap", string);
                        ++count;
                    }
                    prop.put("sitemaps", count);
                }
                catch (MalformedURLException e) {
                    ConcurrentLog.logException(e);
                }
            }
            if (actions.indexOf("oai", 0) >= 0) {
                try {
                    theURL = new DigestURL(url + "?verb=Identify");
                    String oairesult = getpageinfo_p.checkOAI(theURL.toNormalform(false));
                    prop.put("oai", oairesult == "" ? 0L : 1L);
                    if (oairesult != "") {
                        prop.putXML("title", oairesult);
                    }
                }
                catch (MalformedURLException malformedURLException) {
                    // empty catch block
                }
            }
        }
        return prop;
    }

    private static String checkOAI(String url) {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        try {
            DocumentBuilder builder = factory.newDocumentBuilder();
            return getpageinfo_p.parseXML(builder.parse(url));
        }
        catch (ParserConfigurationException ex) {
            ConcurrentLog.logException(ex);
        }
        catch (SAXException ex) {
            ConcurrentLog.logException(ex);
        }
        catch (IOException ex) {
            ConcurrentLog.logException(ex);
        }
        return "";
    }

    private static String parseXML(org.w3c.dom.Document doc) {
        String repositoryName = null;
        NodeList items = doc.getDocumentElement().getElementsByTagName("Identify");
        if (items.getLength() == 0) {
            return "";
        }
        int n = items.getLength();
        for (int i = 0; i < n; ++i) {
            if (!"Identify".equals(items.item(i).getNodeName())) continue;
            NodeList currentNodeChildren = items.item(i).getChildNodes();
            int m = currentNodeChildren.getLength();
            for (int j = 0; j < m; ++j) {
                Node currentNode = currentNodeChildren.item(j);
                if (!"repositoryName".equals(currentNode.getNodeName())) continue;
                repositoryName = currentNode.getFirstChild().getNodeValue();
            }
            if (repositoryName != null) continue;
            return "";
        }
        return repositoryName;
    }
}

