/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.document.parser;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.zip.GZIPInputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.TextParser;
import net.yacy.document.VocabularyScraper;
import org.apache.commons.io.IOUtils;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class sitemapParser
extends AbstractParser
implements Parser {
    public static final URLEntry POISON_URLEntry = new URLEntry(null);

    public sitemapParser() {
        super("sitemap Parser");
    }

    @Override
    public Document[] parse(DigestURL location, String mimeType, String charset, VocabularyScraper scraper, int timezoneOffset, InputStream source) throws Parser.Failure, InterruptedException {
        URLEntry item;
        ArrayList<Document> docs = new ArrayList<Document>();
        SitemapReader sitemap = new SitemapReader(source, ClientIdentification.yacyInternetCrawlerAgent);
        sitemap.start();
        while ((item = sitemap.take()) != POISON_URLEntry) {
            try {
                DigestURL uri = new DigestURL(item.loc);
                Document doc = new Document(uri, TextParser.mimeOf(location), charset, this, null, null, sitemapParser.singleList(""), null, "", null, null, 0.0, 0.0, null, null, null, null, false, new Date());
                docs.add(doc);
            }
            catch (MalformedURLException e) {}
        }
        Document[] da = new Document[docs.size()];
        docs.toArray(da);
        return da;
    }

    public static SitemapReader parse(DigestURL sitemapURL, ClientIdentification.Agent agent) throws IOException {
        SitemapReader sitemapReader;
        ConcurrentLog.info("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true));
        try (HTTPClient client = new HTTPClient(agent);){
            String url = sitemapURL.toNormalform(false);
            client.GET(url, false);
            if (client.getStatusCode() != 200) {
                throw new IOException("Unable to download the sitemap file " + sitemapURL + "\nServer returned status: " + client.getHttpResponse().getStatusLine());
            }
            int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
            ResponseHeader header = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
            String contentMimeType = header.mime();
            InputStream contentStream = client.getContentstream();
            if (contentMimeType != null && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip")) || url.endsWith(".gz")) {
                contentStream = new GZIPInputStream(contentStream);
            }
            byte[] bytes = IOUtils.toByteArray((InputStream)contentStream);
            ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
            sitemapReader = new SitemapReader(bais, agent);
        }
        return sitemapReader;
    }

    private static String val(Element parent, String label, String dflt) {
        if (parent == null) {
            return null;
        }
        Element e = (Element)parent.getElementsByTagName(label).item(0);
        if (e == null) {
            return dflt;
        }
        Node child = e.getFirstChild();
        return child instanceof CharacterData ? ((CharacterData)child).getData() : dflt;
    }

    public static class SitemapReader
    extends Thread {
        private final InputStream source;
        private final BlockingQueue<URLEntry> queue;
        private final ClientIdentification.Agent agent;

        public SitemapReader(InputStream source, ClientIdentification.Agent agent) {
            super(SitemapReader.class.getSimpleName());
            this.source = source;
            this.queue = new ArrayBlockingQueue<URLEntry>(10000);
            this.agent = agent;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        @Override
        public void run() {
            try {
                org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(this.source);
                NodeList sitemapNodes = doc.getElementsByTagName("sitemap");
                block19: for (int i = 0; i < sitemapNodes.getLength(); ++i) {
                    String url = new SitemapEntry((Element)sitemapNodes.item(i)).url();
                    if (url == null || url.length() <= 0) continue;
                    try {
                        URLEntry item;
                        SitemapReader r = sitemapParser.parse(new DigestURL(url), this.agent);
                        r.start();
                        while ((item = r.take()) != POISON_URLEntry) {
                            try {
                                this.queue.put(item);
                            }
                            catch (InterruptedException e) {
                                continue block19;
                            }
                        }
                        continue;
                    }
                    catch (IOException r) {
                        // empty catch block
                    }
                }
                NodeList urlEntryNodes = doc.getElementsByTagName("url");
                for (int i = 0; i < urlEntryNodes.getLength(); ++i) {
                    try {
                        this.queue.put(new URLEntry((Element)urlEntryNodes.item(i)));
                        continue;
                    }
                    catch (InterruptedException e) {
                        break;
                    }
                }
            }
            catch (Throwable e) {
                ConcurrentLog.logException(e);
            }
            finally {
                try {
                    this.source.close();
                }
                catch (IOException e) {
                    ConcurrentLog.logException(e);
                }
            }
            try {
                this.queue.put(POISON_URLEntry);
            }
            catch (InterruptedException interruptedException) {
                // empty catch block
            }
        }

        public URLEntry take() {
            try {
                return this.queue.take();
            }
            catch (InterruptedException e) {
                return POISON_URLEntry;
            }
        }
    }

    public static class URLEntry {
        public String loc;
        public String lastmod;
        public String changefreq;
        public String priority;

        public URLEntry(Element element) {
            this.loc = sitemapParser.val(element, "loc", "");
            this.lastmod = sitemapParser.val(element, "lastmod", "");
            this.changefreq = sitemapParser.val(element, "changefreq", "");
            this.priority = sitemapParser.val(element, "priority", "");
        }

        public String url() {
            return this.loc;
        }

        public Date lastmod(Date dflt) {
            try {
                return ISO8601Formatter.FORMATTER.parse(this.lastmod, 0).getTime();
            }
            catch (ParseException e) {
                return dflt;
            }
        }
    }

    public static class SitemapEntry {
        public String loc;
        public String lastmod;

        public SitemapEntry(Element element) {
            this.loc = sitemapParser.val(element, "loc", "");
            this.lastmod = sitemapParser.val(element, "lastmod", "");
        }

        public String url() {
            return this.loc;
        }

        public Date lastmod(Date dflt) {
            try {
                return ISO8601Formatter.FORMATTER.parse(this.lastmod, 0).getTime();
            }
            catch (ParseException e) {
                return dflt;
            }
        }
    }
}

