/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.crawler.retrieval;

import java.net.MalformedURLException;
import java.util.Date;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.HarvestProcess;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.retrieval.Request;
import net.yacy.document.parser.sitemapParser;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.search.Switchboard;

public class SitemapImporter
extends Thread {
    private CrawlProfile crawlingProfile = null;
    private static final ConcurrentLog logger = new ConcurrentLog("SITEMAP");
    private DigestURL siteMapURL = null;
    private final Switchboard sb;

    public SitemapImporter(Switchboard sb, DigestURL sitemapURL, CrawlProfile profileEntry) {
        super("SitemapImporter(" + sitemapURL != null ? sitemapURL.toNormalform(false) : ")");
        assert (sitemapURL != null);
        this.sb = sb;
        this.siteMapURL = sitemapURL;
        assert (profileEntry != null);
        this.crawlingProfile = profileEntry;
    }

    @Override
    public void run() {
        try {
            sitemapParser.URLEntry item;
            logger.info("Start parsing sitemap file " + this.siteMapURL.toNormalform(true));
            sitemapParser.SitemapReader parser = sitemapParser.parse(this.siteMapURL, this.crawlingProfile.getAgent());
            parser.start();
            while ((item = parser.take()) != sitemapParser.POISON_URLEntry) {
                this.process(item);
            }
        }
        catch (Exception e) {
            logger.warn("Unable to parse sitemap file " + this.siteMapURL, e);
        }
    }

    public void process(sitemapParser.URLEntry entry2) {
        Date modDate;
        URIMetadataNode oldEntry;
        HarvestProcess dbocc;
        byte[] nexturlhash = null;
        DigestURL url = null;
        try {
            url = new DigestURL(entry2.url());
            nexturlhash = url.hash();
        }
        catch (MalformedURLException malformedURLException) {
            // empty catch block
        }
        Date lastMod = entry2.lastmod(null);
        if (lastMod != null && (dbocc = this.sb.getHarvestProcess(ASCII.String(nexturlhash))) != null && dbocc == HarvestProcess.LOADED && (oldEntry = this.sb.index.fulltext().getMetadata(nexturlhash)) != null && (modDate = oldEntry.moddate()).after(lastMod)) {
            return;
        }
        this.sb.crawlStacker.enqueueEntry(new Request(ASCII.getBytes(this.sb.peers.mySeed().hash), url, null, entry2.url(), entry2.lastmod(new Date()), this.crawlingProfile.handle(), 0, this.crawlingProfile.timezoneOffset()));
        logger.info("New URL '" + entry2.url() + "' added for loading.");
    }
}

