/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.document.parser.html;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
import java.util.Set;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.CommonPattern;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.ScraperListener;
import net.yacy.document.parser.html.TagValency;
import net.yacy.document.parser.html.TransformerWriter;

public class ScraperInputStream
extends InputStream
implements ScraperListener {
    private static final int MODE_PRESCAN = 0;
    private static final int MODE_PRESCAN_FINISHED = 1;
    private int mode = 1;
    private static final long preBufferSize = 4096L;
    private long preRead = 0L;
    private final BufferedInputStream bufferedIn;
    private String detectedCharset;
    private boolean charsetChanged = false;
    private boolean endOfHead = false;
    private Reader reader;
    private Writer writer;

    public ScraperInputStream(InputStream inStream, String inputStreamCharset, Set<String> valencySwitchTagNames, TagValency defaultValency, VocabularyScraper vocabularyScraper, DigestURL rooturl, boolean passbyIfBinarySuspect, int maxLinks, int timezoneOffset) {
        this.bufferedIn = new BufferedInputStream(inStream, 4096);
        this.bufferedIn.mark(4096);
        ContentScraper scraper = new ContentScraper(rooturl, maxLinks, valencySwitchTagNames, defaultValency, vocabularyScraper, timezoneOffset);
        scraper.registerHtmlFilterEventListener(this);
        try {
            this.reader = inputStreamCharset == null ? new InputStreamReader(this) : new InputStreamReader((InputStream)this, inputStreamCharset);
        }
        catch (UnsupportedEncodingException e) {
            this.reader = new InputStreamReader((InputStream)this, StandardCharsets.UTF_8);
        }
        this.writer = new TransformerWriter(null, null, scraper, passbyIfBinarySuspect);
    }

    private static String extractCharsetFromMimetypeHeader(String mimeType) {
        if (mimeType == null) {
            return null;
        }
        String[] parts = CommonPattern.SEMICOLON.split(mimeType);
        if (parts == null || parts.length <= 1) {
            return null;
        }
        for (int i = 1; i < parts.length; ++i) {
            String param = parts[i].trim();
            if (!param.startsWith("charset=")) continue;
            String charset = param.substring("charset=".length()).trim();
            if (charset.length() > 0 && (charset.charAt(0) == '\"' || charset.charAt(0) == '\'')) {
                charset = charset.substring(1);
            }
            if (charset.endsWith("\"") || charset.endsWith("'")) {
                charset = charset.substring(0, charset.length() - 1);
            }
            return charset.trim();
        }
        return null;
    }

    @Override
    public void scrapeTag0(String tagname, Properties tagopts) {
        String value;
        if (tagname == null || tagname.isEmpty()) {
            return;
        }
        if (tagname.equalsIgnoreCase("meta") && tagopts.containsKey("http-equiv") && (value = tagopts.getProperty("http-equiv")).equalsIgnoreCase("Content-Type")) {
            String contentType = tagopts.getProperty("content", "");
            this.detectedCharset = ScraperInputStream.extractCharsetFromMimetypeHeader(contentType);
            if (this.detectedCharset != null && this.detectedCharset.length() > 0) {
                this.charsetChanged = true;
            } else if (tagopts.containsKey("charset")) {
                this.detectedCharset = tagopts.getProperty("charset");
                this.charsetChanged = true;
            }
        }
    }

    @Override
    public void scrapeTag1(String tagname, Properties tagopts, char[] text) {
        if (tagname == null || tagname.isEmpty()) {
            return;
        }
        if (tagname.equalsIgnoreCase("head")) {
            this.endOfHead = true;
        }
    }

    public String detectCharset() throws IOException {
        int c;
        this.mode = 0;
        while ((c = this.reader.read()) != -1) {
            this.writer.write(c);
            if (!this.charsetChanged) continue;
        }
        this.writer = null;
        if (this.mode != 1) {
            ++this.mode;
            this.bufferedIn.reset();
        }
        return this.charsetChanged ? this.detectedCharset : null;
    }

    @Override
    public int read() throws IOException {
        if (this.mode == 0) {
            if (this.endOfHead || this.charsetChanged || this.preRead >= 4095L) {
                return -1;
            }
            ++this.preRead;
        }
        return this.bufferedIn.read();
    }

    @Override
    public synchronized void close() throws IOException {
        if (this.writer != null) {
            this.writer.close();
        }
    }
}

