/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.document.parser.rdfa.impl;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.parser.htmlParser;
import net.yacy.document.parser.rdfa.IRDFaTriple;
import net.yacy.document.parser.rdfa.impl.RDFaTripleImpl;

public class RDFaParser
extends AbstractParser
implements Parser {
    private final htmlParser hp = new htmlParser();

    public RDFaParser() {
        super("RDFa Parser");
        this.SUPPORTED_EXTENSIONS.add("html");
        this.SUPPORTED_EXTENSIONS.add("htm");
        this.SUPPORTED_EXTENSIONS.add("xhtml");
        this.SUPPORTED_EXTENSIONS.add("php");
        this.SUPPORTED_MIME_TYPES.add("text/html");
        this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml");
    }

    @Override
    public Document[] parse(DigestURL url, String mimeType, String charset, VocabularyScraper scraper, int timezoneOffset, InputStream source) throws Parser.Failure, InterruptedException {
        Document[] retDocs;
        boolean resetDone;
        if (!source.markSupported()) {
            throw new Parser.Failure("RDFaParser needs an input stream with mark/reset operations supported.", url);
        }
        int maxBytes = 10240;
        source.mark(10240);
        Document[] htmlDocs = this.hp.parse(url, mimeType, charset, scraper, timezoneOffset, source);
        try {
            source.reset();
            resetDone = true;
        }
        catch (IOException e1) {
            ConcurrentLog.warn("RDFA PARSER", "Could not reset stream to beginning : only HTML has been parsed. Document may be larger than limit (10240 bytes.)");
            resetDone = false;
        }
        if (resetDone) {
            Document rdfaDoc = RDFaParser.parseRDFa(url, mimeType, charset, source);
            retDocs = new Document[htmlDocs.length + 1];
            for (int i = 0; i < htmlDocs.length; ++i) {
                retDocs[i] = htmlDocs[i];
            }
            retDocs[retDocs.length - 1] = rdfaDoc;
        } else {
            retDocs = htmlDocs;
        }
        return retDocs;
    }

    private static Document parseRDFa(DigestURL url, String mimeType, String charset, InputStream source) {
        IRDFaTriple[] allTriples = null;
        try {
            RDFaTripleImpl triple = new RDFaTripleImpl(new InputStreamReader(source), url.toString());
            allTriples = triple.parse();
        }
        catch (Exception e) {
            ConcurrentLog.warn("RDFA PARSER", "Triple extraction failed");
        }
        Document doc = new Document(url, mimeType, charset, null, null, null, RDFaParser.singleList(""), "", "", null, new ArrayList<String>(0), 0.0, 0.0, null, null, null, null, false, new Date());
        try {
            if (allTriples.length > 0) {
                doc = RDFaParser.convertAllTriplesToDocument(url, mimeType, charset, allTriples);
            }
        }
        catch (Exception e) {
            ConcurrentLog.warn("RDFA PARSER", "Conversion triple to document failed");
        }
        return doc;
    }

    private static Document convertAllTriplesToDocument(DigestURL url, String mimeType, String charset, IRDFaTriple[] allTriples) {
        HashSet<String> keywords = new HashSet<String>(allTriples.length);
        Object all2 = "";
        for (IRDFaTriple irdFaTriple : allTriples) {
            RDFaParser.addNotEmptyValuesToSet(keywords, irdFaTriple.getPropertyURI() + "Z" + irdFaTriple.getValue());
        }
        for (String string : keywords) {
            string = string.replace(":", "X");
            string = string.replace("_", "Y");
            string = string.replace(" ", "Y");
            string = string.replace(".", "Y");
            string = string.replace(",", "Y");
            all2 = (String)all2 + string + ",";
        }
        Document doc = new Document(url, mimeType, charset, null, null, null, RDFaParser.singleList(""), "", "", null, new ArrayList<String>(0), 0.0, 0.0, all2, null, null, null, false, new Date());
        return doc;
    }

    private static void addNotEmptyValuesToSet(Set<String> set, String value) {
        if (value != null) {
            set.add(value);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args) {
        block17: {
            try {
                URL aURL = null;
                if (args.length < 1) {
                    System.out.println("Usage: one and only one argument giving a file path or a URL.");
                    break block17;
                }
                File aFile = new File(args[0]);
                if (aFile.exists()) {
                    try {
                        aURL = aFile.getAbsoluteFile().toURI().toURL();
                    }
                    catch (MalformedURLException e) {
                        System.err.println("Could not convert file path to URL.");
                    }
                } else {
                    try {
                        aURL = new URL(args[0]);
                    }
                    catch (MalformedURLException e) {
                        System.err.println("URL is malformed.");
                    }
                }
                if (aURL != null) {
                    RDFaParser aParser = new RDFaParser();
                    try {
                        aParser.parse(new DigestURL(args[0]), "", "", new VocabularyScraper(), 0, aURL.openStream());
                    }
                    catch (FileNotFoundException e) {
                        e.printStackTrace();
                    }
                    catch (IOException e) {
                        e.printStackTrace();
                    }
                    catch (Parser.Failure e) {
                        e.printStackTrace();
                    }
                    catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                } else {
                    System.out.println("File or URL not recognized.");
                }
            }
            finally {
                ConcurrentLog.shutdown();
            }
        }
    }
}

