/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.cora.protocol.http;

import java.net.MalformedURLException;
import java.util.WeakHashMap;
import java.util.regex.Pattern;
import net.yacy.cora.document.id.MultiProtocolURL;

public class LinkExtractor {
    private static final char lb = '<';
    private static final char rb = '>';
    private static final char dquotes = '\"';
    private static final char space = ' ';
    private static final Object PRESENT = new Object();
    private WeakHashMap<MultiProtocolURL, Object> links = new WeakHashMap();
    private Pattern blackpattern;

    public LinkExtractor(Pattern blackpattern) {
        this.blackpattern = blackpattern;
    }

    public void scrape(String text) {
        int p;
        text = text.replace('<', ' ').replace('>', ' ').replace('\"', ' ');
        int s = 0;
        while (s < text.length() && (p = Math.min(LinkExtractor.find(text, "smb://", s), Math.min(LinkExtractor.find(text, "ftp://", s), Math.min(LinkExtractor.find(text, "http://", s), LinkExtractor.find(text, "https://", s))))) != Integer.MAX_VALUE) {
            int q = text.indexOf(" ", p + 1);
            String u = text.substring(p, q < 0 ? text.length() : q);
            if (u.endsWith(".")) {
                u = u.substring(0, u.length() - 1);
            }
            s = p + 1;
            if (this.blackpattern.matcher(u).matches()) continue;
            try {
                this.links.put(new MultiProtocolURL(u), PRESENT);
            }
            catch (MalformedURLException malformedURLException) {}
        }
    }

    public MultiProtocolURL[] getLinks() {
        MultiProtocolURL[] urls2 = new MultiProtocolURL[this.links.size()];
        int i = 0;
        for (MultiProtocolURL uri : this.links.keySet()) {
            urls2[i++] = uri;
        }
        return urls2;
    }

    private static final int find(String s, String m, int start) {
        int p = s.indexOf(m, start);
        return p < 0 ? Integer.MAX_VALUE : p;
    }
}

