搜索|收藏|地图|订阅|图片|论坛|商城

从网页源文件中得到链接

来源: 编程中国 编辑:若水 时间:2008-03-27
import java.net.*;
import java.io.*;
import java.util.*;

class ScanPage{
    private static String strPage;
    private String strUrl;
    private String fileName;
    public void setURL(String strUrl){
        this.strUrl=strUrl;
    }
    //从地址中得到文件名
    public void setFileName(){
        int i;
        char ch;
        i=strUrl.length();
        ch=strUrl.charAt(--i);
        while(ch!='/' && ch>0)
            ch=strUrl.charAt(--i);
        fileName=strUrl.substring(i);
    }
    //下载网页
    public void downFile()throws IOException{
        URL url =new URL(strUrl);
        InputStream is =url.openStream();
        OutputStream os =new FileOutputStream(fileName);
        byte[] buffer =new byte[512];
        int len;
        while((len =is.read(buffer))!=-1)
            os.write(buffer,0,len);
        is.close();
        os.close();
    }
    //读文件
    public void readFile() throws IOException {
        StringBuffer sb = new StringBuffer();
        BufferedReader in =new BufferedReader(new FileReader(fileName));
        String s;
        while((s = in.readLine()) != null) {
            sb.append(s);
            sb.append("\n");
        }
        in.close();
        strPage=sb.toString();
    }
    public String getTitle(){
        
        return "";
    }
    //扫描标签,得到资源
    public void scanLabel(ArrayList al,String strLabel,String strType){
        int idx;   //返回下标
        String strTmp;
        strPage=strPage.toLowerCase();  //转换为小写,以便后面比较
        idx = strPage.indexOf("<body");
        while(idx!=-1){
            idx=strPage.indexOf(strLabel,idx);
            if (idx==-1)
                break;
            else{
                int i=0;
                idx=idx+strLabel.length();
                for(;strPage.charAt(idx+i)!='>' && strPage.charAt(idx+i)!=32;++i);
                strTmp=strPage.substring(idx,idx+i);
                idx=idx+i;
                //去掉首尾引号
                if (strTmp.charAt(0)=='\"')
                    strTmp=strTmp.substring(1);
最新评论共有 0 位网友发表了评论
发表评论
评论内容:不能超过250字,需审核,请自觉遵守互联网相关政策法规。
用户名: 密码:
匿名?