Java读取网页内容并下载图片的实例
Java读取网页内容并下载图片的实例
很多人在第一次了解数据采集的时候,可能无从下手,尤其是作为一个新手,更是感觉很是茫然,所以,在这里分享一下自己的心得,希望和大家一起分享技术,如果有什么不足,还请大家指正。写出这篇目的,就是希望大家一起成长,我也相信技术之间没有高低,只有互补,只有分享,才能使彼此更加成长。
示例代码:
importjava.io.BufferedInputStream;
importjava.io.BufferedReader;
importjava.io.File;
importjava.io.FileNotFoundException;
importjava.io.FileOutputStream;
importjava.io.IOException;
importjava.io.InputStreamReader;
importjava.net.MalformedURLException;
importjava.net.URL;
importjava.util.regex.Matcher;
importjava.util.regex.Pattern;
publicclassGetContentPicture{
publicvoidgetHtmlPicture(StringhttpUrl){
URLurl;
BufferedInputStreamin;
FileOutputStreamfile;
try{
System.out.println("取网络图片");
StringfileName=httpUrl.substring(httpUrl.lastIndexOf("/"));
StringfilePath="./pic/";
url=newURL(httpUrl);
in=newBufferedInputStream(url.openStream());
file=newFileOutputStream(newFile(filePath+fileName));
intt;
while((t=in.read())!=-1){
file.write(t);
}
file.close();
in.close();
System.out.println("图片获取成功");
}catch(MalformedURLExceptione){
e.printStackTrace();
}catch(FileNotFoundExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicStringgetHtmlCode(StringhttpUrl)throwsIOException{
Stringcontent="";
URLuu=newURL(httpUrl);//创建URL类对象
BufferedReaderii=newBufferedReader(newInputStreamReader(uu
.openStream()));////使用openStream得到一输入流并由此构造一个BufferedReader对象
Stringinput;
while((input=ii.readLine())!=null){//建立读取循环,并判断是否有读取值
content+=input;
}
ii.close();
returncontent;
}
publicvoidget(Stringurl)throwsIOException{
StringsearchImgReg="(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";
StringsearchImgReg2="(?x)(src|SRC|background|BACKGROUND)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";
Stringcontent=this.getHtmlCode(url);
System.out.println(content);
Patternpattern=Pattern.compile(searchImgReg);
Matchermatcher=pattern.matcher(content);
while(matcher.find()){
System.out.println(matcher.group(3));
this.getHtmlPicture(url+matcher.group(3));
}
pattern=Pattern.compile(searchImgReg2);
matcher=pattern.matcher(content);
while(matcher.find()){
System.out.println(matcher.group(3));
this.getHtmlPicture(matcher.group(3));
}
//searchImgReg=
//"(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";
}
publicstaticvoidmain(String[]args)throwsIOException{
Stringurl="http://www.baidu.com/";
GetContentPicturegcp=newGetContentPicture();
gcp.get(url);
}
}
如有疑问请留言或者到本站社区交流讨论,感谢阅读,希望能帮助到大家,谢谢大家对本站的支持!