Skip to content

Commit fe2a2c7

Browse files
committed
Merge branch 'master' of https://github.com/silence940109/Java
2 parents c132ade + 9da7132 commit fe2a2c7

1 file changed

Lines changed: 138 additions & 0 deletions

File tree

net/GrabPicture.java

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
package com.silence.util;
2+
3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileOutputStream;
6+
import java.io.IOException;
7+
import java.io.InputStream;
8+
import java.io.InputStreamReader;
9+
import java.net.HttpURLConnection;
10+
import java.net.URL;
11+
import java.net.URLConnection;
12+
13+
import org.jsoup.Jsoup;
14+
import org.jsoup.nodes.Document;
15+
import org.jsoup.nodes.Element;
16+
import org.jsoup.select.Elements;
17+
18+
/**
19+
* Java开发图片批量采集
20+
* @author cx112
21+
* @version v1.0
22+
*/
23+
24+
public class GrabPicture {
25+
/**
26+
* 根据网站的地址和页面的编码集来获取网页的源代码
27+
*
28+
* @author cx112
29+
* @param url
30+
* 网址路径
31+
* @param encoding
32+
* 编码集
33+
* @return String 网页的源代码
34+
*/
35+
public static String gethtmlResourceByURL(String url, String encoding) {
36+
// 用于存储网页源代码
37+
StringBuffer buf = new StringBuffer();
38+
URL urlObj = null;
39+
URLConnection uc = null;
40+
InputStreamReader isr = null;
41+
BufferedReader buffer = null;
42+
try {
43+
// 建立网络连接
44+
urlObj = new URL(url);
45+
// 打开网络连接
46+
uc = urlObj.openConnection();
47+
// 将连接网络的输入流转换
48+
isr = new InputStreamReader(uc.getInputStream(), encoding);
49+
// 建立缓冲写入流
50+
buffer = new BufferedReader(isr);
51+
String line = null;
52+
while ((line = buffer.readLine()) != null) {
53+
buf.append(line + "\n");// 一行一行的追加代码
54+
}
55+
} catch (Exception e) {
56+
System.out.println("test");
57+
e.printStackTrace();
58+
} finally {
59+
try {
60+
if (isr != null) {
61+
isr.close();
62+
}
63+
} catch (IOException e) {
64+
// TODO Auto-generated catch block
65+
e.printStackTrace();
66+
}
67+
}
68+
return buf.toString();
69+
}
70+
71+
/**
72+
* 根据图片的网络地址,下载图片带本地服务器
73+
*
74+
* @author cx112
75+
* @param filePath
76+
* 文件保存的路径
77+
* @param imgURL
78+
* 图片的网络地址
79+
*/
80+
public static void DownImages(String filePath, String imgURL) {
81+
String fileName = imgURL.substring(imgURL.lastIndexOf("/"));
82+
83+
try {
84+
// 创建文件目录
85+
File files = new File(filePath);
86+
// 判断是否存在文件夹
87+
if (!files.exists()) {
88+
files.mkdirs();
89+
}
90+
// 获取下载地址
91+
URL url = new URL(imgURL);
92+
// 连接网络地址
93+
HttpURLConnection huc = (HttpURLConnection) url.openConnection();
94+
// 获取连接的输出流
95+
InputStream is = huc.getInputStream();
96+
// 创建文件
97+
File file = new File(filePath + fileName);
98+
// 创建输入流,写入文件
99+
FileOutputStream out = null;
100+
if (file.getName().endsWith("jpg") || file.getName().endsWith("png")
101+
|| file.getName().endsWith("jpeg") || file.getName().endsWith("jpg") ){
102+
out = new FileOutputStream(file);
103+
int i = 0;
104+
while ((i = is.read()) != -1) {
105+
out.write(i);
106+
}
107+
is.close();
108+
out.close();
109+
}
110+
111+
} catch (Exception e) {
112+
e.printStackTrace();
113+
}
114+
}
115+
116+
public static void getImage(String url, String encoding,String path){
117+
String htmlResouce = gethtmlResourceByURL(url, encoding);
118+
// 解析网页源代码
119+
Document document = Jsoup.parse(htmlResouce);
120+
// 获取所以图片的地址<img src="" alt= "" width= "" height=""/>
121+
Elements elements = document.getElementsByTag("img");
122+
for (Element element : elements) {
123+
String imgSrc = element.attr("src");
124+
if (!"".equals(imgSrc) && imgSrc.startsWith("http://")) {
125+
System.out.println("下载图片的地址===" + imgSrc);
126+
DownImages(path, imgSrc);
127+
}
128+
}
129+
}
130+
131+
public static void main(String[] args) {
132+
// 根据网页地址和网页的编码集 获取网页的内容
133+
String url = "http://www.tripadvisor.cn";
134+
String encoding = "gb2312";
135+
getImage(url, encoding, "e:\\test");
136+
}
137+
138+
}

0 commit comments

Comments
 (0)