Merge branch 'master' of https://github.com/silence940109/Java

scalad · scalad · commit fe2a2c71438e · 2016-09-07T08:49:11.000+08:00
diff --git a/net/GrabPicture.java b/net/GrabPicture.java
@@ -0,0 +1,138 @@
+﻿package com.silence.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLConnection;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+/**
+ * Java开发图片批量采集
+ * @author cx112
+ * @version v1.0
+ */
+
+public class GrabPicture {
+	/**
+	 * 根据网站的地址和页面的编码集来获取网页的源代码
+	 * 
+	 * @author cx112
+	 * @param url
+	 *            网址路径
+	 * @param encoding
+	 *            编码集
+	 * @return String 网页的源代码
+	 */
+	public static String gethtmlResourceByURL(String url, String encoding) {
+		// 用于存储网页源代码
+		StringBuffer buf = new StringBuffer();
+		URL urlObj = null;
+		URLConnection uc = null;
+		InputStreamReader isr = null;
+		BufferedReader buffer = null;
+		try {
+			// 建立网络连接
+			urlObj = new URL(url);
+			// 打开网络连接
+			uc = urlObj.openConnection();
+			// 将连接网络的输入流转换
+			isr = new InputStreamReader(uc.getInputStream(), encoding);
+			// 建立缓冲写入流
+			buffer = new BufferedReader(isr);
+			String line = null;
+			while ((line = buffer.readLine()) != null) {
+				buf.append(line + "\n");// 一行一行的追加代码
+			}
+		} catch (Exception e) {
+			System.out.println("test");
+			e.printStackTrace();
+		} finally {
+			try {
+				if (isr != null) {
+					isr.close();
+				}
+			} catch (IOException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			}
+		}
+		return buf.toString();
+	}
+
+	/**
+	 * 根据图片的网络地址，下载图片带本地服务器
+	 * 
+	 * @author cx112
+	 * @param filePath
+	 *            文件保存的路径
+	 * @param imgURL
+	 *            图片的网络地址
+	 */
+	public static void DownImages(String filePath, String imgURL) {
+		String fileName = imgURL.substring(imgURL.lastIndexOf("/"));
+
+		try {
+			// 创建文件目录
+			File files = new File(filePath);
+			// 判断是否存在文件夹
+			if (!files.exists()) {
+				files.mkdirs();
+			}
+			// 获取下载地址
+			URL url = new URL(imgURL);
+			// 连接网络地址
+			HttpURLConnection huc = (HttpURLConnection) url.openConnection();
+			// 获取连接的输出流
+			InputStream is = huc.getInputStream();
+			// 创建文件
+			File file = new File(filePath + fileName);
+			// 创建输入流，写入文件
+			FileOutputStream out = null;
+			if (file.getName().endsWith("jpg") || file.getName().endsWith("png") 
+					|| file.getName().endsWith("jpeg") || file.getName().endsWith("jpg") ){				
+				 out = new FileOutputStream(file);				 
+				 int i = 0;
+				 while ((i = is.read()) != -1) {
+					 out.write(i);
+				 }
+				 is.close();
+				 out.close();
+			}
+
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	}
+
+	public static void getImage(String url, String encoding,String path){
+		String htmlResouce = gethtmlResourceByURL(url, encoding);		
+		// 解析网页源代码
+		Document document = Jsoup.parse(htmlResouce);
+		// 获取所以图片的地址<img src="" alt= "" width= "" height=""/>
+		Elements elements = document.getElementsByTag("img");
+		for (Element element : elements) {
+			String imgSrc = element.attr("src");
+			if (!"".equals(imgSrc) && imgSrc.startsWith("http://")) {
+				System.out.println("下载图片的地址===" + imgSrc);
+				DownImages(path, imgSrc);
+			}
+		}
+	}
+	
+	public static void main(String[] args) {
+		// 根据网页地址和网页的编码集 获取网页的内容
+		String url = "http://www.tripadvisor.cn";
+		String encoding = "gb2312";
+		getImage(url, encoding, "e:\\test");
+	}
+
+}