支持SSL,Proxy,POST/GET的WebClient (使用HttpClient 4.0.1)

2010-08-11 23:29 by hackerzhou

具体实现代码如下,点击展开(import那段很重要,容易搞混,故不略去):

import java.io.IOException;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.http.Header;
import org.apache.http.HttpException;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.conn.routing.HttpRoute;
import org.apache.http.conn.routing.HttpRoutePlanner;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.protocol.HTTP;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;

public class WebClient {
	private DefaultHttpClient httpClient = new DefaultHttpClient();
	private String url;
	private HTTPMethod method;
	private byte[] content;
	private Map<String, String> headers = new HashMap<String, String>();
	private int responseCode;
	private List<NameValuePair> postParameter = new ArrayList<NameValuePair>();

	private static final Pattern pageEncodingReg = Pattern.compile(
			"content-type.*charset=([^\">\\\\]+)", Pattern.CASE_INSENSITIVE);
	private static final Pattern headerEncodingReg = Pattern.compile(
			"charset=(.+)", Pattern.CASE_INSENSITIVE);

	public static void main(String[] args) throws Exception {
		WebClient web = new WebClient("http://www.baidu.com/", HTTPMethod.GET);
		web.enableProxy("10.58.32.51", 8080, false, null, null, "127.0.0.1");
		System.out.println(web.getTextContent());
		System.out.println("------------------------------------------");
		web.setUrl("https://mail.google.com/mail/");
		System.out.println(web.getTextContent());
		System.out.println("------------------------------------------");
		web.setUrl("http://www.snee.com/xml/crud/posttest.cgi");
		web.setMethod(HTTPMethod.POST);
		web.addPostParameter("fname", "ababab");
		web.addPostParameter("lname", "cdcdcd");
		System.out.println(web.getTextContent());
		System.out.println("------------------------------------------");
	}

	// Without proxy
	public WebClient(String url, HTTPMethod method) {
		this(url, method, false, null, 0, false, null, null, null);
	}

	// Proxy without auth
	public WebClient(String url, HTTPMethod method, String proxyHost,
			int proxyPort) {
		this(url, method, true, proxyHost, proxyPort, false, null, null, null);
	}

	// All in one settings
	public WebClient(String url, HTTPMethod method, boolean useProxy,
			String proxyHost, int proxyPort, boolean needAuth, String username,
			String password, String nonProxyReg) {
		setUrl(url);
		setMethod(method);
		if (useProxy) {
			enableProxy(proxyHost, proxyPort, needAuth, username, password,
					nonProxyReg);
		}
	}

	public void setMethod(HTTPMethod method) {
		this.method = method;
	}

	public void setUrl(String url) {
		if (isStringEmpty(url)) {
			throw new RuntimeException("[Error] url is empty!");
		}
		this.url = url;
		headers.clear();
		responseCode = 0;
		postParameter.clear();
		content = null;
		if (url.startsWith("https://")) {
			enableSSL();
		} else {
			disableSSL();
		}
	}

	public Map<String, String> getRequestHeaders() {
		return headers;
	}

	public void addPostParameter(String name, String value) {
		this.postParameter.add(new BasicNameValuePair(name, value));
	}

	public void setTimeout(int connectTimeout, int readTimeout) {
		HttpParams params = httpClient.getParams();
		HttpConnectionParams.setConnectionTimeout(params, connectTimeout);
		HttpConnectionParams.setSoTimeout(params, readTimeout);
	}

	private void enableSSL() {
		try {
			SSLContext sslcontext = SSLContext.getInstance("TLS");
			sslcontext.init(null, new TrustManager[] { truseAllManager }, null);
			SSLSocketFactory sf = new SSLSocketFactory(sslcontext);
			sf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
			Scheme https = new Scheme("https", sf, 443);
			httpClient.getConnectionManager().getSchemeRegistry()
					.register(https);
		} catch (KeyManagementException e) {
			e.printStackTrace();
		} catch (NoSuchAlgorithmException e) {
			e.printStackTrace();
		}
	}

	private void disableSSL() {
		SchemeRegistry reg = httpClient.getConnectionManager()
				.getSchemeRegistry();
		if (reg.get("https") != null) {
			reg.unregister("https");
		}
	}

	public void disableProxy() {
		httpClient.getCredentialsProvider().clear();
		httpClient.setRoutePlanner(null);
	}

	public void enableProxy(final String proxyHost, final int proxyPort,
			boolean needAuth, String username, String password,
			final String nonProxyHostRegularExpression) {
		if (needAuth) {
			httpClient.getCredentialsProvider().setCredentials(
					new AuthScope(proxyHost, proxyPort),
					new UsernamePasswordCredentials(username, password));
		}
		// Simple proxy setting, can't handle non-proxy-host
		// httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,new
		// HttpHost(proxyHost, proxyPort));
		httpClient.setRoutePlanner(new HttpRoutePlanner() {
			@Override
			public HttpRoute determineRoute(HttpHost target,
					HttpRequest request, HttpContext contenxt)
					throws HttpException {
				HttpRoute proxyRoute = new HttpRoute(target, null,
						new HttpHost(proxyHost, proxyPort), "https"
								.equalsIgnoreCase(target.getSchemeName()));
				if (nonProxyHostRegularExpression == null) {
					return proxyRoute;
				}
				Pattern pattern = Pattern
						.compile(nonProxyHostRegularExpression,
								Pattern.CASE_INSENSITIVE);
				Matcher m = pattern.matcher(target.getHostName());
				if (m.find()) {
					return new HttpRoute(target, null, target, "https"
							.equalsIgnoreCase(target.getSchemeName()));
				} else {
					return proxyRoute;
				}
			}
		});
	}

	private void fetch() throws IOException {
		if (url == null || method == null) {
			throw new RuntimeException(
					"Fetch exception: URL and Method is null");
		}
		httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY,
				CookiePolicy.BROWSER_COMPATIBILITY);
		HttpResponse response = null;
		HttpUriRequest req = null;
		if (method.equals(HTTPMethod.GET)) {
			req = new HttpGet(url);
		} else {
			req = new HttpPost(url);
			((HttpPost) req).setEntity(new UrlEncodedFormEntity(
					this.postParameter, HTTP.UTF_8));
		}
		for (Entry<String, String> e : headers.entrySet()) {
			req.addHeader(e.getKey(), e.getValue());
		}

		//
		// Turn off "except" http header, some proxy server and web server do
		// not support it, may cause "417 Expectation Failed"
		//
		// HttpClient's doc says: 100-continue handshake should be used with
		// caution, as it may cause problems with HTTP servers and proxies that
		// do not support HTTP/1.1 protocol.
		//
		req.getParams().setBooleanParameter(
				CoreProtocolPNames.USE_EXPECT_CONTINUE, false);
		response = httpClient.execute(req);
		Header[] header = response.getAllHeaders();
		headers.clear();
		for (Header h : header) {
			headers.put(h.getName(), h.getValue());
		}
		content = EntityUtils.toByteArray(response.getEntity());
		responseCode = response.getStatusLine().getStatusCode();
	}

	private boolean isStringEmpty(String s) {
		return s == null || s.length() == 0;
	}

	public int getResponseCode() throws IOException {
		if (responseCode == 0) {
			fetch();
		}
		return responseCode;
	}

	public Map<String, String> getResponseHeaders() throws IOException {
		if (responseCode == 0) {
			fetch();
		}
		return headers;
	}

	public byte[] getByteArrayContent() throws IOException {
		if (content == null) {
			fetch();
		}
		return content;
	}

	public String getTextContent() throws IOException {
		if (content == null) {
			fetch();
		}
		if (content == null) {
			throw new RuntimeException("[Error] Can't fetch content!");
		}
		String headerContentType = null;
		if ((headerContentType = headers.get("Content-Type")) != null) {
			// use http header encoding
			Matcher m1 = headerEncodingReg.matcher(headerContentType);
			if (m1.find()) {
				return new String(content, m1.group(1));
			}
		}
		// Use html's encoding
		String html = new String(content);
		Matcher m2 = pageEncodingReg.matcher(html);
		if (m2.find()) {
			html = new String(content, m2.group(1));
		}
		return html;
	}

	public DefaultHttpClient getHttpClient() {
		return httpClient;
	}

	public enum HTTPMethod {
		GET, POST
	}

	// SSL handler (ignore untrusted hosts)
	private static TrustManager truseAllManager = new X509TrustManager() {
		@Override
		public X509Certificate[] getAcceptedIssuers() {
			return null;
		}

		@Override
		public void checkServerTrusted(X509Certificate[] chain, String authType)
				throws CertificateException {
		}

		@Override
		public void checkClientTrusted(X509Certificate[] chain, String authType)
				throws CertificateException {
		}
	};
}

最近研究了下HttpClient 4.0.1,主要是因为Java自己的HttpURLConnection对SSL支持的不好,而且控制起来不太方便,而且HttpClient还支持抓取非信任的站点,别的实现方式貌似需要在代码中显式导入证书。

需要的jar包:commons-logging-1.1.1.jar,httpclient-4.0.1.jar,httpcore-4.0.1.jar

Coding的时候遇到了些非常规问题:

1.HttpClient支持使用Java默认的Properties方式设置代理,不过我还是使用了HttpClient的代理设置方式。因而遇到了一个很诡异的问题,Properties方式设置的代理可以设置代理例外,即本地地址不通过代理访问,HttpClient没有简单的一句话设置的方法,必须写HttpRoutePlanner来自定义,比较繁琐。
如果使用HttpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,new HttpHost(proxyHost, proxyPort));来设置代理,则所有的请求都会往这个代理发送,没有例外,故弃之。

2.HTTP Header中的“Except”字段引起的问题,我向一些网页直接提交POST没有问题,但如果使用squid proxy进行post的话就会出现417 Expectation Failed错误,网上查了http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html 发现这个问题是因为proxy server/web server不能理解或满足Except字段中指定的值,则会引发这个错误,而HttpClient默认会发送这个字段,只要指示request不发送这个字段即可。

3.不受信任的HTTPS站点的访问问题,通过X509TrustManager来完成,将方法重写成返回null的或者是啥都不做的,理论上要是checkServerTrusted或checkClientTrusted方法检测到不受信任的站点,会抛出异常,但如果什么都不做,则被视为通过检查。

4.写了个getTextContent方法,用来获取返回的文本,解决乱码问题的方法其实很简单,首先用正则提取HTTP Header中Content-Type里的charset,如果没有,使用默认编码分析html head中Content-Type里的charset,如果没有,使用系统默认编码。

本文基于 署名 2.5 中国大陆 许可协议发布,欢迎转载,演绎或用于商业目的,但是必须保留本文的署名 hackerzhou 并包含 原文链接
发表评论

本文有 9 条评论

  1. iamk
    2012-03-03 23:02

    楼主,怎样实现代理例外呢,能提供一下吗?谢谢

  2. per
    2011-09-16 23:59

    你好,你这个对httpclient的封装不错。

    不过,对于有些web程序,进行的ssl安全登录是双向验证的,不能像如上你说的返回null等方法,那该如何处理呢?

    • hackerzhou
      2011-09-17 08:30

      那就只能进行验证了= =b,比较麻烦应该。我没有试过

  3. 啊啊
    2011-08-17 21:03

    org.apache.http.conn.scheme.Scheme 这个对象的构造方法(String,SocketFactory,int)
    传入SSLSocketFactory报错。
    你下面的语句是怎么通过的?用的包也是httpclient-4.0.1.jar

    SSLSocketFactory sf = new SSLSocketFactory(sslcontext);
    sf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
    Scheme https = new Scheme(“https”, sf, 443);

    • hackerzhou
      2011-08-18 07:48

      对的,我不太清楚是不是后续版本的httpclient这里的api改变过了

  4. dddd
    2011-02-23 18:54

    只要指示request不发送这个字段即可 请问具体要怎么设置? 谢谢!

  5. shirleyjwilder
    2010-10-18 20:05

    一个问题。如何让httpclient4通过socks代理访问https的页面。

    • hackerzhou
      2010-10-19 00:01

      没试过,不过应该不行。为啥要通过socks代理?socks代理用的很少的吧,普通的http代理是可以支持https的,比如squid

发表评论