1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.fetcher;
24
25 import java.io.IOException;
26 import java.net.SocketTimeoutException;
27
28 import org.apache.commons.httpclient.HttpMethod;
29 import org.apache.commons.httpclient.HttpMethodRetryHandler;
30 import org.apache.commons.httpclient.NoHttpResponseException;
31 import org.apache.commons.httpclient.methods.PostMethod;
32
33 /***
34 * Retry handler that tries ten times to establish connection and then once
35 * established, if a GET method, tries ten times to get response (If POST,
36 * it tries once only).
37 *
38 * Its unsafe retrying POSTs. See 'Rule of Thumb' under 'Method Recovery'
39 * here: <a href="http://jakarta.apache.org/commons/httpclient/tutorial.html">
40 * HttpClient Tutorial</a>.
41 *
42 * @author stack
43 * @version $Date: 2005-06-12 17:59:14 +0000 (Sun, 12 Jun 2005) $, $Revision: 3560 $
44 */
45 public class HeritrixHttpMethodRetryHandler implements HttpMethodRetryHandler {
46 private static final int DEFAULT_RETRY_COUNT = 10;
47
48 private final int maxRetryCount;
49
50 /***
51 * Constructor.
52 */
53 public HeritrixHttpMethodRetryHandler() {
54 this(DEFAULT_RETRY_COUNT);
55 }
56
57 /***
58 * Constructor.
59 * @param maxRetryCount Maximum amount of times to retry.
60 */
61 public HeritrixHttpMethodRetryHandler(int maxRetryCount) {
62 this.maxRetryCount = maxRetryCount;
63 }
64
65 public boolean retryMethod(HttpMethod method, IOException exception,
66 int executionCount) {
67 if(exception instanceof SocketTimeoutException) {
68
69
70 return false;
71 }
72 if (executionCount >= this.maxRetryCount) {
73
74 return false;
75 }
76 if (exception instanceof NoHttpResponseException) {
77
78 return true;
79 }
80 if (!method.isRequestSent() && (!(method instanceof PostMethod))) {
81
82
83 return true;
84 }
85
86 return false;
87 }
88 }