1   /* HttpRecorderGetMethod
2   *
3    * Created on Sep 29, 2004
4   *
5   * Copyright (C) 2003 Internet Archive.
6   *
7   * This file is part of the Heritrix web crawler (crawler.archive.org).
8   *
9   * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22  */
23  package org.archive.crawler.fetcher;
24  
25  import java.io.IOException;
26  import java.net.SocketTimeoutException;
27  
28  import org.apache.commons.httpclient.HttpMethod;
29  import org.apache.commons.httpclient.HttpMethodRetryHandler;
30  import org.apache.commons.httpclient.NoHttpResponseException;
31  import org.apache.commons.httpclient.methods.PostMethod;
32  
33  /***
34   * Retry handler that tries ten times to establish connection and then once
35   * established, if a GET method, tries ten times to get response (If POST,
36   * it tries once only).
37   * 
38   * Its unsafe retrying POSTs.  See 'Rule of Thumb' under 'Method Recovery'
39   * here: <a href="http://jakarta.apache.org/commons/httpclient/tutorial.html">
40   * HttpClient Tutorial</a>.
41   * 
42   * @author stack
43   * @version $Date: 2005-06-12 17:59:14 +0000 (Sun, 12 Jun 2005) $, $Revision: 3560 $
44   */
45  public class HeritrixHttpMethodRetryHandler implements HttpMethodRetryHandler {
46      private static final int DEFAULT_RETRY_COUNT = 10;
47      
48      private final int maxRetryCount;
49      
50      /***
51       * Constructor.
52       */
53      public HeritrixHttpMethodRetryHandler() {
54          this(DEFAULT_RETRY_COUNT);
55      }
56      
57      /***
58       * Constructor.
59       * @param maxRetryCount Maximum amount of times to retry.
60       */
61      public HeritrixHttpMethodRetryHandler(int maxRetryCount) {
62      	this.maxRetryCount = maxRetryCount;
63      }
64      
65      public boolean retryMethod(HttpMethod method, IOException exception,
66  			int executionCount) {
67          if(exception instanceof SocketTimeoutException) {
68              // already waited for the configured amount of time with no reply; 
69              // do not retry further until next go round
70              return false; 
71          }
72  		if (executionCount >= this.maxRetryCount) {
73  			// Do not retry if over max retry count
74  			return false;
75  		}
76  		if (exception instanceof NoHttpResponseException) {
77  			// Retry if the server dropped connection on us
78  			return true;
79  		}
80  		if (!method.isRequestSent() && (!(method instanceof PostMethod))) {
81  			// Retry if the request has not been sent fully or
82  			// if it's OK to retry methods that have been sent
83  			return true;
84  		}
85  		// otherwise do not retry
86  		return false;
87  	}
88  }