1   /* Copyright (C) 2003 Internet Archive.
2    *
3    * This file is part of the Heritrix web crawler (crawler.archive.org).
4    *
5    * Heritrix is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU Lesser Public License as published by
7    * the Free Software Foundation; either version 2.1 of the License, or
8    * any later version.
9    *
10   * Heritrix is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU Lesser Public License for more details.
14   *
15   * You should have received a copy of the GNU Lesser Public License
16   * along with Heritrix; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   *
19   * FetchStatusCodes.java
20   * Created on Jun 19, 2003
21   *
22   * $Header$
23   */
24  package org.archive.crawler.datamodel;
25  
26  /***
27   * Constant flag codes to be used, in lieu of per-protocol
28   * codes (like HTTP's 200, 404, etc.), when network/internal/
29   * out-of-band conditions occur.
30   *
31   * The URISelector may use such codes, along with user-configured
32   * options, to determine whether, when, and how many times
33   * a CrawlURI might be reattempted.
34   *
35   * @author gojomo
36   *
37   */
38  public interface FetchStatusCodes {
39      /*** fetch never tried (perhaps protocol unsupported or illegal URI) */
40      public static final int S_UNATTEMPTED = 0;
41      /*** DNS lookup failed */
42      public static final int S_DOMAIN_UNRESOLVABLE = -1;  //
43      /*** HTTP connect failed */
44      public static final int S_CONNECT_FAILED = -2;       //
45      /*** HTTP connect broken */
46      public static final int S_CONNECT_LOST = -3;         //
47      /*** HTTP timeout (before any meaningful response received) */
48      public static final int S_TIMEOUT = -4;              //
49      /*** Unexpected runtime exception; see runtime-errors.log */
50      public static final int S_RUNTIME_EXCEPTION = -5;    //
51      /*** DNS prerequisite failed, precluding attempt */
52      public static final int S_DOMAIN_PREREQUISITE_FAILURE = -6; //
53      /*** URI recognized as unsupported or illegal)  */
54      public static final int S_UNFETCHABLE_URI = -7;      //
55      /*** multiple retries all failed */
56      public static final int S_TOO_MANY_RETRIES = -8;     //
57  
58      /*** temporary status assigned URIs awaiting preconditions; appearance in
59       *  logs is a bug */
60      public static final int S_DEFERRED = -50;
61      /*** URI could not be queued in Frontier; when URIs are properly
62       * filtered for format, should never occur */
63      public static final int S_UNQUEUEABLE = -60;
64      
65      /*** Robots prerequisite failed, precluding attempt */
66      public static final int S_ROBOTS_PREREQUISITE_FAILURE = -61; //
67      /*** DNS prerequisite failed, precluding attempt */
68      public static final int S_OTHER_PREREQUISITE_FAILURE = -62; //
69      /*** DNS prerequisite failed, precluding attempt */
70      public static final int S_PREREQUISITE_UNSCHEDULABLE_FAILURE = -63; //
71      
72      /*** severe java 'Error' conditions (OutOfMemoryError, StackOverflowError,
73       *  etc.) during URI processing */
74      public static final int S_SERIOUS_ERROR = -3000;     //
75  
76      /*** 'chaff' detection of traps/content of negligible value applied */
77      public static final int S_DEEMED_CHAFF = -4000;
78      /*** overstepped link hops */
79      public static final int S_TOO_MANY_LINK_HOPS = -4001;
80      /*** overstepped embed/trans hops */
81      public static final int S_TOO_MANY_EMBED_HOPS = -4002;
82      /*** out-of-scope upoin reexamination (only when scope changes during
83       *  crawl) */
84      public static final int S_OUT_OF_SCOPE = -5000;
85      /*** blocked from fetch by user setting. */
86      public static final int S_BLOCKED_BY_USER = -5001;
87      /***
88       * Blocked by custom prefetcher processor.
89       * A check against scope or against filters in a custom prefetch
90       * processor rules CrawlURI should not be crawled.
91       * TODO: Add to documentation and help page.
92       */
93      public static final int S_BLOCKED_BY_CUSTOM_PROCESSOR = -5002;
94      /***
95       * Blocked due to exceeding an established quota.
96       * TODO: Add to documentation and help page.
97       */
98      public static final int S_BLOCKED_BY_QUOTA = -5003;
99      /***
100      * Blocked due to exceeding an established runtime.
101      * TODO: Add to documentation and help page.
102      */
103     public static final int S_BLOCKED_BY_RUNTIME_LIMIT = -5004;
104     /*** deleted from frontier by user */
105     public static final int S_DELETED_BY_USER = -6000;
106 
107     /*** Processing thread was killed */
108     public static final int S_PROCESSING_THREAD_KILLED = -7000;
109 
110     /*** robots rules precluded fetch */
111     public static final int S_ROBOTS_PRECLUDED = -9998;
112 
113     /*** DNS success */
114     public static final int S_DNS_SUCCESS = 1;
115     /*** InetAddress.getByName success */
116     public static final int S_GETBYNAME_SUCCESS = 1001;
117 }
118 
119