1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.datamodel;
25
26 /***
27 * Constant flag codes to be used, in lieu of per-protocol
28 * codes (like HTTP's 200, 404, etc.), when network/internal/
29 * out-of-band conditions occur.
30 *
31 * The URISelector may use such codes, along with user-configured
32 * options, to determine whether, when, and how many times
33 * a CrawlURI might be reattempted.
34 *
35 * @author gojomo
36 *
37 */
38 public interface FetchStatusCodes {
39 /*** fetch never tried (perhaps protocol unsupported or illegal URI) */
40 public static final int S_UNATTEMPTED = 0;
41 /*** DNS lookup failed */
42 public static final int S_DOMAIN_UNRESOLVABLE = -1;
43 /*** HTTP connect failed */
44 public static final int S_CONNECT_FAILED = -2;
45 /*** HTTP connect broken */
46 public static final int S_CONNECT_LOST = -3;
47 /*** HTTP timeout (before any meaningful response received) */
48 public static final int S_TIMEOUT = -4;
49 /*** Unexpected runtime exception; see runtime-errors.log */
50 public static final int S_RUNTIME_EXCEPTION = -5;
51 /*** DNS prerequisite failed, precluding attempt */
52 public static final int S_DOMAIN_PREREQUISITE_FAILURE = -6;
53 /*** URI recognized as unsupported or illegal) */
54 public static final int S_UNFETCHABLE_URI = -7;
55 /*** multiple retries all failed */
56 public static final int S_TOO_MANY_RETRIES = -8;
57
58 /*** temporary status assigned URIs awaiting preconditions; appearance in
59 * logs is a bug */
60 public static final int S_DEFERRED = -50;
61 /*** URI could not be queued in Frontier; when URIs are properly
62 * filtered for format, should never occur */
63 public static final int S_UNQUEUEABLE = -60;
64
65 /*** Robots prerequisite failed, precluding attempt */
66 public static final int S_ROBOTS_PREREQUISITE_FAILURE = -61;
67 /*** DNS prerequisite failed, precluding attempt */
68 public static final int S_OTHER_PREREQUISITE_FAILURE = -62;
69 /*** DNS prerequisite failed, precluding attempt */
70 public static final int S_PREREQUISITE_UNSCHEDULABLE_FAILURE = -63;
71
72 /*** severe java 'Error' conditions (OutOfMemoryError, StackOverflowError,
73 * etc.) during URI processing */
74 public static final int S_SERIOUS_ERROR = -3000;
75
76 /*** 'chaff' detection of traps/content of negligible value applied */
77 public static final int S_DEEMED_CHAFF = -4000;
78 /*** overstepped link hops */
79 public static final int S_TOO_MANY_LINK_HOPS = -4001;
80 /*** overstepped embed/trans hops */
81 public static final int S_TOO_MANY_EMBED_HOPS = -4002;
82 /*** out-of-scope upoin reexamination (only when scope changes during
83 * crawl) */
84 public static final int S_OUT_OF_SCOPE = -5000;
85 /*** blocked from fetch by user setting. */
86 public static final int S_BLOCKED_BY_USER = -5001;
87 /***
88 * Blocked by custom prefetcher processor.
89 * A check against scope or against filters in a custom prefetch
90 * processor rules CrawlURI should not be crawled.
91 * TODO: Add to documentation and help page.
92 */
93 public static final int S_BLOCKED_BY_CUSTOM_PROCESSOR = -5002;
94 /***
95 * Blocked due to exceeding an established quota.
96 * TODO: Add to documentation and help page.
97 */
98 public static final int S_BLOCKED_BY_QUOTA = -5003;
99 /***
100 * Blocked due to exceeding an established runtime.
101 * TODO: Add to documentation and help page.
102 */
103 public static final int S_BLOCKED_BY_RUNTIME_LIMIT = -5004;
104 /*** deleted from frontier by user */
105 public static final int S_DELETED_BY_USER = -6000;
106
107 /*** Processing thread was killed */
108 public static final int S_PROCESSING_THREAD_KILLED = -7000;
109
110 /*** robots rules precluded fetch */
111 public static final int S_ROBOTS_PRECLUDED = -9998;
112
113 /*** DNS success */
114 public static final int S_DNS_SUCCESS = 1;
115 /*** InetAddress.getByName success */
116 public static final int S_GETBYNAME_SUCCESS = 1001;
117 }
118
119