1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 package org.archive.crawler.datamodel;
27
28 /***
29 * CrawlURI attribute keys used by the core crawler
30 * classes.
31 *
32 * @author gojomo
33 *
34 */
35 public interface CoreAttributeConstants {
36
37 /***
38 * Extracted MIME type of fetched content; should be
39 * set immediately by fetching module if possible
40 * (rather than waiting for a later analyzer)
41 */
42 public static String A_CONTENT_TYPE = "content-type";
43
44 /***
45 * Multiplier of last fetch duration to wait before
46 * fetching another item of the same class (eg host)
47 */
48 public static String A_DELAY_FACTOR = "delay-factor";
49 /***
50 * Minimum delay before fetching another item of th
51 * same class (eg host). Even if lastFetchTime*delayFactor
52 * is less than this, this period will be waited.
53 */
54 public static String A_MINIMUM_DELAY = "minimum-delay";
55
56 public static String A_RRECORD_SET_LABEL = "dns-records";
57 public static String A_DNS_FETCH_TIME = "dns-fetch-time";
58 public static String A_DNS_SERVER_IP_LABEL = "dns-server-ip";
59 public static String A_FETCH_BEGAN_TIME= "fetch-began-time";
60 public static String A_FETCH_COMPLETED_TIME = "fetch-completed-time";
61 public static String A_HTTP_TRANSACTION = "http-transaction";
62
63 public static String A_RUNTIME_EXCEPTION = "runtime-exception";
64 public static String A_LOCALIZED_ERRORS = "localized-errors";
65
66 /*** shorthand string tokens indicating notable occurences,
67 * separated by commas */
68 public static String A_ANNOTATIONS = "annotations";
69
70 public static String A_PREREQUISITE_URI = "prerequisite-uri";
71 public static String A_DISTANCE_FROM_SEED = "distance-from-seed";
72 public static String A_HTML_BASE = "html-base-href";
73 public static String A_RETRY_DELAY = "retry-delay";
74
75 public static String A_META_ROBOTS = "meta-robots";
76 /***
77 * Define for org.archive.crawler.writer.MirrorWriterProcessor.
78 */
79 public static String A_MIRROR_PATH = "mirror-path";
80
81 /***
82 * Key to get credential avatars from A_LIST.
83 */
84 public static final String A_CREDENTIAL_AVATARS_KEY =
85 "credential-avatars";
86
87 /*** a 'source' (usu. URI) that's inherited by discovered URIs */
88 public static String A_SOURCE_TAG = "source";
89
90 /***
91 * Key to (optional) attribute specifying a list of keys that
92 * are passed to CandidateURIs that 'descend' (are discovered
93 * via) this URI.
94 */
95 public static final String A_HERITABLE_KEYS = "heritable";
96
97 /*** flag indicating the containing queue should be retired */
98 public static final String A_FORCE_RETIRE = "force-retire";
99
100 /*** local override of proxy host */
101 public static final String A_HTTP_PROXY_HOST = "http-proxy-host";
102 /*** local override of proxy port */
103 public static final String A_HTTP_PROXY_PORT = "http-proxy-port";
104
105 /*** local override of origin bind address */
106 public static final String A_HTTP_BIND_ADDRESS = "http-bind-address";
107
108 /***
109 * Fetch truncation codes present in {@link CrawlURI} annotations.
110 * All truncation annotations have a <code>TRUNC_SUFFIX</code> suffix (TODO:
111 * Make for-sure unique or redo truncation so definitive flag marked
112 * against {@link CrawlURI}).
113 */
114 public static final String TRUNC_SUFFIX = "Trunc";
115
116 public static final String HEADER_TRUNC = "header" + TRUNC_SUFFIX;
117
118 public static final String TIMER_TRUNC = "time" + TRUNC_SUFFIX;
119
120 public static final String LENGTH_TRUNC = "len" + TRUNC_SUFFIX;
121
122
123
124 /*** fetch history array */
125 public static final String A_FETCH_HISTORY = "fetch-history";
126 /*** content digest */
127 public static final String A_CONTENT_DIGEST = "content-digest";
128 /*** header name (and AList key) for last-modified timestamp */
129 public static final String A_LAST_MODIFIED_HEADER = "last-modified";
130 /*** header name (and AList key) for ETag */
131 public static final String A_ETAG_HEADER = "etag";
132 /*** key for status (when in history) */
133 public static final String A_STATUS = "status";
134 /*** reference length (content length or virtual length */
135 public static final String A_REFERENCE_LENGTH = "reference-length";
136
137 }