View Javadoc

1   /* Copyright (C) 2003 Internet Archive.
2    *
3    * This file is part of the Heritrix web crawler (crawler.archive.org).
4    *
5    * Heritrix is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU Lesser Public License as published by
7    * the Free Software Foundation; either version 2.1 of the License, or
8    * any later version.
9    *
10   * Heritrix is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU Lesser Public License for more details.
14   *
15   * You should have received a copy of the GNU Lesser Public License
16   * along with Heritrix; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   *
19   * CoreAttributeConstants.java
20   * Created on Jun 17, 2003
21   *
22   * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src//**
23   * @author gojomo
24   *
25   */
26  package org.archive.crawler.datamodel;
27  
28  /***
29   * CrawlURI attribute keys used by the core crawler
30   * classes.
31   *
32   * @author gojomo
33   *
34   */
35  public interface CoreAttributeConstants {
36  
37      /***
38       * Extracted MIME type of fetched content; should be
39       * set immediately by fetching module if possible
40       * (rather than waiting for a later analyzer)
41       */
42      public static String A_CONTENT_TYPE = "content-type";
43  
44      /***
45       * Multiplier of last fetch duration to wait before
46       * fetching another item of the same class (eg host)
47       */
48      public static String A_DELAY_FACTOR = "delay-factor";
49      /***
50       * Minimum delay before fetching another item of th
51       * same class (eg host). Even if lastFetchTime*delayFactor
52       * is less than this, this period will be waited.
53       */
54      public static String A_MINIMUM_DELAY = "minimum-delay";
55  
56      public static String A_RRECORD_SET_LABEL = "dns-records";
57      public static String A_DNS_FETCH_TIME    = "dns-fetch-time";
58      public static String A_DNS_SERVER_IP_LABEL = "dns-server-ip";
59      public static String A_FETCH_BEGAN_TIME= "fetch-began-time";
60      public static String A_FETCH_COMPLETED_TIME = "fetch-completed-time";
61      public static String A_HTTP_TRANSACTION = "http-transaction";
62  
63      public static String A_RUNTIME_EXCEPTION = "runtime-exception";
64      public static String A_LOCALIZED_ERRORS = "localized-errors";
65  
66      /*** shorthand string tokens indicating notable occurences,
67       * separated by commas */
68      public static String A_ANNOTATIONS = "annotations";
69  
70      public static String A_PREREQUISITE_URI = "prerequisite-uri";
71      public static String A_DISTANCE_FROM_SEED = "distance-from-seed";
72      public static String A_HTML_BASE = "html-base-href";
73      public static String A_RETRY_DELAY = "retry-delay";
74  
75      public static String A_META_ROBOTS = "meta-robots";
76      /*** 
77       * Define for org.archive.crawler.writer.MirrorWriterProcessor.
78       */
79      public static String A_MIRROR_PATH = "mirror-path";
80  
81      /***
82       * Key to get credential avatars from A_LIST.
83       */
84      public static final String A_CREDENTIAL_AVATARS_KEY =
85          "credential-avatars";
86      
87      /*** a 'source' (usu. URI) that's inherited by discovered URIs */
88      public static String A_SOURCE_TAG = "source";
89      
90      /***
91       * Key to (optional) attribute specifying a list of keys that
92       * are passed to CandidateURIs that 'descend' (are discovered 
93       * via) this URI. 
94       */
95      public static final String A_HERITABLE_KEYS = "heritable";
96      
97      /*** flag indicating the containing queue should be retired */ 
98      public static final String A_FORCE_RETIRE = "force-retire";
99      
100     /*** local override of proxy host */ 
101     public static final String A_HTTP_PROXY_HOST = "http-proxy-host";
102     /*** local override of proxy port */ 
103     public static final String A_HTTP_PROXY_PORT = "http-proxy-port";
104 
105     /*** local override of origin bind address */ 
106     public static final String A_HTTP_BIND_ADDRESS = "http-bind-address";
107     
108     /***
109      * Fetch truncation codes present in {@link CrawlURI} annotations.
110      * All truncation annotations have a <code>TRUNC_SUFFIX</code> suffix (TODO:
111      * Make for-sure unique or redo truncation so definitive flag marked
112      * against {@link CrawlURI}).
113      */
114     public static final String TRUNC_SUFFIX = "Trunc";
115     // headerTrunc
116     public static final String HEADER_TRUNC = "header" + TRUNC_SUFFIX; 
117     // timeTrunc
118     public static final String TIMER_TRUNC = "time" + TRUNC_SUFFIX;
119     // lenTrunc
120     public static final String LENGTH_TRUNC = "len" + TRUNC_SUFFIX;
121 
122     /* Duplication-reduction / recrawl / history constants */
123     
124     /*** fetch history array */ 
125     public static final String A_FETCH_HISTORY = "fetch-history";
126     /*** content digest */
127     public static final String A_CONTENT_DIGEST = "content-digest";
128 	/*** header name (and AList key) for last-modified timestamp */
129     public static final String A_LAST_MODIFIED_HEADER = "last-modified";
130 	/*** header name (and AList key) for ETag */
131     public static final String A_ETAG_HEADER = "etag"; 
132     /*** key for status (when in history) */
133     public static final String A_STATUS = "status"; 
134     /*** reference length (content length or virtual length */
135     public static final String A_REFERENCE_LENGTH = "reference-length";
136 
137 }