1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.datamodel;
24
25 import java.util.Map;
26 import java.util.Hashtable;
27 import java.util.logging.Level;
28 import java.util.logging.Logger;
29
30 import org.apache.commons.collections.Closure;
31 import org.apache.commons.httpclient.URIException;
32 import org.archive.crawler.framework.CrawlController;
33 import org.archive.crawler.settings.SettingsHandler;
34
35 /***
36 * Server and Host cache.
37 * @author stack
38 * @version $Date: 2007-08-28 05:15:25 +0000 (Tue, 28 Aug 2007) $, $Revision: 5439 $
39 */
40 public class ServerCache {
41 private static Logger logger =
42 Logger.getLogger(ServerCache.class.getName());
43
44 protected SettingsHandler settingsHandler = null;
45
46 /***
47 * hostname[:port] -> CrawlServer.
48 * Set in the initialization.
49 */
50 protected Map<String,CrawlServer> servers = null;
51
52 /***
53 * hostname -> CrawlHost.
54 * Set in the initialization.
55 */
56 protected Map<String,CrawlHost> hosts = null;
57
58 /***
59 * Constructor.
60 * Shutdown access to the default constructor by making it protected.
61 */
62 protected ServerCache() {
63 super();
64 }
65
66 /***
67 * This constructor creates a ServerCache that is all memory-based using
68 * Hashtables. Used for unit testing only
69 * (Use {@link #ServerCache(CrawlController)} when crawling).
70 * @param sh
71 * @throws Exception
72 */
73 public ServerCache(final SettingsHandler sh)
74 throws Exception {
75 this.settingsHandler = sh;
76 this.servers = new Hashtable<String,CrawlServer>();
77 this.hosts = new Hashtable<String,CrawlHost>();
78 }
79
80 public ServerCache(final CrawlController c)
81 throws Exception {
82 this.settingsHandler = c.getSettingsHandler();
83 this.servers = c.getBigMap("servers", String.class, CrawlServer.class);
84 this.hosts = c.getBigMap("hosts", String.class, CrawlHost.class);
85 }
86
87 /***
88 * Get the {@link CrawlServer} associated with <code>name</code>.
89 * @param serverKey Server name we're to return server for.
90 * @return CrawlServer instance that matches the passed server name.
91 */
92 public synchronized CrawlServer getServerFor(String serverKey) {
93 CrawlServer cserver = (CrawlServer)this.servers.get(serverKey);
94 return (cserver != null)? cserver: createServerFor(serverKey);
95 }
96
97 protected CrawlServer createServerFor(String s) {
98 CrawlServer cserver = (CrawlServer)this.servers.get(s);
99 if (cserver != null) {
100 return cserver;
101 }
102
103 String skey = new String(s);
104 cserver = new CrawlServer(skey);
105 cserver.setSettingsHandler(settingsHandler);
106 servers.put(skey,cserver);
107 if (logger.isLoggable(Level.FINER)) {
108 logger.finer("Created server " + s);
109 }
110 return cserver;
111 }
112
113 /***
114 * Get the {@link CrawlServer} associated with <code>curi</code>.
115 * @param cauri CandidateURI we're to get server from.
116 * @return CrawlServer instance that matches the passed CandidateURI.
117 */
118 public CrawlServer getServerFor(CandidateURI cauri) {
119 CrawlServer cs = null;
120 try {
121 String key = CrawlServer.getServerKey(cauri);
122
123
124 if (key != null) {
125 cs = getServerFor(key);
126 }
127 } catch (URIException e) {
128 logger.severe(e.getMessage() + ": " + cauri);
129 e.printStackTrace();
130 } catch (NullPointerException npe) {
131 logger.severe(npe.getMessage() + ": " + cauri);
132 npe.printStackTrace();
133 }
134 return cs;
135 }
136
137 /***
138 * Get the {@link CrawlHost} associated with <code>name</code>.
139 * @param hostname Host name we're to return Host for.
140 * @return CrawlHost instance that matches the passed Host name.
141 */
142 public synchronized CrawlHost getHostFor(String hostname) {
143 if (hostname == null || hostname.length() == 0) {
144 return null;
145 }
146 CrawlHost host = (CrawlHost)this.hosts.get(hostname);
147 return (host != null)? host: createHostFor(hostname);
148 }
149
150 protected CrawlHost createHostFor(String hostname) {
151 if (hostname == null || hostname.length() == 0) {
152 return null;
153 }
154 CrawlHost host = (CrawlHost)this.hosts.get(hostname);
155 if (host != null) {
156 return host;
157 }
158 String hkey = new String(hostname);
159 host = new CrawlHost(hkey);
160 this.hosts.put(hkey, host);
161 if (logger.isLoggable(Level.FINE)) {
162 logger.fine("Created host " + hostname);
163 }
164 return host;
165 }
166
167 /***
168 * Get the {@link CrawlHost} associated with <code>curi</code>.
169 * @param cauri CandidateURI we're to return Host for.
170 * @return CandidateURI instance that matches the passed Host name.
171 */
172 public CrawlHost getHostFor(CandidateURI cauri) {
173 CrawlHost h = null;
174 try {
175 h = getHostFor(cauri.getUURI().getReferencedHost());
176 } catch (URIException e) {
177 e.printStackTrace();
178 }
179 return h;
180 }
181
182 /***
183 * @param serverKey Key to use doing lookup.
184 * @return True if a server instance exists.
185 */
186 public boolean containsServer(String serverKey) {
187 return (CrawlServer) servers.get(serverKey) != null;
188 }
189
190 /***
191 * @param hostKey Key to use doing lookup.
192 * @return True if a host instance exists.
193 */
194 public boolean containsHost(String hostKey) {
195 return (CrawlHost) hosts.get(hostKey) != null;
196 }
197
198 /***
199 * Called when shutting down the cache so we can do clean up.
200 */
201 public void cleanup() {
202 if (this.hosts != null) {
203
204
205 this.hosts.clear();
206 this.hosts = null;
207 }
208 if (this.servers != null) {
209 this.servers.clear();
210 this.servers = null;
211 }
212 }
213
214 public void forAllHostsDo(Closure c) {
215 for(String host : hosts.keySet()) {
216 c.execute(hosts.get(host));
217 }
218 }
219 }