View Javadoc

1   /* Copyright (C) 2003 Internet Archive.
2    *
3    * This file is part of the Heritrix web crawler (crawler.archive.org).
4    *
5    * Heritrix is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU Lesser Public License as published by
7    * the Free Software Foundation; either version 2.1 of the License, or
8    * any later version.
9    *
10   * Heritrix is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU Lesser Public License for more details.
14   *
15   * You should have received a copy of the GNU Lesser Public License
16   * along with Heritrix; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   *
19   * CrawlController.java
20   * Created on May 14, 2003
21   *
22   * $Id: CrawlController.java 5507 2007-10-05 21:31:54Z gojomo $
23   */
24  package org.archive.crawler.framework;
25  
26  import java.io.File;
27  import java.io.FileOutputStream;
28  import java.io.FilenameFilter;
29  import java.io.IOException;
30  import java.io.ObjectInputStream;
31  import java.io.PrintWriter;
32  import java.io.Serializable;
33  import java.util.ArrayList;
34  import java.util.Arrays;
35  import java.util.Collections;
36  import java.util.EventObject;
37  import java.util.HashMap;
38  import java.util.HashSet;
39  import java.util.Hashtable;
40  import java.util.Iterator;
41  import java.util.LinkedList;
42  import java.util.List;
43  import java.util.Map;
44  import java.util.Set;
45  import java.util.TreeSet;
46  import java.util.concurrent.locks.ReentrantLock;
47  import java.util.logging.FileHandler;
48  import java.util.logging.Formatter;
49  import java.util.logging.Level;
50  import java.util.logging.Logger;
51  
52  import javax.management.AttributeNotFoundException;
53  import javax.management.InvalidAttributeValueException;
54  import javax.management.MBeanException;
55  import javax.management.ReflectionException;
56  
57  import org.apache.commons.httpclient.URIException;
58  import org.archive.crawler.admin.CrawlJob;
59  import org.archive.crawler.admin.StatisticsTracker;
60  import org.archive.crawler.datamodel.Checkpoint;
61  import org.archive.crawler.datamodel.CrawlOrder;
62  import org.archive.crawler.datamodel.CrawlURI;
63  import org.archive.crawler.datamodel.ServerCache;
64  import org.archive.crawler.event.CrawlStatusListener;
65  import org.archive.crawler.event.CrawlURIDispositionListener;
66  import org.archive.crawler.framework.exceptions.FatalConfigurationException;
67  import org.archive.crawler.framework.exceptions.InitializationException;
68  import org.archive.crawler.io.LocalErrorFormatter;
69  import org.archive.crawler.io.RuntimeErrorFormatter;
70  import org.archive.crawler.io.StatisticsLogFormatter;
71  import org.archive.crawler.io.UriErrorFormatter;
72  import org.archive.crawler.io.UriProcessingFormatter;
73  import org.archive.crawler.settings.MapType;
74  import org.archive.crawler.settings.SettingsHandler;
75  import org.archive.crawler.util.CheckpointUtils;
76  import org.archive.io.GenerationFileHandler;
77  import org.archive.net.UURI;
78  import org.archive.net.UURIFactory;
79  import org.archive.util.ArchiveUtils;
80  import org.archive.util.CachedBdbMap;
81  import org.archive.util.FileUtils;
82  import org.archive.util.Reporter;
83  import org.archive.util.bdbje.EnhancedEnvironment;
84  import org.xbill.DNS.DClass;
85  import org.xbill.DNS.Lookup;
86  
87  import com.sleepycat.bind.serial.StoredClassCatalog;
88  import com.sleepycat.je.CheckpointConfig;
89  import com.sleepycat.je.Database;
90  import com.sleepycat.je.DatabaseException;
91  import com.sleepycat.je.DbInternal;
92  import com.sleepycat.je.EnvironmentConfig;
93  import com.sleepycat.je.dbi.EnvironmentImpl;
94  import com.sleepycat.je.utilint.DbLsn;
95  
96  /***
97   * CrawlController collects all the classes which cooperate to
98   * perform a crawl and provides a high-level interface to the
99   * running crawl.
100  *
101  * As the "global context" for a crawl, subcomponents will
102  * often reach each other through the CrawlController.
103  *
104  * @author Gordon Mohr
105  */
106 public class CrawlController implements Serializable, Reporter {
107     // be robust against trivial implementation changes
108     private static final long serialVersionUID =
109         ArchiveUtils.classnameBasedUID(CrawlController.class,1);
110 
111     /***
112      * Messages from the crawlcontroller.
113      *
114      * They appear on console.
115      */
116     private final static Logger LOGGER =
117         Logger.getLogger(CrawlController.class.getName());
118 
119     // manifest support
120     /*** abbrieviation label for config files in manifest */
121     public static final char MANIFEST_CONFIG_FILE = 'C';
122     /*** abbrieviation label for report files in manifest */
123     public static final char MANIFEST_REPORT_FILE = 'R';
124     /*** abbrieviation label for log files in manifest */
125     public static final char MANIFEST_LOG_FILE = 'L';
126 
127     // key log names
128     private static final String LOGNAME_PROGRESS_STATISTICS =
129         "progress-statistics";
130     private static final String LOGNAME_URI_ERRORS = "uri-errors";
131     private static final String LOGNAME_RUNTIME_ERRORS = "runtime-errors";
132     private static final String LOGNAME_LOCAL_ERRORS = "local-errors";
133     private static final String LOGNAME_CRAWL = "crawl";
134 
135     // key subcomponents which define and implement a crawl in progress
136     private transient CrawlOrder order;
137     private transient CrawlScope scope;
138     private transient ProcessorChainList processorChains;
139     
140     private transient Frontier frontier;
141 
142     private transient ToePool toePool;
143     
144     private transient ServerCache serverCache;
145     
146     // This gets passed into the initialize method.
147     private transient SettingsHandler settingsHandler;
148 
149 
150     // Used to enable/disable single-threaded operation after OOM
151     private volatile transient boolean singleThreadMode = false; 
152     private transient ReentrantLock singleThreadLock = null;
153 
154     // emergency reserve of memory to allow some progress/reporting after OOM
155     private transient LinkedList<char[]> reserveMemory;
156     private static final int RESERVE_BLOCKS = 1;
157     private static final int RESERVE_BLOCK_SIZE = 6*2^20; // 6MB
158 
159     // crawl state: as requested or actual
160     
161     /***
162      * Crawl exit status.
163      */
164     private transient String sExit;
165 
166     private static final Object NASCENT = "NASCENT".intern();
167     private static final Object RUNNING = "RUNNING".intern();
168     private static final Object PAUSED = "PAUSED".intern();
169     private static final Object PAUSING = "PAUSING".intern();
170     private static final Object CHECKPOINTING = "CHECKPOINTING".intern();
171     private static final Object STOPPING = "STOPPING".intern();
172     private static final Object FINISHED = "FINISHED".intern();
173     private static final Object STARTED = "STARTED".intern();
174     private static final Object PREPARING = "PREPARING".intern();
175 
176     transient private Object state = NASCENT;
177 
178     // disk paths
179     private transient File disk;        // overall disk path
180     private transient File logsDisk;    // for log files
181     
182     /***
183      * For temp files representing state of crawler (eg queues)
184      */
185     private transient File stateDisk;
186     
187     /***
188      * For discardable temp files (eg fetch buffers).
189      */
190     private transient File scratchDisk;
191 
192     /***
193      * Directory that holds checkpoint.
194      */
195     private transient File checkpointsDisk;
196     
197     /***
198      * Checkpointer.
199      * Knows if checkpoint in progress and what name of checkpoint is.  Also runs
200      * checkpoints.
201      */
202     private Checkpointer checkpointer;
203     
204     /***
205      * Gets set to checkpoint we're in recovering if in checkpoint recover
206      * mode.  Gets setup by {@link #getCheckpointRecover()}.
207      */
208     private transient Checkpoint checkpointRecover = null;
209 
210     // crawl limits
211     private long maxBytes;
212     private long maxDocument;
213     private long maxTime;
214 
215     /***
216      * A manifest of all files used/created during this crawl. Written to file
217      * at the end of the crawl (the absolutely last thing done).
218      */
219     private StringBuffer manifest;
220 
221     /***
222      * Record of fileHandlers established for loggers,
223      * assisting file rotation.
224      */
225     transient private Map<Logger,FileHandler> fileHandlers;
226 
227     /*** suffix to use on active logs */
228     public static final String CURRENT_LOG_SUFFIX = ".log";
229 
230     /***
231      * Crawl progress logger.
232      *
233      * No exceptions.  Logs summary result of each url processing.
234      */
235     public transient Logger uriProcessing;
236 
237     /***
238      * This logger contains unexpected runtime errors.
239      *
240      * Would contain errors trying to set up a job or failures inside
241      * processors that they are not prepared to recover from.
242      */
243     public transient Logger runtimeErrors;
244 
245     /***
246      * This logger is for job-scoped logging, specifically errors which
247      * happen and are handled within a particular processor.
248      *
249      * Examples would be socket timeouts, exceptions thrown by extractors, etc.
250      */
251     public transient Logger localErrors;
252 
253     /***
254      * Special log for URI format problems, wherever they may occur.
255      */
256     public transient Logger uriErrors;
257 
258     /***
259      * Statistics tracker writes here at regular intervals.
260      */
261     private transient Logger progressStats;
262 
263     /***
264      * Logger to hold job summary report.
265      *
266      * Large state reports made at infrequent intervals (e.g. job ending) go
267      * here.
268      */
269     public transient Logger reports;
270 
271     protected StatisticsTracking statistics = null;
272 
273     /***
274      * List of crawl status listeners.
275      *
276      * All iterations need to synchronize on this object if they're to avoid
277      * concurrent modification exceptions.
278      * See {@link java.util.Collections#synchronizedList(List)}.
279      */
280     private transient List<CrawlStatusListener> registeredCrawlStatusListeners =
281         Collections.synchronizedList(new ArrayList<CrawlStatusListener>());
282     
283     // Since there is a high probability that there will only ever by one
284     // CrawlURIDispositionListner we will use this while there is only one:
285     private transient CrawlURIDispositionListener
286         registeredCrawlURIDispositionListener;
287 
288     // And then switch to the array once there is more then one.
289      protected transient ArrayList<CrawlURIDispositionListener> 
290      registeredCrawlURIDispositionListeners;
291     
292     /*** Shared bdb Environment for Frontier subcomponents */
293     // TODO: investigate using multiple environments to split disk accesses
294     // across separate physical disks
295     private transient EnhancedEnvironment bdbEnvironment = null;
296     
297     /***
298      * Keep a list of all BigMap instance made -- shouldn't be many -- so that
299      * we can checkpoint.
300      */
301     private transient Map<String,CachedBdbMap<?,?>> bigmaps = null;
302     
303     /***
304      * Default constructor
305      */
306     public CrawlController() {
307         super();
308         // Defer most setup to initialize methods
309     }
310 
311     /***
312      * Starting from nothing, set up CrawlController and associated
313      * classes to be ready for a first crawl.
314      *
315      * @param sH Settings handler.
316      * @throws InitializationException
317      */
318     public void initialize(SettingsHandler sH)
319     throws InitializationException {
320         sendCrawlStateChangeEvent(PREPARING, CrawlJob.STATUS_PREPARING);
321  
322         this.singleThreadLock = new ReentrantLock();
323         this.settingsHandler = sH;
324         installThreadContextSettingsHandler();
325         this.order = settingsHandler.getOrder();
326         this.order.setController(this);
327         this.bigmaps = new Hashtable<String,CachedBdbMap<?,?>>();
328         sExit = "";
329         this.manifest = new StringBuffer();
330         String onFailMessage = "";
331         try {
332             onFailMessage = "You must set the User-Agent and From HTTP" +
333             " header values to acceptable strings. \n" +
334             " User-Agent: [software-name](+[info-url])[misc]\n" +
335             " From: [email-address]\n";
336             order.checkUserAgentAndFrom();
337 
338             onFailMessage = "Unable to setup disk";
339             if (disk == null) {
340                 setupDisk();
341             }
342 
343             onFailMessage = "Unable to create log file(s)";
344             setupLogs();
345             
346             // Figure if we're to do a checkpoint restore. If so, get the
347             // checkpointRecover instance and then put into place the old bdb
348             // log files. If any of the log files already exist in target state
349             // diretory, WE DO NOT OVERWRITE (Makes for faster recovery).
350             // CrawlController checkpoint recovery code manages restoration of
351             // the old StatisticsTracker, any BigMaps used by the Crawler and
352             // the moving of bdb log files into place only. Other objects
353             // interested in recovery need to ask if
354             // CrawlController#isCheckpointRecover is set to figure if in
355             // recovery and then take appropriate recovery action
356             // (These objects can call CrawlController#getCheckpointRecover
357             // to get the directory that might hold files/objects dropped
358             // checkpointing).  Such objects will need to use a technique other
359             // than object serialization restoring settings because they'll
360             // have already been constructed when comes time for object to ask
361             // if its to recover itself. See ARCWriterProcessor for example.
362             onFailMessage = "Unable to test/run checkpoint recover";
363             this.checkpointRecover = getCheckpointRecover();
364             if (this.checkpointRecover == null) {
365                 this.checkpointer =
366                     new Checkpointer(this, this.checkpointsDisk);
367             } else {
368                 setupCheckpointRecover();
369             }
370             
371             onFailMessage = "Unable to setup bdb environment.";
372             setupBdb();
373             
374             onFailMessage = "Unable to setup statistics";
375             setupStatTracking();
376             
377             onFailMessage = "Unable to setup crawl modules";
378             setupCrawlModules();
379         } catch (Exception e) {
380             String tmp = "On crawl: "
381                 + settingsHandler.getSettingsObject(null).getName() + " " +
382                 onFailMessage;
383             LOGGER.log(Level.SEVERE, tmp, e);
384             throw new InitializationException(tmp, e);
385         }
386 
387         // force creation of DNS Cache now -- avoids CacheCleaner in toe-threads group
388         // also cap size at 1 (we never wanta cached value; 0 is non-operative)
389         Lookup.getDefaultCache(DClass.IN).setMaxEntries(1);
390         //dns.getRecords("localhost", Type.A, DClass.IN);
391         
392         setupToePool();
393         setThresholds();
394         
395         reserveMemory = new LinkedList<char[]>();
396         for(int i = 1; i < RESERVE_BLOCKS; i++) {
397             reserveMemory.add(new char[RESERVE_BLOCK_SIZE]);
398         }
399     }
400 
401     /***
402      * Utility method to install this crawl's SettingsHandler into the 
403      * 'global' (for this thread) holder, so that any subsequent 
404      * deserialization operations in this thread can find it. 
405      * 
406      * @param sH
407      */
408     public void installThreadContextSettingsHandler() {
409         SettingsHandler.setThreadContextSettingsHandler(settingsHandler);
410     }
411     
412     /***
413      * Does setup of checkpoint recover.
414      * Copies bdb log files into state dir.
415      * @throws IOException
416      */
417     protected void setupCheckpointRecover()
418     throws IOException {
419         long started = System.currentTimeMillis();;
420         if (LOGGER.isLoggable(Level.FINE)) {
421             LOGGER.fine("Starting recovery setup -- copying into place " +
422                 "bdbje log files -- for checkpoint named " +
423                 this.checkpointRecover.getDisplayName());
424         }
425         // Mark context we're in a recovery.
426         this.checkpointer.recover(this);
427         this.progressStats.info("CHECKPOINT RECOVER " +
428             this.checkpointRecover.getDisplayName());
429         // Copy the bdb log files to the state dir so we don't damage
430         // old checkpoint.  If thousands of log files, can take
431         // tens of minutes (1000 logs takes ~5 minutes to java copy,
432         // dependent upon hardware).  If log file already exists over in the
433         // target state directory, we do not overwrite -- we assume the log
434         // file in the target same as one we'd copy from the checkpoint dir.
435         File bdbSubDir = CheckpointUtils.
436             getBdbSubDirectory(this.checkpointRecover.getDirectory());
437         List<IOException> errs = new ArrayList<IOException>();
438         FileUtils.copyFiles(bdbSubDir, CheckpointUtils.getJeLogsFilter(),
439             getStateDisk(), true, false, errs);
440         for (IOException ioe : errs) {
441             LOGGER.log(Level.SEVERE, "Problem copying checkpoint files: "
442                     +"checkpoint may be corrupt",ioe);
443         }
444         if (LOGGER.isLoggable(Level.INFO)) {
445             LOGGER.info("Finished recovery setup for checkpoint named " +
446                 this.checkpointRecover.getDisplayName() + " in " +
447                 (System.currentTimeMillis() - started) + "ms.");
448         }
449     }
450     
451     protected boolean getCheckpointCopyBdbjeLogs() {
452         return ((Boolean)this.order.getUncheckedAttribute(null,
453             CrawlOrder.ATTR_CHECKPOINT_COPY_BDBJE_LOGS)).booleanValue();
454     }
455     
456     private void setupBdb()
457     throws FatalConfigurationException, AttributeNotFoundException {
458         EnvironmentConfig envConfig = new EnvironmentConfig();
459         envConfig.setAllowCreate(true);
460         int bdbCachePercent = ((Integer)this.order.
461             getAttribute(null, CrawlOrder.ATTR_BDB_CACHE_PERCENT)).intValue();
462         if(bdbCachePercent > 0) {
463             // Operator has expressed a preference; override BDB default or 
464             // je.properties value
465             envConfig.setCachePercent(bdbCachePercent);
466         }
467         envConfig.setLockTimeout(5000000); // 5 seconds
468         if (LOGGER.isLoggable(Level.FINEST)) {
469             envConfig.setConfigParam("java.util.logging.level", "SEVERE");
470             envConfig.setConfigParam("java.util.logging.level.evictor",
471                 "SEVERE");
472             envConfig.setConfigParam("java.util.logging.ConsoleHandler.on",
473                 "true");
474         }
475 
476         if (!getCheckpointCopyBdbjeLogs()) {
477             // If we are not copying files on checkpoint, then set bdbje to not
478             // remove its log files so that its possible to later assemble
479             // (manually) all needed to run a recovery using mix of current
480             // bdbje logs and those its marked for deletion.
481             envConfig.setConfigParam("je.cleaner.expunge", "false");
482         }
483                 
484         try {
485             this.bdbEnvironment = new EnhancedEnvironment(getStateDisk(), envConfig);
486             if (LOGGER.isLoggable(Level.FINE)) {
487                 // Write out the bdb configuration.
488                 envConfig = bdbEnvironment.getConfig();
489                 LOGGER.fine("BdbConfiguration: Cache percentage " +
490                     envConfig.getCachePercent() +
491                     ", cache size " + envConfig.getCacheSize());
492             }
493         } catch (DatabaseException e) {
494             e.printStackTrace();
495             throw new FatalConfigurationException(e.getMessage());
496         }
497     }
498     
499     /***
500      * @return the shared EnhancedEnvironment
501      */
502     public EnhancedEnvironment getBdbEnvironment() {
503         return this.bdbEnvironment;
504     }
505     
506     /***
507      * @deprecated use EnhancedEnvironment's getClassCatalog() instead
508      */
509     public StoredClassCatalog getClassCatalog() {
510         return this.bdbEnvironment.getClassCatalog();
511     }
512 
513     /***
514      * Register for CrawlStatus events.
515      *
516      * @param cl a class implementing the CrawlStatusListener interface
517      *
518      * @see CrawlStatusListener
519      */
520     public void addCrawlStatusListener(CrawlStatusListener cl) {
521         synchronized (this.registeredCrawlStatusListeners) {
522             this.registeredCrawlStatusListeners.add(cl);
523         }
524     }
525 
526     /***
527      * Register for CrawlURIDisposition events.
528      *
529      * @param cl a class implementing the CrawlURIDispostionListener interface
530      *
531      * @see CrawlURIDispositionListener
532      */
533     public void addCrawlURIDispositionListener(CrawlURIDispositionListener cl) {
534         registeredCrawlURIDispositionListener = null;
535         if (registeredCrawlURIDispositionListeners == null) {
536             // First listener;
537             registeredCrawlURIDispositionListener = cl;
538             //Only used for the first one while it is the only one.
539             registeredCrawlURIDispositionListeners 
540              = new ArrayList<CrawlURIDispositionListener>(1);
541             //We expect it to be very small.
542         }
543         registeredCrawlURIDispositionListeners.add(cl);
544     }
545 
546     /***
547      * Allows an external class to raise a CrawlURIDispostion
548      * crawledURISuccessful event that will be broadcast to all listeners that
549      * have registered with the CrawlController.
550      *
551      * @param curi - The CrawlURI that will be sent with the event notification.
552      *
553      * @see CrawlURIDispositionListener#crawledURISuccessful(CrawlURI)
554      */
555     public void fireCrawledURISuccessfulEvent(CrawlURI curi) {
556         if (registeredCrawlURIDispositionListener != null) {
557             // Then we'll just use that.
558             registeredCrawlURIDispositionListener.crawledURISuccessful(curi);
559         } else {
560             // Go through the list.
561             if (registeredCrawlURIDispositionListeners != null
562                 && registeredCrawlURIDispositionListeners.size() > 0) {
563                 Iterator it = registeredCrawlURIDispositionListeners.iterator();
564                 while (it.hasNext()) {
565                     (
566                         (CrawlURIDispositionListener) it
567                             .next())
568                             .crawledURISuccessful(
569                         curi);
570                 }
571             }
572         }
573     }
574 
575     /***
576      * Allows an external class to raise a CrawlURIDispostion
577      * crawledURINeedRetry event that will be broadcast to all listeners that
578      * have registered with the CrawlController.
579      *
580      * @param curi - The CrawlURI that will be sent with the event notification.
581      *
582      * @see CrawlURIDispositionListener#crawledURINeedRetry(CrawlURI)
583      */
584     public void fireCrawledURINeedRetryEvent(CrawlURI curi) {
585         if (registeredCrawlURIDispositionListener != null) {
586             // Then we'll just use that.
587             registeredCrawlURIDispositionListener.crawledURINeedRetry(curi);
588             return;
589         }
590         
591         // Go through the list.
592         if (registeredCrawlURIDispositionListeners != null
593                 && registeredCrawlURIDispositionListeners.size() > 0) {
594             for (Iterator i = registeredCrawlURIDispositionListeners.iterator();
595                     i.hasNext();) {
596                 ((CrawlURIDispositionListener)i.next()).crawledURINeedRetry(curi);
597             }
598         }
599     }
600 
601     /***
602      * Allows an external class to raise a CrawlURIDispostion
603      * crawledURIDisregard event that will be broadcast to all listeners that
604      * have registered with the CrawlController.
605      * 
606      * @param curi -
607      *            The CrawlURI that will be sent with the event notification.
608      * 
609      * @see CrawlURIDispositionListener#crawledURIDisregard(CrawlURI)
610      */
611     public void fireCrawledURIDisregardEvent(CrawlURI curi) {
612         if (registeredCrawlURIDispositionListener != null) {
613             // Then we'll just use that.
614             registeredCrawlURIDispositionListener.crawledURIDisregard(curi);
615         } else {
616             // Go through the list.
617             if (registeredCrawlURIDispositionListeners != null
618                 && registeredCrawlURIDispositionListeners.size() > 0) {
619                 Iterator it = registeredCrawlURIDispositionListeners.iterator();
620                 while (it.hasNext()) {
621                     (
622                         (CrawlURIDispositionListener) it
623                             .next())
624                             .crawledURIDisregard(
625                         curi);
626                 }
627             }
628         }
629     }
630 
631     /***
632      * Allows an external class to raise a CrawlURIDispostion crawledURIFailure event
633      * that will be broadcast to all listeners that have registered with the CrawlController.
634      *
635      * @param curi - The CrawlURI that will be sent with the event notification.
636      *
637      * @see CrawlURIDispositionListener#crawledURIFailure(CrawlURI)
638      */
639     public void fireCrawledURIFailureEvent(CrawlURI curi) {
640         if (registeredCrawlURIDispositionListener != null) {
641             // Then we'll just use that.
642             registeredCrawlURIDispositionListener.crawledURIFailure(curi);
643         } else {
644             // Go through the list.
645             if (registeredCrawlURIDispositionListeners != null
646                 && registeredCrawlURIDispositionListeners.size() > 0) {
647                 Iterator it = registeredCrawlURIDispositionListeners.iterator();
648                 while (it.hasNext()) {
649                     ((CrawlURIDispositionListener)it.next())
650                         .crawledURIFailure(curi);
651                 }
652             }
653         }
654     }
655 
656     private void setupCrawlModules() throws FatalConfigurationException,
657              AttributeNotFoundException, MBeanException, ReflectionException {
658         if (scope == null) {
659             scope = (CrawlScope) order.getAttribute(CrawlScope.ATTR_NAME);
660         	scope.initialize(this);
661         }
662         try {
663             this.serverCache = new ServerCache(this);
664         } catch (Exception e) {
665             throw new FatalConfigurationException("Unable to" +
666                " initialize frontier (Failed setup of ServerCache) " + e);
667         }
668         
669         if (this.frontier == null) {
670             this.frontier = (Frontier)order.getAttribute(Frontier.ATTR_NAME);
671             try {
672                 frontier.initialize(this);
673                 frontier.pause(); // Pause until begun
674                 // Run recovery if recoverPath points to a file (If it points
675                 // to a directory, its a checkpoint recovery).
676                 // TODO: make recover path relative to job root dir.
677                 if (!isCheckpointRecover()) {
678                     runFrontierRecover((String)order.
679                         getAttribute(CrawlOrder.ATTR_RECOVER_PATH));
680                 }
681             } catch (IOException e) {
682                 throw new FatalConfigurationException(
683                     "unable to initialize frontier: " + e);
684             }
685         }
686 
687         // Setup processors
688         if (processorChains == null) {
689             processorChains = new ProcessorChainList(order);
690         }
691     }
692     
693     protected void runFrontierRecover(String recoverPath)
694             throws AttributeNotFoundException, MBeanException,
695             ReflectionException, FatalConfigurationException {
696         if (recoverPath == null || recoverPath.length() <= 0) {
697             return;
698         }
699         File f = new File(recoverPath);
700         if (!f.exists()) {
701             LOGGER.severe("Recover file does not exist " + recoverPath);
702             return;
703         }
704         if (!f.isFile()) {
705             // Its a directory if supposed to be doing a checkpoint recover.
706             return;
707         }
708         boolean retainFailures = ((Boolean)order.
709           getAttribute(CrawlOrder.ATTR_RECOVER_RETAIN_FAILURES)).booleanValue();
710         try {
711             frontier.importRecoverLog(recoverPath, retainFailures);
712         } catch (IOException e) {
713             e.printStackTrace();
714             throw (FatalConfigurationException) new FatalConfigurationException(
715                 "Recover.log " + recoverPath + " problem: " + e).initCause(e);
716         }
717     }
718 
719     private void setupDisk() throws AttributeNotFoundException {
720         String diskPath
721             = (String) order.getAttribute(null, CrawlOrder.ATTR_DISK_PATH);
722         this.disk = getSettingsHandler().
723             getPathRelativeToWorkingDirectory(diskPath);
724         this.disk.mkdirs();
725         this.logsDisk = getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
726         this.checkpointsDisk = getSettingsDir(CrawlOrder.ATTR_CHECKPOINTS_PATH);
727         this.stateDisk = getSettingsDir(CrawlOrder.ATTR_STATE_PATH);
728         this.scratchDisk = getSettingsDir(CrawlOrder.ATTR_SCRATCH_PATH);
729     }
730     
731     /***
732      * @return The logging directory or null if problem reading the settings.
733      */
734     public File getLogsDir() {
735         File f = null;
736         try {
737             f = getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
738         } catch (AttributeNotFoundException e) {
739             LOGGER.severe("Failed get of logs directory: " + e.getMessage());
740         }
741         return f;
742     }
743     
744     /***
745      * Return fullpath to the directory named by <code>key</code>
746      * in settings.
747      * If directory does not exist, it and all intermediary dirs
748      * will be created.
749      * @param key Key to use going to settings.
750      * @return Full path to directory named by <code>key</code>.
751      * @throws AttributeNotFoundException
752      */
753     public File getSettingsDir(String key)
754     throws AttributeNotFoundException {
755         String path = (String)order.getAttribute(null, key);
756         File f = new File(path);
757         if (!f.isAbsolute()) {
758             f = new File(disk.getPath(), path);
759         }
760         if (!f.exists()) {
761             f.mkdirs();
762         }
763         return f;
764     }
765 
766     /***
767      * Setup the statistics tracker.
768      * The statistics object must be created before modules can use it.
769      * Do it here now so that when modules retrieve the object from the
770      * controller during initialization (which some do), its in place.
771      * @throws InvalidAttributeValueException
772      * @throws FatalConfigurationException
773      */
774     private void setupStatTracking()
775     throws InvalidAttributeValueException, FatalConfigurationException {
776         MapType loggers = order.getLoggers();
777         final String cstName = "crawl-statistics";
778         if (loggers.isEmpty(null)) {
779             if (!isCheckpointRecover() && this.statistics == null) {
780                 this.statistics = new StatisticsTracker(cstName);
781             }
782             loggers.addElement(null, (StatisticsTracker)this.statistics);
783         }
784         
785         if (isCheckpointRecover()) {
786             restoreStatisticsTracker(loggers, cstName);
787         }
788 
789         for (Iterator it = loggers.iterator(null); it.hasNext();) {
790             StatisticsTracking tracker = (StatisticsTracking)it.next();
791             tracker.initialize(this);
792             if (this.statistics == null) {
793                 this.statistics = tracker;
794             }
795         }
796     }
797     
798     protected void restoreStatisticsTracker(MapType loggers,
799         String replaceName)
800     throws FatalConfigurationException {
801         try {
802             // Add the deserialized statstracker to the settings system.
803             loggers.removeElement(loggers.globalSettings(), replaceName);
804             loggers.addElement(loggers.globalSettings(),
805                 (StatisticsTracker)this.statistics);
806          } catch (Exception e) {
807              throw convertToFatalConfigurationException(e);
808          }
809     }
810     
811     protected FatalConfigurationException
812             convertToFatalConfigurationException(Exception e) {
813         FatalConfigurationException fce =
814             new FatalConfigurationException("Converted exception: " +
815                e.getMessage());
816         fce.setStackTrace(e.getStackTrace());
817         return fce;
818     }
819 
820     private void setupLogs() throws IOException {
821         String logsPath = logsDisk.getAbsolutePath() + File.separatorChar;
822         uriProcessing = Logger.getLogger(LOGNAME_CRAWL + "." + logsPath);
823         runtimeErrors = Logger.getLogger(LOGNAME_RUNTIME_ERRORS + "." +
824             logsPath);
825         localErrors = Logger.getLogger(LOGNAME_LOCAL_ERRORS + "." + logsPath);
826         uriErrors = Logger.getLogger(LOGNAME_URI_ERRORS + "." + logsPath);
827         progressStats = Logger.getLogger(LOGNAME_PROGRESS_STATISTICS + "." +
828             logsPath);
829 
830         this.fileHandlers = new HashMap<Logger,FileHandler>();
831 
832         setupLogFile(uriProcessing,
833             logsPath + LOGNAME_CRAWL + CURRENT_LOG_SUFFIX,
834             new UriProcessingFormatter(), true);
835 
836         setupLogFile(runtimeErrors,
837             logsPath + LOGNAME_RUNTIME_ERRORS + CURRENT_LOG_SUFFIX,
838             new RuntimeErrorFormatter(), true);
839 
840         setupLogFile(localErrors,
841             logsPath + LOGNAME_LOCAL_ERRORS + CURRENT_LOG_SUFFIX,
842             new LocalErrorFormatter(), true);
843 
844         setupLogFile(uriErrors,
845             logsPath + LOGNAME_URI_ERRORS + CURRENT_LOG_SUFFIX,
846             new UriErrorFormatter(), true);
847 
848         setupLogFile(progressStats,
849             logsPath + LOGNAME_PROGRESS_STATISTICS + CURRENT_LOG_SUFFIX,
850             new StatisticsLogFormatter(), true);
851 
852     }
853 
854     private void setupLogFile(Logger logger, String filename, Formatter f,
855             boolean shouldManifest) throws IOException, SecurityException {
856         GenerationFileHandler fh = new GenerationFileHandler(filename, true,
857             shouldManifest);
858         fh.setFormatter(f);
859         logger.addHandler(fh);
860         addToManifest(filename, MANIFEST_LOG_FILE, shouldManifest);
861         logger.setUseParentHandlers(false);
862         this.fileHandlers.put(logger, fh);
863     }
864     
865     protected void rotateLogFiles(String generationSuffix)
866     throws IOException {
867         if (this.state != PAUSED && this.state != CHECKPOINTING) {
868             throw new IllegalStateException("Pause crawl before requesting " +
869                 "log rotation.");
870         }
871         for (Iterator i = fileHandlers.keySet().iterator(); i.hasNext();) {
872             Logger l = (Logger)i.next();
873             GenerationFileHandler gfh =
874                 (GenerationFileHandler)fileHandlers.get(l);
875             GenerationFileHandler newGfh =
876                 gfh.rotate(generationSuffix, CURRENT_LOG_SUFFIX);
877             if (gfh.shouldManifest()) {
878                 addToManifest((String) newGfh.getFilenameSeries().get(1),
879                     MANIFEST_LOG_FILE, newGfh.shouldManifest());
880             }
881             l.removeHandler(gfh);
882             l.addHandler(newGfh);
883             fileHandlers.put(l, newGfh);
884         }
885     }
886 
887     /***
888      * Close all log files and remove handlers from loggers.
889      */
890     public void closeLogFiles() {
891        for (Iterator i = fileHandlers.keySet().iterator(); i.hasNext();) {
892             Logger l = (Logger)i.next();
893             GenerationFileHandler gfh =
894                 (GenerationFileHandler)fileHandlers.get(l);
895             gfh.close();
896             l.removeHandler(gfh);
897         }
898     }
899 
900     /***
901      * Sets the values for max bytes, docs and time based on crawl order. 
902      */
903     private void setThresholds() {
904         try {
905             maxBytes =
906                 ((Long) order.getAttribute(CrawlOrder.ATTR_MAX_BYTES_DOWNLOAD))
907                     .longValue();
908         } catch (Exception e) {
909             maxBytes = 0;
910         }
911         try {
912             maxDocument =
913                 ((Long) order
914                     .getAttribute(CrawlOrder.ATTR_MAX_DOCUMENT_DOWNLOAD))
915                     .longValue();
916         } catch (Exception e) {
917             maxDocument = 0;
918         }
919         try {
920             maxTime =
921                 ((Long) order.getAttribute(CrawlOrder.ATTR_MAX_TIME_SEC))
922                     .longValue();
923         } catch (Exception e) {
924             maxTime = 0;
925         }
926     }
927 
928     /***
929      * @return Object this controller is using to track crawl statistics
930      */
931     public StatisticsTracking getStatistics() {
932         return statistics==null ?
933             new StatisticsTracker("crawl-statistics"): this.statistics;
934     }
935     
936     /***
937      * Send crawl change event to all listeners.
938      * @param newState State change we're to tell listeners' about.
939      * @param message Message on state change.
940      * @see #sendCheckpointEvent(File) for special case event sending
941      * telling listeners to checkpoint.
942      */
943     protected void sendCrawlStateChangeEvent(Object newState, String message) {
944         synchronized (this.registeredCrawlStatusListeners) {
945             this.state = newState;
946             for (Iterator i = this.registeredCrawlStatusListeners.iterator();
947                     i.hasNext();) {
948                 CrawlStatusListener l = (CrawlStatusListener)i.next();
949                 if (newState.equals(PAUSED)) {
950                    l.crawlPaused(message);
951                 } else if (newState.equals(RUNNING)) {
952                     l.crawlResuming(message);
953                 } else if (newState.equals(PAUSING)) {
954                    l.crawlPausing(message);
955                 } else if (newState.equals(STARTED)) {
956                     l.crawlStarted(message);
957                 } else if (newState.equals(STOPPING)) {
958                     l.crawlEnding(message);
959                 } else if (newState.equals(FINISHED)) {
960                     l.crawlEnded(message);
961                 } else if (newState.equals(PREPARING)) {
962                     l.crawlResuming(message);
963                 } else {
964                     throw new RuntimeException("Unknown state: " + newState);
965                 }
966                 if (LOGGER.isLoggable(Level.FINE)) {
967                     LOGGER.fine("Sent " + newState + " to " + l);
968                 }
969             }
970             LOGGER.fine("Sent " + newState);
971         }
972     }
973     
974     /***
975      * Send the checkpoint event.
976      * Has its own method apart from
977      * {@link #sendCrawlStateChangeEvent(Object, String)} because checkpointing
978      * throws an Exception (Didn't want to have to wrap all of the
979      * sendCrawlStateChangeEvent in try/catches).
980      * @param checkpointDir Where to write checkpoint state to.
981      * @throws Exception
982      */
983     protected void sendCheckpointEvent(File checkpointDir) throws Exception {
984         synchronized (this.registeredCrawlStatusListeners) {
985             if (this.state != PAUSED) {
986                 throw new IllegalStateException("Crawler must be completly " +
987                     "paused before checkpointing can start");
988             }
989             this.state = CHECKPOINTING;
990             for (Iterator i = this.registeredCrawlStatusListeners.iterator();
991                     i.hasNext();) {
992                 CrawlStatusListener l = (CrawlStatusListener)i.next();
993                 l.crawlCheckpoint(checkpointDir);
994                 if (LOGGER.isLoggable(Level.FINE)) {
995                     LOGGER.fine("Sent " + CHECKPOINTING + " to " + l);
996                 }
997             }
998             LOGGER.fine("Sent " + CHECKPOINTING);
999         }
1000     }
1001 
1002     /***
1003      * Operator requested crawl begin
1004      */
1005     public void requestCrawlStart() {
1006         runProcessorInitialTasks();
1007 
1008         sendCrawlStateChangeEvent(STARTED, CrawlJob.STATUS_PENDING);
1009         String jobState;
1010         state = RUNNING;
1011         jobState = CrawlJob.STATUS_RUNNING;
1012         sendCrawlStateChangeEvent(this.state, jobState);
1013 
1014         // A proper exit will change this value.
1015         this.sExit = CrawlJob.STATUS_FINISHED_ABNORMAL;
1016         
1017         Thread statLogger = new Thread(statistics);
1018         statLogger.setName("StatLogger");
1019         statLogger.start();
1020         
1021         frontier.start();
1022     }
1023 
1024     /***
1025      * Called when the last toethread exits.
1026      */
1027     protected void completeStop() {
1028         LOGGER.fine("Entered complete stop.");
1029         // Run processors' final tasks
1030         runProcessorFinalTasks();
1031         // Ok, now we are ready to exit.
1032         sendCrawlStateChangeEvent(FINISHED, this.sExit);
1033         synchronized (this.registeredCrawlStatusListeners) {
1034             // Remove all listeners now we're done with them.
1035             this.registeredCrawlStatusListeners.
1036                 removeAll(this.registeredCrawlStatusListeners);
1037             this.registeredCrawlStatusListeners = null;
1038         }
1039         
1040         closeLogFiles();
1041         
1042         // Release reference to logger file handler instances.
1043         this.fileHandlers = null;
1044         this.uriErrors = null;
1045         this.uriProcessing = null;
1046         this.localErrors = null;
1047         this.runtimeErrors = null;
1048         this.progressStats = null;
1049         this.reports = null;
1050         this.manifest = null;
1051 
1052         // Do cleanup.
1053         this.statistics = null;
1054         this.frontier = null;
1055         this.disk = null;
1056         this.scratchDisk = null;
1057         this.order = null;
1058         this.scope = null;
1059         if (this.settingsHandler !=  null) {
1060             this.settingsHandler.cleanup();
1061         }
1062         this.settingsHandler = null;
1063         this.reserveMemory = null;
1064         this.processorChains = null;
1065         if (this.serverCache != null) {
1066             this.serverCache.cleanup();
1067             this.serverCache = null;
1068         }
1069         if (this.checkpointer != null) {
1070             this.checkpointer.cleanup();
1071             this.checkpointer = null;
1072         }
1073         if (this.bdbEnvironment != null) {
1074             try {
1075                 this.bdbEnvironment.sync();
1076                 this.bdbEnvironment.close();
1077             } catch (DatabaseException e) {
1078                 e.printStackTrace();
1079             }
1080             this.bdbEnvironment = null;
1081         }
1082         this.bigmaps = null;
1083         if (this.toePool != null) {
1084             this.toePool.cleanup();
1085             // I played with launching a thread here to do cleanup of the
1086             // ToePool ThreadGroup (making sure the cleanup thread was not
1087             // in the ToePool ThreadGroup).  Did this because ToePools seemed
1088             // to be sticking around holding references to CrawlController at
1089             // least.  Need to spend more time looking to see that this is
1090             // still the case even after adding the above toePool#cleanup call.
1091         }
1092         this.toePool = null;
1093         LOGGER.fine("Finished crawl.");
1094     }
1095     
1096     synchronized void completePause() {
1097         // Send a notifyAll. At least checkpointing thread may be waiting on a
1098         // complete pause.
1099         notifyAll();
1100         sendCrawlStateChangeEvent(PAUSED, CrawlJob.STATUS_PAUSED);
1101     }
1102 
1103     private boolean shouldContinueCrawling() {
1104         if (frontier.isEmpty()) {
1105             this.sExit = CrawlJob.STATUS_FINISHED;
1106             return false;
1107         }
1108 
1109         if (maxBytes > 0 && frontier.totalBytesWritten() >= maxBytes) {
1110             // Hit the max byte download limit!
1111             sExit = CrawlJob.STATUS_FINISHED_DATA_LIMIT;
1112             return false;
1113         } else if (maxDocument > 0
1114                 && frontier.succeededFetchCount() >= maxDocument) {
1115             // Hit the max document download limit!
1116             this.sExit = CrawlJob.STATUS_FINISHED_DOCUMENT_LIMIT;
1117             return false;
1118         } else if (maxTime > 0 &&
1119                 statistics.crawlDuration() >= maxTime * 1000) {
1120             // Hit the max byte download limit!
1121             this.sExit = CrawlJob.STATUS_FINISHED_TIME_LIMIT;
1122             return false;
1123         }
1124         return state == RUNNING;
1125     }
1126 
1127     /***
1128      * Request a checkpoint.
1129      * Sets a checkpointing thread running.
1130      * @throws IllegalStateException Thrown if crawl is not in paused state
1131      * (Crawl must be first paused before checkpointing).
1132      */
1133     public synchronized void requestCrawlCheckpoint()
1134     throws IllegalStateException {
1135         if (this.checkpointer == null) {
1136             return;
1137         }
1138         if (this.checkpointer.isCheckpointing()) {
1139             throw new IllegalStateException("Checkpoint already running.");
1140         }
1141         this.checkpointer.checkpoint();
1142     }   
1143     
1144     /***
1145      * @return True if checkpointing.
1146      */
1147     public boolean isCheckpointing() {
1148         return this.state == CHECKPOINTING;
1149     }
1150     
1151     /***
1152      * Run checkpointing.
1153      * CrawlController takes care of managing the checkpointing/serializing
1154      * of bdb, the StatisticsTracker, and the CheckpointContext.  Other
1155      * modules that want to revive themselves on checkpoint recovery need to
1156      * save state during their {@link CrawlStatusListener#crawlCheckpoint(File)}
1157      * invocation and then in their #initialize if a module,
1158      * or in their #initialTask if a processor, check with the CrawlController
1159      * if its checkpoint recovery. If it is, read in their old state from the
1160      * pointed to  checkpoint directory.
1161      * <p>Default access only to be called by Checkpointer.
1162      * @throws Exception
1163      */
1164     void checkpoint()
1165     throws Exception {
1166         // Tell registered listeners to checkpoint.
1167         sendCheckpointEvent(this.checkpointer.
1168             getCheckpointInProgressDirectory());
1169         
1170         // Rotate off crawler logs.
1171         LOGGER.fine("Rotating log files.");
1172         rotateLogFiles(CURRENT_LOG_SUFFIX + "." +
1173             this.checkpointer.getNextCheckpointName());
1174 
1175         // Sync the BigMap contents to bdb, if their bdb bigmaps.
1176         LOGGER.fine("BigMaps.");
1177         checkpointBigMaps(this.checkpointer.getCheckpointInProgressDirectory());
1178 
1179         // Note, on deserialization, the super CrawlType#parent
1180         // needs to be restored. Parent is '/crawl-order/loggers'.
1181         // The settings handler for this module also needs to be
1182         // restored. Both of these fields are private in the
1183         // super class. Adding the restored ST to crawl order should take
1184         // care of this.
1185 
1186         // Checkpoint bdb environment.
1187         LOGGER.fine("Bdb environment.");
1188         checkpointBdb(this.checkpointer.getCheckpointInProgressDirectory());
1189 
1190         // Make copy of order, seeds, and settings.
1191         LOGGER.fine("Copying settings.");
1192         copySettings(this.checkpointer.getCheckpointInProgressDirectory());
1193 
1194         // Checkpoint this crawlcontroller.
1195         CheckpointUtils.writeObjectToFile(this,
1196             this.checkpointer.getCheckpointInProgressDirectory());
1197     }
1198     
1199     /***
1200      * Copy off the settings.
1201      * @param checkpointDir Directory to write checkpoint to.
1202      * @throws IOException 
1203      */
1204     protected void copySettings(final File checkpointDir) throws IOException {
1205         final List files = this.settingsHandler.getListOfAllFiles();
1206         boolean copiedSettingsDir = false;
1207         final File settingsDir = new File(this.disk, "settings");
1208         for (final Iterator i = files.iterator(); i.hasNext();) {
1209             File f = new File((String)i.next());
1210             if (f.getAbsolutePath().startsWith(settingsDir.getAbsolutePath())) {
1211                 if (copiedSettingsDir) {
1212                     // Skip.  We've already copied this member of the
1213                     // settings directory.
1214                     continue;
1215                 }
1216                 // Copy 'settings' dir all in one lump, not a file at a time.
1217                 copiedSettingsDir = true;
1218                 FileUtils.copyFiles(settingsDir,
1219                     new File(checkpointDir, settingsDir.getName()));
1220                 continue;
1221             }
1222             FileUtils.copyFiles(f, f.isDirectory()? checkpointDir:
1223                 new File(checkpointDir, f.getName()));
1224         }
1225     }
1226     
1227     /***
1228      * Checkpoint bdb.
1229      * I used do a call to log cleaning as suggested in je-2.0 javadoc but takes
1230      * way too much time (20minutes for a crawl of 1million items). Assume
1231      * cleaner is keeping up. Below was log cleaning loop .
1232      * <pre>int totalCleaned = 0;
1233      * for (int cleaned = 0; (cleaned = this.bdbEnvironment.cleanLog()) != 0;
1234      *  totalCleaned += cleaned) {
1235      *      LOGGER.fine("Cleaned " + cleaned + " log files.");
1236      * }
1237      * </pre>
1238      * <p>I also used to do a sync. But, from Mark Hayes, sync and checkpoint
1239      * are effectively same thing only sync is not configurable.  He suggests
1240      * doing one or the other:
1241      * <p>MS: Reading code, Environment.sync() is a checkpoint.  Looks like
1242      * I don't need to call a checkpoint after calling a sync?
1243      * <p>MH: Right, they're almost the same thing -- just do one or the other,
1244      * not both.  With the new API, you'll need to do a checkpoint not a
1245      * sync, because the sync() method has no config parameter.  Don't worry
1246      * -- it's fine to do a checkpoint even though you're not using.
1247      * @param checkpointDir Directory to write checkpoint to.
1248      * @throws DatabaseException 
1249      * @throws IOException 
1250      * @throws RuntimeException Thrown if failed setup of new bdb environment.
1251      */
1252     protected void checkpointBdb(File checkpointDir)
1253     throws DatabaseException, IOException, RuntimeException {
1254         EnvironmentConfig envConfig = this.bdbEnvironment.getConfig();
1255         final List bkgrdThreads = Arrays.asList(new String []
1256             {"je.env.runCheckpointer", "je.env.runCleaner",
1257                 "je.env.runINCompressor"});
1258         try {
1259             // Disable background threads
1260             setBdbjeBkgrdThreads(envConfig, bkgrdThreads, "false");
1261             // Do a force checkpoint.  Thats what a sync does (i.e. doSync).
1262             CheckpointConfig chkptConfig = new CheckpointConfig();
1263             chkptConfig.setForce(true);
1264             
1265             // Mark Hayes of sleepycat says:
1266             // "The default for this property is false, which gives the current
1267             // behavior (allow deltas).  If this property is true, deltas are
1268             // prohibited -- full versions of internal nodes are always logged
1269             // during the checkpoint. When a full version of an internal node
1270             // is logged during a checkpoint, recovery does not need to process
1271             // it at all.  It is only fetched if needed by the application,
1272             // during normal DB operations after recovery. When a delta of an
1273             // internal node is logged during a checkpoint, recovery must
1274             // process it by fetching the full version of the node from earlier
1275             // in the log, and then applying the delta to it.  This can be
1276             // pretty slow, since it is potentially a large amount of
1277             // random I/O."
1278             chkptConfig.setMinimizeRecoveryTime(true);
1279             this.bdbEnvironment.checkpoint(chkptConfig);
1280             LOGGER.fine("Finished bdb checkpoint.");
1281             
1282             // From the sleepycat folks: A trick for flipping db logs.
1283             EnvironmentImpl envImpl = 
1284                 DbInternal.envGetEnvironmentImpl(this.bdbEnvironment);
1285             long firstFileInNextSet =
1286                 DbLsn.getFileNumber(envImpl.forceLogFileFlip());
1287             // So the last file in the checkpoint is firstFileInNextSet - 1.
1288             // Write manifest of all log files into the bdb directory.
1289             final String lastBdbCheckpointLog =
1290                 getBdbLogFileName(firstFileInNextSet - 1);
1291             processBdbLogs(checkpointDir, lastBdbCheckpointLog);
1292             LOGGER.fine("Finished processing bdb log files.");
1293         } finally {
1294             // Restore background threads.
1295             setBdbjeBkgrdThreads(envConfig, bkgrdThreads, "true");
1296         }
1297     }
1298     
1299     protected void processBdbLogs(final File checkpointDir,
1300             final String lastBdbCheckpointLog) throws IOException {
1301         File bdbDir = CheckpointUtils.getBdbSubDirectory(checkpointDir);
1302         if (!bdbDir.exists()) {
1303             bdbDir.mkdir();
1304         }
1305         PrintWriter pw = new PrintWriter(new FileOutputStream(new File(
1306              checkpointDir, "bdbje-logs-manifest.txt")));
1307         try {
1308             // Don't copy any beyond the last bdb log file (bdbje can keep
1309             // writing logs after checkpoint).
1310             boolean pastLastLogFile = false;
1311             Set<String> srcFilenames = null;
1312             final boolean copyFiles = getCheckpointCopyBdbjeLogs();
1313             do {
1314                 FilenameFilter filter = CheckpointUtils.getJeLogsFilter();
1315                 srcFilenames =
1316                     new HashSet<String>(Arrays.asList(
1317                             getStateDisk().list(filter)));
1318                 List tgtFilenames = Arrays.asList(bdbDir.list(filter));
1319                 if (tgtFilenames != null && tgtFilenames.size() > 0) {
1320                     srcFilenames.removeAll(tgtFilenames);
1321                 }
1322                 if (srcFilenames.size() > 0) {
1323                     // Sort files.
1324                     srcFilenames = new TreeSet<String>(srcFilenames);
1325                     int count = 0;
1326                     for (final Iterator i = srcFilenames.iterator();
1327                             i.hasNext() && !pastLastLogFile;) {
1328                         String name = (String) i.next();
1329                         if (copyFiles) {
1330                             FileUtils.copyFiles(new File(getStateDisk(), name),
1331                                 new File(bdbDir, name));
1332                         }
1333                         pw.println(name);
1334                         if (name.equals(lastBdbCheckpointLog)) {
1335                             // We're done.
1336                             pastLastLogFile = true;
1337                         }
1338                         count++;
1339                     }
1340                     if (LOGGER.isLoggable(Level.FINE)) {
1341                         LOGGER.fine("Copied " + count);
1342                     }
1343                 }
1344             } while (!pastLastLogFile && srcFilenames != null &&
1345                 srcFilenames.size() > 0);
1346         } finally {
1347             pw.close();
1348         }
1349     }
1350  
1351     protected String getBdbLogFileName(final long index) {
1352         String lastBdbLogFileHex = Long.toHexString(index);
1353         StringBuffer buffer = new StringBuffer();
1354         for (int i = 0; i < (8 - lastBdbLogFileHex.length()); i++) {
1355             buffer.append('0');
1356         }
1357         buffer.append(lastBdbLogFileHex);
1358         buffer.append(".jdb");
1359         return buffer.toString();
1360     }
1361     
1362     protected void setBdbjeBkgrdThreads(final EnvironmentConfig config,
1363             final List threads, final String setting) {
1364         for (final Iterator i = threads.iterator(); i.hasNext();) {
1365             config.setConfigParam((String)i.next(), setting);
1366         }
1367     }
1368     
1369     /***
1370      * Get recover checkpoint.
1371      * Returns null if we're NOT in recover mode.
1372      * Looks at ATTR_RECOVER_PATH and if its a directory, assumes checkpoint
1373      * recover. If checkpoint mode, returns Checkpoint instance if
1374      * checkpoint was VALID (else null).
1375      * @return Checkpoint instance if we're in recover checkpoint
1376      * mode and the pointed-to checkpoint was valid.
1377      * @see #isCheckpointRecover()
1378      */
1379     public synchronized Checkpoint getCheckpointRecover() {
1380         if (this.checkpointRecover != null) {
1381             return this.checkpointRecover;
1382         }
1383         return getCheckpointRecover(this.order);
1384     }
1385     
1386     public static Checkpoint getCheckpointRecover(final CrawlOrder order) {
1387         String path = (String)order.getUncheckedAttribute(null,
1388             CrawlOrder.ATTR_RECOVER_PATH);
1389         if (path == null || path.length() <= 0) {
1390             return null;
1391         }
1392         File rp = new File(path);
1393         // Assume if path is to a directory, its a checkpoint recover.
1394         Checkpoint result = null;
1395         if (rp.exists() && rp.isDirectory()) {
1396             Checkpoint cp = new Checkpoint(rp);
1397             if (cp.isValid()) {
1398                 // if valid, set as result.
1399                 result = cp;
1400             }
1401         }
1402         return result;
1403     }
1404     
1405     public static boolean isCheckpointRecover(final CrawlOrder order) {
1406         return getCheckpointRecover(order) != null;
1407     }
1408     
1409     /***
1410      * @return True if we're in checkpoint recover mode. Call
1411      * {@link #getCheckpointRecover()} to get at Checkpoint instance
1412      * that has info on checkpoint directory being recovered from.
1413      */
1414     public boolean isCheckpointRecover() {
1415         return this.checkpointRecover != null;
1416     }
1417 
1418     /***
1419      * Operator requested for crawl to stop.
1420      */
1421     public synchronized void requestCrawlStop() {
1422         requestCrawlStop(CrawlJob.STATUS_ABORTED);
1423     }
1424     
1425     /***
1426      * Operator requested for crawl to stop.
1427      * @param message 
1428      */
1429     public synchronized void requestCrawlStop(String message) {
1430         if (state == STOPPING || state == FINISHED) {
1431             return;
1432         }
1433         if (message == null) {
1434             throw new IllegalArgumentException("Message cannot be null.");
1435         }
1436         this.sExit = message;
1437         beginCrawlStop();
1438     }
1439 
1440     /***
1441      * Start the process of stopping the crawl. 
1442      */
1443     public void beginCrawlStop() {
1444         LOGGER.fine("Started.");
1445         sendCrawlStateChangeEvent(STOPPING, this.sExit);
1446         if (this.frontier != null) {
1447             this.frontier.terminate();
1448             this.frontier.unpause();
1449         }
1450         LOGGER.fine("Finished."); 
1451     }
1452     
1453     /***
1454      * Stop the crawl temporarly.
1455      */
1456     public synchronized void requestCrawlPause() {
1457         if (state == PAUSING || state == PAUSED) {
1458             // Already about to pause
1459             return;
1460         }
1461         sExit = CrawlJob.STATUS_WAITING_FOR_PAUSE;
1462         frontier.pause();
1463         sendCrawlStateChangeEvent(PAUSING, this.sExit);
1464         if (toePool.getActiveToeCount() == 0) {
1465             // if all threads already held, complete pause now
1466             // (no chance to trigger off later held thread)
1467             completePause();
1468         }
1469     }
1470 
1471     /***
1472      * Tell if the controller is paused
1473      * @return true if paused
1474      */
1475     public boolean isPaused() {
1476         return state == PAUSED;
1477     }
1478     
1479     public boolean isPausing() {
1480         return state == PAUSING;
1481     }
1482     
1483     public boolean isRunning() {
1484         return state == RUNNING;
1485     }
1486 
1487     /***
1488      * Resume crawl from paused state
1489      */
1490     public synchronized void requestCrawlResume() {
1491         if (state != PAUSING && state != PAUSED && state != CHECKPOINTING) {
1492             // Can't resume if not been told to pause or if we're in middle of
1493             // a checkpoint.
1494             return;
1495         }
1496         multiThreadMode();
1497         frontier.unpause();
1498         LOGGER.fine("Crawl resumed.");
1499         sendCrawlStateChangeEvent(RUNNING, CrawlJob.STATUS_RUNNING);
1500     }
1501 
1502     /***
1503      * @return Active toe thread count.
1504      */
1505     public int getActiveToeCount() {
1506         if (toePool == null) {
1507             return 0;
1508         }
1509         return toePool.getActiveToeCount();
1510     }
1511 
1512     private void setupToePool() {
1513         toePool = new ToePool(this);
1514         // TODO: make # of toes self-optimizing
1515         toePool.setSize(order.getMaxToes());
1516     }
1517 
1518     /***
1519      * @return The order file instance.
1520      */
1521     public CrawlOrder getOrder() {
1522         return order;
1523     }
1524 
1525     /***
1526      * @return The server cache instance.
1527      */
1528     public ServerCache getServerCache() {
1529         return serverCache;
1530     }
1531 
1532     /***
1533      * @param o
1534      */
1535     public void setOrder(CrawlOrder o) {
1536         order = o;
1537     }
1538 
1539 
1540     /***
1541      * @return The frontier.
1542      */
1543     public Frontier getFrontier() {
1544         return frontier;
1545     }
1546 
1547     /***
1548      * @return This crawl scope.
1549      */
1550     public CrawlScope getScope() {
1551         return scope;
1552     }
1553 
1554     /*** Get the list of processor chains.
1555      *
1556      * @return the list of processor chains.
1557      */
1558     public ProcessorChainList getProcessorChainList() {
1559         return processorChains;
1560     }
1561 
1562     /*** Get the first processor chain.
1563      *
1564      * @return the first processor chain.
1565      */
1566     public ProcessorChain getFirstProcessorChain() {
1567         return processorChains.getFirstChain();
1568     }
1569 
1570     /*** Get the postprocessor chain.
1571      *
1572      * @return the postprocessor chain.
1573      */
1574     public ProcessorChain getPostprocessorChain() {
1575         return processorChains.getLastChain();
1576     }
1577 
1578     /***
1579      * Get the 'working' directory of the current crawl.
1580      * @return the 'working' directory of the current crawl.
1581      */
1582     public File getDisk() {
1583         return disk;
1584     }
1585 
1586     /***
1587      * @return Scratch disk location.
1588      */
1589     public File getScratchDisk() {
1590         return scratchDisk;
1591     }
1592 
1593     /***
1594      * @return State disk location.
1595      */
1596     public File getStateDisk() {
1597         return stateDisk;
1598     }
1599 
1600     /***
1601      * @return The number of ToeThreads
1602      *
1603      * @see ToePool#getToeCount()
1604      */
1605     public int getToeCount() {
1606         return this.toePool == null? 0: this.toePool.getToeCount();
1607     }
1608 
1609     /***
1610      * @return The ToePool
1611      */
1612     public ToePool getToePool() {
1613         return toePool;
1614     }
1615     
1616 	/***
1617 	 * @return toepool one-line report
1618 	 */
1619 	public String oneLineReportThreads() {
1620 		// TODO Auto-generated method stub
1621 		return toePool.singleLineReport();
1622 	}
1623 
1624     /***
1625      * While many settings will update automatically when the SettingsHandler is
1626      * modified, some settings need to be explicitly changed to reflect new
1627      * settings. This includes, number of toe threads and seeds.
1628      */
1629     public void kickUpdate() {
1630         
1631         installThreadContextSettingsHandler();
1632  
1633         toePool.setSize(order.getMaxToes());
1634         
1635         this.scope.kickUpdate();
1636         this.frontier.kickUpdate();
1637         this.processorChains.kickUpdate();
1638         
1639         // TODO: continue to generalize this, so that any major 
1640         // component can get a kick when it may need to refresh its data
1641 
1642         setThresholds();
1643     }
1644 
1645 	/***
1646      * @return The settings handler.
1647      */
1648     public SettingsHandler getSettingsHandler() {
1649         return settingsHandler;
1650     }
1651 
1652     /***
1653      * This method iterates through processor chains to run processors' initial
1654      * tasks.
1655      *
1656      */
1657     private void runProcessorInitialTasks(){
1658         for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) {
1659             for (Iterator ip = ((ProcessorChain) ic.next()).iterator();
1660                     ip.hasNext(); ) {
1661                 ((Processor) ip.next()).initialTasks();
1662             }
1663         }
1664     }
1665 
1666     /***
1667      * This method iterates through processor chains to run processors' final
1668      * tasks.
1669      *
1670      */
1671     private void runProcessorFinalTasks(){
1672         for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) {
1673             for (Iterator ip = ((ProcessorChain) ic.next()).iterator();
1674                     ip.hasNext(); ) {
1675                 ((Processor) ip.next()).finalTasks();
1676             }
1677         }
1678     }
1679 
1680     /***
1681      * Kills a thread. For details see
1682      * {@link org.archive.crawler.framework.ToePool#killThread(int, boolean)
1683      * ToePool.killThread(int, boolean)}.
1684      * @param threadNumber Thread to kill.
1685      * @param replace Should thread be replaced.
1686      * @see org.archive.crawler.framework.ToePool#killThread(int, boolean)
1687      */
1688     public void killThread(int threadNumber, boolean replace){
1689         toePool.killThread(threadNumber, replace);
1690     }
1691 
1692     /***
1693      * Add a file to the manifest of files used/generated by the current
1694      * crawl.
1695      * 
1696      * TODO: Its possible for a file to be added twice if reports are
1697      * force generated midcrawl.  Fix.
1698      *
1699      * @param file The filename (with absolute path) of the file to add
1700      * @param type The type of the file
1701      * @param bundle Should the file be included in a typical bundling of
1702      *           crawler files.
1703      *
1704      * @see #MANIFEST_CONFIG_FILE
1705      * @see #MANIFEST_LOG_FILE
1706      * @see #MANIFEST_REPORT_FILE
1707      */
1708     public void addToManifest(String file, char type, boolean bundle) {
1709         manifest.append(type + (bundle? "+": "-") + " " + file + "\n");
1710     }
1711 
1712     /***
1713      * Evaluate if the crawl should stop because it is finished.
1714      */
1715     public void checkFinish() {
1716         if(atFinish()) {
1717             beginCrawlStop();
1718         }
1719     }
1720 
1721     /***
1722      * Evaluate if the crawl should stop because it is finished,
1723      * without actually stopping the crawl.
1724      * 
1725      * @return true if crawl is at a finish-possible state
1726      */
1727     public boolean atFinish() {
1728         return state == RUNNING && !shouldContinueCrawling();
1729     }
1730     
1731     private void readObject(ObjectInputStream stream)
1732     throws IOException, ClassNotFoundException {
1733         stream.defaultReadObject();
1734         // Setup status listeners
1735         this.registeredCrawlStatusListeners =
1736             Collections.synchronizedList(new ArrayList<CrawlStatusListener>());
1737         // Ensure no holdover singleThreadMode
1738         singleThreadMode = false; 
1739     }
1740 
1741     /***
1742      * Go to single thread mode, where only one ToeThread may
1743      * proceed at a time. Also acquires the single lock, so 
1744      * no further threads will proceed past an 
1745      * acquireContinuePermission. Caller mush be sure to release
1746      * lock to allow other threads to proceed one at a time. 
1747      */
1748     public void singleThreadMode() {
1749         this.singleThreadLock.lock();
1750         singleThreadMode = true; 
1751     }
1752 
1753     /***
1754      * Go to back to regular multi thread mode, where all
1755      * ToeThreads may proceed at once
1756      */
1757     public void multiThreadMode() {
1758         this.singleThreadLock.lock();
1759         singleThreadMode = false; 
1760         while(this.singleThreadLock.isHeldByCurrentThread()) {
1761             this.singleThreadLock.unlock();
1762         }
1763     }
1764     
1765     /***
1766      * Proceed only if allowed, giving CrawlController a chance
1767      * to enforce single-thread mode.
1768      */
1769     public void acquireContinuePermission() {
1770         if (singleThreadMode) {
1771             this.singleThreadLock.lock();
1772             if(!singleThreadMode) {
1773                 // If changed while waiting, ignore
1774                 while(this.singleThreadLock.isHeldByCurrentThread()) {
1775                     this.singleThreadLock.unlock();
1776                 }
1777             }
1778         } // else, permission is automatic
1779     }
1780 
1781     /***
1782      * Relinquish continue permission at end of processing (allowing
1783      * another thread to proceed if in single-thread mode). 
1784      */
1785     public void releaseContinuePermission() {
1786         if (singleThreadMode) {
1787             while(this.singleThreadLock.isHeldByCurrentThread()) {
1788                 this.singleThreadLock.unlock();
1789             }
1790         } // else do nothing; 
1791     }
1792     
1793     public void freeReserveMemory() {
1794         if(!reserveMemory.isEmpty()) {
1795             reserveMemory.removeLast();
1796             System.gc();
1797         }
1798     }
1799 
1800     /***
1801      * Note that a ToeThread reached paused condition, possibly
1802      * completing the crawl-pause. 
1803      */
1804     public synchronized void toePaused() {
1805         releaseContinuePermission();
1806         if (state ==  PAUSING && toePool.getActiveToeCount() == 0) {
1807             completePause();
1808         }
1809     }
1810     
1811     /***
1812      * Note that a ToeThread ended, possibly completing the crawl-stop. 
1813      */
1814     public synchronized void toeEnded() {
1815         if (state == STOPPING && toePool.getActiveToeCount() == 0) {
1816             completeStop();
1817         }
1818     }
1819 
1820     /***
1821      * Add order file contents to manifest.
1822      * Write configuration files and any files managed by CrawlController to
1823      * it - files managed by other classes, excluding the settings framework,
1824      * are responsible for adding their files to the manifest themselves.
1825      * by calling addToManifest.
1826      * Call before writing out reports.
1827      */
1828     public void addOrderToManifest() {
1829         for (Iterator it = getSettingsHandler().getListOfAllFiles().iterator();
1830                 it.hasNext();) {
1831             addToManifest((String)it.next(),
1832                 CrawlController.MANIFEST_CONFIG_FILE, true);
1833         }
1834     }
1835     
1836     /***
1837      * Log a URIException from deep inside other components to the crawl's
1838      * shared log. 
1839      * 
1840      * @param e URIException encountered
1841      * @param u CrawlURI where problem occurred
1842      * @param l String which could not be interpreted as URI without exception
1843      */
1844     public void logUriError(URIException e, UURI u, CharSequence l) {
1845         if (e.getReasonCode() == UURIFactory.IGNORED_SCHEME) {
1846             // don't log those that are intentionally ignored
1847             return; 
1848         }
1849         Object[] array = {u, l};
1850         uriErrors.log(Level.INFO, e.getMessage(), array);
1851     }
1852     
1853     // 
1854     // Reporter
1855     //
1856     public final static String PROCESSORS_REPORT = "processors";
1857     public final static String MANIFEST_REPORT = "manifest";
1858     protected final static String[] REPORTS = {PROCESSORS_REPORT, MANIFEST_REPORT};
1859     
1860     /* (non-Javadoc)
1861      * @see org.archive.util.Reporter#getReports()
1862      */
1863     public String[] getReports() {
1864         return REPORTS;
1865     }
1866 
1867     /* (non-Javadoc)
1868      * @see org.archive.util.Reporter#reportTo(java.io.Writer)
1869      */
1870     public void reportTo(PrintWriter writer) {
1871         reportTo(null,writer);
1872     }
1873 
1874     public String singleLineReport() {
1875         return ArchiveUtils.singleLineReport(this);
1876     }
1877 
1878     public void reportTo(String name, PrintWriter writer) {
1879         if(PROCESSORS_REPORT.equals(name)) {
1880             reportProcessorsTo(writer);
1881             return;
1882         } else if (MANIFEST_REPORT.equals(name)) {
1883             reportManifestTo(writer);
1884             return;
1885         } else if (name!=null) {
1886             writer.println("requested report unknown: "+name);
1887         }
1888         singleLineReportTo(writer);
1889     }
1890 
1891     /***
1892      * @param writer Where to write report to.
1893      */
1894     protected void reportManifestTo(PrintWriter writer) {
1895         writer.print(manifest.toString());
1896     }
1897 
1898     /***
1899      * Compiles and returns a human readable report on the active processors.
1900      * @param writer Where to write to.
1901      * @see org.archive.crawler.framework.Processor#report()
1902      */
1903     protected void reportProcessorsTo(PrintWriter writer) {
1904         writer.print(
1905             "Processors report - "
1906                 + ArchiveUtils.get12DigitDate()
1907                 + "\n");
1908         writer.print("  Job being crawled:    " + getOrder().getCrawlOrderName()
1909                 + "\n");
1910 
1911         writer.print("  Number of Processors: " +
1912             processorChains.processorCount() + "\n");
1913         writer.print("  NOTE: Some processors may not return a report!\n\n");
1914 
1915         for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) {
1916             for (Iterator ip = ((ProcessorChain) ic.next()).iterator();
1917                     ip.hasNext(); ) {
1918                 writer.print(((Processor) ip.next()).report());
1919             }
1920         }
1921     }
1922 
1923     public void singleLineReportTo(PrintWriter writer) {
1924         // TODO: imrpvoe to be summary of crawl state
1925         writer.write("[Crawl Controller]\n");
1926     }
1927 
1928     public String singleLineLegend() {
1929         // TODO improve
1930         return "nothingYet";
1931     }
1932     
1933     /***
1934      * Call this method to get instance of the crawler BigMap implementation.
1935      * A "BigMap" is a Map that knows how to manage ever-growing sets of
1936      * key/value pairs. If we're in a checkpoint recovery, this method will
1937      * manage reinstantiation of checkpointed bigmaps.
1938      * @param dbName Name to give any associated database.  Also used
1939      * as part of name serializing out bigmap.  Needs to be unique to a crawl.
1940      * @param keyClass Class of keys we'll be using.
1941      * @param valueClass Class of values we'll be using.
1942      * @return Map that knows how to carry large sets of key/value pairs or
1943      * if none available, returns instance of HashMap.
1944      * @throws Exception
1945      */
1946     public <K,V> Map<K,V> getBigMap(final String dbName, 
1947             final Class<? super K> keyClass,
1948             final Class<? super V> valueClass)
1949     throws Exception {
1950         CachedBdbMap<K,V> result = new CachedBdbMap<K,V>(dbName);
1951         if (isCheckpointRecover()) {
1952             File baseDir = getCheckpointRecover().getDirectory();
1953             @SuppressWarnings("unchecked")
1954             CachedBdbMap<K,V> temp = CheckpointUtils.
1955                 readObjectFromFile(result.getClass(), dbName, baseDir);
1956             result = temp;
1957         }
1958         result.initialize(getBdbEnvironment(), keyClass, valueClass,
1959                 getBdbEnvironment().getClassCatalog());
1960         // Save reference to all big maps made so can manage their
1961         // checkpointing.
1962         this.bigmaps.put(dbName, result);
1963         return result;
1964     }
1965     
1966     protected void checkpointBigMaps(final File cpDir)
1967     throws Exception {
1968         for (final Iterator i = this.bigmaps.keySet().iterator(); i.hasNext();) {
1969             Object key = i.next();
1970             Object obj = this.bigmaps.get(key);
1971             // TODO: I tried adding sync to custom serialization of BigMap
1972             // implementation but data member counts of the BigMap
1973             // implementation were not being persisted properly.  Look at
1974             // why.  For now, do sync in advance of serialization for now.
1975             ((CachedBdbMap)obj).sync();
1976             CheckpointUtils.writeObjectToFile(obj, (String)key, cpDir);
1977         }
1978     }
1979 
1980     /***
1981      * Called whenever progress statistics logging event.
1982      * @param e Progress statistics event.
1983      */
1984     public void progressStatisticsEvent(final EventObject e) {
1985         // Default is to do nothing.  Subclass if you want to catch this event.
1986         // Later, if demand, add publisher/listener support.  Currently hacked
1987         // in so the subclass in CrawlJob added to support JMX can send
1988         // notifications of progressStatistics change.
1989     }
1990     
1991     /***
1992      * Log to the progress statistics log.
1993      * @param msg Message to write the progress statistics log.
1994      */
1995     public void logProgressStatistics(final String msg) {
1996         this.progressStats.info(msg);
1997     }
1998 
1999     /***
2000      * @return CrawlController state.
2001      */
2002     public Object getState() {
2003         return this.state;
2004     }
2005 
2006     public File getCheckpointsDisk() {
2007         return this.checkpointsDisk;
2008     }
2009 }