View Javadoc

1   /* CrawlJobHandler
2    *
3    * $Id: CrawlJobHandler.java 5384 2007-08-09 00:41:35Z gojomo $
4    *
5    * Copyright (C) 2003 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.admin;
24  
25  import java.io.BufferedReader;
26  import java.io.BufferedWriter;
27  import java.io.File;
28  import java.io.FileWriter;
29  import java.io.FilenameFilter;
30  import java.io.IOException;
31  import java.io.InputStream;
32  import java.io.InputStreamReader;
33  import java.net.URL;
34  import java.net.URI;
35  import java.util.ArrayList;
36  import java.util.Comparator;
37  import java.util.Date;
38  import java.util.Enumeration;
39  import java.util.Iterator;
40  import java.util.List;
41  import java.util.TreeSet;
42  import java.util.logging.Level;
43  import java.util.logging.Logger;
44  
45  import javax.management.Attribute;
46  import javax.management.AttributeNotFoundException;
47  import javax.management.InvalidAttributeValueException;
48  import javax.management.MBeanException;
49  import javax.management.ReflectionException;
50  
51  import org.apache.commons.httpclient.URIException;
52  import org.archive.crawler.Heritrix;
53  import org.archive.crawler.datamodel.CrawlOrder;
54  import org.archive.crawler.event.CrawlStatusListener;
55  import org.archive.crawler.framework.FrontierMarker;
56  import org.archive.crawler.framework.exceptions.FatalConfigurationException;
57  import org.archive.crawler.framework.exceptions.InitializationException;
58  import org.archive.crawler.framework.exceptions.InvalidFrontierMarkerException;
59  import org.archive.crawler.frontier.FrontierJournal;
60  import org.archive.crawler.frontier.RecoveryJournal;
61  import org.archive.crawler.settings.ComplexType;
62  import org.archive.crawler.settings.CrawlerSettings;
63  import org.archive.crawler.settings.SettingsHandler;
64  import org.archive.crawler.settings.XMLSettingsHandler;
65  import org.archive.util.ArchiveUtils;
66  import org.archive.util.FileUtils;
67  
68  
69  /***
70   * This class manages CrawlJobs. Submitted crawl jobs are queued up and run
71   * in order when the crawler is running.
72   * <p>Basically this provides a layer between any potential user interface and
73   * the CrawlJobs.  It keeps the lists of completed jobs, pending jobs, etc.
74   * <p>
75   * The jobs managed by the handler can be divided into the following:
76   * <ul>
77   *  <li> <code>Pending</code> - Jobs that are ready to run and are waiting their
78   *                              turn. These can be edited, viewed, deleted etc.
79   *  <li> <code>Running</code> - Only one job can be running at a time. There may
80   *                              be no job running. The running job can be viewed
81   *                              and edited to some extent. It can also be
82   *                              terminated. This job should have a
83   *                              StatisticsTracking module attached to it for more
84   *                              details on the crawl.
85   * <li><code>Completed</code> - Jobs that have finished crawling or have been
86   *                              deleted from the pending queue or terminated
87   *                              while running. They can not be edited but can be
88   *                              viewed. They retain the StatisticsTracking
89   *                              module from their run.
90   *  <li> <code>New job</code> - At any given time their can be one 'new job' the
91   *                              new job is not considered ready to run. It can
92   *                              be edited or discarded (in which case it will be
93   *                              totally destroyed, including any files on disk).
94   *                              Once an operator deems the job ready to run it
95   *                              can be moved to the pending queue.
96   * <li> <code>Profiles</code> - Jobs under profiles are not actual jobs. They can
97   *                              be edited normally but can not be submitted to
98   *                              the pending queue. New jobs can be created
99   *                              using a profile as it's template.
100  *
101  * @author Kristinn Sigurdsson
102  *
103  * @see org.archive.crawler.admin.CrawlJob
104  */
105 
106 public class CrawlJobHandler implements CrawlStatusListener {
107     private static final Logger logger =
108         Logger.getLogger(CrawlJobHandler.class.getName());
109 
110     /***
111      * Name of system property whose specification overrides default profile
112      * used.
113      *
114      */
115     public static final String DEFAULT_PROFILE_NAME
116         = "heritrix.default.profile";
117 
118     /***
119      * Default profile name.
120      */
121     public static final String DEFAULT_PROFILE = "default";
122     
123     /***
124      * Name of the profiles directory.
125      */
126     public static final String PROFILES_DIR_NAME = "profiles";
127     
128     public static final String ORDER_FILE_NAME = "order.xml";
129 
130     /***
131      * Job currently being crawled.
132      */
133     private CrawlJob currentJob = null;
134     
135     /***
136      * A new job that is being created/configured. Not yet ready for crawling.
137      */
138     private CrawlJob newJob = null;
139 
140 	/***
141 	 * Thread to start the next job in background
142      */    
143     private Thread startingNextJob = null;
144 
145     /***
146      * A list of pending CrawlJobs.
147      */
148     private TreeSet<CrawlJob> pendingCrawlJobs;
149 
150     /***
151      * A list of completed CrawlJobs.
152      */
153     //private Vector completedCrawlJobs = new Vector();
154     private TreeSet<CrawlJob> completedCrawlJobs;
155 
156     /***
157      * A list of profile CrawlJobs.
158      */
159     private TreeSet<CrawlJob> profileJobs;
160     
161     // The UIDs of profiles should be NOT be timestamps. A descriptive name is
162     // ideal.
163     private String defaultProfile = null;
164 
165     /***
166      * If true the crawler is 'running'. That is the next pending job will start
167      * crawling as soon as the current job (if any) is completed.
168      */
169     private boolean running = false;
170     
171     /***
172      * String to indicate recovery should be based on the recovery log, not
173      * based on checkpointing.
174      */
175     public static final String RECOVER_LOG = "recover";
176     
177     /***
178      * Jobs directory.
179      */
180     private final File jobsDir;
181     
182     /***
183      * Constructor.
184      * @param jobsDir Jobs directory.
185      */
186     public CrawlJobHandler(final File jobsDir) {
187         this(jobsDir, true, true);
188     }
189 
190     /***
191      * Constructor allowing for optional loading of profiles and jobs.
192      * @param jobsDir Jobs directory.
193      * @param loadJobs If true then any applicable jobs will be loaded.
194      * @param loadProfiles If true then any applicable profiles will be loaded.
195      */
196     public CrawlJobHandler(final File jobsDir,
197             final boolean loadJobs, final boolean loadProfiles) {
198         this.jobsDir = jobsDir;
199         // Make a comparator for CrawlJobs.
200         Comparator<CrawlJob> comp = new Comparator<CrawlJob>(){
201             public int compare(CrawlJob job1, CrawlJob job2) {
202                 if( job1.getJobPriority() < job2.getJobPriority() ){
203                     return -1;
204                 } else if( job1.getJobPriority() > job2.getJobPriority() ){
205                     return 1;
206                 } else {
207                     // Same priority, use UID (which should be a timestamp).
208                     // Lower UID (string compare) means earlier time.
209                     return job1.getUID().compareTo(job2.getUID());
210                 }
211             }
212         };
213         this.pendingCrawlJobs = new TreeSet<CrawlJob>(comp);
214         this.completedCrawlJobs = new TreeSet<CrawlJob>(comp);
215         // Profiles always have the same priority so it will be sorted by name
216         this.profileJobs = new TreeSet<CrawlJob>(comp);
217         if (loadProfiles){
218             loadProfiles();
219         }
220         if (loadJobs){
221             loadJobs();
222         }
223     }
224     
225     /***
226      * Find the state.job file in the job directory.
227      * @param jobDir Directory to look in.
228      * @return Full path to 'state.job' file or null if none found.
229      */
230     protected File getStateJobFile(final File jobDir) {
231         // Need to find job file ('state.job').
232         File[] jobFiles = jobDir.listFiles(new FilenameFilter() {
233             public boolean accept(File dir, String name) {
234                 return name.toLowerCase().endsWith(".job") &&
235                     (new File(dir, name)).canRead();
236             }
237             
238         });
239         return (jobFiles.length == 1)? jobFiles[0]: null;
240     }
241 
242     /***
243      * Loads any availible jobs in the jobs directory.
244      * <p>
245      * Availible jobs are any directory containing a file called
246      * <code>state.job</code>. The file must contain valid job information.
247      */
248     private void loadJobs() {
249         this.jobsDir.mkdirs();
250         File[] jobs = this.jobsDir.listFiles();
251         for (int i = 0; i < jobs.length; i++) {
252             if (jobs[i].isDirectory()) {
253                 File jobFile = getStateJobFile(jobs[i]);
254                 if (jobFile != null) {
255                     loadJob(jobFile);
256                 }
257             }
258         }
259     }
260 
261     /***
262      * Loads a job given a specific job file. The loaded job will be placed in
263      * the list of completed jobs or pending queue depending on its status.
264      * Running jobs will have their status set to 'finished abnormally' and put
265      * into the completed list.
266      * @param job The job file of the job to load.
267      */
268     protected void loadJob(final File job) {
269         CrawlJob cjob = null;
270         try {
271             // Load the CrawlJob
272             cjob = new CrawlJob(job, new CrawlJobErrorHandler());
273         } catch (InvalidJobFileException e) {
274             logger.log(Level.INFO,
275                     "Invalid job file for " + job.getAbsolutePath(), e);
276             return;
277         } catch (IOException e) {
278             logger.log(Level.INFO, "IOException for " + job.getName() +
279                     ", " + job.getAbsolutePath(), e);
280             return;
281         }
282         
283         // TODO: Move test into CrawlJob.
284         // Check job status and place it accordingly.
285         if (cjob.getStatus().equals(CrawlJob.STATUS_RUNNING)
286                 || cjob.getStatus().equals(CrawlJob.STATUS_PAUSED)
287                 || cjob.getStatus().equals(CrawlJob.STATUS_CHECKPOINTING)
288                 || cjob.getStatus().equals(CrawlJob.STATUS_WAITING_FOR_PAUSE) ){
289             // Was a running job.
290             cjob.setStatus(CrawlJob.STATUS_FINISHED_ABNORMAL);
291             this.completedCrawlJobs.add(cjob);
292         } else if( cjob.getStatus().equals(CrawlJob.STATUS_PENDING) ) {
293             // Was a pending job.
294             this.pendingCrawlJobs.add(cjob);
295         } else if( cjob.getStatus().equals(CrawlJob.STATUS_CREATED)
296                 || cjob.getStatus().equals(CrawlJob.STATUS_DELETED) ) {
297             // Ignore for now. TODO: Add to 'recycle bin'
298         } else {
299             // Must have been completed.
300             this.completedCrawlJobs.add(cjob);
301         }
302     }
303 
304     /***
305      * Looks in conf dir for a profiles dir.
306      * @return the directory where profiles are stored else null if none
307      * available
308      * @throws IOException
309      */
310     private File getProfilesDirectory() throws IOException {
311         URL webappProfilePath = Heritrix.class.getResource("/" + 
312             PROFILES_DIR_NAME);
313         if (webappProfilePath != null) {
314             try {
315                 return new File(new URI(webappProfilePath.toString()));
316             } catch (java.lang.IllegalArgumentException e) {
317                 // e.g. "profiles" within a jar file
318                 // try Heritrix.getConfdir() in this case
319             } catch (java.net.URISyntaxException e) {
320                 e.printStackTrace();
321             }
322         }
323         return (Heritrix.getConfdir(false) == null)? null:
324             new File(Heritrix.getConfdir().getAbsolutePath(), 
325                 PROFILES_DIR_NAME);        
326     }
327 
328     /***
329      * Loads the default profile and all other profiles found on disk.
330      */
331     private void loadProfiles() {
332         boolean loadedDefault = false;
333         File profileDir = null;
334 		try {
335 			profileDir = getProfilesDirectory();
336 		} catch (IOException e) {
337 			e.printStackTrace();
338 		}
339 		if (profileDir != null) {
340             File[] ps = profileDir.listFiles();
341             if (ps != null && ps.length > 0) {
342                 for (int i = 0; i < ps.length; i++) {
343                     File f = ps[i];
344                     if (f.isDirectory()) {
345                         // Each directory in the profiles directory should
346                         // contain the file order.xml.
347                         File profile = new File(f, ORDER_FILE_NAME);
348                         if (profile.canRead()) {
349                             boolean b = loadProfile(profile);
350                             if (b) {
351                                 loadedDefault = b;
352                             }
353                         }
354                     }
355                 }
356             }
357         }
358         // Now add in the default profile.  Its on the CLASSPATH and needs
359         // special handling.  Don't add if already a default present.
360         String parent = File.separator + PROFILES_DIR_NAME + File.separator;
361         if (!loadedDefault) {
362             loadProfile(new File(parent + DEFAULT_PROFILE, ORDER_FILE_NAME));
363         }
364         // Look to see if a default profile system property has been
365         // supplied. If so, use it instead.
366         // TODO: Try and read default profile from some permanent storage.
367         defaultProfile = DEFAULT_PROFILE;
368     }
369     
370     /***
371      * Load one profile.
372      * @param profile Profile to load.
373      * @return True if loaded profile was the default profile.
374      */
375     protected boolean loadProfile(File profile) {
376         boolean loadedDefault = false;
377         // Ok, got the order file for this profile.
378         try {
379             // The directory name denotes the profiles UID and name.
380             XMLSettingsHandler newSettingsHandler =
381                 new XMLSettingsHandler(profile);
382             CrawlJobErrorHandler cjseh =
383                 new CrawlJobErrorHandler(Level.SEVERE);
384             newSettingsHandler.
385                 setErrorReportingLevel(cjseh.getLevel());
386             newSettingsHandler.initialize();
387             addProfile(new CrawlJob(profile.getParentFile().getName(),
388                 newSettingsHandler, cjseh));
389             loadedDefault = profile.getParentFile().getName().
390                 equals(DEFAULT_PROFILE);
391         } catch (InvalidAttributeValueException e) {
392             System.err.println("Failed to load profile '" +
393                     profile.getParentFile().getName() +
394                     "'. InvalidAttributeValueException.");
395         }
396         return loadedDefault;
397     }
398 
399     /***
400      * Add a new profile
401      * @param profile The new profile
402      */
403     public synchronized void addProfile(CrawlJob profile){
404         profileJobs.add(profile);
405     }
406     
407     public synchronized void deleteProfile(CrawlJob cj) throws IOException {
408         File d = getProfilesDirectory();
409         File p = new File(d, cj.getJobName());
410         if (!p.exists()) {
411             throw new IOException("No profile named " + cj.getJobName() +
412                 " at " + d.getAbsolutePath());
413         }
414         FileUtils.deleteDir(p);
415         this.profileJobs.remove(cj);
416     }
417 
418     /***
419      * Returns a List of all known profiles.
420      * @return a List of all known profiles.
421      */
422     public synchronized List<CrawlJob> getProfiles(){
423         ArrayList<CrawlJob> tmp = new ArrayList<CrawlJob>(profileJobs.size());
424         tmp.addAll(profileJobs);
425         return tmp;
426     }
427 
428     /***
429      * Submit a job to the handler. Job will be scheduled for crawling. At
430      * present it will not take the job's priority into consideration.
431      *
432      * @param job A new job for the handler
433      * @return CrawlJob that was added or null.
434      */
435     public CrawlJob addJob(CrawlJob job) {
436         if(job.isProfile()){
437             return null;     // Can't crawl profiles.
438         }
439         job.setStatus(CrawlJob.STATUS_PENDING);
440         if(job.isNew()){
441             // Are adding the new job to the pending queue.
442             this.newJob = null;
443             job.setNew(false);
444         }
445         this.pendingCrawlJobs.add(job);
446         if(isCrawling() == false && isRunning()) {
447             // Start crawling
448             startNextJob();
449         }
450         return job;
451     }
452 
453     /***
454      * Returns the default profile. If no default profile has been set it will
455      * return the first profile that was set/loaded and still exists. If no
456      * profiles exist it will return null
457      * @return the default profile.
458      */
459     public synchronized CrawlJob getDefaultProfile() {
460         if(defaultProfile != null){
461             for(Iterator it = profileJobs.iterator(); it.hasNext();) {
462                 CrawlJob item = (CrawlJob)it.next();
463                 if(item.getJobName().equals(defaultProfile)){
464                     // Found it.
465                     return item;
466                 }
467             }
468         }
469         if(profileJobs.size() > 0){
470             return (CrawlJob)profileJobs.first();
471         }
472         return null;
473     }
474 
475     /***
476      * Set the default profile.
477      * @param profile The new default profile. The following must apply to it.
478      *                profile.isProfile() should return true and
479      *                this.getProfiles() should contain it.
480      */
481     public void setDefaultProfile(CrawlJob profile) {
482         defaultProfile = profile.getJobName();
483         // TODO: Make changes to default profile durable across restarts.
484     }
485 
486     /***
487      * A List of all pending jobs
488      *
489      * @return A List of all pending jobs.
490      * No promises are made about the order of the list
491      */
492     public List<CrawlJob> getPendingJobs() {
493         ArrayList<CrawlJob> tmp
494          = new ArrayList<CrawlJob>(pendingCrawlJobs.size());
495         tmp.addAll(pendingCrawlJobs);
496         return tmp;
497     }
498 
499     /***
500      * @return The job currently being crawled.
501      */
502     public CrawlJob getCurrentJob() {
503         return currentJob;
504     }
505 
506     /***
507      * @return A List of all finished jobs.
508      */
509     public List<CrawlJob> getCompletedJobs() {
510         ArrayList<CrawlJob> tmp
511          = new ArrayList<CrawlJob>(completedCrawlJobs.size());
512         tmp.addAll(completedCrawlJobs);
513         return tmp;
514     }
515 
516     /***
517      * Return a job with the given UID.
518      * Doesn't matter if it's pending, currently running, has finished running
519      * is new or a profile.
520      *
521      * @param jobUID The unique ID of the job.
522      * @return The job with the UID or null if no such job is found
523      */
524     public CrawlJob getJob(String jobUID) {
525         if (jobUID == null){
526             return null; // UID can't be null
527         }
528         // First check currently running job
529         if (currentJob != null && currentJob.getUID().equals(jobUID)) {
530             return currentJob;
531         } else if (newJob != null && newJob.getUID().equals(jobUID)) {
532             // Then check the 'new job'
533             return newJob;
534         } else {
535             // Then check pending jobs.
536             Iterator itPend = pendingCrawlJobs.iterator();
537             while (itPend.hasNext()) {
538                 CrawlJob cj = (CrawlJob) itPend.next();
539                 if (cj.getUID().equals(jobUID)) {
540                     return cj;
541                 }
542             }
543 
544             // Next check completed jobs.
545             Iterator itComp = completedCrawlJobs.iterator();
546             while (itComp.hasNext()) {
547                 CrawlJob cj = (CrawlJob) itComp.next();
548                 if (cj.getUID().equals(jobUID)) {
549                     return cj;
550                 }
551             }
552 
553             // And finally check the profiles.
554             for (Iterator i = getProfiles().iterator(); i.hasNext();) {
555                 CrawlJob cj = (CrawlJob) i.next();
556                 if (cj.getUID().equals(jobUID)) {
557                     return cj;
558                 }
559             }
560         }
561         return null; // Nothing found, return null
562     }
563     
564     /***
565      * @return True if we terminated a current job (False if no job to
566      * terminate)
567      */
568     public boolean terminateCurrentJob() {
569         if (this.currentJob == null) {
570             return false;
571         }
572         // requestCrawlStop will cause crawlEnding to be invoked.
573         // It will handle the clean up.
574         this.currentJob.stopCrawling();
575         synchronized (this) {
576             try {
577                 // Take a few moments so that the controller can change
578                 // states before the UI updates. The CrawlEnding event
579                 // will wake us if it occurs sooner than this.
580                 wait(3000);
581             } catch (InterruptedException e) {
582                 // Ignore.
583             }
584         }
585         return true;
586     }
587 
588     /***
589      * The specified job will be removed from the pending queue or aborted if
590      * currently running.  It will be placed in the list of completed jobs with
591      * appropriate status info. If the job is already in the completed list or
592      * no job with the given UID is found, no action will be taken.
593      *
594      * @param jobUID The UID (unique ID) of the job that is to be deleted.
595      *
596      */
597     public void deleteJob(String jobUID) {
598         // First check to see if we are deleting the current job.
599         if (currentJob != null && jobUID.equals(currentJob.getUID())) {
600             terminateCurrentJob();
601             return; // We're not going to find another job with the same UID
602         }
603         
604         // Ok, it isn't the current job, let's check the pending jobs.
605         for(Iterator it = pendingCrawlJobs.iterator(); it.hasNext();) {
606             CrawlJob cj = (CrawlJob) it.next();
607             if (cj.getUID().equals(jobUID)) {
608                 // Found the one to delete.
609                 cj.setStatus(CrawlJob.STATUS_DELETED);
610                 it.remove();
611                 return; // We're not going to find another job with the same UID
612             }
613         }
614         
615         // And finally the completed jobs.
616         for (Iterator it = completedCrawlJobs.iterator(); it.hasNext();) {
617             CrawlJob cj = (CrawlJob) it.next();
618             if (cj.getUID().equals(jobUID)) {
619                 // Found the one to delete.
620                 cj.setStatus(CrawlJob.STATUS_DELETED);
621                 it.remove();
622                 return; // No other job will have the same UID
623             }
624         }
625     }
626 
627     /***
628      * Cause the current job to pause. If no current job is crawling this
629      * method will have no effect. 
630      */
631     public void pauseJob() {
632         if (this.currentJob != null) {
633             this.currentJob.pause();
634         }
635     }
636 
637     /***
638      * Cause the current job to resume crawling if it was paused. Will have no
639      * effect if the current job was not paused or if there is no current job.
640      * If the current job is still waiting to pause, this will not take effect
641      * until the job has actually paused. At which time it will immeditatly
642      * resume crawling.
643      */
644     public void resumeJob() {
645         if (this.currentJob != null) {
646             this.currentJob.resume();
647         }
648     }
649 
650     /***
651      * Cause the current job to write a checkpoint to disk. Currently
652      * requires job to already be paused.
653      * @throws IllegalStateException Thrown if crawl is not paused.
654      */
655     public void checkpointJob() throws IllegalStateException {
656         if (this.currentJob != null) {
657             this.currentJob.checkpoint();
658         }
659     }
660 
661     /***
662      * Returns a unique job ID.
663      * <p>
664      * No two calls to this method (on the same instance of this class) can ever
665      * return the same value. <br>
666      * Currently implemented to return a time stamp. That is subject to change
667      * though.
668      *
669      * @return A unique job ID.
670      *
671      * @see ArchiveUtils#TIMESTAMP17
672      */
673     public String getNextJobUID() {
674         return ArchiveUtils.get17DigitDate();
675     }
676 
677     /***
678      * Creates a new job. The new job will be returned and also registered as
679      * the handler's 'new job'. The new job will be based on the settings
680      * provided but created in a new location on disk.
681      *
682      * @param baseOn
683      *            A CrawlJob (with a valid settingshandler) to use as the
684      *            template for the new job.
685      * @param recovery Whether to preinitialize new job as recovery of
686      * <code>baseOn</code> job.  String holds RECOVER_LOG if we are to
687      * do the recovery based off the recover.gz log -- See RecoveryJournal in
688      * the frontier package -- or it holds the name of
689      * the checkpoint we're to use recoverying.
690      * @param name
691      *            The name of the new job.
692      * @param description
693      *            Descriptions of the job.
694      * @param seeds
695      *            The contents of the new settings' seed file.
696      * @param priority
697      *            The priority of the new job.
698      *
699      * @return The new crawl job.
700      * @throws FatalConfigurationException If a problem occurs creating the
701      *             settings.
702      */
703     public CrawlJob newJob(CrawlJob baseOn, String recovery, String name,
704             String description, String seeds, int priority)
705     throws FatalConfigurationException {
706         // See what the recover story is.
707         File recover = null;
708         try {
709             if (recovery != null && recovery.length() > 0
710                     && recovery.equals(RECOVER_LOG)) {
711                 // Then we're to do a recovery based off the RecoveryJournal
712                 // recover.gz log.
713                 File dir = baseOn.getSettingsHandler().getOrder()
714                     .getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
715                 // Add name of recover file.  We're hardcoding it as
716                 // 'recover.gz'.
717                 recover = new File(dir, FrontierJournal.LOGNAME_RECOVER);
718             } else if (recovery != null && recovery.length() > 0) {
719                 // Must be name of a checkpoint to use.
720                 recover = new File(baseOn.getSettingsHandler().
721                     getOrder().getSettingsDir(CrawlOrder.ATTR_CHECKPOINTS_PATH),
722                         recovery);
723             }
724         } catch (AttributeNotFoundException e1) {
725             throw new FatalConfigurationException(
726                 "AttributeNotFoundException occured while setting up" +
727                     "new job/profile " + name + " \n" + e1.getMessage());
728         }
729 
730         CrawlJob cj = createNewJob(baseOn.getSettingsHandler().getOrderFile(),
731             name, description, seeds, priority);
732     
733         updateRecoveryPaths(recover, cj.getSettingsHandler(), name);
734         
735         return cj;
736     }
737     
738     /***
739      * Creates a new job. The new job will be returned and also registered as
740      * the handler's 'new job'. The new job will be based on the settings
741      * provided but created in a new location on disk.
742      * @param orderFile Order file to use as the template for the new job.
743      * @param name The name of the new job.
744      * @param description Descriptions of the job.
745      * @param seeds The contents of the new settings' seed file.
746      *
747      * @return The new crawl job.
748      * @throws FatalConfigurationException If a problem occurs creating the
749      *             settings.
750      */
751     public CrawlJob newJob(final File orderFile, final String name,
752         final String description, final String seeds)
753     throws FatalConfigurationException {
754         return createNewJob(orderFile, name, description, seeds,
755             CrawlJob.PRIORITY_AVERAGE);
756     }
757     
758     protected void checkDirectory(File dir)
759     throws FatalConfigurationException {
760         if (dir == null) {
761             return;
762         }
763         if (!dir.exists() && !dir.canRead()) {
764             throw new FatalConfigurationException(dir.getAbsolutePath() +
765                 " does not exist or is unreadable");
766         }
767     }
768     
769     protected CrawlJob createNewJob(final File orderFile, final String name,
770             final String description, final String seeds, final int priority)
771     throws FatalConfigurationException {
772         if (newJob != null) {
773             //There already is a new job. Discard it.
774             discardNewJob();
775         }
776         String UID = getNextJobUID();
777         File jobDir;
778         jobDir = new File(this.jobsDir, name + "-" + UID);
779         CrawlJobErrorHandler errorHandler = new CrawlJobErrorHandler();
780         XMLSettingsHandler handler =
781             createSettingsHandler(orderFile, name, description,
782                 seeds, jobDir, errorHandler, "order.xml", "seeds.txt");
783         this.newJob = new CrawlJob(UID, name, handler, errorHandler, priority,
784                 jobDir);
785         return this.newJob;
786     }
787 
788     /***
789      * Creates a new profile. The new profile will be returned and also
790      * registered as the handler's 'new job'. The new profile will be based on
791      * the settings provided but created in a new location on disk.
792      *
793      * @param baseOn
794      *            A CrawlJob (with a valid settingshandler) to use as the
795      *            template for the new profile.
796      * @param name
797      *            The name of the new profile.
798      * @param description
799      *            Description of the new profile
800      * @param seeds
801      *            The contents of the new profiles' seed file
802      * @return The new profile.
803      * @throws FatalConfigurationException
804      * @throws IOException
805      */
806     public CrawlJob newProfile(CrawlJob baseOn, String name, String description,
807             String seeds)
808     throws FatalConfigurationException, IOException {
809         File profileDir = new File(getProfilesDirectory().getAbsoluteFile()
810             + File.separator + name);
811         CrawlJobErrorHandler cjseh = new CrawlJobErrorHandler(Level.SEVERE);
812         CrawlJob newProfile = new CrawlJob(name,
813             createSettingsHandler(baseOn.getSettingsHandler().getOrderFile(),
814                 name, description, seeds, profileDir, cjseh, "order.xml",
815                 "seeds.txt"), cjseh);
816         addProfile(newProfile);
817         return newProfile;
818     }
819     
820     /***
821      * Creates a new settings handler based on an existing job. Basically all
822      * the settings file for the 'based on' will be copied to the specified
823      * directory.
824      *
825      * @param orderFile Order file to base new order file on.  Cannot be null.
826      * @param name Name for the new settings
827      * @param description Description of the new settings.
828      * @param seeds The contents of the new settings' seed file.
829      * @param newSettingsDir
830      * @param errorHandler
831      * @param filename Name of new order file.
832      * @param seedfile Name of new seeds file.
833      *
834      * @return The new settings handler.
835      * @throws FatalConfigurationException
836      *             If there are problems with reading the 'base on'
837      *             configuration, with writing the new configuration or it's
838      *             seed file.
839      */
840     protected XMLSettingsHandler createSettingsHandler(
841         final File orderFile, final String name, final String description,
842         final String seeds, final File newSettingsDir,
843         final CrawlJobErrorHandler errorHandler,
844         final String filename, final String seedfile)
845     throws FatalConfigurationException {
846         XMLSettingsHandler newHandler = null;
847         try {
848             newHandler = new XMLSettingsHandler(orderFile);
849             if(errorHandler != null){
850                 newHandler.registerValueErrorHandler(errorHandler);
851             }
852             newHandler.setErrorReportingLevel(errorHandler.getLevel());
853             newHandler.initialize();
854         } catch (InvalidAttributeValueException e2) {
855             throw new FatalConfigurationException(
856                 "InvalidAttributeValueException occured while creating" +
857                 " new settings handler for new job/profile\n" +
858                 e2.getMessage());
859         }
860 
861         // Make sure the directory exists.
862         newSettingsDir.mkdirs();
863 
864         try {
865             // Set the seed file
866             ((ComplexType)newHandler.getOrder().getAttribute("scope"))
867                 .setAttribute(new Attribute("seedsfile", seedfile));
868         } catch (AttributeNotFoundException e1) {
869             throw new FatalConfigurationException(
870                     "AttributeNotFoundException occured while setting up" +
871                     "new job/profile\n" + e1.getMessage());
872         } catch (InvalidAttributeValueException e1) {
873             throw new FatalConfigurationException(
874                     "InvalidAttributeValueException occured while setting" +
875                     "up new job/profile\n"  + e1.getMessage());
876         } catch (MBeanException e1) {
877             throw new FatalConfigurationException(
878                     "MBeanException occured while setting up new" +
879                     " job/profile\n" + e1.getMessage());
880         } catch (ReflectionException e1) {
881             throw new FatalConfigurationException(
882                     "ReflectionException occured while setting up" +
883                     " new job/profile\n" + e1.getMessage());
884         }
885 
886         File newFile = new File(newSettingsDir.getAbsolutePath(), filename);
887         
888         try {
889             newHandler.copySettings(newFile, (String)newHandler.getOrder()
890                 .getAttribute(CrawlOrder.ATTR_SETTINGS_DIRECTORY));
891         } catch (IOException e3) {
892             // Print stack trace to help debug issue where cannot create
893             // new job from an old that has overrides.
894             e3.printStackTrace();
895             throw new FatalConfigurationException(
896                     "IOException occured while writing new settings files" +
897                     " for new job/profile\n" + e3.getMessage());
898         } catch (AttributeNotFoundException e) {
899             throw new FatalConfigurationException(
900                     "AttributeNotFoundException occured while writing new" +
901                     " settings files for new job/profile\n" + e.getMessage());
902         } catch (MBeanException e) {
903             throw new FatalConfigurationException(
904                     "MBeanException occured while writing new settings files" +
905                     " for new job/profile\n" + e.getMessage());
906         } catch (ReflectionException e) {
907             throw new FatalConfigurationException(
908                     "ReflectionException occured while writing new settings" +
909                     " files for new job/profile\n" + e.getMessage());
910         }
911         CrawlerSettings orderfile = newHandler.getSettingsObject(null);
912 
913         orderfile.setName(name);
914         orderfile.setDescription(description);
915 
916         if (seeds != null) {
917             BufferedWriter writer = null;
918             try {
919                 writer = new BufferedWriter(new FileWriter(newHandler
920                     .getPathRelativeToWorkingDirectory(seedfile)));
921                 try {
922                     writer.write(seeds);
923                 } finally {
924                     writer.close();
925                 }
926             } catch (IOException e) {
927                 throw new FatalConfigurationException(
928                     "IOException occured while writing seed file for new"
929                         + " job/profile\n" + e.getMessage());
930             }
931         }
932         return newHandler;
933     }
934     
935     /***
936      * @param recover
937      *            Source to use recovering. Can be full path to a recovery log
938      *            or full path to a checkpoint src dir.
939      * @param sh
940      *            Settings Handler to update.
941      * @param jobName
942      *            Name of this job.
943      * @throws FatalConfigurationException 
944      */
945     protected void updateRecoveryPaths(final File recover,
946             final SettingsHandler sh, final String jobName)
947     throws FatalConfigurationException {
948         if (recover == null) {
949             return;
950         }
951         checkDirectory(recover);
952         try {
953             // Set 'recover-path' to be old job's recovery log path
954             updateRecoveryPaths(recover, sh);
955         } catch (AttributeNotFoundException e1) {
956             throw new FatalConfigurationException(
957                     "AttributeNotFoundException occured while setting up"
958                             + "new job/profile " + jobName + " \n"
959                             + e1.getMessage());
960         } catch (InvalidAttributeValueException e1) {
961             throw new FatalConfigurationException(
962                     "InvalidAttributeValueException occured while setting"
963                             + "new job/profile " + jobName + " \n"
964                             + e1.getMessage());
965         } catch (MBeanException e1) {
966             throw new FatalConfigurationException(
967                     "MBeanException occured while setting up new"
968                             + "new job/profile " + jobName + " \n"
969                             + e1.getMessage());
970         } catch (ReflectionException e1) {
971             throw new FatalConfigurationException(
972                     "ReflectionException occured while setting up"
973                             + "new job/profile " + jobName + " \n"
974                             + e1.getMessage());
975         } catch (IOException e) {
976             throw new FatalConfigurationException(
977                     "IOException occured while setting up" + "new job/profile "
978                             + jobName + " \n" + e.getMessage());
979         }
980     }
981 
982     /***
983      * @param recover
984      *            Source to use recovering. Can be full path to a recovery log
985      *            or full path to a checkpoint src dir.
986      * @param newHandler
987      * @throws ReflectionException
988      * @throws MBeanException
989      * @throws InvalidAttributeValueException
990      * @throws AttributeNotFoundException
991      * @throws IOException
992      */
993     private void updateRecoveryPaths(final File recover,
994         SettingsHandler newHandler)
995     throws AttributeNotFoundException, InvalidAttributeValueException,
996     MBeanException, ReflectionException, IOException {
997         if (recover == null || !recover.exists()) {
998             throw new IOException("Recovery src does not exist: " + recover);
999         }
1000         newHandler.getOrder().setAttribute(
1001             new Attribute(CrawlOrder.ATTR_RECOVER_PATH,
1002                 recover.getAbsolutePath()));
1003             
1004         // Now, ensure that 'logs' and 'state' don't overlap with
1005         // previous job's files (ok for 'arcs' and 'scratch' to overlap)
1006         File newLogsDisk = null;
1007         final String RECOVERY_SUFFIX = "-R";
1008         while(true) {
1009             try {
1010                 newLogsDisk = newHandler.getOrder().
1011                     getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
1012             } catch (AttributeNotFoundException e) {
1013                 logger.log(Level.SEVERE, "Failed to get logs directory", e);
1014             }
1015             if (newLogsDisk.list().length > 0) {
1016                 // 'new' directory is nonempty; rename with trailing '-R'
1017                 String logsPath =  (String) newHandler.getOrder().
1018                     getAttribute(CrawlOrder.ATTR_LOGS_PATH);
1019                 if(logsPath.endsWith("/")) {
1020                     logsPath = logsPath.substring(0,logsPath.length()-1);
1021                 }
1022                 newHandler.getOrder().setAttribute(
1023                     new Attribute(CrawlOrder.ATTR_LOGS_PATH,
1024                         logsPath + RECOVERY_SUFFIX));
1025             } else {
1026                 // directory is suitably empty; exit loop
1027                 break;
1028             }
1029         }
1030         File newStateDisk = null;
1031         while (true) {
1032             try {
1033                 newStateDisk = newHandler.getOrder().getSettingsDir(
1034                         CrawlOrder.ATTR_STATE_PATH);
1035             } catch (AttributeNotFoundException e) {
1036                 logger.log(Level.SEVERE, "Failed to get state directory", e);
1037             }
1038             if (newStateDisk.list().length>0) {
1039                 // 'new' directory is nonempty; rename with trailing '-R'
1040                 String statePath =  (String) newHandler.getOrder().
1041                     getAttribute(CrawlOrder.ATTR_STATE_PATH);
1042                 if(statePath.endsWith("/")) {
1043                     statePath = statePath.substring(0,statePath.length()-1);
1044                 }
1045                 newHandler.getOrder().setAttribute(
1046                     new Attribute(CrawlOrder.ATTR_STATE_PATH,
1047                         statePath + RECOVERY_SUFFIX));
1048             } else {
1049                 // directory is suitably empty; exit loop
1050                 break;
1051             }
1052         }
1053     }
1054 
1055     /***
1056      * Discard the handler's 'new job'. This will remove any files/directories
1057      * written to disk.
1058      */
1059     public void discardNewJob(){
1060         FileUtils.deleteDir(new File(newJob.getSettingsDirectory()));
1061     }
1062 
1063     /***
1064      * Get the handler's 'new job'
1065      * @return the handler's 'new job'
1066      */
1067     public CrawlJob getNewJob(){
1068         return newJob;
1069     }
1070 
1071     /***
1072      * Is the crawler accepting crawl jobs to run?
1073      * @return True if the next availible CrawlJob will be crawled. False otherwise.
1074      */
1075     public boolean isRunning() {
1076         return running;
1077     }
1078 
1079     /***
1080      * Is a crawl job being crawled?
1081      * @return True if a job is actually being crawled (even if it is paused).
1082      *         False if no job is being crawled.
1083      */
1084     public boolean isCrawling() {
1085         return this.currentJob != null;
1086     }
1087 
1088     /***
1089      * Allow jobs to be crawled.
1090      */
1091     public void startCrawler() {
1092         running = true;
1093         if (pendingCrawlJobs.size() > 0 && isCrawling() == false) {
1094             // Ok, can just start the next job
1095             startNextJob();
1096         }
1097     }
1098 
1099     /***
1100      * Stop future jobs from being crawled.
1101      *
1102      * This action will not affect the current job.
1103      */
1104     public void stopCrawler() {
1105         running = false;
1106     }
1107 
1108     /***
1109      * Start next crawl job.
1110      *
1111      * If a is job already running this method will do nothing.
1112      */
1113     protected final void startNextJob() {
1114         synchronized (this) {
1115             if(startingNextJob != null) {
1116                 try {
1117                     startingNextJob.join();
1118                 } catch (InterruptedException e) {
1119                     e.printStackTrace();
1120                     return;
1121                 }
1122             }
1123             startingNextJob = new Thread(new Runnable() {
1124                 public void run() {
1125                     startNextJobInternal();
1126                 }
1127             }, "StartNextJob");
1128             startingNextJob.start();
1129         }
1130     }
1131     
1132     protected void startNextJobInternal() {
1133         if (pendingCrawlJobs.size() == 0 || isCrawling()) {
1134             // No job ready or already crawling.
1135             return;
1136         }
1137         this.currentJob = (CrawlJob)pendingCrawlJobs.first();
1138         assert pendingCrawlJobs.contains(currentJob) :
1139             "pendingCrawlJobs is in an illegal state";
1140         pendingCrawlJobs.remove(currentJob);
1141         try {
1142             this.currentJob.setupForCrawlStart();
1143             // This is ugly but needed so I can clear the currentJob
1144             // reference in the crawlEnding and update the list of completed
1145             // jobs.  Also, crawlEnded can startup next job.
1146             this.currentJob.getController().addCrawlStatusListener(this);
1147             // now, actually start
1148             this.currentJob.getController().requestCrawlStart();
1149         } catch (InitializationException e) {
1150             loadJob(getStateJobFile(this.currentJob.getDirectory()));
1151             this.currentJob = null;
1152             startNextJobInternal(); // Load the next job if there is one.
1153         }
1154     }
1155 
1156     /***
1157      * Forward a 'kick' update to current job if any.
1158      */
1159     public void kickUpdate() {
1160         if(this.currentJob != null) {
1161             this.currentJob.kickUpdate();
1162         }
1163     }
1164 
1165     /***
1166      * Loads options from a file. Typically these are a list of available
1167      * modules that can be plugged into some part of the configuration.
1168      * For examples Processors, Frontiers, Filters etc. Leading and trailing
1169      * spaces are trimmed from each line.
1170      * 
1171      * <p>Options are loaded from the CLASSPATH.
1172      * @param file the name of the option file (without path!)
1173      * @return The option file with each option line as a seperate entry in the
1174      *         ArrayList.
1175      * @throws IOException when there is trouble reading the file.
1176      */
1177     public static ArrayList<String> loadOptions(String file)
1178     throws IOException {
1179         ArrayList<String> ret = new ArrayList<String>();
1180         Enumeration resources = 
1181             CrawlJob.class.getClassLoader().getResources("modules/" + file);
1182 
1183         boolean noFileFound = true;
1184         while (resources.hasMoreElements()) {
1185             InputStream is = ((URL) resources.nextElement()).openStream();
1186             noFileFound = false;
1187 
1188             String line = null;
1189             BufferedReader bf =
1190                 new BufferedReader(new InputStreamReader(is), 8192);
1191             try {
1192                 while ((line = bf.readLine()) != null) {
1193                     line = line.trim();
1194                     if(line.indexOf('#')<0 && line.length()>0){
1195                         // Looks like a valid line.
1196                         ret.add(line);
1197                     }
1198                 }
1199             } finally {
1200                 bf.close();
1201             }
1202         }
1203         
1204         if (noFileFound) {
1205             throw new IOException("Failed to get " + file + " from the " +
1206                 " CLASSPATH");
1207         }
1208 
1209         return ret;
1210     }
1211 
1212     /***
1213      * Returns a URIFrontierMarker for the current, paused, job. If there is no
1214      * current job or it is not paused null will be returned.
1215      *
1216      * @param regexpr
1217      *            A regular expression that each URI must match in order to be
1218      *            considered 'within' the marker.
1219      * @param inCacheOnly
1220      *            Limit marker scope to 'cached' URIs.
1221      * @return a URIFrontierMarker for the current job.
1222      * @see #getPendingURIsList(FrontierMarker, int, boolean)
1223      * @see org.archive.crawler.framework.Frontier#getInitialMarker(String,
1224      *      boolean)
1225      * @see org.archive.crawler.framework.FrontierMarker
1226      */
1227     public FrontierMarker getInitialMarker(String regexpr,
1228             boolean inCacheOnly) {
1229         return (this.currentJob != null)?
1230                 this.currentJob.getInitialMarker(regexpr, inCacheOnly): null;
1231     }
1232 
1233     /***
1234      * Returns the frontiers URI list based on the provided marker. This method
1235      * will return null if there is not current job or if the current job is
1236      * not paused. Only when there is a paused current job will this method
1237      * return a URI list.
1238      *
1239      * @param marker
1240      *            URIFrontier marker
1241      * @param numberOfMatches
1242      *            maximum number of matches to return
1243      * @param verbose
1244      *            should detailed info be provided on each URI?
1245      * @return the frontiers URI list based on the provided marker
1246      * @throws InvalidFrontierMarkerException
1247      *             When marker is inconsistent with the current state of the
1248      *             frontier.
1249      * @see #getInitialMarker(String, boolean)
1250      * @see org.archive.crawler.framework.FrontierMarker
1251      */
1252     public ArrayList getPendingURIsList(FrontierMarker marker,
1253             int numberOfMatches, boolean verbose)
1254     throws InvalidFrontierMarkerException {
1255         return (this.currentJob != null)?
1256            this.currentJob.getPendingURIsList(marker, numberOfMatches, verbose):
1257            null;
1258     }
1259 
1260     /***
1261      * Delete any URI from the frontier of the current (paused) job that match
1262      * the specified regular expression. If the current job is not paused (or
1263      * there is no current job) nothing will be done.
1264      * @param regexpr Regular expression to delete URIs by.
1265      * @return the number of URIs deleted
1266      */
1267     public long deleteURIsFromPending(String regexpr) {
1268         return deleteURIsFromPending(regexpr, null);
1269     }
1270     
1271     /***
1272      * Delete any URI from the frontier of the current (paused) job that match
1273      * the specified regular expression. If the current job is not paused (or
1274      * there is no current job) nothing will be done.
1275      * @param uriPattern Regular expression to delete URIs by.
1276      * @param queuePattern Regular expression of target queues (or null for all)
1277      * @return the number of URIs deleted
1278      */
1279     public long deleteURIsFromPending(String uriPattern, String queuePattern) {
1280         return (this.currentJob != null)?
1281                 this.currentJob.deleteURIsFromPending(uriPattern,queuePattern): 0;
1282     }
1283     
1284     public String importUris(String file, String style, String force) {
1285         return importUris(file, style, "true".equals(force));
1286     }
1287 
1288     /***
1289      * @param fileOrUrl Name of file w/ seeds.
1290      * @param style What style of seeds -- crawl log (<code>crawlLog</code>
1291      * style) or recovery journal (<code>recoveryJournal</code> style), or
1292      * seeds file style (Pass <code>default</code> style).
1293      * @param forceRevisit Should we revisit even if seen before?
1294      * @return A display string that has a count of all added.
1295      */
1296     public String importUris(final String fileOrUrl, final String style,
1297             final boolean forceRevisit) {
1298         return (this.currentJob != null)?
1299             this.currentJob.importUris(fileOrUrl, style, forceRevisit): null;
1300     }
1301     
1302     protected int importUris(InputStream is, String style,
1303             boolean forceRevisit) {
1304         return (this.currentJob != null)?
1305                 this.currentJob.importUris(is, style, forceRevisit): 0;
1306     }
1307     
1308     /***
1309      * Schedule a uri.
1310      * @param uri Uri to schedule.
1311      * @param forceFetch Should it be forcefetched.
1312      * @param isSeed True if seed.
1313      * @throws URIException
1314      */
1315     public void importUri(final String uri, final boolean forceFetch,
1316             final boolean isSeed)
1317     throws URIException {
1318         importUri(uri, forceFetch, isSeed, true);
1319     }
1320     
1321     /***
1322      * Schedule a uri.
1323      * @param str String that can be: 1. a UURI, 2. a snippet of the
1324      * crawl.log line, or 3. a snippet from recover log.  See
1325      * {@link #importUris(InputStream, String, boolean)} for how it subparses
1326      * the lines from crawl.log and recover.log.
1327      * @param forceFetch Should it be forcefetched.
1328      * @param isSeed True if seed.
1329      * @param isFlush If true, flush the frontier IF it implements
1330      * flushing.
1331      * @throws URIException
1332      */
1333     public void importUri(final String str, final boolean forceFetch,
1334             final boolean isSeed, final boolean isFlush)
1335     throws URIException {
1336         if (this.currentJob != null) {
1337             this.currentJob.importUri(str, forceFetch, isSeed, isFlush);
1338         }
1339     }
1340     
1341     /***
1342      * If its a HostQueuesFrontier, needs to be flushed for the queued.
1343      */
1344     protected void doFlush() {
1345         if (this.currentJob != null) {
1346             this.currentJob.flush();
1347         }
1348     }
1349     
1350     public void stop() {
1351         if (isCrawling()) {
1352             deleteJob(getCurrentJob().getUID());
1353         }
1354     }
1355     
1356     public void requestCrawlStop() {
1357         if (this.currentJob != null) {
1358             this.currentJob.stopCrawling();
1359         }
1360     }
1361     
1362     /***
1363      * Ensure order file with new name/desc is written.
1364      * See '[ 1066573 ] sometimes job based-on other job uses older job name'.
1365      * @param newJob Newly created job.
1366      * @param metaname Metaname for new job.
1367      * @param description Description for new job.
1368      * @return <code>newJob</code>
1369      */
1370     public static CrawlJob ensureNewJobWritten(CrawlJob newJob, String metaname,
1371             String description) {
1372         XMLSettingsHandler settingsHandler = newJob.getSettingsHandler();
1373         CrawlerSettings orderfile = settingsHandler.getSettingsObject(null);
1374         orderfile.setName(metaname);
1375         orderfile.setDescription(description);
1376         settingsHandler.writeSettingsObject(orderfile);
1377         return newJob;
1378     }
1379 
1380     public void crawlStarted(String message) {
1381         // TODO Auto-generated method stub
1382         
1383     }
1384 
1385     public void crawlEnding(String sExitMessage) {
1386         loadJob(getStateJobFile(this.currentJob.getDirectory()));
1387         currentJob = null;
1388         synchronized (this) {
1389             // If the GUI terminated the job then it is waiting for this event.
1390             notifyAll();
1391         }
1392     }
1393 
1394     public void crawlEnded(String sExitMessage) {
1395         if (this.running) {
1396             startNextJob();
1397         }
1398     }
1399 
1400     public void crawlPausing(String statusMessage) {
1401         // TODO Auto-generated method stub
1402         
1403     }
1404 
1405     public void crawlPaused(String statusMessage) {
1406         // TODO Auto-generated method stub
1407         
1408     }
1409 
1410     public void crawlResuming(String statusMessage) {
1411         // TODO Auto-generated method stub
1412     }
1413 
1414     public void crawlCheckpoint(File checkpointDir) throws Exception {
1415         // TODO Auto-generated method stub
1416     }
1417 }