View Javadoc

1   /* Heritrix
2    *
3    * $Id: Heritrix.java 5514 2007-10-12 01:40:57Z gojomo $
4    *
5    * Created on May 15, 2003
6    *
7    * Copyright (C) 2003 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler;
26  
27  import java.io.File;
28  import java.io.FileInputStream;
29  import java.io.FileNotFoundException;
30  import java.io.FileOutputStream;
31  import java.io.IOException;
32  import java.io.InputStream;
33  import java.io.PrintStream;
34  import java.io.PrintWriter;
35  import java.net.HttpURLConnection;
36  import java.net.InetAddress;
37  import java.net.URL;
38  import java.net.URLConnection;
39  import java.net.UnknownHostException;
40  import java.util.ArrayList;
41  import java.util.Arrays;
42  import java.util.Collection;
43  import java.util.Collections;
44  import java.util.Enumeration;
45  import java.util.Hashtable;
46  import java.util.Iterator;
47  import java.util.List;
48  import java.util.Map;
49  import java.util.Properties;
50  import java.util.StringTokenizer;
51  import java.util.TimeZone;
52  import java.util.Vector;
53  import java.util.logging.Level;
54  import java.util.logging.LogManager;
55  import java.util.logging.Logger;
56  
57  import javax.management.Attribute;
58  import javax.management.AttributeList;
59  import javax.management.AttributeNotFoundException;
60  import javax.management.DynamicMBean;
61  import javax.management.InstanceAlreadyExistsException;
62  import javax.management.InstanceNotFoundException;
63  import javax.management.InvalidAttributeValueException;
64  import javax.management.MBeanInfo;
65  import javax.management.MBeanNotificationInfo;
66  import javax.management.MBeanOperationInfo;
67  import javax.management.MBeanRegistration;
68  import javax.management.MBeanRegistrationException;
69  import javax.management.MBeanServer;
70  import javax.management.MBeanServerFactory;
71  import javax.management.MalformedObjectNameException;
72  import javax.management.NotCompliantMBeanException;
73  import javax.management.ObjectName;
74  import javax.management.ReflectionException;
75  import javax.management.RuntimeOperationsException;
76  import javax.management.openmbean.CompositeData;
77  import javax.management.openmbean.CompositeDataSupport;
78  import javax.management.openmbean.CompositeType;
79  import javax.management.openmbean.OpenDataException;
80  import javax.management.openmbean.OpenMBeanAttributeInfoSupport;
81  import javax.management.openmbean.OpenMBeanConstructorInfoSupport;
82  import javax.management.openmbean.OpenMBeanInfoSupport;
83  import javax.management.openmbean.OpenMBeanOperationInfoSupport;
84  import javax.management.openmbean.OpenMBeanParameterInfo;
85  import javax.management.openmbean.OpenMBeanParameterInfoSupport;
86  import javax.management.openmbean.OpenType;
87  import javax.management.openmbean.SimpleType;
88  import javax.management.openmbean.TabularData;
89  import javax.management.openmbean.TabularDataSupport;
90  import javax.management.openmbean.TabularType;
91  import javax.naming.CompoundName;
92  import javax.naming.Context;
93  import javax.naming.NameNotFoundException;
94  import javax.naming.NamingException;
95  import javax.naming.NoInitialContextException;
96  
97  import org.apache.commons.cli.Option;
98  import org.archive.crawler.admin.CrawlJob;
99  import org.archive.crawler.admin.CrawlJobErrorHandler;
100 import org.archive.crawler.admin.CrawlJobHandler;
101 import org.archive.crawler.datamodel.CredentialStore;
102 import org.archive.crawler.datamodel.credential.Credential;
103 import org.archive.crawler.event.CrawlStatusListener;
104 import org.archive.crawler.framework.AlertManager;
105 import org.archive.crawler.framework.CrawlController;
106 import org.archive.crawler.framework.exceptions.FatalConfigurationException;
107 import org.archive.crawler.framework.exceptions.InitializationException;
108 import org.archive.crawler.selftest.SelfTestCrawlJobHandler;
109 import org.archive.crawler.settings.XMLSettingsHandler;
110 import org.archive.io.SinkHandler;
111 import org.archive.io.SinkHandlerLogRecord;
112 import org.archive.net.UURI;
113 import org.archive.util.FileUtils;
114 import org.archive.util.IoUtils;
115 import org.archive.util.JmxUtils;
116 import org.archive.util.JndiUtils;
117 import org.archive.util.PropertyUtils;
118 import org.archive.util.TextUtils;
119 
120 import sun.net.www.protocol.file.FileURLConnection;
121 
122 
123 /***
124  * Main class for Heritrix crawler.
125  *
126  * Heritrix is usually launched by a shell script that backgrounds heritrix
127  * that redirects all stdout and stderr emitted by heritrix to a log file.  So
128  * that startup messages emitted subsequent to the redirection of stdout and
129  * stderr show on the console, this class prints usage or startup output
130  * such as where the web UI can be found, etc., to a STARTLOG that the shell
131  * script is waiting on.  As soon as the shell script sees output in this file,
132  * it prints its content and breaks out of its wait.
133  * See ${HERITRIX_HOME}/bin/heritrix.
134  * 
135  * <p>Heritrix can also be embedded or launched by webapp initialization or
136  * by JMX bootstrapping.  So far I count 4 methods of instantiation:
137  * <ol>
138  * <li>From this classes main -- the method usually used;</li>
139  * <li>From the Heritrix UI (The local-instances.jsp) page;</li>
140  * <li>A creation by a JMX agent at the behest of a remote JMX client; and</li>
141  * <li>A container such as tomcat or jboss.</li>
142  * </ol>
143  *
144  * @author gojomo
145  * @author Kristinn Sigurdsson
146  * @author Stack
147  */
148 public class Heritrix implements DynamicMBean, MBeanRegistration {
149     /***
150      * Heritrix logging instance.
151      */
152     private static final Logger logger =
153         Logger.getLogger(Heritrix.class.getName());
154     
155     private static final File TMPDIR =
156         new File(System.getProperty("java.io.tmpdir", "/tmp"));
157 
158     /***
159      * Name of the heritrix properties file.
160      */
161     private static final String PROPERTIES = "heritrix.properties";
162 
163     /***
164      * Name of the key to use specifying alternate heritrix properties on
165      * command line.
166      */
167     private static final String PROPERTIES_KEY = PROPERTIES;
168     
169     /***
170      * Prefix used on our properties we'll add to the System.properties list.
171      */
172     private static final String HERITRIX_PROPERTIES_PREFIX = "heritrix.";
173 
174     /***
175      * Prefix used on other properties we'll add to the System.properties 
176      * list (after stripping this prefix). 
177      */
178     private static final String SYSTEM_PREFIX = "system.";
179 
180     /***
181      * Instance of web server if one was started.
182      */
183     private static SimpleHttpServer httpServer = null;
184 
185     /***
186      * CrawlJob handler. Manages multiple crawl jobs at runtime.
187      */
188     private CrawlJobHandler jobHandler = null;
189 
190     /***
191      * Heritrix start log file.
192      *
193      * This file contains standard out produced by this main class for startup
194      * only.  Used by heritrix shell script.  Name here MUST match that in the
195      * <code>bin/heritrix</code> shell script.  This is a DEPENDENCY the shell
196      * wrapper has on this here java heritrix.
197      */
198     private static final String STARTLOG = "heritrix_dmesg.log";
199 
200     /***
201      * Default encoding.
202      * 
203      * Used for content when fetching if none specified.
204      */
205 	public static final String DEFAULT_ENCODING = "ISO-8859-1";
206 
207     /***
208      * Heritrix stderr/stdout log file.
209      *
210      * This file should have nothing in it except messages over which we have
211      * no control (JVM stacktrace, 3rd-party lib emissions).  The wrapper
212      * startup script directs stderr/stdout here. This is an INTERDEPENDENCY
213      * this program has with the wrapper shell script.  Shell can actually
214      * pass us an alternate to use for this file.
215      */
216     private static String DEFAULT_HERITRIX_OUT = "heritrix_out.log";
217 
218     /***
219      * Where to write this classes startup output.
220      * 
221      * This out should only be used if Heritrix is being run from the
222      * command-line.
223      */
224     private static PrintWriter out = null;
225 
226     /***
227      * The org.archive package
228      */
229     private static final String ARCHIVE_PACKAGE = "org.archive.";
230 
231     /***
232      * The crawler package.
233      */
234 	private static final String CRAWLER_PACKAGE = Heritrix.class.getName().
235         substring(0, Heritrix.class.getName().lastIndexOf('.'));
236     
237     /***
238      * The root context for a webapp.
239      */
240     private static final String ROOT_CONTEXT = "/";
241 
242     /***
243      * Set to true if application is started from command line.
244      */
245     private static boolean commandLine = false;
246     
247     /***
248      * True if container initialization has been run.
249      */
250     private static boolean containerInitialized = false;
251     
252     /***
253      * True if properties have been loaded.
254      */
255     private static boolean propertiesLoaded = false;
256     
257     private static final String JAR_SUFFIX = ".jar";
258     
259     private AlertManager alertManager;
260 
261     /***
262      * The context of the GUI webapp.  Default is root.
263      */
264     private static String adminContext = ROOT_CONTEXT;
265     
266     /***
267      * True if we're to put up a GUI.
268      * Cmdline processing can override.
269      */
270     private static boolean gui =
271         !PropertyUtils.getBooleanProperty("heritrix.cmdline.nowui");
272     
273     /***
274      * Port to put the GUI up on.
275      * Cmdline processing can override.
276      */
277     private static int guiPort = SimpleHttpServer.DEFAULT_PORT;
278 
279     
280     /***
281      * A collection containing only localhost.  Used as default value
282      * for guiHosts, and passed to SimpleHttpServer when doing selftest.
283      */
284     final private static Collection<String> LOCALHOST_ONLY =
285      Collections.unmodifiableList(Arrays.asList(new String[] { "127.0.0.1" }));
286 
287     
288     /***
289      * Hosts to bind the GUI webserver to.
290      * By default, only contans localhost.
291      * Set to an empty collection to indicate that all available network
292      * interfaces should be used for the webserver.
293      */
294     private static Collection<String> guiHosts = LOCALHOST_ONLY;
295     
296     
297     /***
298      * Web UI server, realm, context name.
299      */
300     private static String ADMIN = "admin";
301     
302     // OpenMBean support.
303     /***
304      * The MBean server we're registered with (May be null).
305      */
306     private MBeanServer mbeanServer = null;
307     
308     /***
309      * MBean name we were registered as.
310      */
311     private ObjectName mbeanName = null;
312     
313     /***
314      * Keep reference to all instances of Heritrix.
315      * Used by the UI to figure which of the local Heritrice it should
316      * be going against and to figure what to shutdown on the way out (If
317      * there was always a JMX Agent, we wouldn't need to keep this list.  We
318      * could always ask the JMX Agent for all instances. UPDATE: True we could
319      * always ask the JMX Agent but we might keep around this local reference
320      * because it will allow faster, less awkward -- think of marshalling the args
321      * for JMX invoke operation -- access to local Heritrix instances.  A new
322      * usage for this instances Map is in CrawlJob#preRegister to find the hosting
323      * Heritrix instance).
324      */
325     private static Map<String,Heritrix> instances
326      = new Hashtable<String,Heritrix>();
327     
328     private OpenMBeanInfoSupport openMBeanInfo;
329     private final static String STATUS_ATTR = "Status";
330     private final static String VERSION_ATTR = "Version";
331     private final static String ISRUNNING_ATTR = "IsRunning";
332     private final static String ISCRAWLING_ATTR = "IsCrawling";
333     private final static String ALERTCOUNT_ATTR = "AlertCount";
334     private final static String NEWALERTCOUNT_ATTR = "NewAlertCount";
335     private final static String CURRENTJOB_ATTR = "CurrentJob";
336     private final static List ATTRIBUTE_LIST;
337     static {
338         ATTRIBUTE_LIST = Arrays.asList(new String [] {STATUS_ATTR,
339             VERSION_ATTR, ISRUNNING_ATTR, ISCRAWLING_ATTR,
340             ALERTCOUNT_ATTR, NEWALERTCOUNT_ATTR, CURRENTJOB_ATTR});
341     }
342     
343     private final static String START_OPER = "start";
344     private final static String STOP_OPER = "stop";
345     private final static String DESTROY_OPER = "destroy";
346     private final static String INTERRUPT_OPER = "interrupt";
347     private final static String START_CRAWLING_OPER = "startCrawling";
348     private final static String STOP_CRAWLING_OPER = "stopCrawling";
349     private final static String ADD_CRAWL_JOB_OPER = "addJob";
350     private final static String TERMINATE_CRAWL_JOB_OPER =
351         "terminateCurrentJob";
352     private final static String DELETE_CRAWL_JOB_OPER = "deleteJob";
353     private final static String ALERT_OPER = "alert";
354     private final static String ADD_CRAWL_JOB_BASEDON_OPER = "addJobBasedon";
355     private final static String PENDING_JOBS_OPER = "pendingJobs";
356     private final static String COMPLETED_JOBS_OPER = "completedJobs";
357     private final static String CRAWLEND_REPORT_OPER = "crawlendReport";
358     private final static String SHUTDOWN_OPER = "shutdown";
359     private final static String LOG_OPER = "log";
360     private final static String REBIND_JNDI_OPER = "rebindJNDI";
361     private final static List OPERATION_LIST;
362     static {
363         OPERATION_LIST = Arrays.asList(new String [] {START_OPER, STOP_OPER,
364             INTERRUPT_OPER, START_CRAWLING_OPER, STOP_CRAWLING_OPER,
365             ADD_CRAWL_JOB_OPER, ADD_CRAWL_JOB_BASEDON_OPER,
366             DELETE_CRAWL_JOB_OPER, ALERT_OPER, PENDING_JOBS_OPER,
367             COMPLETED_JOBS_OPER, CRAWLEND_REPORT_OPER, SHUTDOWN_OPER,
368             LOG_OPER, DESTROY_OPER, TERMINATE_CRAWL_JOB_OPER,
369             REBIND_JNDI_OPER});
370     }
371     private CompositeType jobCompositeType = null;
372     private TabularType jobsTabularType = null;
373     private static final String [] JOB_KEYS =
374         new String [] {"uid", "name", "status"};
375 
376     private static String adminUsername;
377 
378     private static String adminPassword;
379     
380     /***
381      * Constructor.
382      * Does not register the created instance with JMX.  Assumed this
383      * constructor is used by such as JMX agent creating an instance of
384      * Heritrix at the commmand of a remote client (In this case Heritrix will
385      * be registered by the invoking agent).
386      * @throws IOException
387      */
388     public Heritrix() throws IOException {
389         this(null, false);
390     }
391     
392     public Heritrix(final boolean jmxregister) throws IOException {
393         this(null, jmxregister);
394     }
395     
396     /***
397      * Constructor.
398      * @param name If null, we bring up the default Heritrix instance.
399      * @param jmxregister True if we are to register this instance with JMX
400      * agent.
401      * @throws IOException
402      */
403     public Heritrix(final String name, final boolean jmxregister)
404     throws IOException {
405         this(name, jmxregister, new CrawlJobHandler(getJobsdir()));
406     }
407     
408     /***
409      * Constructor.
410      * @param name If null, we bring up the default Heritrix instance.
411      * @param jmxregister True if we are to register this instance with JMX
412      * agent.
413      * @param cjh CrawlJobHandler to use.
414      * @throws IOException
415      */
416     public Heritrix(final String name, final boolean jmxregister,
417             final CrawlJobHandler cjh)
418     throws IOException {
419         super();
420         containerInitialization();
421         this.jobHandler = cjh;
422         this.openMBeanInfo = buildMBeanInfo();
423         // Set up the alerting system.  SinkHandler is also a global so will
424         // catch alerts for all running Heritrix instances.  Will need to
425         // address (Add name of instance that threw the alert to SinkRecord?).
426         final SinkHandler sinkHandler = SinkHandler.getInstance();
427         if (sinkHandler == null) {
428             throw new NullPointerException("SinkHandler not found.");
429         }
430         // Adapt the alerting system to use SinkHandler.
431         this.alertManager = new AlertManager() {
432             public void add(SinkHandlerLogRecord record) {
433                 sinkHandler.publish(record);
434             }
435 
436             public Vector getAll() {
437                 return sinkHandler.getAll();
438             }
439 
440             public Vector getNewAll() {
441                 return sinkHandler.getAllUnread();
442             }
443 
444             public SinkHandlerLogRecord get(String alertID) {
445                 return sinkHandler.get(Long.parseLong(alertID));
446             }
447             
448             public int getCount() {
449                 return sinkHandler.getCount();
450             }
451 
452             public int getNewCount() {
453                 return sinkHandler.getUnreadCount();
454             }
455 
456             public void remove(String alertID) {
457                 sinkHandler.remove(Long.parseLong(alertID));
458             }
459 
460             public void read(String alertID) {
461                 sinkHandler.read(Long.parseLong(alertID));
462             }
463         };
464         
465         try {
466             Heritrix.registerHeritrix(this, name, jmxregister);
467         } catch (InstanceAlreadyExistsException e) {
468             throw new RuntimeException(e);
469         } catch (MBeanRegistrationException e) {
470             throw new RuntimeException(e);
471         } catch (NotCompliantMBeanException e) {
472             throw new RuntimeException(e);
473         } catch (MalformedObjectNameException e) {
474             throw new RuntimeException(e);
475         }
476     }
477     
478     /***
479      * Run setup tasks for this 'container'. Idempotent.
480      * 
481      * @throws IOException
482      */
483     protected static void containerInitialization() throws IOException {
484         if (Heritrix.containerInitialized) {
485             return;
486         }
487         Heritrix.containerInitialized = true;
488         // Load up the properties.  This invocation adds heritrix properties
489         // to system properties so all available via System.getProperty.
490         // Note, loadProperties and patchLogging have global effects.  May be an
491         // issue if we're running inside a container such as tomcat or jboss.
492         Heritrix.loadProperties();
493         Heritrix.patchLogging();
494         Heritrix.configureTrustStore();
495         // Will run on SIGTERM but not on SIGKILL, unfortunately.
496         // Otherwise, ensures we cleanup after ourselves (Deregister from
497         // JMX and JNDI).
498         Runtime.getRuntime().addShutdownHook(
499             Heritrix.getShutdownThread(false, 0, "Heritrix shutdown hook"));
500         // Register this heritrix 'container' though we may be inside another
501         // tomcat or jboss container.
502         try {
503             registerContainerJndi();
504         } catch (Exception e) {
505             logger.log(Level.WARNING, "Failed jndi container registration.", e);
506         }
507     }
508     
509     /***
510      * Do inverse of construction. Used by anyone who does a 'new Heritrix' when
511      * they want to cleanup the instance.
512      * Of note, there may be Heritrix threads still hanging around after the
513      * call to destroy completes.  They'll eventually go down after they've
514      * finished their cleanup routines.  In particular, if you are watching
515      * Heritrix via JMX, you can see the Heritrix instance JMX bean unregister
516      * ahead of the CrawlJob JMX bean that its hosting.
517      */
518     public void destroy() {
519         stop();
520         try {
521             Heritrix.unregisterHeritrix(this);
522         } catch (InstanceNotFoundException e) {
523             e.printStackTrace();
524         } catch (MBeanRegistrationException e) {
525             e.printStackTrace();
526         } catch (NullPointerException e) {
527             e.printStackTrace();
528         }
529         this.jobHandler = null;
530         this.openMBeanInfo = null;
531     }
532     
533     /***
534      * Launch program.
535      * Optionally will launch a web server to host UI.  Will also register
536      * Heritrix MBean with first found JMX Agent (Usually the 1.5.0 JVM
537      * Agent).
538      * 
539      * @param args Command line arguments.
540      * @throws Exception
541      */
542     public static void main(String[] args)
543     throws Exception {
544         Heritrix.commandLine = true;
545         
546         // Set timezone here.  Would be problematic doing it if we're running
547         // inside in a container.
548         TimeZone.setDefault(TimeZone.getTimeZone("GMT"));
549         
550         File startLog = new File(getHeritrixHome(), STARTLOG);
551         Heritrix.out = new PrintWriter(isDevelopment()? 
552             System.out: new PrintStream(new FileOutputStream(startLog)));
553         
554         try {
555             containerInitialization();
556             String status = doCmdLineArgs(args);
557             if (status != null) {
558                 Heritrix.out.println(status);
559             }
560         }
561 
562         catch(Exception e) {
563             // Show any exceptions in STARTLOG.
564             e.printStackTrace(Heritrix.out);
565             throw e;
566         }
567 
568         finally {
569             // If not development, close the file that signals the wrapper
570             // script that we've started.  Otherwise, just flush it; if in
571             // development, the output is probably a console.
572             if (!isDevelopment()) {
573                 if (Heritrix.out != null) {
574                     Heritrix.out.close();
575                 }
576                 System.out.println("Heritrix version: " +
577                         Heritrix.getVersion());
578             } else {
579                 if (Heritrix.out != null) {
580                     Heritrix.out.flush();
581                 }
582             }
583         }
584     }
585     
586     protected static String doCmdLineArgs(final String [] args)
587     throws Exception {
588         // Get defaults for commandline arguments from the properties file.
589         String tmpStr = PropertyUtils.
590             getPropertyOrNull("heritrix.context");
591         if (tmpStr != null)  {
592             Heritrix.adminContext = tmpStr;
593         }
594         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.port");
595         if (tmpStr != null) {
596             Heritrix.guiPort = Integer.parseInt(tmpStr);
597         }
598         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.admin");
599         String adminLoginPassword = (tmpStr == null)? "": tmpStr;
600         String crawlOrderFile =
601             PropertyUtils.getPropertyOrNull("heritrix.cmdline.order");
602         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.run");
603         boolean runMode =
604             PropertyUtils.getBooleanProperty("heritrix.cmdline.run");
605         boolean selfTest = false;
606         String selfTestName = null;
607         CommandLineParser clp = new CommandLineParser(args, Heritrix.out,
608             Heritrix.getVersion());
609         List arguments = clp.getCommandLineArguments();
610         Option [] options = clp.getCommandLineOptions();
611 
612         // Check passed argument.  Only one argument, the ORDER_FILE is allowed.
613         // If one argument, make sure exists and xml suffix.
614         if (arguments.size() > 1) {
615             clp.usage(1);
616         } else if (arguments.size() == 1) {
617             crawlOrderFile = (String)arguments.get(0);
618             if (!(new File(crawlOrderFile).exists())) {
619                 clp.usage("ORDER.XML <" + crawlOrderFile +
620                     "> specified does not exist.", 1);
621             }
622             // Must end with '.xml'
623             if (crawlOrderFile.length() > 4 &&
624                     !crawlOrderFile.substring(crawlOrderFile.length() - 4).
625                         equalsIgnoreCase(".xml")) {
626                 clp.usage("ORDER.XML <" + crawlOrderFile +
627                     "> does not have required '.xml' suffix.", 1);
628             }
629         }
630 
631         // Now look at options passed.
632         for (int i = 0; i < options.length; i++) {
633             switch(options[i].getId()) {
634                 case 'h':
635                     clp.usage();
636                     break;
637 
638                 case 'a':
639                     adminLoginPassword = options[i].getValue();
640                     break;
641 
642                 case 'n':
643                     if (crawlOrderFile == null) {
644                         clp.usage("You must specify an ORDER_FILE with" +
645                             " '--nowui' option.", 1);
646                     }
647                     Heritrix.gui = false;
648                     break;
649                 
650                 case 'b':
651                     Heritrix.guiHosts = parseHosts(options[i].getValue());
652                     break;
653 
654                 case 'p':
655                     try {
656                         Heritrix.guiPort =
657                             Integer.parseInt(options[i].getValue());
658                     } catch (NumberFormatException e) {
659                         clp.usage("Failed parse of port number: " +
660                             options[i].getValue(), 1);
661                     }
662                     if (Heritrix.guiPort <= 0) {
663                         clp.usage("Nonsensical port number: " +
664                             options[i].getValue(), 1);
665                     }
666                     break;
667 
668                 case 'r':
669                     runMode = true;
670                     break;
671 
672                 case 's':
673                     selfTestName = options[i].getValue();
674                     selfTest = true;
675                     break;
676 
677                 default:
678                     assert false: options[i].getId();
679             }
680         }
681 
682         // Ok, we should now have everything to launch the program.
683         String status = null;
684         if (selfTest) {
685             // If more than just '--selftest' and '--port' passed, then
686             // there is confusion on what is being asked of us.  Print usage
687             // rather than proceed.
688             for (int i = 0; i < options.length; i++) {
689                 if (options[i].getId() != 'p' && options[i].getId() != 's') {
690                     clp.usage(1);
691                 }
692             }
693 
694             if (arguments.size() > 0) {
695                 // No arguments accepted by selftest.
696                 clp.usage(1);
697             }
698             status = selftest(selfTestName, Heritrix.guiPort);
699         } else {
700 			if (!isValidLoginPasswordString(adminLoginPassword)) {
701 				clp.usage("Invalid admin login:password value, or none "
702 						+ "specified. ", 1);
703 			}
704 			
705 			if (!Heritrix.gui) {
706 				if (options.length > 1) {
707 					// If more than just '--nowui' passed, then there is
708 					// confusion on what is being asked of us. Print usage
709 					// rather than proceed.
710 					clp.usage(1);
711 				}
712 				Heritrix h = new Heritrix(true);
713 				status = h.doOneCrawl(crawlOrderFile);
714 			} else {
715 				status = startEmbeddedWebserver(
716                         Heritrix.guiHosts, Heritrix.guiPort,
717 						adminLoginPassword);
718 				Heritrix h = new Heritrix(true);
719 
720 				String tmp = h.launch(crawlOrderFile, runMode);
721 				if (tmp != null) {
722 					status += ('\n' + tmp);
723 				}
724 			}
725 		}
726         return status;
727     }
728     
729     /***
730 	 * @return The file we dump stdout and stderr into.
731 	 */
732     public static String getHeritrixOut() {
733         String tmp = System.getProperty("heritrix.out");
734         if (tmp == null || tmp.length() == 0) {
735             tmp = Heritrix.DEFAULT_HERITRIX_OUT;
736         }
737         return tmp;
738     }
739 
740     /***
741      * Exploit <code>-Dheritrix.home</code> if available to us.
742      * Is current working dir if no heritrix.home property supplied.
743      * @return Heritrix home directory.
744      * @throws IOException
745      */
746     protected static File getHeritrixHome()
747     throws IOException {
748         File heritrixHome = null;
749         String home = System.getProperty("heritrix.home");
750         if (home != null && home.length() > 0) {
751             heritrixHome = new File(home);
752             if (!heritrixHome.exists()) {
753                 throw new IOException("HERITRIX_HOME <" + home +
754                     "> does not exist.");
755             }
756         } else {
757             heritrixHome = new File(new File("").getAbsolutePath());
758         }
759         return heritrixHome;
760     }
761     
762     /***
763      * @return The directory into which we put jobs.  If the system property
764      * 'heritrix.jobsdir' is set, we will use its value in place of the default
765      * 'jobs' directory in the current working directory.
766      * @throws IOException
767      */
768     public static File getJobsdir() throws IOException {
769         Heritrix.loadProperties(); // if called in constructor
770         String jobsdirStr = System.getProperty("heritrix.jobsdir", "jobs");
771         File jobsdir = new File(jobsdirStr);
772         return (jobsdir.isAbsolute())?
773             jobsdir:
774             new File(getHeritrixHome(), jobsdirStr);
775     }
776     
777     /***
778      * Get and check for existence of expected subdir.
779      *
780      * If development flag set, then look for dir under src dir.
781      *
782      * @param subdirName Dir to look for.
783      * @return The extant subdir.  Otherwise null if we're running
784      * in a webapp context where there is no conf directory available.
785      * @throws IOException if unable to find expected subdir.
786      */
787     protected static File getSubDir(String subdirName)
788     throws IOException {
789         return getSubDir(subdirName, true);
790     }
791     
792     /***
793      * Get and optionally check for existence of subdir.
794      *
795      * If development flag set, then look for dir under src dir.
796      *
797      * @param subdirName Dir to look for.
798      * @param fail True if we are to fail if directory does not
799      * exist; false if we are to return false if the directory does not exist.
800      * @return The extant subdir.  Otherwise null if we're running
801      * in a webapp context where there is no subdir directory available.
802      * @throws IOException if unable to find expected subdir.
803      */
804     protected static File getSubDir(String subdirName, boolean fail)
805     throws IOException {
806         String path = isDevelopment()?
807             "src" + File.separator + subdirName:
808             subdirName;
809         File dir = new File(getHeritrixHome(), path);
810         if (!dir.exists()) {
811             if (fail) {
812                 throw new IOException("Cannot find subdir: " + subdirName);
813             }
814             dir = null;
815         }
816         return dir;
817     }
818     
819     /***
820      * Test string is valid login/password string.
821      *
822      * A valid login/password string has the login and password compounded
823      * w/ a ':' delimiter.
824      *
825      * @param str String to test.
826      * @return True if valid password/login string.
827      */
828     protected static boolean isValidLoginPasswordString(String str) {
829         boolean isValid = false;
830         StringTokenizer tokenizer = new StringTokenizer(str,  ":");
831         if (tokenizer.countTokens() == 2) {
832             String login = ((String)tokenizer.nextElement()).trim();
833             String password = ((String)tokenizer.nextElement()).trim();
834             if (login.length() > 0 && password.length() > 0) {
835                 isValid = true;
836             }
837         }
838         return isValid;
839     }
840 
841     protected static boolean isDevelopment() {
842         return System.getProperty("heritrix.development") != null;
843     }
844 
845     /***
846      * Load the heritrix.properties file.
847      * 
848      * Adds any property that starts with
849      * <code>HERITRIX_PROPERTIES_PREFIX</code>
850      * or <code>ARCHIVE_PACKAGE</code>
851      * into system properties (except logging '.level' directives).
852      * @return Loaded properties.
853      * @throws IOException
854      */
855     protected static Properties loadProperties()
856     throws IOException {
857         if (Heritrix.propertiesLoaded) {
858             return System.getProperties();
859         }
860         Heritrix.propertiesLoaded = true;
861             
862         Properties properties = new Properties();
863         properties.load(getPropertiesInputStream());
864         
865         // Any property that begins with ARCHIVE_PACKAGE, make it
866         // into a system property. While iterating, check to see if anything
867         // defined on command-line, and if so, it overrules whats in
868         // heritrix.properties.
869         for (Enumeration e = properties.keys(); e.hasMoreElements();) {
870             String key = ((String)e.nextElement()).trim();
871         	if (key.startsWith(ARCHIVE_PACKAGE) ||
872                     key.startsWith(HERITRIX_PROPERTIES_PREFIX)) {
873                 // Don't add the heritrix.properties entries that are
874                 // changing the logging level of particular classes.
875                 String value = properties.getProperty(key).trim();
876                 if (key.indexOf(".level") < 0) {
877                     copyToSystemProperty(key, value);
878                 }
879             }  else if (key.startsWith(SYSTEM_PREFIX)) {
880                 String value = properties.getProperty(key).trim();
881                 copyToSystemProperty(key.substring(SYSTEM_PREFIX.length()), value); 
882             }
883         }
884         return properties;
885     }
886 
887     /***
888      * Copy the given key-value into System properties, as long as there
889      * is no existing value. 
890      * @param key property key 
891      * @param value property value
892      */
893     protected static void copyToSystemProperty(String key, String value) {
894         if (System.getProperty(key) == null ||
895             System.getProperty(key).length() == 0) {
896             System.setProperty(key, value);
897         }
898     }
899 
900     protected static InputStream getPropertiesInputStream()
901     throws IOException {
902         File file = null;
903         // Look to see if properties have been passed on the cmd-line.
904         String alternateProperties = System.getProperty(PROPERTIES_KEY);
905         if (alternateProperties != null && alternateProperties.length() > 0) {
906             file = new File(alternateProperties);
907         }
908         // Get properties from conf directory if one available.
909         if ((file == null || !file.exists()) && getConfdir(false) != null) {
910             file = new File(getConfdir(), PROPERTIES);
911             if (!file.exists()) {
912                 // If no properties file in the conf dir, set file back to
913                 // null so we go looking for heritrix.properties on classpath.
914                 file = null;
915             }
916         }
917         // If not on the command-line, there is no conf dir. Then get the
918         // properties from the CLASSPATH (Classpath file separator is always
919         // '/', whatever the platform.
920         InputStream is = (file != null)?
921             new FileInputStream(file):
922             Heritrix.class.getResourceAsStream("/" + PROPERTIES_KEY);
923         if (is == null) {
924             throw new IOException("Failed to load properties file from" +
925                 " filesystem or from classpath.");
926         }
927         return is;
928     }
929 
930     /***
931      * If the user hasn't altered the default logging parameters, tighten them
932      * up somewhat: some of our libraries are way too verbose at the INFO or
933      * WARNING levels.
934      * 
935      * This might be a problem running inside in someone else's
936      * container.  Container's seem to prefer commons logging so we
937      * ain't messing them doing the below.
938      *
939      * @throws IOException
940      * @throws SecurityException
941      */
942     protected static void patchLogging()
943     throws SecurityException, IOException {
944         if (System.getProperty("java.util.logging.config.class") != null) {
945             return;
946         }
947 
948         if (System.getProperty("java.util.logging.config.file") != null) {
949             return;
950         }
951 
952         // No user-set logging properties established; use defaults
953         // from distribution-packaged 'heritrix.properties'.
954         LogManager.getLogManager().
955             readConfiguration(getPropertiesInputStream());
956     }
957 
958     /***
959      * Configure our trust store.
960      *
961      * If system property is defined, then use it for our truststore.  Otherwise
962      * use the heritrix truststore under conf directory if it exists.
963      * 
964      * <p>If we're not launched from the command-line, we will not be able
965      * to find our truststore.  The truststore is nor normally used so rare
966      * should this be a problem (In case where we don't use find our trust
967      * store, we'll use the 'default' -- either the JVMs or the containers).
968      */
969     protected static void configureTrustStore() {
970         // Below must be defined in jsse somewhere but can' find it.
971         final String TRUSTSTORE_KEY = "javax.net.ssl.trustStore";
972         String value = System.getProperty(TRUSTSTORE_KEY);
973         File confdir = null;
974         try {
975             confdir = getConfdir(false);
976         } catch (IOException e) {
977             logger.log(Level.WARNING, "Failed to get confdir.", e);
978         }
979         if ((value == null || value.length() <= 0) && confdir != null) {
980             // Use the heritrix store if it exists on disk.
981             File heritrixStore = new File(confdir, "heritrix.cacerts");
982             if(heritrixStore.exists()) {
983                 value = heritrixStore.getAbsolutePath();
984             }
985         }
986 
987         if (value != null && value.length() > 0) {
988             System.setProperty(TRUSTSTORE_KEY, value);
989         }
990     }
991 
992     /***
993      * Run the selftest
994      *
995      * @param oneSelfTestName Name of a test if we are to run one only rather
996      * than the default running all tests.
997      * @param port Port number to use for web UI.
998      *
999      * @exception Exception
1000      * @return Status of how selftest startup went.
1001      */
1002     protected static String selftest(final String oneSelfTestName,
1003             final int port)
1004         throws Exception {
1005         // Put up the webserver w/ the root and selftest webapps only.
1006         final String SELFTEST = "selftest";
1007         Heritrix.httpServer = new SimpleHttpServer(SELFTEST,
1008             Heritrix.adminContext, LOCALHOST_ONLY, port, true);
1009         // Set up digest auth for a section of the server so selftest can run
1010         // auth tests.  Looks like can only set one login realm going by the
1011         // web.xml dtd.  Otherwise, would be nice to selftest basic and digest.
1012         // Have login, password and role all be SELFTEST.  Must match what is
1013         // in the selftest order.xml file.
1014         Heritrix.httpServer.setAuthentication(SELFTEST, Heritrix.adminContext,
1015             SELFTEST, SELFTEST, SELFTEST);
1016         Heritrix.httpServer.startServer();
1017         // Get the order file from the CLASSPATH unless we're running in dev
1018         // environment.
1019         File selftestDir = (isDevelopment())?
1020             new File(getConfdir(), SELFTEST):
1021             new File(File.separator + SELFTEST);
1022         File crawlOrderFile = new File(selftestDir, "order.xml");
1023         // Create a job based off the selftest order file.  Then use this as
1024         // a template to pass jobHandler.newJob().  Doing this gets our
1025         // selftest output to show under the jobs directory.
1026         // Pass as a seed a pointer to the webserver we just put up.
1027         final String ROOTURI = "127.0.0.1:" + Integer.toString(port);
1028         String selfTestUrl = "http://" + ROOTURI + '/';
1029         if (oneSelfTestName != null && oneSelfTestName.length() > 0) {
1030             selfTestUrl += (oneSelfTestName + '/');
1031         }
1032         CrawlJobHandler cjh = new SelfTestCrawlJobHandler(getJobsdir(),
1033                 oneSelfTestName, selfTestUrl);
1034         Heritrix h = new Heritrix("Selftest", true, cjh);
1035         CrawlJob job = createCrawlJob(cjh, crawlOrderFile, "Template");
1036         job = h.getJobHandler().newJob(job, null, SELFTEST,
1037             "Integration self test", selfTestUrl, CrawlJob.PRIORITY_AVERAGE);
1038         h.getJobHandler().addJob(job);
1039         // Before we start, need to change some items in the settings file.
1040         CredentialStore cs = (CredentialStore)job.getSettingsHandler().
1041             getOrder().getAttribute(CredentialStore.ATTR_NAME);
1042         for (Iterator i = cs.iterator(null); i.hasNext();) {
1043             ((Credential)i.next()).setCredentialDomain(null, ROOTURI);
1044         }
1045         h.getJobHandler().startCrawler();
1046         StringBuffer buffer = new StringBuffer();
1047         buffer.append("Heritrix " + Heritrix.getVersion() +
1048                 " selftest started.");
1049         buffer.append("\nSelftest first crawls " + selfTestUrl +
1050             " and then runs an analysis.");
1051         buffer.append("\nResult of analysis printed to " +
1052             getHeritrixOut() + " when done.");
1053         buffer.append("\nSelftest job directory for logs and arcs:\n" +
1054             job.getDirectory().getAbsolutePath());
1055         return buffer.toString();
1056     }
1057 
1058     /***
1059      * Launch the crawler without a web UI and run the passed crawl only.
1060      * 
1061      * Specialized version of {@link #launch()}.
1062      *
1063      * @param crawlOrderFile The crawl order to crawl.
1064      * @throws InitializationException
1065      * @throws InvalidAttributeValueException
1066      * @return Status string.
1067      */
1068     protected String doOneCrawl(String crawlOrderFile)
1069     throws InitializationException, InvalidAttributeValueException {
1070         return doOneCrawl(crawlOrderFile, null);
1071     }
1072     
1073     /***
1074      * Launch the crawler without a web UI and run passed crawl only.
1075      * 
1076      * Specialized version of {@link #launch()}.
1077      *
1078      * @param crawlOrderFile The crawl order to crawl.
1079      * @param listener Register this crawl status listener before starting
1080      * crawl (You can use this listener to notice end-of-crawl).
1081      * @throws InitializationException
1082      * @throws InvalidAttributeValueException
1083      * @return Status string.
1084      */
1085     protected String doOneCrawl(String crawlOrderFile,
1086         CrawlStatusListener listener)
1087     throws InitializationException, InvalidAttributeValueException {
1088         XMLSettingsHandler handler =
1089             new XMLSettingsHandler(new File(crawlOrderFile));
1090         handler.initialize();
1091         CrawlController controller = new CrawlController();
1092         controller.initialize(handler);
1093         if (listener != null) {
1094             controller.addCrawlStatusListener(listener);
1095         }
1096         controller.requestCrawlStart();
1097         return "Crawl started using " + crawlOrderFile + ".";
1098     }
1099     
1100     /***
1101      * Launch the crawler for a web UI.
1102      *
1103      * Crawler hangs around waiting on jobs.
1104      *
1105      * @exception Exception
1106      * @return A status string describing how the launch went.
1107      * @throws Exception
1108      */
1109     public String launch() throws Exception {
1110         return launch(null, false);
1111     }
1112 
1113     /***
1114      * Launch the crawler for a web UI.
1115      *
1116      * Crawler hangs around waiting on jobs.
1117      * 
1118      * @param crawlOrderFile File to crawl.  May be null.
1119      * @param runMode Whether crawler should be set to run mode.
1120      *
1121      * @exception Exception
1122      * @return A status string describing how the launch went.
1123      */
1124     public String launch(String crawlOrderFile, boolean runMode)
1125     throws Exception {
1126         String status = null;
1127         if (crawlOrderFile != null) {
1128             addCrawlJob(crawlOrderFile, "Autolaunched", "", "");
1129             if(runMode) {
1130                 this.jobHandler.startCrawler();
1131                 status = "Job being crawled: " + crawlOrderFile;
1132             } else {
1133                 status = "Crawl job ready and pending: " + crawlOrderFile;
1134             }
1135         } else if(runMode) {
1136             // The use case is that jobs are to be run on a schedule and that
1137             // if the crawler is in run mode, then the scheduled job will be
1138             // run at appropriate time.  Otherwise, not.
1139             this.jobHandler.startCrawler();
1140             status = "Crawler set to run mode.";
1141         }
1142         return status;
1143     }
1144     
1145     /***
1146      * Start up the embedded Jetty webserver instance.
1147      * This is done when we're run from the command-line.
1148      * @param port Port number to use for web UI.
1149      * @param adminLoginPassword Compound of login and password.
1150      * @throws Exception
1151      * @return Status on webserver startup.
1152      * @deprecated  Use startEmbeddedWebserver(hosts, port, adminLoginPassword)
1153      */
1154     protected static String startEmbeddedWebserver(final int port,
1155         final boolean lho, final String adminLoginPassword)
1156     throws Exception {
1157         ArrayList<String> hosts = new ArrayList<String>();
1158         if (lho) {
1159             hosts.add("127.0.0.1");
1160         }
1161         return startEmbeddedWebserver(hosts, port, adminLoginPassword);
1162     }
1163 
1164     
1165     /***
1166      * Parses a list of host names.
1167      * 
1168      * <p>If the given string is <code>/</code>, then an empty
1169      * collection is returned.  This indicates that all available network
1170      * interfaces should be used.
1171      * 
1172      * <p>Otherwise, the string must contain a comma-separated list of 
1173      * IP addresses or host names.  The parsed list is then returned.
1174      * 
1175      * @param hosts  the string to parse
1176      * @return  the parsed collection of hosts 
1177      */
1178     private static Collection<String> parseHosts(String hosts) {
1179         hosts = hosts.trim();
1180         if (hosts.equals("/")) {
1181             return new ArrayList<String>(1);
1182         }
1183         String[] hostArray = hosts.split(",");
1184         for (int i = 0; i < hostArray.length; i++) {
1185             hostArray[i] = hostArray[i].trim();
1186         }
1187         return Arrays.asList(hostArray);
1188     }
1189     
1190     /***
1191      * Start up the embedded Jetty webserver instance.
1192      * This is done when we're run from the command-line.
1193      * 
1194      * @param hosts  a list of IP addresses or hostnames to bind to, or an
1195      *               empty collection to bind to all available network 
1196      *               interfaces
1197      * @param port Port number to use for web UI.
1198      * @param adminLoginPassword Compound of login and password.
1199      * @throws Exception
1200      * @return Status on webserver startup.
1201      */
1202     protected static String startEmbeddedWebserver(Collection<String> hosts, 
1203         int port, String adminLoginPassword) 
1204     throws Exception {
1205         adminUsername = adminLoginPassword.
1206             substring(0, adminLoginPassword.indexOf(":"));
1207         adminPassword = adminLoginPassword.
1208             substring(adminLoginPassword.indexOf(":") + 1);
1209         Heritrix.httpServer = new SimpleHttpServer("admin",
1210             Heritrix.adminContext, hosts, port, false);
1211         
1212         final String DOTWAR = ".war";
1213         final String SELFTEST = "selftest";
1214         
1215         // Look for additional WAR files beyond 'selftest' and 'admin'.
1216         File[] wars = getWarsdir().listFiles();
1217         for(int i = 0; i < wars.length; i++) {
1218             if(wars[i].isFile()) {
1219                 final String warName = wars[i].getName();
1220                 final String warNameNC = warName.toLowerCase();
1221                 if(warNameNC.endsWith(DOTWAR) &&
1222                         !warNameNC.equals(ADMIN + DOTWAR) &&
1223                         !warNameNC.equals(SELFTEST + DOTWAR)) {
1224                     int dot = warName.indexOf('.');
1225                     Heritrix.httpServer.addWebapp(warName.substring(0, dot),
1226                             null, true);
1227                 }
1228             }
1229         }
1230         
1231         // Name of passed 'realm' must match what is in configured in web.xml.
1232         // We'll use ROLE for 'realm' and 'role'.
1233         final String ROLE = ADMIN;
1234         Heritrix.httpServer.setAuthentication(ROLE, Heritrix.adminContext,
1235             adminUsername, adminPassword, ROLE);
1236         Heritrix.httpServer.startServer();
1237         StringBuffer buffer = new StringBuffer();
1238         buffer.append("Heritrix " + Heritrix.getVersion() + " is running.");
1239         for (String host: httpServer.getHosts()) {
1240             buffer.append("\nWeb console is at: http://");
1241             buffer.append(host).append(':').append(port);
1242         }
1243         buffer.append("\nWeb console login and password: " +
1244             adminUsername + "/" + adminPassword);
1245         return buffer.toString();
1246     }
1247     
1248     /***
1249      * Replace existing administrator login info with new info.
1250      * 
1251      * @param newUsername new administrator login username
1252      * @param newPassword new administrator login password
1253      */
1254     public static void resetAuthentication(String newUsername,
1255             String newPassword) {
1256         Heritrix.httpServer.resetAuthentication(ADMIN, adminUsername,
1257                 newUsername, newPassword);
1258         adminUsername = newUsername;
1259         adminPassword = newPassword; 
1260         logger.info("administrative login changed to "
1261                 +newUsername+":"+newPassword);
1262     }
1263 
1264     protected static CrawlJob createCrawlJob(CrawlJobHandler handler,
1265             File crawlOrderFile, String name)
1266     throws InvalidAttributeValueException {
1267         XMLSettingsHandler settings = new XMLSettingsHandler(crawlOrderFile);
1268         settings.initialize();
1269         return new CrawlJob(handler.getNextJobUID(), name, settings,
1270             new CrawlJobErrorHandler(Level.SEVERE),
1271             CrawlJob.PRIORITY_HIGH,
1272             crawlOrderFile.getAbsoluteFile().getParentFile());
1273     }
1274     
1275     /***
1276      * This method is called when we have an order file to hand that we want
1277      * to base a job on.  It leaves the order file in place and just starts up
1278      * a job that uses all the order points to for locations for logs, etc.
1279      * @param orderPathOrUrl Path to an order file or to a seeds file.
1280      * @param name Name to use for this job.
1281      * @param description 
1282      * @param seeds 
1283      * @return A status string.
1284      * @throws IOException 
1285      * @throws FatalConfigurationException 
1286      */
1287     public String addCrawlJob(String orderPathOrUrl, String name,
1288             String description, String seeds)
1289     throws IOException, FatalConfigurationException {
1290         if (!UURI.hasScheme(orderPathOrUrl)) {
1291             // Assume its a file path.
1292             return addCrawlJob(new File(orderPathOrUrl), name, description,
1293                     seeds);
1294         }
1295 
1296         // Otherwise, must be an URL.
1297         URL url = new URL(orderPathOrUrl);
1298 
1299         // Handle http and file only for now (Tried to handle JarUrlConnection
1300         // but too awkward undoing jar stream.  Rather just look for URLs that
1301         // end in '.jar').
1302         String result = null;
1303         URLConnection connection = url.openConnection();
1304         if (connection instanceof HttpURLConnection) {
1305             result = addCrawlJob(url, (HttpURLConnection)connection, name,
1306                 description, seeds);
1307         } else if (connection instanceof FileURLConnection) {
1308             result = addCrawlJob(new File(url.getPath()), name, description,
1309                 seeds);
1310         } else {
1311             throw new UnsupportedOperationException("No support for "
1312                 + connection);
1313         }
1314 
1315         return result;
1316     }
1317     
1318     protected String addCrawlJob(final URL url,
1319             final HttpURLConnection connection,
1320             final String name, final String description, final String seeds)
1321     throws IOException, FatalConfigurationException {
1322         // Look see if its a jar file.  If it is undo it.
1323         boolean isJar = url.getPath() != null &&
1324             url.getPath().toLowerCase().endsWith(JAR_SUFFIX);
1325         // If http url connection, bring down the resource local.
1326         File localFile = File.createTempFile(Heritrix.class.getName(),
1327            isJar? JAR_SUFFIX: null, TMPDIR);
1328         connection.connect();
1329         String result = null;
1330         try {
1331             IoUtils.readFullyToFile(connection.getInputStream(), localFile);
1332             result = addCrawlJob(localFile, name, description, seeds);
1333         } catch (IOException ioe) {
1334             // Cleanup if an Exception.
1335             localFile.delete();
1336             localFile = null;
1337         } finally {
1338              connection.disconnect();
1339              // If its a jar file, then we made a job based on the jar contents.
1340              // Its no longer needed.  Remove it.  If not a jar file, then leave
1341              // the file around because the job depends on it.
1342              if (isJar && localFile != null && localFile.exists()) {
1343                  localFile.delete();
1344              }
1345         }
1346         return result;
1347     }
1348     
1349     protected String addCrawlJob(final File order, final String name,
1350             final String description, final String seeds)
1351     throws FatalConfigurationException, IOException {
1352         CrawlJob addedJob = null;
1353         if (this.jobHandler == null) {
1354             throw new NullPointerException("Heritrix jobhandler is null.");
1355         }
1356         try {
1357             if (order.getName().toLowerCase().endsWith(JAR_SUFFIX)) {
1358                 return addCrawlJobBasedonJar(order, name, description, seeds);
1359             }
1360             addedJob = this.jobHandler.
1361                 addJob(createCrawlJob(this.jobHandler, order, name));
1362         } catch (InvalidAttributeValueException e) {
1363             FatalConfigurationException fce = new FatalConfigurationException(
1364                 "Converted InvalidAttributeValueException on " +
1365                 order.getAbsolutePath() + ": " + e.getMessage());
1366             fce.setStackTrace(e.getStackTrace());
1367         }
1368         return addedJob != null? addedJob.getUID(): null;
1369     }
1370     
1371     /***
1372      * Undo jar file and use as basis for a new job.
1373      * @param jarFile Pointer to file that holds jar.
1374      * @param name Name to use for new job.
1375      * @param description 
1376      * @param seeds 
1377      * @return Message.
1378      * @throws IOException
1379      * @throws FatalConfigurationException
1380      */
1381     protected String addCrawlJobBasedonJar(final File jarFile,
1382             final String name, final String description, final String seeds)
1383     throws IOException, FatalConfigurationException {
1384         if (jarFile == null || !jarFile.exists()) {
1385             throw new FileNotFoundException(jarFile.getAbsolutePath());
1386         }
1387         // Create a directory with a tmp name.  Do it by first creating file,
1388         // removing it, then creating the directory. There is a hole during
1389         // which the OS may put a file of same exact name in our way but
1390         // unlikely.
1391         File dir = File.createTempFile(Heritrix.class.getName(), ".expandedjar",
1392             TMPDIR);
1393         dir.delete();
1394         dir.mkdir();
1395         try {
1396             org.archive.crawler.util.IoUtils.unzip(jarFile, dir);
1397             // Expect to find an order file at least.
1398             File orderFile = new File(dir, "order.xml");
1399             if (!orderFile.exists()) {
1400                 throw new IOException("Missing order: " +
1401                     orderFile.getAbsolutePath());
1402             }
1403             CrawlJob job =
1404                 createCrawlJobBasedOn(orderFile, name, description, seeds);
1405             // Copy into place any seeds and settings directories before we
1406             // add job to Heritrix to crawl.
1407             File seedsFile = new File(dir, "seeds.txt");
1408             if (seedsFile.exists()) {
1409                 FileUtils.copyFiles(seedsFile, new File(job.getDirectory(),
1410                     seedsFile.getName()));
1411             }
1412             File settingsDir = new File(dir, "settings");
1413             if (settingsDir.exists()) {
1414                 FileUtils.copyFiles(settingsDir, job.getDirectory());
1415             }
1416             addCrawlJob(job);
1417             return job.getUID();
1418          } finally {
1419              // After job has been added, no more need of expanded content.
1420              // (Let the caller be responsible for cleanup of jar. Sometimes
1421              // its should be deleted -- when its a local copy of a jar pulled
1422              // across the net -- wherease other times, if its a jar passed
1423              // in w/ a 'file' scheme, it shouldn't be deleted.
1424              org.archive.util.FileUtils.deleteDir(dir);
1425          }
1426     }
1427     
1428     public String addCrawlJobBasedOn(String jobUidOrProfile,
1429             String name, String description, String seeds) {
1430         try {
1431             CrawlJob cj = getJobHandler().getJob(jobUidOrProfile);
1432             if (cj == null) {
1433                 throw new InvalidAttributeValueException(jobUidOrProfile +
1434                     " is not a job UID or profile name (Job UIDs are " +
1435                     " usually the 14 digit date portion of job name).");
1436             }
1437             CrawlJob job = addCrawlJobBasedOn(
1438                 cj.getSettingsHandler().getOrderFile(), name, description,
1439                     seeds);
1440             return job.getUID();
1441         } catch (Exception e) {
1442             e.printStackTrace();
1443             return "Exception on " + jobUidOrProfile + ": " + e.getMessage();
1444         } 
1445     }
1446     
1447     protected CrawlJob addCrawlJobBasedOn(final File orderFile,
1448         final String name, final String description, final String seeds)
1449     throws FatalConfigurationException {
1450         return addCrawlJob(createCrawlJobBasedOn(orderFile, name, description,
1451                 seeds));
1452     }
1453     
1454     protected CrawlJob createCrawlJobBasedOn(final File orderFile,
1455             final String name, final String description, final String seeds)
1456     throws FatalConfigurationException {
1457         CrawlJob job = getJobHandler().newJob(orderFile, name, description,
1458                 seeds);
1459         return CrawlJobHandler.ensureNewJobWritten(job, name, description);
1460     }
1461     
1462     protected CrawlJob addCrawlJob(final CrawlJob job) {
1463         return getJobHandler().addJob(job);
1464     }
1465     
1466     public void startCrawling() {
1467         if (getJobHandler() == null) {
1468             throw new NullPointerException("Heritrix jobhandler is null.");
1469         }
1470         getJobHandler().startCrawler();
1471     }
1472 
1473     public void stopCrawling() {
1474         if (getJobHandler() == null) {
1475             throw new NullPointerException("Heritrix jobhandler is null.");
1476         }
1477         getJobHandler().stopCrawler();
1478     }
1479     
1480     /***
1481      * Get the heritrix version.
1482      *
1483      * @return The heritrix version.  May be null.
1484      */
1485     public static String getVersion() {
1486         return System.getProperty("heritrix.version");
1487     }
1488 
1489     /***
1490      * Get the job handler
1491      *
1492      * @return The CrawlJobHandler being used.
1493      */
1494     public CrawlJobHandler getJobHandler() {
1495         return this.jobHandler;
1496     }
1497 
1498     /***
1499      * Get the configuration directory.
1500      * @return The conf directory under HERITRIX_HOME or null if none can
1501      * be found.
1502      * @throws IOException
1503      */
1504     public static File getConfdir()
1505     throws IOException {
1506         return getConfdir(true);
1507     }
1508 
1509     /***
1510      * Get the configuration directory.
1511      * @param fail Throw IOE if can't find directory if true, else just
1512      * return null.
1513      * @return The conf directory under HERITRIX_HOME or null (or an IOE) if
1514      * can't be found.
1515      * @throws IOException
1516      */
1517     public static File getConfdir(final boolean fail)
1518     throws IOException {
1519         final String key = "heritrix.conf";
1520         // Look to see if heritrix.conf property passed on the cmd-line.
1521         String tmp = System.getProperty(key);
1522         // if not fall back to default $HERITIX_HOME/conf
1523         if (tmp == null || tmp.length() == 0) {
1524             return getSubDir("conf", fail);
1525         }
1526         File dir = new File(tmp);
1527         if (!dir.exists()) {
1528             if (fail) {
1529                 throw new IOException("Cannot find conf dir: " + tmp);
1530             } else {
1531                 logger.log(Level.WARNING, "Specified " + key +
1532                     " dir does not exist.  Falling back on default");
1533             }
1534             dir = getSubDir("conf", fail);
1535         }
1536         return dir;
1537     }
1538 
1539     /***
1540      * @return Returns the httpServer. May be null if one was not started.
1541      */
1542     public static SimpleHttpServer getHttpServer() {
1543         return Heritrix.httpServer;
1544     }
1545 
1546     /***
1547      * @throws IOException
1548      * @return Returns the directory under which reside the WAR files
1549      * we're to load into the servlet container.
1550      */
1551     public static File getWarsdir()
1552     throws IOException {
1553         return getSubDir("webapps");
1554     }
1555 
1556     /***
1557      * Prepars for program shutdown. This method does it's best to prepare the
1558      * program so that it can exit normally. It will kill the httpServer and
1559      * terminate any running job.<br>
1560      * It is advisible to wait a few (~1000) millisec after calling this method
1561      * and before calling performHeritrixShutDown() to allow as many threads as
1562      * possible to finish what they are doing.
1563      */
1564     public static void prepareHeritrixShutDown() {
1565         // Stop and destroy all running Heritrix instances.
1566         // Get array of the key set to avoid CCEs for case where call to
1567         // destroy does a remove of an instance from Heritrix.instances.
1568         final Object [] keys = Heritrix.instances.keySet().toArray();
1569         for (int i = 0; i < keys.length; i++) {
1570             ((Heritrix)Heritrix.instances.get(keys[i])).destroy();
1571         }
1572         
1573         try {
1574             deregisterJndi(getJndiContainerName());
1575         } catch (NameNotFoundException e) {
1576             // We were probably unbound already. Ignore.
1577             logger.log(Level.WARNING, "deregistration of jndi", e);
1578         } catch (Exception e) {
1579             e.printStackTrace();
1580         }
1581         
1582         if(Heritrix.httpServer != null) {
1583             // Shut down the web access.
1584             try {
1585                 Heritrix.httpServer.stopServer();
1586             } catch (InterruptedException e) {
1587                 // Generally this can be ignored, but we'll print a stack trace
1588                 // just in case.
1589                 e.printStackTrace();
1590             } finally {
1591                 Heritrix.httpServer = null;
1592             }
1593         }
1594     }
1595 
1596     /***
1597      * Exit program. Recommended that prepareHeritrixShutDown() be invoked
1598      * prior to this method.
1599      */
1600     public static void performHeritrixShutDown() {
1601         performHeritrixShutDown(0);
1602     }
1603 
1604     /***
1605      * Exit program. Recommended that prepareHeritrixShutDown() be invoked
1606      * prior to this method.
1607      *
1608      * @param exitCode Code to pass System.exit.
1609      *
1610      */
1611     public static void performHeritrixShutDown(int exitCode) {
1612         System.exit(exitCode);
1613     }
1614 
1615     /***
1616      * Shutdown all running heritrix instances and the JVM.
1617      * Assumes stop has already been called.
1618 	 * @param exitCode Exit code to pass system exit.
1619 	 */
1620 	public static void shutdown(final int exitCode) {
1621         getShutdownThread(true, exitCode, "Heritrix shutdown").start();
1622 	}
1623     
1624     protected static Thread getShutdownThread(final boolean sysexit,
1625             final int exitCode, final String name) {
1626         Thread t = new Thread(name) {
1627             public void run() {
1628                 Heritrix.prepareHeritrixShutDown();
1629                 if (sysexit) {
1630                     Heritrix.performHeritrixShutDown(exitCode);
1631                 }
1632             }
1633         };
1634         t.setDaemon(true);
1635         return t;
1636     }
1637     
1638     public static void shutdown() {
1639         shutdown(0);
1640     }
1641     
1642     /***
1643      * Register Heritrix with JNDI, JMX, and with the static hashtable of all
1644      * Heritrix instances known to this JVM.
1645      * 
1646      * If launched from cmdline, register Heritrix MBean if an agent to register
1647      * ourselves with. Usually this method will only have effect if we're
1648      * running in a 1.5.0 JDK and command line options such as
1649      * '-Dcom.sun.management.jmxremote.port=8082
1650      * -Dcom.sun.management.jmxremote.authenticate=false
1651      * -Dcom.sun.management.jmxremote.ssl=false' are supplied.
1652      * See <a href="http://java.sun.com/j2se/1.5.0/docs/guide/management/agent.html">Monitoring
1653      * and Management Using JMX</a>
1654      * for more on the command line options and how to connect to the
1655      * Heritrix bean using the JDK 1.5.0 jconsole tool.  We register currently
1656      * with first server we find (TODO: Make configurable).
1657      * 
1658      * <p>If we register successfully with a JMX agent, then part of the
1659      * registration will include our registering ourselves with JNDI.
1660      * 
1661      * <p>Finally, add the heritrix instance to the hashtable of all the
1662      * Heritrix instances floating in the current VM.  This latter registeration
1663      * happens whether or no there is a JMX agent to register with.  This is
1664      * a list we keep out of convenience so its easy iterating over all
1665      *  all instances calling stop when main application is going down.
1666      * 
1667      * @param h Instance of heritrix to register.
1668      * @param name Name to use for this Heritrix instance.
1669      * @param jmxregister True if we are to register this instance with JMX.
1670      * @throws NullPointerException
1671      * @throws MalformedObjectNameException
1672      * @throws NotCompliantMBeanException 
1673      * @throws MBeanRegistrationException 
1674      * @throws InstanceAlreadyExistsException 
1675      */
1676     protected static void registerHeritrix(final Heritrix h,
1677             final String name, final boolean jmxregister)
1678     throws MalformedObjectNameException, InstanceAlreadyExistsException,
1679     MBeanRegistrationException, NotCompliantMBeanException {
1680         MBeanServer server = getMBeanServer();
1681         if (server != null) {
1682             // Are we to manage the jmx registration?  Or is it being done for
1683             // us by an external process: e.g. This instance was created by
1684             // MBeanAgent.
1685             if (jmxregister) {
1686                 ObjectName objName = (name == null || name.length() <= 0)?
1687                     getJmxObjectName(): getJmxObjectName(name);
1688                 registerMBean(server, h, objName);
1689             }
1690         } else {
1691             // JMX ain't available. Put this instance into the list of Heritrix
1692             // instances so findable by the UI (Normally this is done in the
1693             // JMX postRegister routine below).  When no JMX, can only have
1694             // one instance of Heritrix so no need to do the deregisteration.
1695             Heritrix.instances.put(h.getNoJmxName(), h);
1696         }
1697     }
1698     
1699     protected static void unregisterHeritrix(final Heritrix h)
1700     throws InstanceNotFoundException, MBeanRegistrationException,
1701             NullPointerException {
1702         MBeanServer server = getMBeanServer();
1703         if (server != null) {
1704             server.unregisterMBean(h.mbeanName);
1705         } else {
1706             // JMX ain't available. Remove from list of Heritrix instances.
1707             // Usually this is done by the JMX postDeregister below.
1708             Heritrix.instances.remove(h.getNoJmxName());
1709         }
1710     }
1711     
1712     /***
1713      * Get MBeanServer.
1714      * Currently uses first MBeanServer found.  This will definetly not be whats
1715      * always wanted. TODO: Make which server settable. Also, if none, put up
1716      * our own MBeanServer.
1717      * @return An MBeanServer to register with or null.
1718      */
1719     public static MBeanServer getMBeanServer() {
1720         MBeanServer result = null;
1721         List servers = MBeanServerFactory.findMBeanServer(null);
1722         if (servers == null) {
1723             return result;
1724         }
1725         for (Iterator i = servers.iterator(); i.hasNext();) {
1726             MBeanServer server = (MBeanServer)i.next();
1727             if (server == null) {
1728                 continue;
1729             }
1730             result = server;
1731             break;
1732         }
1733         return result;
1734     }
1735     
1736     public static MBeanServer registerMBean(final Object objToRegister,
1737             final String name, final String type)
1738     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1739     NotCompliantMBeanException {
1740         MBeanServer server = getMBeanServer();
1741         if (server != null) {
1742             server = registerMBean(server, objToRegister, name, type);
1743         }
1744         return server;
1745     }
1746     
1747     public static MBeanServer registerMBean(final MBeanServer server,
1748             final Object objToRegister, final String name, final String type)
1749     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1750     NotCompliantMBeanException {
1751         try {
1752             Hashtable<String,String> ht = new Hashtable<String,String>();
1753             ht.put(JmxUtils.NAME, name);
1754             ht.put(JmxUtils.TYPE, type);
1755             registerMBean(server, objToRegister,
1756                 new ObjectName(CRAWLER_PACKAGE, ht));
1757         } catch (MalformedObjectNameException e) {
1758             e.printStackTrace();
1759         }
1760         return server;
1761     }
1762         
1763     public static MBeanServer registerMBean(final MBeanServer server,
1764                 final Object objToRegister, final ObjectName objName)
1765     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1766     NotCompliantMBeanException {
1767         server.registerMBean(objToRegister, objName);
1768         return server;
1769     }
1770     
1771     public static void unregisterMBean(final MBeanServer server,
1772             final String name, final String type) {
1773         if (server == null) {
1774             return;
1775         }
1776         try {
1777             unregisterMBean(server, getJmxObjectName(name, type));
1778         } catch (MalformedObjectNameException e) {
1779             e.printStackTrace();
1780         }
1781     }
1782             
1783     public static void unregisterMBean(final MBeanServer server,
1784             final ObjectName name) {
1785         try {
1786             server.unregisterMBean(name);
1787             logger.info("Unregistered bean " + name.getCanonicalName());
1788         } catch (InstanceNotFoundException e) {
1789             e.printStackTrace();
1790         } catch (MBeanRegistrationException e) {
1791             e.printStackTrace();
1792         } catch (NullPointerException e) {
1793             e.printStackTrace();
1794         }
1795     }
1796     
1797     /***
1798      * @return Name to use when no JMX agent available.
1799      */
1800     protected String getNoJmxName() {
1801         return this.getClass().getName();
1802     }
1803     
1804     public static ObjectName getJmxObjectName()
1805     throws MalformedObjectNameException, NullPointerException {
1806         return getJmxObjectName("Heritrix", JmxUtils.SERVICE);
1807     }
1808     
1809     public static ObjectName getJmxObjectName(final String name)
1810     throws MalformedObjectNameException, NullPointerException {
1811         return getJmxObjectName(name, JmxUtils.SERVICE);
1812     }
1813     
1814     public static ObjectName getJmxObjectName(final String name,
1815             final String type)
1816     throws MalformedObjectNameException, NullPointerException {
1817         Hashtable<String,String> ht = new Hashtable<String,String>();
1818         ht.put(JmxUtils.NAME, name);
1819         ht.put(JmxUtils.TYPE, type);
1820         return new ObjectName(CRAWLER_PACKAGE, ht);
1821     }
1822     
1823     /***
1824      * @return Returns true if Heritrix was launched from the command line.
1825      * (When launched from command line, we do stuff like put up a web server
1826      * to manage our web interface and we register ourselves with the first
1827      * available jmx agent).
1828      */
1829     public static boolean isCommandLine() {
1830         return Heritrix.commandLine;
1831     }
1832     
1833     /***
1834      * @return True if heritrix has been started.
1835      */
1836     public boolean isStarted() {
1837         return this.jobHandler != null;
1838     }
1839     
1840     public String getStatus() {
1841         StringBuffer buffer = new StringBuffer();
1842         if (this.getJobHandler() != null) {
1843             buffer.append("isRunning=");
1844             buffer.append(this.getJobHandler().isRunning());
1845             buffer.append(" isCrawling=");
1846             buffer.append(this.getJobHandler().isCrawling());
1847             buffer.append(" alertCount=");
1848             buffer.append(getAlertsCount());
1849             buffer.append(" newAlertCount=");
1850             buffer.append(getNewAlertsCount());
1851             if (this.getJobHandler().isCrawling()) {
1852                 buffer.append(" currentJob=");
1853                 buffer.append(this.getJobHandler().getCurrentJob().
1854                     getJmxJobName());
1855             }
1856         }
1857         return buffer.toString();
1858     }
1859     
1860     // Alert methods.
1861     public int getAlertsCount() {
1862         return this.alertManager.getCount();
1863     }
1864     
1865     public int getNewAlertsCount() {
1866         return this.alertManager.getNewCount();
1867     }
1868     
1869     public Vector getAlerts() {
1870         return this.alertManager.getAll();
1871     }
1872     
1873     public Vector getNewAlerts() {
1874         return this.alertManager.getNewAll();
1875     }
1876     
1877     public SinkHandlerLogRecord getAlert(final String id) {
1878         return this.alertManager.get(id);
1879     }
1880     
1881     public void readAlert(final String id) {
1882         this.alertManager.read(id);
1883     }
1884     
1885     public void removeAlert(final String id) {
1886         this.alertManager.remove(id);
1887     }
1888     
1889     /***
1890      * Start Heritrix.
1891      * 
1892      * Used by JMX and webapp initialization for starting Heritrix.
1893      * Not by the cmdline launched Heritrix. Idempotent.
1894      * If start is called by JMX, then new instance of Heritrix is automatically
1895      * registered w/ JMX Agent.  If started by webapp, need to register the new
1896      * Heritrix instance.
1897      */
1898     public void start() {
1899         // Don't start if we've been launched from the command line.
1900         // Don't start if already started.
1901         if (!Heritrix.isCommandLine() && !isStarted()) {
1902             try {
1903                 logger.info(launch());
1904             } catch (Exception e) {
1905                 e.printStackTrace();
1906             }
1907         }
1908     }
1909     
1910     /***
1911      * Stop Heritrix.
1912      * 
1913      * Used by JMX and webapp initialization for stopping Heritrix.
1914      */
1915     public void stop() {
1916         if (this.jobHandler != null) {
1917             this.jobHandler.stop();
1918         }
1919     }
1920 
1921     public String interrupt(String threadName) {
1922         String result = "Thread " + threadName + " not found";
1923         ThreadGroup group = Thread.currentThread().getThreadGroup();
1924         if (group == null) {
1925             return result;
1926         }
1927         // Back up to the root threadgroup before starting
1928         // to iterate over threads.
1929         ThreadGroup parent = null;
1930         while((parent = group.getParent()) != null) {
1931             group = parent;
1932         }
1933         // Do an array that is twice the size of active
1934         // thread count.  That should be big enough.
1935         final int max = group.activeCount() * 2;
1936         Thread [] threads = new Thread[max];
1937         int threadCount = group.enumerate(threads, true);
1938         if (threadCount >= max) {
1939             logger.info("Some threads not found...array too small: " +
1940                 max);
1941         }
1942         for (int j = 0; j < threadCount; j++) {
1943             if (threads[j].getName().equals(threadName)) {
1944                 threads[j].interrupt();
1945                 result = "Interrupt sent to " + threadName;
1946                 break;
1947             }
1948         }
1949         return result;
1950     }
1951 
1952     // OpenMBean implementation.
1953     
1954     /***
1955      * Build up the MBean info for Heritrix main.
1956      * @return Return created mbean info instance.
1957      */
1958     protected OpenMBeanInfoSupport buildMBeanInfo() {
1959         OpenMBeanAttributeInfoSupport[] attributes =
1960             new OpenMBeanAttributeInfoSupport[Heritrix.ATTRIBUTE_LIST.size()];
1961         OpenMBeanConstructorInfoSupport[] constructors =
1962             new OpenMBeanConstructorInfoSupport[1];
1963         OpenMBeanOperationInfoSupport[] operations =
1964             new OpenMBeanOperationInfoSupport[Heritrix.OPERATION_LIST.size()];
1965         MBeanNotificationInfo[] notifications =
1966             new MBeanNotificationInfo[0];
1967 
1968         // Attributes.
1969         attributes[0] =
1970             new OpenMBeanAttributeInfoSupport(Heritrix.STATUS_ATTR,
1971                 "Short basic status message", SimpleType.STRING, true,
1972                 false, false);
1973         // Attributes.
1974         attributes[1] =
1975             new OpenMBeanAttributeInfoSupport(Heritrix.VERSION_ATTR,
1976                 "Heritrix version", SimpleType.STRING, true, false, false);
1977         // Attributes.
1978         attributes[2] =
1979             new OpenMBeanAttributeInfoSupport(Heritrix.ISRUNNING_ATTR,
1980                 "Whether the crawler is running", SimpleType.BOOLEAN, true,
1981                 false, false);
1982         // Attributes.
1983         attributes[3] =
1984             new OpenMBeanAttributeInfoSupport(Heritrix.ISCRAWLING_ATTR,
1985                 "Whether the crawler is crawling", SimpleType.BOOLEAN, true,
1986                 false, false);
1987         // Attributes.
1988         attributes[4] =
1989             new OpenMBeanAttributeInfoSupport(Heritrix.ALERTCOUNT_ATTR,
1990                 "The number of alerts", SimpleType.INTEGER, true, false, false);
1991         // Attributes.
1992         attributes[5] =
1993             new OpenMBeanAttributeInfoSupport(Heritrix.NEWALERTCOUNT_ATTR,
1994                 "The number of new alerts", SimpleType.INTEGER, true, false,
1995                 false);
1996         // Attributes.
1997         attributes[6] =
1998             new OpenMBeanAttributeInfoSupport(Heritrix.CURRENTJOB_ATTR,
1999                 "The name of the job currently being crawled", 
2000                 SimpleType.STRING, true, false, false);
2001 
2002         // Constructors.
2003         constructors[0] = new OpenMBeanConstructorInfoSupport(
2004             "HeritrixOpenMBean", "Constructs Heritrix OpenMBean instance ",
2005             new OpenMBeanParameterInfoSupport[0]);
2006 
2007         // Operations.
2008         operations[0] = new OpenMBeanOperationInfoSupport(
2009             Heritrix.START_OPER, "Start Heritrix instance", null,
2010                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2011         
2012         operations[1] = new OpenMBeanOperationInfoSupport(
2013             Heritrix.STOP_OPER, "Stop Heritrix instance", null,
2014                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2015         
2016         OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[1];
2017         args[0] = new OpenMBeanParameterInfoSupport("threadName",
2018             "Name of thread to send interrupt", SimpleType.STRING);
2019         operations[2] = new OpenMBeanOperationInfoSupport(
2020             Heritrix.INTERRUPT_OPER, "Send thread an interrupt " +
2021                 "(Used debugging)", args, SimpleType.STRING,
2022                 MBeanOperationInfo.ACTION_INFO);
2023         
2024         operations[3] = new OpenMBeanOperationInfoSupport(
2025             Heritrix.START_CRAWLING_OPER, "Set Heritrix instance " +
2026                 "into crawling mode", null, SimpleType.VOID,
2027                 MBeanOperationInfo.ACTION);
2028         
2029         operations[4] = new OpenMBeanOperationInfoSupport(
2030             Heritrix.STOP_CRAWLING_OPER, "Unset Heritrix instance " +
2031                 " crawling mode", null, SimpleType.VOID,
2032                 MBeanOperationInfo.ACTION);
2033         
2034         args = new OpenMBeanParameterInfoSupport[4];
2035         args[0] = new OpenMBeanParameterInfoSupport("pathOrURL",
2036             "Path/URL to order or jar of order+seed",
2037             SimpleType.STRING);
2038         args[1] = new OpenMBeanParameterInfoSupport("name",
2039             "Basename for new job", SimpleType.STRING);
2040         args[2] = new OpenMBeanParameterInfoSupport("description",
2041             "Description to save with new job", SimpleType.STRING);
2042         args[3] = new OpenMBeanParameterInfoSupport("seeds",
2043             "Initial seed(s)", SimpleType.STRING);
2044         operations[5] = new OpenMBeanOperationInfoSupport(
2045             Heritrix.ADD_CRAWL_JOB_OPER, "Add new crawl job", args,
2046                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2047         
2048         args = new OpenMBeanParameterInfoSupport[4];
2049         args[0] = new OpenMBeanParameterInfoSupport("uidOrName",
2050             "Job UID or profile name", SimpleType.STRING);
2051         args[1] = new OpenMBeanParameterInfoSupport("name",
2052             "Basename for new job", SimpleType.STRING);
2053         args[2] = new OpenMBeanParameterInfoSupport("description",
2054             "Description to save with new job", SimpleType.STRING);
2055         args[3] = new OpenMBeanParameterInfoSupport("seeds",
2056             "Initial seed(s)", SimpleType.STRING);
2057         operations[6] = new OpenMBeanOperationInfoSupport(
2058             Heritrix.ADD_CRAWL_JOB_BASEDON_OPER,
2059             "Add a new crawl job based on passed Job UID or profile",
2060             args, SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2061         
2062         args = new OpenMBeanParameterInfoSupport[1];
2063         args[0] = new OpenMBeanParameterInfoSupport("UID",
2064             "Job UID", SimpleType.STRING);
2065         operations[7] = new OpenMBeanOperationInfoSupport(DELETE_CRAWL_JOB_OPER,
2066             "Delete/stop this crawl job", args, SimpleType.VOID,
2067             MBeanOperationInfo.ACTION);
2068         
2069         args = new OpenMBeanParameterInfoSupport[1];
2070         args[0] = new OpenMBeanParameterInfoSupport("index",
2071             "Zero-based index into array of alerts", SimpleType.INTEGER);
2072         operations[8] = new OpenMBeanOperationInfoSupport(
2073             Heritrix.ALERT_OPER, "Return alert at passed index", args,
2074                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2075         
2076         try {
2077             this.jobCompositeType = new CompositeType("job",
2078                     "Job attributes", JOB_KEYS,
2079                     new String [] {"Job unique ID", "Job name", "Job status"},
2080                     new OpenType [] {SimpleType.STRING, SimpleType.STRING,
2081                         SimpleType.STRING});
2082             this.jobsTabularType = new TabularType("jobs", "List of jobs",
2083                     this.jobCompositeType, new String [] {"uid"});
2084         } catch (OpenDataException e) {
2085             // This should never happen.
2086             throw new RuntimeException(e);
2087         }
2088         operations[9] = new OpenMBeanOperationInfoSupport(
2089             Heritrix.PENDING_JOBS_OPER,
2090                 "List of pending jobs (or null if none)", null,
2091                 this.jobsTabularType, MBeanOperationInfo.INFO);
2092         operations[10] = new OpenMBeanOperationInfoSupport(
2093                 Heritrix.COMPLETED_JOBS_OPER,
2094                     "List of completed jobs (or null if none)", null,
2095                     this.jobsTabularType, MBeanOperationInfo.INFO);
2096         
2097         args = new OpenMBeanParameterInfoSupport[2];
2098         args[0] = new OpenMBeanParameterInfoSupport("uid",
2099             "Job unique ID", SimpleType.STRING);
2100         args[1] = new OpenMBeanParameterInfoSupport("name",
2101                 "Report name (e.g. crawl-report, etc.)",
2102                 SimpleType.STRING);
2103         operations[11] = new OpenMBeanOperationInfoSupport(
2104             Heritrix.CRAWLEND_REPORT_OPER, "Return crawl-end report", args,
2105                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2106         
2107         operations[12] = new OpenMBeanOperationInfoSupport(
2108             Heritrix.SHUTDOWN_OPER, "Shutdown container", null,
2109                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2110         
2111         args = new OpenMBeanParameterInfoSupport[2];
2112         args[0] = new OpenMBeanParameterInfoSupport("level",
2113             "Log level: e.g. SEVERE, WARNING, etc.", SimpleType.STRING);
2114         args[1] = new OpenMBeanParameterInfoSupport("message",
2115             "Log message", SimpleType.STRING);
2116         operations[13] = new OpenMBeanOperationInfoSupport(Heritrix.LOG_OPER,
2117             "Add a log message", args, SimpleType.VOID,
2118             MBeanOperationInfo.ACTION);
2119         
2120         operations[14] = new OpenMBeanOperationInfoSupport(
2121             Heritrix.DESTROY_OPER, "Destroy Heritrix instance", null,
2122                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2123         
2124         operations[15] = new OpenMBeanOperationInfoSupport(
2125             Heritrix.TERMINATE_CRAWL_JOB_OPER,
2126             "Returns false if no current job", null, SimpleType.BOOLEAN,
2127             MBeanOperationInfo.ACTION);
2128         
2129         operations[16] = new OpenMBeanOperationInfoSupport(
2130             Heritrix.REBIND_JNDI_OPER,
2131             "Rebinds this Heritrix with JNDI.", null,
2132             SimpleType.VOID, MBeanOperationInfo.ACTION);
2133 
2134         // Build the info object.
2135         return new OpenMBeanInfoSupport(this.getClass().getName(),
2136             "Heritrix Main OpenMBean", attributes, constructors, operations,
2137             notifications);
2138     }
2139     
2140     public Object getAttribute(String attribute_name)
2141     throws AttributeNotFoundException {
2142         if (attribute_name == null) {
2143             throw new RuntimeOperationsException(
2144                  new IllegalArgumentException("Attribute name cannot be null"),
2145                  "Cannot call getAttribute with null attribute name");
2146         }
2147         if (!Heritrix.ATTRIBUTE_LIST.contains(attribute_name)) {
2148             throw new AttributeNotFoundException("Attribute " +
2149                  attribute_name + " is unimplemented.");
2150         }
2151         // The pattern in the below is to match an attribute and when found
2152         // do a return out of if clause.  Doing it this way, I can fall
2153         // on to the AttributeNotFoundException for case where we've an
2154         // attribute but no handler.
2155         if (attribute_name.equals(STATUS_ATTR)) {
2156             return getStatus();
2157         }
2158         if (attribute_name.equals(VERSION_ATTR)) {
2159             return getVersion();
2160         }
2161 
2162         if (attribute_name.equals(ISRUNNING_ATTR)) {
2163             return new Boolean(this.getJobHandler().isRunning());
2164         }
2165         if (attribute_name.equals(ISCRAWLING_ATTR)) {
2166             return new Boolean(this.getJobHandler().isCrawling());
2167         }
2168         if (attribute_name.equals(ALERTCOUNT_ATTR)) {
2169             return new Integer(getAlertsCount());
2170         }
2171         if (attribute_name.equals(NEWALERTCOUNT_ATTR)) {
2172             return new Integer(getNewAlertsCount());
2173         }
2174         if (attribute_name.equals(CURRENTJOB_ATTR)) {
2175             if (this.getJobHandler().isCrawling()) {
2176                 return this.getJobHandler().getCurrentJob().getJmxJobName();
2177             }
2178             return null;
2179         }
2180         throw new AttributeNotFoundException("Attribute " +
2181             attribute_name + " not found.");
2182     }
2183 
2184     public void setAttribute(Attribute attribute)
2185     throws AttributeNotFoundException {
2186         throw new AttributeNotFoundException("No attribute can be set in " +
2187             "this MBean");
2188     }
2189 
2190     public AttributeList getAttributes(String [] attributeNames) {
2191         if (attributeNames == null) {
2192             throw new RuntimeOperationsException(
2193                 new IllegalArgumentException("attributeNames[] cannot be " +
2194                 "null"), "Cannot call getAttributes with null attribute " +
2195                 "names");
2196         }
2197         AttributeList resultList = new AttributeList();
2198         if (attributeNames.length == 0) {
2199             return resultList;
2200         }
2201         for (int i = 0; i < attributeNames.length; i++) {
2202             try {
2203                 Object value = getAttribute(attributeNames[i]);
2204                 resultList.add(new Attribute(attributeNames[i], value));
2205             } catch (Exception e) {
2206                 e.printStackTrace();
2207             }
2208         }
2209         return(resultList);
2210     }
2211 
2212     public AttributeList setAttributes(AttributeList attributes) {
2213         return new AttributeList(); // always empty
2214     }
2215 
2216     public Object invoke(final String operationName, final Object[] params,
2217         final String[] signature)
2218     throws ReflectionException {
2219         if (operationName == null) {
2220             throw new RuntimeOperationsException(
2221                 new IllegalArgumentException("Operation name cannot be null"),
2222                 "Cannot call invoke with null operation name");
2223         }
2224         // INFO logging of JMX invokes: [#HER-907]
2225         if (logger.isLoggable(Level.INFO)) {
2226             String paramsString = "";
2227             for (Object o : params) {
2228                 paramsString.concat("[" + o.toString() + "]");
2229             }
2230             logger.info("JMX invoke: " + operationName + " [" + paramsString
2231                     + "]");
2232         } 
2233         // The pattern in the below is to match an operation and when found
2234         // do a return out of if clause.  Doing it this way, I can fall
2235         // on to the MethodNotFoundException for case where we've an
2236         // attribute but no handler.
2237         if (operationName.equals(START_OPER)) {
2238             JmxUtils.checkParamsCount(START_OPER, params, 0);
2239             start();
2240             return null;
2241         }
2242         if (operationName.equals(STOP_OPER)) {
2243             JmxUtils.checkParamsCount(STOP_OPER, params, 0);
2244             stop();
2245             return null;
2246         }
2247         if (operationName.equals(DESTROY_OPER)) {
2248             JmxUtils.checkParamsCount(DESTROY_OPER, params, 0);
2249             destroy();
2250             return null;
2251         }
2252         if (operationName.equals(TERMINATE_CRAWL_JOB_OPER)) {
2253             JmxUtils.checkParamsCount(TERMINATE_CRAWL_JOB_OPER, params, 0);
2254             return new Boolean(this.jobHandler.terminateCurrentJob());
2255         }
2256         if (operationName.equals(REBIND_JNDI_OPER)) {
2257             JmxUtils.checkParamsCount(REBIND_JNDI_OPER, params, 0);
2258             try {
2259 				registerContainerJndi();
2260 			} catch (MalformedObjectNameException e) {
2261 				throw new RuntimeOperationsException(new RuntimeException(e));
2262 			} catch (UnknownHostException e) {
2263 				throw new RuntimeOperationsException(new RuntimeException(e));
2264 			} catch (NamingException e) {
2265 				throw new RuntimeOperationsException(new RuntimeException(e));
2266 			}
2267             return null;
2268         }
2269         if (operationName.equals(SHUTDOWN_OPER)) {
2270             JmxUtils.checkParamsCount(SHUTDOWN_OPER, params, 0);
2271             Heritrix.shutdown();
2272             return null;
2273         }
2274         if (operationName.equals(LOG_OPER)) {
2275             JmxUtils.checkParamsCount(LOG_OPER, params, 2);
2276             logger.log(Level.parse((String)params[0]), (String)params[1]);
2277             return null;
2278         }
2279         if (operationName.equals(INTERRUPT_OPER)) {
2280             JmxUtils.checkParamsCount(INTERRUPT_OPER, params, 1);
2281             return interrupt((String)params[0]);
2282         }       
2283         if (operationName.equals(START_CRAWLING_OPER)) {
2284             JmxUtils.checkParamsCount(START_CRAWLING_OPER, params, 0);
2285             startCrawling();
2286             return null;
2287         }
2288         if (operationName.equals(STOP_CRAWLING_OPER)) {
2289             JmxUtils.checkParamsCount(STOP_CRAWLING_OPER, params, 0);
2290             stopCrawling();
2291             return null;
2292         }
2293         if (operationName.equals(ADD_CRAWL_JOB_OPER)) {
2294             JmxUtils.checkParamsCount(ADD_CRAWL_JOB_OPER, params, 4);
2295             try {
2296                 return addCrawlJob((String)params[0], (String)params[1],
2297                     checkForEmptyPlaceHolder((String)params[2]),
2298                     checkForEmptyPlaceHolder((String)params[3]));
2299             } catch (IOException e) {
2300                 throw new RuntimeOperationsException(new RuntimeException(e));
2301             } catch (FatalConfigurationException e) {
2302                 throw new RuntimeOperationsException(new RuntimeException(e));
2303             }
2304         }
2305         if (operationName.equals(DELETE_CRAWL_JOB_OPER)) {
2306             JmxUtils.checkParamsCount(DELETE_CRAWL_JOB_OPER, params, 1);
2307             this.jobHandler.deleteJob((String)params[0]);
2308             return null;
2309         }
2310         
2311         if (operationName.equals(ADD_CRAWL_JOB_BASEDON_OPER)) {
2312             JmxUtils.checkParamsCount(ADD_CRAWL_JOB_BASEDON_OPER, params, 4);
2313             return addCrawlJobBasedOn((String)params[0], (String)params[1],
2314                     checkForEmptyPlaceHolder((String)params[2]),
2315                     checkForEmptyPlaceHolder((String)params[3]));
2316         }       
2317         if (operationName.equals(ALERT_OPER)) {
2318             JmxUtils.checkParamsCount(ALERT_OPER, params, 1);
2319             SinkHandlerLogRecord slr = null;
2320             if (this.alertManager.getCount() > 0) {
2321                 // This is creating a vector of all alerts just so I can then
2322                 // use passed index into resultant vector -- needs to be
2323                 // improved.
2324                 slr = (SinkHandlerLogRecord)this.alertManager.getAll().
2325                     get(((Integer)params[0]).intValue());
2326             }
2327             return (slr != null)? slr.toString(): null;
2328         }
2329         
2330         if (operationName.equals(PENDING_JOBS_OPER)) {
2331                 JmxUtils.checkParamsCount(PENDING_JOBS_OPER, params, 0);
2332             try {
2333                 return makeJobsTabularData(getJobHandler().getPendingJobs());
2334             } catch (OpenDataException e) {
2335                 throw new RuntimeOperationsException(new RuntimeException(e));
2336             }
2337         }
2338         
2339         if (operationName.equals(COMPLETED_JOBS_OPER)) {
2340                 JmxUtils.checkParamsCount(COMPLETED_JOBS_OPER, params, 0);
2341             try {
2342                 return makeJobsTabularData(getJobHandler().getCompletedJobs());
2343             } catch (OpenDataException e) {
2344                 throw new RuntimeOperationsException(new RuntimeException(e));
2345             }
2346         }
2347         
2348         if (operationName.equals(CRAWLEND_REPORT_OPER)) {
2349             JmxUtils.checkParamsCount(CRAWLEND_REPORT_OPER, params, 2);
2350             try {
2351                 return getCrawlendReport((String)params[0], (String) params[1]);
2352             } catch (IOException e) {
2353                 throw new RuntimeOperationsException(new RuntimeException(e));
2354             }
2355         }
2356         
2357         throw new ReflectionException(
2358             new NoSuchMethodException(operationName),
2359                 "Cannot find the operation " + operationName);
2360     }
2361     
2362     /***
2363      * Return named crawl end report for job with passed uid.
2364      * Crawler makes reports when its finished its crawl.  Use this method
2365      * to get a String version of one of these files.
2366      * @param jobUid The unique ID for the job whose reports you want to see
2367      * (Must be a completed job).
2368      * @param reportName Name of report minus '.txt' (e.g. crawl-report).
2369      * @return String version of the on-disk report.
2370      * @throws IOException 
2371      */
2372     protected String getCrawlendReport(String jobUid, String reportName)
2373     throws IOException {
2374         CrawlJob job = getJobHandler().getJob(jobUid);
2375         if (job == null) {
2376             throw new IOException("No such job: " + jobUid);
2377         }
2378         File report = new File(job.getDirectory(), reportName + ".txt");
2379         if (!report.exists()) {
2380             throw new FileNotFoundException(report.getAbsolutePath());
2381         }
2382         return FileUtils.readFileAsString(report);
2383     }
2384     
2385     protected TabularData makeJobsTabularData(List jobs)
2386     throws OpenDataException {
2387         if (jobs == null || jobs.size() == 0) {
2388             return null;
2389         }
2390         TabularData td = new TabularDataSupport(this.jobsTabularType);
2391         for (Iterator i = jobs.iterator(); i.hasNext();) {
2392             CrawlJob job = (CrawlJob)i.next();
2393             CompositeData cd = new CompositeDataSupport(this.jobCompositeType,
2394                 JOB_KEYS,
2395                 new String [] {job.getUID(), job.getJobName(), job.getStatus()});
2396             td.put(cd);
2397         }
2398         return td;
2399     }
2400     
2401     /***
2402      * If passed str has placeholder for the empty string, return the empty
2403      * string else return orginal.
2404      * Dumb jmx clients can't pass empty string so they'll pass a representation
2405      * of empty string such as ' ' or '-'.  Convert such strings to empty
2406      * string.
2407      * @param str String to check.
2408      * @return Original <code>str</code> or empty string if <code>str</code>
2409      * contains a placeholder for the empty-string (e.g. '-', or ' ').
2410      */
2411     protected String checkForEmptyPlaceHolder(String str) {
2412         return TextUtils.matches("-| +", str)? "": str;
2413     }
2414 
2415     public MBeanInfo getMBeanInfo() {
2416         return this.openMBeanInfo;
2417     }
2418     
2419     /***
2420      * @return Name this instance registered in JMX (Only available after JMX
2421      * registration).
2422      */
2423     public ObjectName getMBeanName() {
2424         return this.mbeanName;
2425     }
2426 
2427     public ObjectName preRegister(MBeanServer server, ObjectName name)
2428     throws Exception {
2429         this.mbeanServer = server;
2430         @SuppressWarnings("unchecked")
2431         Hashtable<String,String> ht = name.getKeyPropertyList();
2432         if (!ht.containsKey(JmxUtils.NAME)) {
2433             throw new IllegalArgumentException("Name property required" +
2434                 name.getCanonicalName());
2435         }
2436         if (!ht.containsKey(JmxUtils.TYPE)) {
2437             ht.put(JmxUtils.TYPE, JmxUtils.SERVICE);
2438             name = new ObjectName(name.getDomain(), ht);
2439         }
2440         this.mbeanName = addGuiPort(addVitals(name));
2441         Heritrix.instances.put(this.mbeanName.
2442             getCanonicalKeyPropertyListString(), this);
2443         return this.mbeanName;
2444     }
2445     
2446     /***
2447      * Add vital stats to passed in ObjectName.
2448      * @param name ObjectName to add to.
2449      * @return name with host, guiport, and jmxport added.
2450      * @throws UnknownHostException
2451      * @throws MalformedObjectNameException
2452      * @throws NullPointerException
2453      */
2454     protected static ObjectName addVitals(ObjectName name)
2455     throws UnknownHostException, MalformedObjectNameException,
2456     NullPointerException {
2457         @SuppressWarnings("unchecked")
2458         Hashtable<String,String> ht = name.getKeyPropertyList();
2459         if (!ht.containsKey(JmxUtils.HOST)) {
2460             ht.put(JmxUtils.HOST, InetAddress.getLocalHost().getHostName());
2461             name = new ObjectName(name.getDomain(), ht);
2462         }
2463         if (!ht.containsKey(JmxUtils.JMX_PORT)) {
2464             // Add jdk jmx-port. This will be present if we've attached
2465             // ourselves to the jdk jmx agent.  Otherwise, we've been
2466             // deployed in a j2ee container with its own jmx agent.  In
2467             // this case we won't know how to get jmx port.
2468             String p = System.getProperty("com.sun.management.jmxremote.port");
2469             if (p != null && p.length() > 0) {
2470                 ht.put(JmxUtils.JMX_PORT, p);
2471                 name = new ObjectName(name.getDomain(), ht);
2472             }
2473         }
2474         return name;
2475     }
2476     
2477     protected static ObjectName addGuiPort(ObjectName name)
2478     throws MalformedObjectNameException, NullPointerException {
2479         @SuppressWarnings("unchecked")
2480         Hashtable<String,String> ht = name.getKeyPropertyList();
2481         if (!ht.containsKey(JmxUtils.GUI_PORT)) {
2482             // Add gui port if this instance was started with a gui.
2483             if (Heritrix.gui) {
2484                 ht.put(JmxUtils.GUI_PORT, Integer.toString(Heritrix.guiPort));
2485                 name = new ObjectName(name.getDomain(), ht);
2486             }
2487         }
2488         return name;
2489     }
2490 
2491     public void postRegister(Boolean registrationDone) {
2492         if (logger.isLoggable(Level.INFO)) {
2493             logger.info(
2494                 JmxUtils.getLogRegistrationMsg(this.mbeanName.getCanonicalName(),
2495                 this.mbeanServer, registrationDone.booleanValue()));
2496         }
2497         try {
2498             registerJndi(this.mbeanName);
2499         } catch (Exception e) {
2500             logger.log(Level.SEVERE, "Failed jndi registration", e);
2501         }
2502     }
2503 
2504     public void preDeregister() throws Exception {
2505         deregisterJndi(this.mbeanName);
2506     }
2507 
2508     public void postDeregister() {
2509         Heritrix.instances.
2510             remove(this.mbeanName.getCanonicalKeyPropertyListString());
2511         if (logger.isLoggable(Level.INFO)) {
2512             logger.info(JmxUtils.getLogUnregistrationMsg(
2513                     this.mbeanName.getCanonicalName(), this.mbeanServer));
2514         }
2515     }
2516     
2517     protected static void registerContainerJndi()
2518     throws MalformedObjectNameException, NullPointerException,
2519     		UnknownHostException, NamingException {
2520     	registerJndi(getJndiContainerName());
2521     }
2522 
2523     protected static void registerJndi(final ObjectName name)
2524     throws NullPointerException, NamingException {
2525     	Context c = getJndiContext();
2526     	if (c == null) {
2527     		return;
2528     	}
2529         CompoundName key = JndiUtils.bindObjectName(c, name);
2530         if (logger.isLoggable(Level.FINE)) {
2531             logger.fine("Bound '"  + key + "' to '" + JndiUtils.
2532                getCompoundName(c.getNameInNamespace()).toString()
2533                + "' jndi context");
2534         }
2535     }
2536     
2537     protected static void deregisterJndi(final ObjectName name)
2538     throws NullPointerException, NamingException {
2539     	Context c = getJndiContext();
2540     	if (c == null) {
2541     		return;
2542     	}
2543         CompoundName key = JndiUtils.unbindObjectName(c, name);
2544         if (logger.isLoggable(Level.FINE)) {
2545             logger.fine("Unbound '" + key + "' from '" +
2546                 JndiUtils.getCompoundName(c.getNameInNamespace()).toString() +
2547                 	"' jndi context");
2548         }
2549     }
2550     
2551     /***
2552      * @return Jndi context for the crawler or null if none found.
2553      * @throws NamingException 
2554      */
2555     protected static Context getJndiContext() throws NamingException {
2556     	Context c = null;
2557     	try {
2558     		c = JndiUtils.getSubContext(CRAWLER_PACKAGE);
2559     	} catch (NoInitialContextException e) {
2560     		logger.fine("No JNDI Context: " + e.toString());
2561     	}
2562     	return c;
2563     }
2564     
2565     /***
2566      * @return Jndi container name -- the name to use for the 'container' that
2567      * can host zero or more heritrix instances (Return a JMX ObjectName.  We
2568      * use ObjectName because then we're sync'd with JMX naming and ObjectName
2569      * has nice parsing).
2570      * @throws NullPointerException 
2571      * @throws MalformedObjectNameException 
2572      * @throws UnknownHostException 
2573      */
2574     protected static ObjectName getJndiContainerName()
2575     throws MalformedObjectNameException, NullPointerException,
2576     UnknownHostException {
2577         ObjectName objName = new ObjectName(CRAWLER_PACKAGE, "type",
2578             "container");
2579         return addVitals(objName);
2580     }
2581     
2582     /***
2583      * @return Return all registered instances of Heritrix (Rare are there 
2584      * more than one).
2585      */
2586     public static Map getInstances() {
2587         return Heritrix.instances;
2588     }
2589     
2590     /***
2591      * @return True if only one instance of Heritrix.
2592      */
2593     public static boolean isSingleInstance() {
2594         return Heritrix.instances != null && Heritrix.instances.size() == 1;
2595     }
2596     
2597     /***
2598      * @return Returns single instance or null if no instance or multiple.
2599      */
2600     public static Heritrix getSingleInstance() {
2601         return !isSingleInstance()?
2602             null:
2603             (Heritrix)Heritrix.instances.
2604                 get(Heritrix.instances.keySet().iterator().next());
2605     }
2606 }