View Javadoc

1   /*
2    * Heritrix
3    *
4    * $Id: JobConfigureUtils.java 4401 2006-07-31 19:39:04Z gojomo $
5    *
6    * Created on Aug 30, 2004
7    *
8    * Copyright (C) 2003 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify
13   * it under the terms of the GNU Lesser Public License as published by
14   * the Free Software Foundation; either version 2.1 of the License, or
15   * any later version.
16   *
17   * Heritrix is distributed in the hope that it will be useful,
18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20   * GNU Lesser Public License for more details.
21   *
22   * You should have received a copy of the GNU Lesser Public License
23   * along with Heritrix; if not, write to the Free Software
24   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25   */
26  package org.archive.crawler.admin.ui;
27  
28  import java.io.BufferedReader;
29  import java.io.BufferedWriter;
30  import java.io.File;
31  import java.io.FileInputStream;
32  import java.io.FileWriter;
33  import java.io.IOException;
34  import java.io.InputStream;
35  import java.io.InputStreamReader;
36  import java.io.Reader;
37  import java.io.StringReader;
38  import java.io.Writer;
39  import java.lang.reflect.Constructor;
40  import java.lang.reflect.InvocationTargetException;
41  import java.util.logging.Level;
42  import java.util.logging.Logger;
43  
44  import javax.management.Attribute;
45  import javax.management.AttributeNotFoundException;
46  import javax.management.InvalidAttributeValueException;
47  import javax.management.MBeanAttributeInfo;
48  import javax.management.MBeanException;
49  import javax.management.ReflectionException;
50  import javax.servlet.http.HttpServletRequest;
51  import javax.servlet.http.HttpServletResponse;
52  
53  import org.archive.crawler.admin.CrawlJob;
54  import org.archive.crawler.admin.CrawlJobHandler;
55  import org.archive.crawler.settings.ComplexType;
56  import org.archive.crawler.settings.CrawlerSettings;
57  import org.archive.crawler.settings.ListType;
58  import org.archive.crawler.settings.MapType;
59  import org.archive.crawler.settings.ModuleAttributeInfo;
60  import org.archive.crawler.settings.ModuleType;
61  import org.archive.crawler.settings.SettingsHandler;
62  import org.archive.crawler.settings.XMLSettingsHandler;
63  import org.archive.crawler.settings.refinements.Refinement;
64  import org.archive.util.IoUtils;
65  
66  /***
67   * Utility methods used configuring jobs in the admin UI.
68   * 
69   * Methods are mostly called by the admin UI jsp.
70   * 
71   * @author stack
72   * @version $Date: 2006-07-31 19:39:04 +0000 (Mon, 31 Jul 2006) $, $Revision: 4401 $
73   */
74  public class JobConfigureUtils {
75      private static Logger logger = Logger.getLogger(JobConfigureUtils.class
76              .getName());
77      public static final String ACTION = "action";
78      public static final String SUBACTION = "subaction";
79      public static final String FILTERS = "filters";
80      private static final String MAP = "map";
81      private static final String FILTER = "filter";
82      private static final Object ADD = "add";
83      private static final Object MOVEUP = "moveup";
84      private static final Object MOVEDOWN = "movedown";
85      private static final Object REMOVE = "remove";
86      private static final Object GOTO = "goto";
87      private static final Object DONE = "done";
88      private static final Object CONTINUE = "continue"; // keep editting
89  
90      /***
91       * Check passed crawljob CrawlJob setting. Call this method at start of
92       * page.
93       * 
94       * @param job
95       *            Current CrawlJobHandler.
96       * @param request
97       *            Http request.
98       * @param response
99       *            Http response.
100      * @return Crawljob.
101      */
102     protected static CrawlJob getAndCheckJob(CrawlJob job,
103             HttpServletRequest request, HttpServletResponse response) {
104         return job;
105     }
106 
107     /***
108      * This methods updates a ComplexType with information passed to it by a
109      * HttpServletRequest. It assumes that for every 'simple' type there is a
110      * corresponding parameter in the request. A recursive call will be made for
111      * any nested ComplexTypes. For each attribute it will check if the relevant
112      * override is set (name.override parameter equals 'true'). If so the
113      * attribute setting on the specified domain level (settings) will be
114      * rewritten. If it is not we well ensure that it isn't being overridden.
115      * 
116      * @param mbean
117      *            The ComplexType to update
118      * @param settings
119      *            CrawlerSettings for the domain to override setting for. null
120      *            denotes the global settings.
121      * @param request
122      *            The HttpServletRequest to use to update the ComplexType
123      * @param expert
124      *            if true expert settings will be updated, otherwise they will
125      *            be ignored.
126      */
127     public static void writeNewOrderFile(ComplexType mbean,
128             CrawlerSettings settings, HttpServletRequest request, boolean expert) {
129         // If mbean is transient or a hidden expert setting.
130         if (mbean.isTransient() || (mbean.isExpertSetting() && expert == false)) {
131             return;
132         }
133 
134         MBeanAttributeInfo a[] = mbean.getMBeanInfo(settings).getAttributes();
135         for (int n = 0; n < a.length; n++) {
136             checkAttribute((ModuleAttributeInfo) a[n], mbean, settings,
137                     request, expert);
138         }
139     }
140 
141     /***
142      * Process passed attribute. Check if needs to be written and if so, write
143      * it.
144      * 
145      * @param att
146      *            Attribute to process.
147      * @param mbean
148      *            The ComplexType to update
149      * @param settings
150      *            CrawlerSettings for the domain to override setting for. null
151      *            denotes the global settings.
152      * @param request
153      *            The HttpServletRequest to use to update the ComplexType
154      * @param expert
155      *            if true expert settings will be updated, otherwise they will
156      *            be ignored.
157      */
158     protected static void checkAttribute(ModuleAttributeInfo att,
159             ComplexType mbean, CrawlerSettings settings,
160             HttpServletRequest request, boolean expert) {
161         // The attributes of the current attribute.
162         Object currentAttribute = null;
163         try {
164             currentAttribute = mbean.getAttribute(settings, att.getName());
165         } catch (Exception e) {
166             logger.severe("Failed getting " + mbean.getAbsoluteName()
167                     + " attribute " + att.getName() + ": " + e.getMessage());
168             return;
169         }
170 
171         if (logger.isLoggable(Level.FINE)) {
172             logger.fine("MBEAN: " + mbean.getAbsoluteName() + " "
173                     + att.getName() + " TRANSIENT " + att.isTransient() + " "
174                     + att.isExpertSetting() + " " + expert);
175         }
176 
177         if (att.isTransient() == false
178                 && (att.isExpertSetting() == false || expert)) {
179             if (currentAttribute instanceof ComplexType) {
180                 writeNewOrderFile((ComplexType) currentAttribute, settings,
181                         request, expert);
182             } else {
183                 String attName = att.getName();
184                 // Have a 'setting'. Let's see if we need to update it (if
185                 // settings == null update all, otherwise only if override
186                 // is set.
187                 String attAbsoluteName = mbean.getAbsoluteName() + "/"
188                         + attName;
189                 boolean override = (request.getParameter(attAbsoluteName
190                         + ".override") != null)
191                         && (request.getParameter(attAbsoluteName + ".override")
192                                 .equals("true"));
193                 if (settings == null || override) {
194                     if (currentAttribute instanceof ListType) {
195                         try {
196                             ListType list = (ListType)currentAttribute;
197                             Class cls = list.getClass();
198                             Constructor constructor = cls.getConstructor(String.class, String.class);
199                             list = (ListType) constructor.newInstance(list.getName(), list.getDescription());
200                             String[] elems = request
201                                     .getParameterValues(attAbsoluteName);
202                             for (int i = 0; elems != null && i < elems.length; i++) {
203                                 list.add(elems[i]);
204                             }
205                             writeAttribute(attName, attAbsoluteName, mbean,
206                                     settings, list);
207                         } catch (Exception e) {
208                             e.printStackTrace();
209                             logger.severe("Setting new list values on "
210                                     + attAbsoluteName + ": " + e.getMessage());
211                             return;
212                         }
213                     } else {
214                         writeAttribute(attName, attAbsoluteName, mbean,
215                                 settings, request.getParameter(attAbsoluteName));
216                     }
217 
218                 } else if (settings != null && override == false) {
219                     // Is not being overridden. Need to remove possible
220                     // previous overrides.
221                     try {
222                         mbean.unsetAttribute(settings, attName);
223                     } catch (Exception e) {
224                         e.printStackTrace();
225                         logger.severe("Unsetting attribute on "
226                                 + attAbsoluteName + ": " + e.getMessage());
227                         return;
228                     }
229                 }
230             }
231         }
232     }
233 
234     /***
235      * Write out attribute.
236      * 
237      * @param attName
238      *            Attribute short name.
239      * @param attAbsoluteName
240      *            Attribute full name.
241      * @param mbean
242      *            The ComplexType to update
243      * @param settings
244      *            CrawlerSettings for the domain to override setting for. null
245      *            denotes the global settings.
246      * @param value
247      *            Value to set into the attribute.
248      */
249     protected static void writeAttribute(String attName,
250             String attAbsoluteName, ComplexType mbean,
251             CrawlerSettings settings, Object value) {
252         try {
253             if (logger.isLoggable(Level.FINE)) {
254                 logger.fine("MBEAN SET: " + attAbsoluteName + " " + value);
255             }
256             mbean.setAttribute(settings, new Attribute(attName, value));
257         } catch (Exception e) {
258             e.printStackTrace();
259             logger.severe("Setting attribute value " + value + " on "
260                     + attAbsoluteName + ": " + e.getMessage());
261             return;
262         }
263     }
264 
265     /***
266      * Check passed job is not null and not readonly.
267      * @param job Job to check.
268      * @param response Http response.
269      * @param redirectBasePath Full path for where to go next if an error.
270      * @param currDomain May be null.
271      * E.g. "/admin/jobs/per/overview.jsp".
272      * @return A job else we've redirected if no job or readonly.
273      * @throws IOException
274      */
275     public static CrawlJob checkCrawlJob(CrawlJob job,
276         HttpServletResponse response, String redirectBasePath,
277         String currDomain)
278     throws IOException {
279         if (job == null) {
280             // Didn't find any job with the given UID or no UID given.
281             response.sendRedirect(redirectBasePath +
282                 "?message=No job selected");
283         } else if (job.isReadOnly()) {
284             // Can't edit this job.
285             response.sendRedirect(redirectBasePath +
286                 "?job=" + job.getUID() +
287                 ((currDomain != null && currDomain.length() > 0)?
288                     "&currDomain=" + currDomain: "") +
289                 "&message=Can't edit a read only job");
290         }
291         return job;
292     }
293 
294     /***
295      * Handle job action.
296      * @param handler CrawlJobHandler to operate on.
297      * @param request Http request.
298      * @param response Http response.
299      * @param redirectBasePath Full path for where to go next if an error.
300      * E.g. "/admin/jobs/per/overview.jsp".
301      * @param currDomain Current domain.  Pass null for global domain.
302      * @param reference 
303      * @return The crawljob configured.
304      * @throws IOException
305      * @throws AttributeNotFoundException
306      * @throws InvocationTargetException
307      * @throws InvalidAttributeValueException
308      */
309     public static CrawlJob handleJobAction(CrawlJobHandler handler,
310             HttpServletRequest request, HttpServletResponse response,
311             String redirectBasePath, String currDomain, String reference)
312     throws IOException, AttributeNotFoundException, InvocationTargetException,
313         InvalidAttributeValueException {
314 
315         // Load the job to manipulate
316         CrawlJob theJob =
317             checkCrawlJob(handler.getJob(request.getParameter("job")),
318                 response, redirectBasePath, currDomain);
319 
320         XMLSettingsHandler settingsHandler = theJob.getSettingsHandler();
321         // If currDomain is null, then we're at top-level.
322         CrawlerSettings settings = settingsHandler
323             .getSettingsObject(currDomain);
324         
325         if(reference != null) {
326             // refinement
327             Refinement refinement = settings.getRefinement(reference);
328             settings = refinement.getSettings();
329         }
330 
331         // See if we need to take any action
332         if (request.getParameter(ACTION) != null) {
333             // Need to take some action.
334             String action = request.getParameter(ACTION);
335             String subaction = request.getParameter(SUBACTION);
336             if (action.equals(FILTERS)) {
337                 // Doing something with the filters.
338                 String map = request.getParameter(MAP);
339                 if (map != null && map.length() > 0) {
340                     String filter = request.getParameter(FILTER);
341                     MapType filterMap = (MapType) settingsHandler
342                         .getComplexTypeByAbsoluteName(settings, map);
343                     if (subaction.equals(ADD)) {
344                         // Add filter
345                         String className = request.getParameter(map + ".class");
346                         String typeName = request.getParameter(map + ".name");
347                         if (typeName != null && typeName.length() > 0 &&
348                                 className != null && className.length() > 0) {
349                             ModuleType tmp = SettingsHandler
350                                 .instantiateModuleTypeFromClassName(
351                                     typeName, className);
352                             filterMap.addElement(settings, tmp);
353                         }
354                     } else if (subaction.equals(MOVEUP)) {
355                         // Move a filter down in a map
356                         if (filter != null && filter.length() > 0) {
357                             filterMap.moveElementUp(settings, filter);
358                         }
359                     } else if (subaction.equals(MOVEDOWN)) {
360                         // Move a filter up in a map
361                         if (filter != null && filter.length() > 0) {
362                             filterMap.moveElementDown(settings, filter);
363                         }
364                     } else if (subaction.equals(REMOVE)) {
365                         // Remove a filter from a map
366                         if (filter != null && filter.length() > 0) {
367                             filterMap.removeElement(settings, filter);
368                         }
369                     }
370                 }
371                 // Finally save the changes to disk
372                 settingsHandler.writeSettingsObject(settings);
373             } else if (action.equals(DONE)) {
374                 // Ok, done editing.
375                 if(subaction.equals(CONTINUE)) {
376                     // was editting an override/refinement, simply continue
377                     if (theJob.isRunning()) {
378                         handler.kickUpdate(); //Just to make sure.
379                     }
380                     String overParam = ((currDomain != null && currDomain
381                             .length() > 0) ? "&currDomain=" + currDomain : "");
382                     String refParam = 
383                         ((reference != null && reference.length() > 0) 
384                                 ? "&reference=" + reference
385                                 : "");
386                     String messageParam = (refParam.length() > 0) 
387                          ? "&message=Refinement changes saved"
388                          : "&message=Override changes saved";
389                     response.sendRedirect(redirectBasePath +
390                         "?job=" + theJob.getUID() +
391                          overParam +
392                          refParam + 
393                          messageParam);
394                 } else {
395                     // on main, truly 'done'
396                     if (theJob.isNew()) {
397                         handler.addJob(theJob);
398                         response.sendRedirect(redirectBasePath
399                                 + "?message=Job created");
400                     } else {
401                         if (theJob.isRunning()) {
402                             handler.kickUpdate();
403                         }
404                         if (theJob.isProfile()) {
405                             response.sendRedirect(redirectBasePath
406                                     + "?message=Profile modified");
407                         } else {
408                             response.sendRedirect(redirectBasePath
409                                     + "?message=Job modified");
410                         }
411                     }
412                 }
413             } else if (action.equals(GOTO)) {
414                 // Goto another page of the job/profile settings
415                 String overParam = ((currDomain != null && currDomain
416                         .length() > 0) ? "&currDomain=" + currDomain : "");
417                 String refParam = 
418                     ((reference != null && reference.length() > 0) 
419                             ? "&reference=" + reference
420                             : "");
421                 response.sendRedirect(request.getParameter(SUBACTION) +
422                     overParam + refParam);
423             }
424         }
425         return theJob;
426     }
427     
428     /***
429      * Print complete seeds list on passed in PrintWriter.
430      * @param hndlr Current handler.
431      * @param payload What to write out.
432      * @throws AttributeNotFoundException
433      * @throws MBeanException
434      * @throws ReflectionException
435      * @throws IOException
436      * @throws IOException
437      */
438     public static void printOutSeeds(SettingsHandler hndlr, String payload)
439     throws AttributeNotFoundException, MBeanException, ReflectionException,
440     IOException {
441         File seedfile = getSeedFile(hndlr);
442         writeReader(new StringReader(payload),
443             new BufferedWriter(new FileWriter(seedfile)));
444     }
445     
446     /***
447      * Print complete seeds list on passed in PrintWriter.
448      * @param hndlr Current handler.
449      * @param out Writer to write out all seeds to.
450      * @throws ReflectionException
451      * @throws MBeanException
452      * @throws AttributeNotFoundException
453      * @throws IOException
454      */
455     public static void printOutSeeds(SettingsHandler hndlr, Writer out)
456     throws AttributeNotFoundException, MBeanException, ReflectionException,
457             IOException {
458         // getSeedStream looks for seeds on disk and on classpath.
459         InputStream is = getSeedStream(hndlr);
460         writeReader(new BufferedReader(new InputStreamReader(is)), out);
461     }
462     
463     /***
464      * Test whether seeds file is of a size that's reasonable
465      * to edit in an HTML textarea. 
466      * @param h current settingsHandler
467      * @return true if seeds size is manageable, false otherwise
468      * @throws AttributeNotFoundException 
469      * @throws MBeanException 
470      * @throws ReflectionException 
471      * 
472      */
473     public static boolean seedsEdittableSize(SettingsHandler h)
474             throws AttributeNotFoundException, MBeanException,
475             ReflectionException {
476         return getSeedFile(h).length() <= (32 * 1024); // 32K
477     }
478     /***
479      * @param hndlr Settings handler.
480      * @return Seeds file.
481      * @throws ReflectionException
482      * @throws MBeanException
483      * @throws AttributeNotFoundException
484      */
485     protected static File getSeedFile(SettingsHandler hndlr)
486     throws AttributeNotFoundException, MBeanException, ReflectionException {
487         String seedsFileStr = (String)((ComplexType)hndlr.getOrder().
488             getAttribute("scope")).getAttribute("seedsfile");
489         return hndlr.getPathRelativeToWorkingDirectory(seedsFileStr);
490     }
491     
492     /***
493      * Return seeds as a stream.
494      * This method will work for case where seeds are on disk or on classpath.
495      * @param hndlr SettingsHandler.  Used to find seeds.txt file.
496      * @return InputStream on current seeds file.
497      * @throws IOException
498      * @throws ReflectionException
499      * @throws MBeanException
500      * @throws AttributeNotFoundException
501      */
502     protected static InputStream getSeedStream(SettingsHandler hndlr)
503     throws IOException, AttributeNotFoundException, MBeanException,
504             ReflectionException {
505         InputStream is = null;
506         File seedFile = getSeedFile(hndlr);
507         if (!seedFile.exists()) {
508             // Is the file on the CLASSPATH?
509             is = SettingsHandler.class.
510                 getResourceAsStream(IoUtils.getClasspathPath(seedFile));
511         } else if(seedFile.canRead()) {
512             is = new FileInputStream(seedFile);
513         }
514         if (is == null) {
515             throw new IOException(seedFile + " does not" +
516             " exist -- neither on disk nor on CLASSPATH -- or is not" +
517             " readable.");
518         }
519         return is;
520     }
521     
522     /***
523      * Print complete seeds list on passed in PrintWriter.
524      * @param reader File to read seeds from.
525      * @param out Writer to write out all seeds to.
526      * @throws IOException
527      */
528     protected static void writeReader(Reader reader, Writer out)
529     throws IOException {
530         final int bufferSize = 1024 * 4;
531         char [] buffer = new char[bufferSize];
532         int read = -1;
533         while ((read = reader.read(buffer, 0, bufferSize)) != -1) {
534             out.write(buffer, 0, read);
535         }
536         out.flush();
537     }
538 }