1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 package org.archive.crawler.admin.ui;
27
28 import java.io.BufferedReader;
29 import java.io.BufferedWriter;
30 import java.io.File;
31 import java.io.FileInputStream;
32 import java.io.FileWriter;
33 import java.io.IOException;
34 import java.io.InputStream;
35 import java.io.InputStreamReader;
36 import java.io.Reader;
37 import java.io.StringReader;
38 import java.io.Writer;
39 import java.lang.reflect.Constructor;
40 import java.lang.reflect.InvocationTargetException;
41 import java.util.logging.Level;
42 import java.util.logging.Logger;
43
44 import javax.management.Attribute;
45 import javax.management.AttributeNotFoundException;
46 import javax.management.InvalidAttributeValueException;
47 import javax.management.MBeanAttributeInfo;
48 import javax.management.MBeanException;
49 import javax.management.ReflectionException;
50 import javax.servlet.http.HttpServletRequest;
51 import javax.servlet.http.HttpServletResponse;
52
53 import org.archive.crawler.admin.CrawlJob;
54 import org.archive.crawler.admin.CrawlJobHandler;
55 import org.archive.crawler.settings.ComplexType;
56 import org.archive.crawler.settings.CrawlerSettings;
57 import org.archive.crawler.settings.ListType;
58 import org.archive.crawler.settings.MapType;
59 import org.archive.crawler.settings.ModuleAttributeInfo;
60 import org.archive.crawler.settings.ModuleType;
61 import org.archive.crawler.settings.SettingsHandler;
62 import org.archive.crawler.settings.XMLSettingsHandler;
63 import org.archive.crawler.settings.refinements.Refinement;
64 import org.archive.util.IoUtils;
65
66 /***
67 * Utility methods used configuring jobs in the admin UI.
68 *
69 * Methods are mostly called by the admin UI jsp.
70 *
71 * @author stack
72 * @version $Date: 2006-07-31 19:39:04 +0000 (Mon, 31 Jul 2006) $, $Revision: 4401 $
73 */
74 public class JobConfigureUtils {
75 private static Logger logger = Logger.getLogger(JobConfigureUtils.class
76 .getName());
77 public static final String ACTION = "action";
78 public static final String SUBACTION = "subaction";
79 public static final String FILTERS = "filters";
80 private static final String MAP = "map";
81 private static final String FILTER = "filter";
82 private static final Object ADD = "add";
83 private static final Object MOVEUP = "moveup";
84 private static final Object MOVEDOWN = "movedown";
85 private static final Object REMOVE = "remove";
86 private static final Object GOTO = "goto";
87 private static final Object DONE = "done";
88 private static final Object CONTINUE = "continue";
89
90 /***
91 * Check passed crawljob CrawlJob setting. Call this method at start of
92 * page.
93 *
94 * @param job
95 * Current CrawlJobHandler.
96 * @param request
97 * Http request.
98 * @param response
99 * Http response.
100 * @return Crawljob.
101 */
102 protected static CrawlJob getAndCheckJob(CrawlJob job,
103 HttpServletRequest request, HttpServletResponse response) {
104 return job;
105 }
106
107 /***
108 * This methods updates a ComplexType with information passed to it by a
109 * HttpServletRequest. It assumes that for every 'simple' type there is a
110 * corresponding parameter in the request. A recursive call will be made for
111 * any nested ComplexTypes. For each attribute it will check if the relevant
112 * override is set (name.override parameter equals 'true'). If so the
113 * attribute setting on the specified domain level (settings) will be
114 * rewritten. If it is not we well ensure that it isn't being overridden.
115 *
116 * @param mbean
117 * The ComplexType to update
118 * @param settings
119 * CrawlerSettings for the domain to override setting for. null
120 * denotes the global settings.
121 * @param request
122 * The HttpServletRequest to use to update the ComplexType
123 * @param expert
124 * if true expert settings will be updated, otherwise they will
125 * be ignored.
126 */
127 public static void writeNewOrderFile(ComplexType mbean,
128 CrawlerSettings settings, HttpServletRequest request, boolean expert) {
129
130 if (mbean.isTransient() || (mbean.isExpertSetting() && expert == false)) {
131 return;
132 }
133
134 MBeanAttributeInfo a[] = mbean.getMBeanInfo(settings).getAttributes();
135 for (int n = 0; n < a.length; n++) {
136 checkAttribute((ModuleAttributeInfo) a[n], mbean, settings,
137 request, expert);
138 }
139 }
140
141 /***
142 * Process passed attribute. Check if needs to be written and if so, write
143 * it.
144 *
145 * @param att
146 * Attribute to process.
147 * @param mbean
148 * The ComplexType to update
149 * @param settings
150 * CrawlerSettings for the domain to override setting for. null
151 * denotes the global settings.
152 * @param request
153 * The HttpServletRequest to use to update the ComplexType
154 * @param expert
155 * if true expert settings will be updated, otherwise they will
156 * be ignored.
157 */
158 protected static void checkAttribute(ModuleAttributeInfo att,
159 ComplexType mbean, CrawlerSettings settings,
160 HttpServletRequest request, boolean expert) {
161
162 Object currentAttribute = null;
163 try {
164 currentAttribute = mbean.getAttribute(settings, att.getName());
165 } catch (Exception e) {
166 logger.severe("Failed getting " + mbean.getAbsoluteName()
167 + " attribute " + att.getName() + ": " + e.getMessage());
168 return;
169 }
170
171 if (logger.isLoggable(Level.FINE)) {
172 logger.fine("MBEAN: " + mbean.getAbsoluteName() + " "
173 + att.getName() + " TRANSIENT " + att.isTransient() + " "
174 + att.isExpertSetting() + " " + expert);
175 }
176
177 if (att.isTransient() == false
178 && (att.isExpertSetting() == false || expert)) {
179 if (currentAttribute instanceof ComplexType) {
180 writeNewOrderFile((ComplexType) currentAttribute, settings,
181 request, expert);
182 } else {
183 String attName = att.getName();
184
185
186
187 String attAbsoluteName = mbean.getAbsoluteName() + "/"
188 + attName;
189 boolean override = (request.getParameter(attAbsoluteName
190 + ".override") != null)
191 && (request.getParameter(attAbsoluteName + ".override")
192 .equals("true"));
193 if (settings == null || override) {
194 if (currentAttribute instanceof ListType) {
195 try {
196 ListType list = (ListType)currentAttribute;
197 Class cls = list.getClass();
198 Constructor constructor = cls.getConstructor(String.class, String.class);
199 list = (ListType) constructor.newInstance(list.getName(), list.getDescription());
200 String[] elems = request
201 .getParameterValues(attAbsoluteName);
202 for (int i = 0; elems != null && i < elems.length; i++) {
203 list.add(elems[i]);
204 }
205 writeAttribute(attName, attAbsoluteName, mbean,
206 settings, list);
207 } catch (Exception e) {
208 e.printStackTrace();
209 logger.severe("Setting new list values on "
210 + attAbsoluteName + ": " + e.getMessage());
211 return;
212 }
213 } else {
214 writeAttribute(attName, attAbsoluteName, mbean,
215 settings, request.getParameter(attAbsoluteName));
216 }
217
218 } else if (settings != null && override == false) {
219
220
221 try {
222 mbean.unsetAttribute(settings, attName);
223 } catch (Exception e) {
224 e.printStackTrace();
225 logger.severe("Unsetting attribute on "
226 + attAbsoluteName + ": " + e.getMessage());
227 return;
228 }
229 }
230 }
231 }
232 }
233
234 /***
235 * Write out attribute.
236 *
237 * @param attName
238 * Attribute short name.
239 * @param attAbsoluteName
240 * Attribute full name.
241 * @param mbean
242 * The ComplexType to update
243 * @param settings
244 * CrawlerSettings for the domain to override setting for. null
245 * denotes the global settings.
246 * @param value
247 * Value to set into the attribute.
248 */
249 protected static void writeAttribute(String attName,
250 String attAbsoluteName, ComplexType mbean,
251 CrawlerSettings settings, Object value) {
252 try {
253 if (logger.isLoggable(Level.FINE)) {
254 logger.fine("MBEAN SET: " + attAbsoluteName + " " + value);
255 }
256 mbean.setAttribute(settings, new Attribute(attName, value));
257 } catch (Exception e) {
258 e.printStackTrace();
259 logger.severe("Setting attribute value " + value + " on "
260 + attAbsoluteName + ": " + e.getMessage());
261 return;
262 }
263 }
264
265 /***
266 * Check passed job is not null and not readonly.
267 * @param job Job to check.
268 * @param response Http response.
269 * @param redirectBasePath Full path for where to go next if an error.
270 * @param currDomain May be null.
271 * E.g. "/admin/jobs/per/overview.jsp".
272 * @return A job else we've redirected if no job or readonly.
273 * @throws IOException
274 */
275 public static CrawlJob checkCrawlJob(CrawlJob job,
276 HttpServletResponse response, String redirectBasePath,
277 String currDomain)
278 throws IOException {
279 if (job == null) {
280
281 response.sendRedirect(redirectBasePath +
282 "?message=No job selected");
283 } else if (job.isReadOnly()) {
284
285 response.sendRedirect(redirectBasePath +
286 "?job=" + job.getUID() +
287 ((currDomain != null && currDomain.length() > 0)?
288 "&currDomain=" + currDomain: "") +
289 "&message=Can't edit a read only job");
290 }
291 return job;
292 }
293
294 /***
295 * Handle job action.
296 * @param handler CrawlJobHandler to operate on.
297 * @param request Http request.
298 * @param response Http response.
299 * @param redirectBasePath Full path for where to go next if an error.
300 * E.g. "/admin/jobs/per/overview.jsp".
301 * @param currDomain Current domain. Pass null for global domain.
302 * @param reference
303 * @return The crawljob configured.
304 * @throws IOException
305 * @throws AttributeNotFoundException
306 * @throws InvocationTargetException
307 * @throws InvalidAttributeValueException
308 */
309 public static CrawlJob handleJobAction(CrawlJobHandler handler,
310 HttpServletRequest request, HttpServletResponse response,
311 String redirectBasePath, String currDomain, String reference)
312 throws IOException, AttributeNotFoundException, InvocationTargetException,
313 InvalidAttributeValueException {
314
315
316 CrawlJob theJob =
317 checkCrawlJob(handler.getJob(request.getParameter("job")),
318 response, redirectBasePath, currDomain);
319
320 XMLSettingsHandler settingsHandler = theJob.getSettingsHandler();
321
322 CrawlerSettings settings = settingsHandler
323 .getSettingsObject(currDomain);
324
325 if(reference != null) {
326
327 Refinement refinement = settings.getRefinement(reference);
328 settings = refinement.getSettings();
329 }
330
331
332 if (request.getParameter(ACTION) != null) {
333
334 String action = request.getParameter(ACTION);
335 String subaction = request.getParameter(SUBACTION);
336 if (action.equals(FILTERS)) {
337
338 String map = request.getParameter(MAP);
339 if (map != null && map.length() > 0) {
340 String filter = request.getParameter(FILTER);
341 MapType filterMap = (MapType) settingsHandler
342 .getComplexTypeByAbsoluteName(settings, map);
343 if (subaction.equals(ADD)) {
344
345 String className = request.getParameter(map + ".class");
346 String typeName = request.getParameter(map + ".name");
347 if (typeName != null && typeName.length() > 0 &&
348 className != null && className.length() > 0) {
349 ModuleType tmp = SettingsHandler
350 .instantiateModuleTypeFromClassName(
351 typeName, className);
352 filterMap.addElement(settings, tmp);
353 }
354 } else if (subaction.equals(MOVEUP)) {
355
356 if (filter != null && filter.length() > 0) {
357 filterMap.moveElementUp(settings, filter);
358 }
359 } else if (subaction.equals(MOVEDOWN)) {
360
361 if (filter != null && filter.length() > 0) {
362 filterMap.moveElementDown(settings, filter);
363 }
364 } else if (subaction.equals(REMOVE)) {
365
366 if (filter != null && filter.length() > 0) {
367 filterMap.removeElement(settings, filter);
368 }
369 }
370 }
371
372 settingsHandler.writeSettingsObject(settings);
373 } else if (action.equals(DONE)) {
374
375 if(subaction.equals(CONTINUE)) {
376
377 if (theJob.isRunning()) {
378 handler.kickUpdate();
379 }
380 String overParam = ((currDomain != null && currDomain
381 .length() > 0) ? "&currDomain=" + currDomain : "");
382 String refParam =
383 ((reference != null && reference.length() > 0)
384 ? "&reference=" + reference
385 : "");
386 String messageParam = (refParam.length() > 0)
387 ? "&message=Refinement changes saved"
388 : "&message=Override changes saved";
389 response.sendRedirect(redirectBasePath +
390 "?job=" + theJob.getUID() +
391 overParam +
392 refParam +
393 messageParam);
394 } else {
395
396 if (theJob.isNew()) {
397 handler.addJob(theJob);
398 response.sendRedirect(redirectBasePath
399 + "?message=Job created");
400 } else {
401 if (theJob.isRunning()) {
402 handler.kickUpdate();
403 }
404 if (theJob.isProfile()) {
405 response.sendRedirect(redirectBasePath
406 + "?message=Profile modified");
407 } else {
408 response.sendRedirect(redirectBasePath
409 + "?message=Job modified");
410 }
411 }
412 }
413 } else if (action.equals(GOTO)) {
414
415 String overParam = ((currDomain != null && currDomain
416 .length() > 0) ? "&currDomain=" + currDomain : "");
417 String refParam =
418 ((reference != null && reference.length() > 0)
419 ? "&reference=" + reference
420 : "");
421 response.sendRedirect(request.getParameter(SUBACTION) +
422 overParam + refParam);
423 }
424 }
425 return theJob;
426 }
427
428 /***
429 * Print complete seeds list on passed in PrintWriter.
430 * @param hndlr Current handler.
431 * @param payload What to write out.
432 * @throws AttributeNotFoundException
433 * @throws MBeanException
434 * @throws ReflectionException
435 * @throws IOException
436 * @throws IOException
437 */
438 public static void printOutSeeds(SettingsHandler hndlr, String payload)
439 throws AttributeNotFoundException, MBeanException, ReflectionException,
440 IOException {
441 File seedfile = getSeedFile(hndlr);
442 writeReader(new StringReader(payload),
443 new BufferedWriter(new FileWriter(seedfile)));
444 }
445
446 /***
447 * Print complete seeds list on passed in PrintWriter.
448 * @param hndlr Current handler.
449 * @param out Writer to write out all seeds to.
450 * @throws ReflectionException
451 * @throws MBeanException
452 * @throws AttributeNotFoundException
453 * @throws IOException
454 */
455 public static void printOutSeeds(SettingsHandler hndlr, Writer out)
456 throws AttributeNotFoundException, MBeanException, ReflectionException,
457 IOException {
458
459 InputStream is = getSeedStream(hndlr);
460 writeReader(new BufferedReader(new InputStreamReader(is)), out);
461 }
462
463 /***
464 * Test whether seeds file is of a size that's reasonable
465 * to edit in an HTML textarea.
466 * @param h current settingsHandler
467 * @return true if seeds size is manageable, false otherwise
468 * @throws AttributeNotFoundException
469 * @throws MBeanException
470 * @throws ReflectionException
471 *
472 */
473 public static boolean seedsEdittableSize(SettingsHandler h)
474 throws AttributeNotFoundException, MBeanException,
475 ReflectionException {
476 return getSeedFile(h).length() <= (32 * 1024);
477 }
478 /***
479 * @param hndlr Settings handler.
480 * @return Seeds file.
481 * @throws ReflectionException
482 * @throws MBeanException
483 * @throws AttributeNotFoundException
484 */
485 protected static File getSeedFile(SettingsHandler hndlr)
486 throws AttributeNotFoundException, MBeanException, ReflectionException {
487 String seedsFileStr = (String)((ComplexType)hndlr.getOrder().
488 getAttribute("scope")).getAttribute("seedsfile");
489 return hndlr.getPathRelativeToWorkingDirectory(seedsFileStr);
490 }
491
492 /***
493 * Return seeds as a stream.
494 * This method will work for case where seeds are on disk or on classpath.
495 * @param hndlr SettingsHandler. Used to find seeds.txt file.
496 * @return InputStream on current seeds file.
497 * @throws IOException
498 * @throws ReflectionException
499 * @throws MBeanException
500 * @throws AttributeNotFoundException
501 */
502 protected static InputStream getSeedStream(SettingsHandler hndlr)
503 throws IOException, AttributeNotFoundException, MBeanException,
504 ReflectionException {
505 InputStream is = null;
506 File seedFile = getSeedFile(hndlr);
507 if (!seedFile.exists()) {
508
509 is = SettingsHandler.class.
510 getResourceAsStream(IoUtils.getClasspathPath(seedFile));
511 } else if(seedFile.canRead()) {
512 is = new FileInputStream(seedFile);
513 }
514 if (is == null) {
515 throw new IOException(seedFile + " does not" +
516 " exist -- neither on disk nor on CLASSPATH -- or is not" +
517 " readable.");
518 }
519 return is;
520 }
521
522 /***
523 * Print complete seeds list on passed in PrintWriter.
524 * @param reader File to read seeds from.
525 * @param out Writer to write out all seeds to.
526 * @throws IOException
527 */
528 protected static void writeReader(Reader reader, Writer out)
529 throws IOException {
530 final int bufferSize = 1024 * 4;
531 char [] buffer = new char[bufferSize];
532 int read = -1;
533 while ((read = reader.read(buffer, 0, bufferSize)) != -1) {
534 out.write(buffer, 0, read);
535 }
536 out.flush();
537 }
538 }