1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.framework;
24
25 import java.util.logging.Level;
26 import java.util.logging.Logger;
27
28 import javax.management.AttributeNotFoundException;
29
30 import org.archive.crawler.datamodel.CandidateURI;
31 import org.archive.crawler.settings.SimpleType;
32 import org.archive.crawler.settings.Type;
33 import org.archive.crawler.util.LogUtils;
34
35 /***
36 * Base class for Scopers.
37 * Scopers test CandidateURIs against a scope.
38 * Scopers allow logging of rejected CandidateURIs.
39 * @author stack
40 * @version $Date: 2006-09-25 23:59:43 +0000 (Mon, 25 Sep 2006) $, $Revision: 4664 $
41 */
42 public abstract class Scoper extends Processor {
43 private static Logger LOGGER =
44 Logger.getLogger(Scoper.class.getName());
45
46 /***
47 * Protected so avaiilable to subclasses.
48 */
49 protected static final String ATTR_OVERRIDE_LOGGER_ENABLED =
50 "override-logger";
51
52 /***
53 * Constructor.
54 * @param name
55 * @param description
56 */
57 public Scoper(String name, String description) {
58 super(name, description);
59 Type t = addElementToDefinition(
60 new SimpleType(ATTR_OVERRIDE_LOGGER_ENABLED,
61 "If enabled, override default logger for this class (Default " +
62 "logger writes the console). Override " +
63 "logger will instead send all logging to a file named for this " +
64 "class in the job log directory. Set the logging level and " +
65 "other " +
66 "characteristics of the override logger such as rotation size, " +
67 "suffix pattern, etc. in heritrix.properties. This attribute " +
68 "is only checked once, on startup of a job.",
69 new Boolean(false)));
70 t.setExpertSetting(true);
71 }
72
73 protected void initialTasks() {
74 super.initialTasks();
75 if (!isOverrideLogger(null)) {
76 return;
77 }
78
79
80 LogUtils.createFileLogger(getController().getLogsDir(),
81 this.getClass().getName(),
82 Logger.getLogger(this.getClass().getName()));
83 }
84
85 /***
86 * @param context Context to use looking up attribute.
87 * @return True if we are to override default logger (default logs
88 * to console) with a logger that writes all loggings to a file
89 * named for this class.
90 */
91 protected boolean isOverrideLogger(Object context) {
92 boolean result = true;
93 try {
94 Boolean b = (Boolean)getAttribute(context,
95 ATTR_OVERRIDE_LOGGER_ENABLED);
96 if (b != null) {
97 result = b.booleanValue();
98 }
99 } catch (AttributeNotFoundException e) {
100 LOGGER.warning("Failed get of 'enabled' attribute.");
101 }
102
103 return result;
104 }
105
106 /***
107 * Schedule the given {@link CandidateURI CandidateURI} with the Frontier.
108 * @param caUri The CandidateURI to be scheduled.
109 * @return true if CandidateURI was accepted by crawl scope, false
110 * otherwise.
111 */
112 protected boolean isInScope(CandidateURI caUri) {
113 boolean result = false;
114 if (getController().getScope().accepts(caUri)) {
115 result = true;
116 if (LOGGER.isLoggable(Level.FINER)) {
117 LOGGER.finer("Accepted: " + caUri);
118 }
119 } else {
120 outOfScope(caUri);
121 }
122 return result;
123 }
124
125 /***
126 * Called when a CandidateUri is ruled out of scope.
127 * Override if you don't want logs as coming from this class.
128 * @param caUri CandidateURI that is out of scope.
129 */
130 protected void outOfScope(CandidateURI caUri) {
131 if (!LOGGER.isLoggable(Level.INFO)) {
132 return;
133 }
134 LOGGER.info(caUri.getUURI().toString());
135 }
136 }