1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.processor;
24
25 import java.io.File;
26 import java.io.IOException;
27 import java.util.Collections;
28 import java.util.HashMap;
29 import java.util.Map;
30 import java.util.logging.Level;
31 import java.util.logging.Logger;
32
33 import org.archive.crawler.datamodel.CrawlURI;
34 import org.archive.crawler.datamodel.FetchStatusCodes;
35 import org.archive.crawler.framework.Processor;
36 import org.archive.crawler.settings.SimpleType;
37 import org.archive.crawler.settings.Type;
38
39 import bsh.EvalError;
40 import bsh.Interpreter;
41
42 /***
43 * A processor which runs a BeanShell script on the CrawlURI.
44 *
45 * Script source may be provided via a file
46 * local to the crawler.
47 * Script source should define
48 * a method with one argument, 'run(curi)'. Each processed CrawlURI is
49 * passed to this script method.
50 *
51 * Other variables available to the script include 'self' (this
52 * BeanShellProcessor instance) and 'controller' (the crawl's
53 * CrawlController instance).
54 *
55 * @author gojomo
56 * @version $Date: 2006-09-25 20:19:54 +0000 (Mon, 25 Sep 2006) $, $Revision: 4654 $
57 */
58 public class BeanShellProcessor extends Processor implements FetchStatusCodes {
59
60 private static final long serialVersionUID = 6926589944337050754L;
61
62 private static final Logger logger =
63 Logger.getLogger(BeanShellProcessor.class.getName());
64
65 /*** setting for script file */
66 public final static String ATTR_SCRIPT_FILE = "script-file";
67
68 /*** whether each thread should have its own script runner (true), or
69 * they should share a single script runner with synchronized access */
70 public final static String ATTR_ISOLATE_THREADS = "isolate-threads";
71
72 protected ThreadLocal<Interpreter> threadInterpreter;
73 protected Interpreter sharedInterpreter;
74 public Map<Object,Object> sharedMap = Collections.synchronizedMap(
75 new HashMap<Object,Object>());
76
77 /***
78 * Constructor.
79 * @param name Name of this processor.
80 */
81 public BeanShellProcessor(String name) {
82 super(name, "BeanShellProcessor. Runs the BeanShell script source " +
83 "(supplied directly or via a file path) against the " +
84 "current URI. Source should define a script method " +
85 "'process(curi)' which will be passed the current CrawlURI. " +
86 "The script may also access this BeanShellProcessor via" +
87 "the 'self' variable and the CrawlController via the " +
88 "'controller' variable.");
89 Type t = addElementToDefinition(new SimpleType(ATTR_SCRIPT_FILE,
90 "BeanShell script file", ""));
91 t.setOverrideable(false);
92 t = addElementToDefinition(new SimpleType(ATTR_ISOLATE_THREADS,
93 "Whether each ToeThread should get its own independent " +
94 "script context, or they should share synchronized access " +
95 "to one context. Default is true, meaning each threads " +
96 "gets its own isolated context.", true));
97 t.setOverrideable(false);
98
99 }
100
101 protected synchronized void innerProcess(CrawlURI curi) {
102
103
104 Interpreter interpreter = getInterpreter();
105 synchronized(interpreter) {
106
107
108 try {
109 interpreter.set("curi",curi);
110 interpreter.eval("process(curi)");
111 } catch (EvalError e) {
112
113 e.printStackTrace();
114 }
115 }
116 }
117
118 /***
119 * Get the proper Interpreter instance -- either shared or local
120 * to this thread.
121 * @return Interpreter to use
122 */
123 protected Interpreter getInterpreter() {
124 if(sharedInterpreter!=null) {
125 return sharedInterpreter;
126 }
127 Interpreter interpreter = threadInterpreter.get();
128 if(interpreter==null) {
129 interpreter = newInterpreter();
130 threadInterpreter.set(interpreter);
131 }
132 return interpreter;
133 }
134
135 /***
136 * Create a new Interpreter instance, preloaded with any supplied
137 * source code or source file and the variables 'self' (this
138 * BeanShellProcessor) and 'controller' (the CrawlController).
139 *
140 * @return the new Interpreter instance
141 */
142 protected Interpreter newInterpreter() {
143 Interpreter interpreter = new Interpreter();
144 try {
145 interpreter.set("self", this);
146 interpreter.set("controller", getController());
147
148 String filePath = (String) getUncheckedAttribute(null, ATTR_SCRIPT_FILE);
149 if(filePath.length()>0) {
150 try {
151 File file = getSettingsHandler().getPathRelativeToWorkingDirectory(filePath);
152 interpreter.source(file.getPath());
153 } catch (IOException e) {
154 logger.log(Level.SEVERE,"unable to read script file",e);
155 }
156 }
157 } catch (EvalError e) {
158
159 e.printStackTrace();
160 }
161
162 return interpreter;
163 }
164
165 protected void initialTasks() {
166 super.initialTasks();
167 kickUpdate();
168 }
169
170 /***
171 * Setup (or reset) Intepreter variables, as appropraite based on
172 * thread-isolation setting.
173 */
174 public void kickUpdate() {
175
176
177 if((Boolean)getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) {
178 sharedInterpreter = null;
179 threadInterpreter = new ThreadLocal<Interpreter>();
180 } else {
181 sharedInterpreter = newInterpreter();
182 threadInterpreter = null;
183 }
184 }
185 }