1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.util;
26
27 import java.io.BufferedInputStream;
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.OutputStream;
32 import java.util.logging.Level;
33 import java.util.logging.Logger;
34
35 import org.archive.io.RecordingInputStream;
36 import org.archive.io.RecordingOutputStream;
37 import org.archive.io.ReplayCharSequence;
38 import org.archive.io.ReplayInputStream;
39
40
41 /***
42 * Pairs together a RecordingInputStream and RecordingOutputStream
43 * to capture exactly a single HTTP transaction.
44 *
45 * Initially only supports HTTP/1.0 (one request, one response per stream)
46 *
47 * Call {@link #markContentBegin()} to demarc the transition between HTTP
48 * header and body.
49 *
50 * @author gojomo
51 */
52 public class HttpRecorder {
53 protected static Logger logger =
54 Logger.getLogger("org.archive.util.HttpRecorder");
55
56 private static final int DEFAULT_OUTPUT_BUFFER_SIZE = 4096;
57 private static final int DEFAULT_INPUT_BUFFER_SIZE = 65536;
58
59 private RecordingInputStream ris = null;
60 private RecordingOutputStream ros = null;
61
62 /***
63 * Backing file basename.
64 *
65 * Keep it around so can clean up backing files left on disk.
66 */
67 private String backingFileBasename = null;
68
69 /***
70 * Backing file output stream suffix.
71 */
72 private static final String RECORDING_OUTPUT_STREAM_SUFFIX = ".ros";
73
74 /***
75 * Backing file input stream suffix.
76 */
77 private static final String RECORDING_INPUT_STREAM_SUFFIX = ".ris";
78
79 /***
80 * Response character encoding.
81 */
82 private String characterEncoding = null;
83
84 /***
85 * Constructor with limited access.
86 * Used internally for case where we're wrapping an already
87 * downloaded stream with a HttpRecorder.
88 */
89 protected HttpRecorder() {
90 super();
91 }
92
93 /***
94 * Create an HttpRecorder.
95 *
96 * @param tempDir Directory into which we drop backing files for
97 * recorded input and output.
98 * @param backingFilenameBase Backing filename base to which we'll append
99 * suffices <code>ris</code> for recorded input stream and
100 * <code>ros</code> for recorded output stream.
101 * @param outBufferSize Size of output buffer to use.
102 * @param inBufferSize Size of input buffer to use.
103 */
104 public HttpRecorder(File tempDir, String backingFilenameBase,
105 int outBufferSize, int inBufferSize) {
106 super();
107 tempDir.mkdirs();
108 this.backingFileBasename =
109 (new File(tempDir.getPath(), backingFilenameBase))
110 .getAbsolutePath();
111 this.ris = new RecordingInputStream(inBufferSize,
112 this.backingFileBasename + RECORDING_INPUT_STREAM_SUFFIX);
113 this.ros = new RecordingOutputStream(outBufferSize,
114 this.backingFileBasename + RECORDING_OUTPUT_STREAM_SUFFIX);
115 }
116
117 /***
118 * Create an HttpRecorder.
119 *
120 * @param tempDir
121 * Directory into which we drop backing files for recorded input
122 * and output.
123 * @param backingFilenameBase
124 * Backing filename base to which we'll append suffices
125 * <code>ris</code> for recorded input stream and
126 * <code>ros</code> for recorded output stream.
127 */
128 public HttpRecorder(File tempDir, String backingFilenameBase) {
129 this(tempDir, backingFilenameBase, DEFAULT_INPUT_BUFFER_SIZE,
130 DEFAULT_OUTPUT_BUFFER_SIZE);
131 }
132
133 /***
134 * Wrap the provided stream with the internal RecordingInputStream
135 *
136 * open() throws an exception if RecordingInputStream is already open.
137 *
138 * @param is InputStream to wrap.
139 *
140 * @return The input stream wrapper which itself is an input stream.
141 * Pass this in place of the passed stream so input can be recorded.
142 *
143 * @throws IOException
144 */
145 public InputStream inputWrap(InputStream is)
146 throws IOException {
147 logger.fine(Thread.currentThread().getName() + " wrapping input");
148 this.ris.open(is);
149 return this.ris;
150 }
151
152 /***
153 * Wrap the provided stream with the internal RecordingOutputStream
154 *
155 * open() throws an exception if RecordingOutputStream is already open.
156 *
157 * @param os The output stream to wrap.
158 *
159 * @return The output stream wrapper which is itself an output stream.
160 * Pass this in place of the passed stream so output can be recorded.
161 *
162 * @throws IOException
163 */
164 public OutputStream outputWrap(OutputStream os)
165 throws IOException {
166 this.ros.open(os);
167 return this.ros;
168 }
169
170 /***
171 * Close all streams.
172 */
173 public void close() {
174 logger.fine(Thread.currentThread().getName() + " closing");
175 try {
176 this.ris.close();
177 } catch (IOException e) {
178
179
180 DevUtils.logger.log(Level.SEVERE, "close() ris" +
181 DevUtils.extraInfo(), e);
182 }
183 try {
184 this.ros.close();
185 } catch (IOException e) {
186 DevUtils.logger.log(Level.SEVERE, "close() ros" +
187 DevUtils.extraInfo(), e);
188 }
189 }
190
191 /***
192 * Return the internal RecordingInputStream
193 *
194 * @return A RIS.
195 */
196 public RecordingInputStream getRecordedInput() {
197 return this.ris;
198 }
199
200 /***
201 * @return The RecordingOutputStream.
202 */
203 public RecordingOutputStream getRecordedOutput() {
204 return this.ros;
205 }
206
207 /***
208 * Mark current position as the point where the HTTP headers end.
209 */
210 public void markContentBegin() {
211 this.ris.markContentBegin();
212 }
213
214 public long getResponseContentLength() {
215 return this.ris.getResponseContentLength();
216 }
217
218 /***
219 * Close both input and output recorders.
220 *
221 * Recorders are the output streams to which we are recording.
222 * {@link #close()} closes the stream that is being recorded and the
223 * recorder. This method explicitly closes the recorder only.
224 */
225 public void closeRecorders() {
226 try {
227 this.ris.closeRecorder();
228 this.ros.closeRecorder();
229 } catch (IOException e) {
230 DevUtils.warnHandle(e, "Convert to runtime exception?");
231 }
232 }
233
234 /***
235 * Cleanup backing files.
236 *
237 * Call when completely done w/ recorder. Removes any backing files that
238 * may have been dropped.
239 */
240 public void cleanup() {
241 this.close();
242 this.delete(this.backingFileBasename + RECORDING_OUTPUT_STREAM_SUFFIX);
243 this.delete(this.backingFileBasename + RECORDING_INPUT_STREAM_SUFFIX);
244 }
245
246 /***
247 * Delete file if exists.
248 *
249 * @param name Filename to delete.
250 */
251 private void delete(String name) {
252 File f = new File(name);
253 if (f.exists()) {
254 f.delete();
255 }
256 }
257
258 /***
259 * Get the current threads' HttpRecorder.
260 *
261 * @return This threads' HttpRecorder. Returns null if can't find a
262 * HttpRecorder in current instance.
263 */
264 public static HttpRecorder getHttpRecorder() {
265 HttpRecorder recorder = null;
266 Thread thread = Thread.currentThread();
267 if (thread instanceof HttpRecorderMarker) {
268 recorder = ((HttpRecorderMarker)thread).getHttpRecorder();
269 }
270 return recorder;
271 }
272
273 /***
274 * @param characterEncoding Character encoding of recording.
275 */
276 public void setCharacterEncoding(String characterEncoding) {
277 this.characterEncoding = characterEncoding;
278 }
279
280 /***
281 * @return Returns the characterEncoding.
282 */
283 public String getCharacterEncoding() {
284 return this.characterEncoding;
285 }
286
287 /***
288 * @return A ReplayCharSequence. Call close on the RCS when done w/ it.
289 * Will return indeterminate results if the underlying recording streams
290 * have not been closed first.
291 * @throws IOException
292 * @throws IOException
293 */
294 public ReplayCharSequence getReplayCharSequence() throws IOException {
295 return getRecordedInput().
296 getReplayCharSequence(this.characterEncoding);
297 }
298
299 /***
300 * @return A replay input stream.
301 * @throws IOException
302 */
303 public ReplayInputStream getReplayInputStream() throws IOException {
304 return getRecordedInput().getReplayInputStream();
305 }
306
307 /***
308 * Record the input stream for later playback by an extractor, etc.
309 * This is convenience method used to setup an artificial HttpRecorder
310 * scenario used in unit tests, etc.
311 * @param dir Directory to write backing file to.
312 * @param basename of what we're recording.
313 * @param in Stream to read.
314 * @param encoding Stream encoding.
315 * @throws IOException
316 * @return An {@link org.archive.util.HttpRecorder}.
317 */
318 public static HttpRecorder wrapInputStreamWithHttpRecord(File dir,
319 String basename, InputStream in, String encoding)
320 throws IOException {
321 HttpRecorder rec = new HttpRecorder(dir, basename);
322 if (encoding != null && encoding.length() > 0) {
323 rec.setCharacterEncoding(encoding);
324 }
325
326
327 InputStream is = rec.inputWrap(new BufferedInputStream(in));
328 final int BUFFER_SIZE = 1024 * 4;
329 byte [] buffer = new byte[BUFFER_SIZE];
330 while(true) {
331
332 int x = is.read(buffer);
333 if (x == -1) {
334 break;
335 }
336 }
337 is.close();
338 return rec;
339 }
340 }