1   /* HTTPRecorder
2    *
3    * $Id: HttpRecorder.java 4498 2006-08-15 04:39:00Z gojomo $
4    *
5    * Created on Sep 22, 2003
6    *
7    * Copyright (C) 2003 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.util;
26  
27  import java.io.BufferedInputStream;
28  import java.io.File;
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.io.OutputStream;
32  import java.util.logging.Level;
33  import java.util.logging.Logger;
34  
35  import org.archive.io.RecordingInputStream;
36  import org.archive.io.RecordingOutputStream;
37  import org.archive.io.ReplayCharSequence;
38  import org.archive.io.ReplayInputStream;
39  
40  
41  /***
42   * Pairs together a RecordingInputStream and RecordingOutputStream
43   * to capture exactly a single HTTP transaction.
44   *
45   * Initially only supports HTTP/1.0 (one request, one response per stream)
46   *
47   * Call {@link #markContentBegin()} to demarc the transition between HTTP
48   * header and body.
49   *
50   * @author gojomo
51   */
52  public class HttpRecorder {
53      protected static Logger logger =
54          Logger.getLogger("org.archive.util.HttpRecorder");
55  
56      private static final int DEFAULT_OUTPUT_BUFFER_SIZE = 4096;
57      private static final int DEFAULT_INPUT_BUFFER_SIZE = 65536;
58  
59      private RecordingInputStream ris = null;
60      private RecordingOutputStream ros = null;
61  
62      /***
63       * Backing file basename.
64       *
65       * Keep it around so can clean up backing files left on disk.
66       */
67      private String backingFileBasename = null;
68  
69      /***
70       * Backing file output stream suffix.
71       */
72      private static final String RECORDING_OUTPUT_STREAM_SUFFIX = ".ros";
73  
74     /***
75      * Backing file input stream suffix.
76      */
77      private static final String RECORDING_INPUT_STREAM_SUFFIX = ".ris";
78  
79      /***
80       * Response character encoding.
81       */
82      private String characterEncoding = null;
83  
84      /***
85       * Constructor with limited access.
86       * Used internally for case where we're wrapping an already
87       * downloaded stream with a HttpRecorder.
88       */
89      protected HttpRecorder() {
90          super();
91      }
92      
93      /***
94       * Create an HttpRecorder.
95       *
96       * @param tempDir Directory into which we drop backing files for
97       * recorded input and output.
98       * @param backingFilenameBase Backing filename base to which we'll append
99       * suffices <code>ris</code> for recorded input stream and
100      * <code>ros</code> for recorded output stream.
101      * @param outBufferSize Size of output buffer to use.
102      * @param inBufferSize Size of input buffer to use.
103      */
104     public HttpRecorder(File tempDir, String backingFilenameBase, 
105             int outBufferSize, int inBufferSize) {
106         super();
107         tempDir.mkdirs();
108         this.backingFileBasename =
109             (new File(tempDir.getPath(), backingFilenameBase))
110                 .getAbsolutePath();
111         this.ris = new RecordingInputStream(inBufferSize,
112             this.backingFileBasename + RECORDING_INPUT_STREAM_SUFFIX);
113         this.ros = new RecordingOutputStream(outBufferSize,
114             this.backingFileBasename + RECORDING_OUTPUT_STREAM_SUFFIX);
115     }
116 
117     /***
118      * Create an HttpRecorder.
119      * 
120      * @param tempDir
121      *            Directory into which we drop backing files for recorded input
122      *            and output.
123      * @param backingFilenameBase
124      *            Backing filename base to which we'll append suffices
125      *            <code>ris</code> for recorded input stream and
126      *            <code>ros</code> for recorded output stream.
127      */
128     public HttpRecorder(File tempDir, String backingFilenameBase) {
129         this(tempDir, backingFilenameBase, DEFAULT_INPUT_BUFFER_SIZE,
130                 DEFAULT_OUTPUT_BUFFER_SIZE);
131     }
132 
133     /***
134      * Wrap the provided stream with the internal RecordingInputStream
135      *
136      * open() throws an exception if RecordingInputStream is already open.
137      *
138      * @param is InputStream to wrap.
139      *
140      * @return The input stream wrapper which itself is an input stream.
141      * Pass this in place of the passed stream so input can be recorded.
142      *
143      * @throws IOException
144      */
145     public InputStream inputWrap(InputStream is) 
146     throws IOException {
147         logger.fine(Thread.currentThread().getName() + " wrapping input");
148         this.ris.open(is);
149         return this.ris;
150     }
151 
152     /***
153      * Wrap the provided stream with the internal RecordingOutputStream
154      *
155      * open() throws an exception if RecordingOutputStream is already open.
156      * 
157      * @param os The output stream to wrap.
158      *
159      * @return The output stream wrapper which is itself an output stream.
160      * Pass this in place of the passed stream so output can be recorded.
161      *
162      * @throws IOException
163      */
164     public OutputStream outputWrap(OutputStream os) 
165     throws IOException {
166         this.ros.open(os);
167         return this.ros;
168     }
169 
170     /***
171      * Close all streams.
172      */
173     public void close() {
174         logger.fine(Thread.currentThread().getName() + " closing");
175         try {
176             this.ris.close();
177         } catch (IOException e) {
178             // TODO: Can we not let the exception out of here and report it
179             // higher up in the caller?
180             DevUtils.logger.log(Level.SEVERE, "close() ris" +
181                 DevUtils.extraInfo(), e);
182         }
183         try {
184             this.ros.close();
185         } catch (IOException e) {
186             DevUtils.logger.log(Level.SEVERE, "close() ros" +
187                 DevUtils.extraInfo(), e);
188         }
189     }
190 
191     /***
192      * Return the internal RecordingInputStream
193      *
194      * @return A RIS.
195      */
196     public RecordingInputStream getRecordedInput() {
197         return this.ris;
198     }
199 
200     /***
201      * @return The RecordingOutputStream.
202      */
203     public RecordingOutputStream getRecordedOutput() {
204         return this.ros;
205     }
206 
207     /***
208      * Mark current position as the point where the HTTP headers end.
209      */
210     public void markContentBegin() {
211         this.ris.markContentBegin();
212     }
213 
214     public long getResponseContentLength() {
215         return this.ris.getResponseContentLength();
216     }
217 
218     /***
219      * Close both input and output recorders.
220      *
221      * Recorders are the output streams to which we are recording.
222      * {@link #close()} closes the stream that is being recorded and the
223      * recorder. This method explicitly closes the recorder only.
224      */
225     public void closeRecorders() {
226         try {
227             this.ris.closeRecorder();
228             this.ros.closeRecorder();
229         } catch (IOException e) {
230             DevUtils.warnHandle(e, "Convert to runtime exception?");
231         }
232     }
233 
234     /***
235      * Cleanup backing files.
236      *
237      * Call when completely done w/ recorder.  Removes any backing files that
238      * may have been dropped.
239      */
240     public void cleanup() {
241         this.close();
242         this.delete(this.backingFileBasename + RECORDING_OUTPUT_STREAM_SUFFIX);
243         this.delete(this.backingFileBasename + RECORDING_INPUT_STREAM_SUFFIX);
244     }
245 
246     /***
247      * Delete file if exists.
248      *
249      * @param name Filename to delete.
250      */
251     private void delete(String name) {
252         File f = new File(name);
253         if (f.exists()) {
254             f.delete();
255         }
256     }
257 
258     /***
259      * Get the current threads' HttpRecorder.
260      *
261      * @return This threads' HttpRecorder.  Returns null if can't find a
262      * HttpRecorder in current instance.
263      */
264     public static HttpRecorder getHttpRecorder() {
265         HttpRecorder recorder = null;
266         Thread thread = Thread.currentThread();
267         if (thread instanceof HttpRecorderMarker) {
268             recorder = ((HttpRecorderMarker)thread).getHttpRecorder();
269         }
270         return recorder;
271     }
272 
273     /***
274      * @param characterEncoding Character encoding of recording.
275      */
276     public void setCharacterEncoding(String characterEncoding) {
277         this.characterEncoding = characterEncoding;
278     }
279 
280     /***
281      * @return Returns the characterEncoding.
282      */
283     public String getCharacterEncoding() {
284         return this.characterEncoding;
285     }
286 
287     /***
288      * @return A ReplayCharSequence.  Call close on the RCS when done w/ it.
289      * Will return indeterminate results if the underlying recording streams
290      * have not been closed first.
291      * @throws IOException
292      * @throws IOException
293      */
294     public ReplayCharSequence getReplayCharSequence() throws IOException {
295         return getRecordedInput().
296             getReplayCharSequence(this.characterEncoding);
297     }
298     
299     /***
300      * @return A replay input stream.
301      * @throws IOException
302      */
303     public ReplayInputStream getReplayInputStream() throws IOException {
304         return getRecordedInput().getReplayInputStream();
305     }
306     
307     /***
308      * Record the input stream for later playback by an extractor, etc.
309      * This is convenience method used to setup an artificial HttpRecorder
310      * scenario used in unit tests, etc.
311      * @param dir Directory to write backing file to.
312      * @param basename of what we're recording.
313      * @param in Stream to read.
314      * @param encoding Stream encoding.
315      * @throws IOException
316      * @return An {@link org.archive.util.HttpRecorder}.
317      */
318     public static HttpRecorder wrapInputStreamWithHttpRecord(File dir,
319         String basename, InputStream in, String encoding)
320     throws IOException {
321         HttpRecorder rec = new HttpRecorder(dir, basename);
322         if (encoding != null && encoding.length() > 0) {
323             rec.setCharacterEncoding(encoding);
324         }
325         // Do not use FastBufferedInputStream here.  It does not
326         // support mark.
327         InputStream is = rec.inputWrap(new BufferedInputStream(in));
328         final int BUFFER_SIZE = 1024 * 4;
329         byte [] buffer = new byte[BUFFER_SIZE];
330         while(true) {
331             // Just read it all down.
332             int x = is.read(buffer);
333             if (x == -1) {
334                 break;
335             }
336         }
337         is.close();
338         return rec;
339     }
340 }