1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.io;
26
27 import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
28
29 import java.io.FileOutputStream;
30 import java.io.IOException;
31 import java.io.OutputStream;
32 import java.security.MessageDigest;
33 import java.security.NoSuchAlgorithmException;
34 import java.util.logging.Level;
35 import java.util.logging.Logger;
36
37 import org.archive.util.IoUtils;
38
39
40 /***
41 * An output stream that records all writes to wrapped output
42 * stream.
43 *
44 * A RecordingOutputStream can be wrapped around any other
45 * OutputStream to record all bytes written to it. You can
46 * then request a ReplayInputStream to read those bytes.
47 *
48 * <p>The RecordingOutputStream uses an in-memory buffer and
49 * backing disk file to allow it to record streams of
50 * arbitrary length limited only by available disk space.
51 *
52 * <p>As long as the stream recorded is smaller than the
53 * in-memory buffer, no disk access will occur.
54 *
55 * <p>Recorded content can be recovered as a ReplayInputStream
56 * (via getReplayInputStream() or, for only the content after
57 * the content-begin-mark is set, getContentReplayInputStream() )
58 * or as a ReplayCharSequence (via getReplayCharSequence()).
59 *
60 * <p>This class is also used as a straight output stream
61 * by {@link RecordingInputStream} to which it records all reads.
62 * {@link RecordingInputStream} is exploiting the file backed buffer
63 * facility of this class passing <code>null</code> for the stream
64 * to wrap. TODO: Make a FileBackedOutputStream class that is
65 * subclassed by RecordingInputStream.
66 *
67 * @author gojomo
68 *
69 */
70 public class RecordingOutputStream extends OutputStream {
71 protected static Logger logger =
72 Logger.getLogger(RecordingOutputStream.class.getName());
73
74 /***
75 * Size of recording.
76 *
77 * Later passed to ReplayInputStream on creation. It uses it to know when
78 * EOS.
79 */
80 private long size = 0;
81
82 private String backingFilename;
83 private OutputStream diskStream = null;
84
85 /***
86 * Buffer we write recordings to.
87 *
88 * We write all recordings here first till its full. Thereafter we
89 * write the backing file.
90 */
91 private byte[] buffer;
92
93 /*** current virtual position in the recording */
94 private long position;
95
96 /*** flag to disable recording */
97 private boolean recording;
98
99 /***
100 * Reusable buffer for FastBufferedOutputStream
101 */
102 protected byte[] bufStreamBuf =
103 new byte [ FastBufferedOutputStream.DEFAULT_BUFFER_SIZE ];
104
105 /***
106 * True if we're to digest content.
107 */
108 private boolean shouldDigest = false;
109
110 /***
111 * Digest instance.
112 */
113 private MessageDigest digest = null;
114
115 /***
116 * Define for SHA1 alogarithm.
117 */
118 private static final String SHA1 = "SHA1";
119
120 /***
121 * Maximum amount of header material to accept without the content
122 * body beginning -- if more, throw a RecorderTooMuchHeaderException.
123 * TODO: make configurable? make smaller?
124 */
125 protected static final long MAX_HEADER_MATERIAL = 1024*1024;
126
127
128 /*** maximum length of material to record before throwing exception */
129 protected long maxLength = Long.MAX_VALUE;
130 /*** maximum time to record before throwing exception */
131 protected long timeoutMs = Long.MAX_VALUE;
132 /*** maximum rate to record (adds delays to hit target rate) */
133 protected long maxRateBytesPerMs = Long.MAX_VALUE;
134 /*** time recording begins for timeout, rate calculations */
135 protected long startTime = Long.MAX_VALUE;
136
137 /***
138 * When recording HTTP, where the content-body starts.
139 */
140 private long contentBeginMark;
141
142 /***
143 * Stream to record.
144 */
145 private OutputStream out = null;
146
147
148 /*** furthest position reached before any reset()s */
149 private long maxPosition = 0;
150 /*** remembered position to reset() to */
151 private long markPosition = 0;
152
153 /***
154 * Create a new RecordingOutputStream.
155 *
156 * @param bufferSize Buffer size to use.
157 * @param backingFilename Name of backing file to use.
158 */
159 public RecordingOutputStream(int bufferSize, String backingFilename) {
160 this.buffer = new byte[bufferSize];
161 this.backingFilename = backingFilename;
162 recording = true;
163 }
164
165 /***
166 * Wrap the given stream, both recording and passing along any data written
167 * to this RecordingOutputStream.
168 *
169 * @throws IOException If failed creation of backing file.
170 */
171 public void open() throws IOException {
172 this.open(null);
173 }
174
175 /***
176 * Wrap the given stream, both recording and passing along any data written
177 * to this RecordingOutputStream.
178 *
179 * @param wrappedStream Stream to wrap. May be null for case where we
180 * want to write to a file backed stream only.
181 *
182 * @throws IOException If failed creation of backing file.
183 */
184 public void open(OutputStream wrappedStream) throws IOException {
185 if(isOpen()) {
186
187
188 throw new IOException("ROS already open for "
189 +Thread.currentThread().getName());
190 }
191 this.out = wrappedStream;
192 this.position = 0;
193 this.markPosition = 0;
194 this.maxPosition = 0;
195 this.size = 0;
196 this.contentBeginMark = -1;
197
198 this.recording = true;
199
200 this.shouldDigest = false;
201 if (this.diskStream != null) {
202 closeDiskStream();
203 }
204 if (this.diskStream == null) {
205
206 FileOutputStream fis = new FileOutputStream(this.backingFilename);
207
208 this.diskStream = new RecyclingFastBufferedOutputStream(fis, bufStreamBuf);
209 }
210 startTime = System.currentTimeMillis();
211 }
212
213 public void write(int b) throws IOException {
214 if(position<maxPosition) {
215
216 position++;
217 return;
218 }
219 if(recording) {
220 record(b);
221 }
222 if (this.out != null) {
223 this.out.write(b);
224 }
225 checkLimits();
226 }
227
228 public void write(byte[] b, int off, int len) throws IOException {
229 if(position < maxPosition) {
230 if(position+len<=maxPosition) {
231
232 position += len;
233 return;
234 }
235
236 long consumeRange = maxPosition - position;
237 position += consumeRange;
238 off += consumeRange;
239 len -= consumeRange;
240 }
241 if(recording) {
242 record(b, off, len);
243 }
244 if (this.out != null) {
245 this.out.write(b, off, len);
246 }
247 checkLimits();
248 }
249
250 /***
251 * Check any enforced limits.
252 */
253 protected void checkLimits() throws RecorderIOException {
254
255 if (contentBeginMark<0) {
256
257 if(position>MAX_HEADER_MATERIAL) {
258 throw new RecorderTooMuchHeaderException();
259 }
260 }
261
262 if(position>maxLength) {
263 throw new RecorderLengthExceededException();
264 }
265
266 long duration = System.currentTimeMillis() - startTime;
267 duration = Math.max(duration,1);
268 if(duration>timeoutMs) {
269 throw new RecorderTimeoutException();
270 }
271
272 if(position/duration > maxRateBytesPerMs) {
273 long desiredDuration = position / maxRateBytesPerMs;
274 try {
275 Thread.sleep(desiredDuration-duration);
276 } catch (InterruptedException e) {
277 logger.log(Level.WARNING,
278 "bandwidth throttling sleep interrupted", e);
279 }
280 }
281 }
282
283 /***
284 * Record the given byte for later recovery
285 *
286 * @param b Int to record.
287 *
288 * @exception IOException Failed write to backing file.
289 */
290 private void record(int b) throws IOException {
291 if (this.shouldDigest) {
292 this.digest.update((byte)b);
293 }
294 if (this.position >= this.buffer.length) {
295
296
297 assert this.diskStream != null: "Diskstream is null";
298 this.diskStream.write(b);
299 } else {
300 this.buffer[(int) this.position] = (byte) b;
301 }
302 this.position++;
303 }
304
305 /***
306 * Record the given byte-array range for recovery later
307 *
308 * @param b Buffer to record.
309 * @param off Offset into buffer at which to start recording.
310 * @param len Length of buffer to record.
311 *
312 * @exception IOException Failed write to backing file.
313 */
314 private void record(byte[] b, int off, int len) throws IOException {
315 if(this.shouldDigest) {
316 assert this.digest != null: "Digest is null.";
317 this.digest.update(b, off, len);
318 }
319 tailRecord(b, off, len);
320 }
321
322 /***
323 * Record without digesting.
324 *
325 * @param b Buffer to record.
326 * @param off Offset into buffer at which to start recording.
327 * @param len Length of buffer to record.
328 *
329 * @exception IOException Failed write to backing file.
330 */
331 private void tailRecord(byte[] b, int off, int len) throws IOException {
332 if(this.position >= this.buffer.length){
333
334
335 if (this.diskStream == null) {
336 throw new IOException("diskstream is null");
337 }
338 this.diskStream.write(b, off, len);
339 this.position += len;
340 } else {
341 assert this.buffer != null: "Buffer is null";
342 int toCopy = (int)Math.min(this.buffer.length - this.position, len);
343 assert b != null: "Passed buffer is null";
344 System.arraycopy(b, off, this.buffer, (int)this.position, toCopy);
345 this.position += toCopy;
346
347 if (toCopy < len) {
348 tailRecord(b, off + toCopy, len - toCopy);
349 }
350 }
351 }
352
353 public void close() throws IOException {
354 if(contentBeginMark<0) {
355
356
357 contentBeginMark = 0;
358 }
359 if (this.out != null) {
360 this.out.close();
361 this.out = null;
362 }
363 closeRecorder();
364 }
365
366 protected synchronized void closeDiskStream()
367 throws IOException {
368 if (this.diskStream != null) {
369 this.diskStream.close();
370 this.diskStream = null;
371 }
372 }
373
374 public void closeRecorder() throws IOException {
375 recording = false;
376 closeDiskStream();
377
378
379 if (this.size == 0) {
380 this.size = this.position;
381 }
382 }
383
384
385
386
387 public void flush() throws IOException {
388 if (this.out != null) {
389 this.out.flush();
390 }
391 if (this.diskStream != null) {
392 this.diskStream.flush();
393 }
394 }
395
396 public ReplayInputStream getReplayInputStream() throws IOException {
397 return getReplayInputStream(0);
398 }
399
400 public ReplayInputStream getReplayInputStream(long skip) throws IOException {
401
402
403
404 assert this.out == null: "Stream is still open.";
405 ReplayInputStream replay = new ReplayInputStream(this.buffer,
406 this.size, this.contentBeginMark, this.backingFilename);
407 replay.skip(skip);
408 return replay;
409 }
410
411 /***
412 * Return a replay stream, cued up to begining of content
413 *
414 * @throws IOException
415 * @return An RIS.
416 */
417 public ReplayInputStream getContentReplayInputStream() throws IOException {
418 return getReplayInputStream(this.contentBeginMark);
419 }
420
421 public long getSize() {
422 return this.size;
423 }
424
425 /***
426 * Remember the current position as the start of the "response
427 * body". Useful when recording HTTP traffic as a way to start
428 * replays after the headers.
429 */
430 public void markContentBegin() {
431 this.contentBeginMark = this.position;
432 startDigest();
433 }
434
435 /***
436 * Return stored content-begin-mark (which is also end-of-headers)
437 */
438 public long getContentBegin() {
439 return this.contentBeginMark;
440 }
441
442 /***
443 * Starts digesting recorded data, if a MessageDigest has been
444 * set.
445 */
446 public void startDigest() {
447 if (this.digest != null) {
448 this.digest.reset();
449 this.shouldDigest = true;
450 }
451 }
452
453 /***
454 * Convenience method for setting SHA1 digest.
455 * @see #setDigest(String)
456 */
457 public void setSha1Digest() {
458 setDigest(SHA1);
459 }
460
461
462 /***
463 * Sets a digest function which may be applied to recorded data.
464 * The difference between calling this method and {@link #setDigest(MessageDigest)}
465 * is that this method tries to reuse MethodDigest instance if already allocated
466 * and of appropriate algorithm.
467 * @param algorithm Message digest algorithm to use.
468 * @see #setDigest(MessageDigest)
469 */
470 public void setDigest(String algorithm) {
471 try {
472
473 if (this.digest == null ||
474 !this.digest.getAlgorithm().equals(algorithm)) {
475 setDigest(MessageDigest.getInstance(algorithm));
476 }
477 } catch (NoSuchAlgorithmException e) {
478 e.printStackTrace();
479 }
480 }
481
482 /***
483 * Sets a digest function which may be applied to recorded data.
484 *
485 * As usually only a subset of the recorded data should
486 * be fed to the digest, you must also call startDigest()
487 * to begin digesting.
488 *
489 * @param md Message digest function to use.
490 */
491 public void setDigest(MessageDigest md) {
492 this.digest = md;
493 }
494
495 /***
496 * Return the digest value for any recorded, digested data. Call
497 * only after all data has been recorded; otherwise, the running
498 * digest state is ruined.
499 *
500 * @return the digest final value
501 */
502 public byte[] getDigestValue() {
503 if(this.digest == null) {
504 return null;
505 }
506 return this.digest.digest();
507 }
508
509 public ReplayCharSequence getReplayCharSequence() throws IOException {
510 return getReplayCharSequence(null);
511 }
512
513 public ReplayCharSequence getReplayCharSequence(String characterEncoding)
514 throws IOException {
515 return getReplayCharSequence(characterEncoding, this.contentBeginMark);
516 }
517
518 /***
519 * @param characterEncoding Encoding of recorded stream.
520 * @return A ReplayCharSequence Will return null if an IOException. Call
521 * close on returned RCS when done.
522 * @throws IOException
523 */
524 public ReplayCharSequence getReplayCharSequence(String characterEncoding,
525 long startOffset) throws IOException {
526
527 float maxBytesPerChar = IoUtils.encodingMaxBytesPerChar(characterEncoding);
528 if(maxBytesPerChar<=1) {
529
530
531 return new ByteReplayCharSequence(
532 this.buffer,
533 this.size,
534 startOffset,
535 this.backingFilename);
536 } else {
537
538 if(this.size <= this.buffer.length) {
539
540 return new MultiByteReplayCharSequence(
541 this.buffer,
542 this.size,
543 startOffset,
544 characterEncoding);
545
546 } else {
547
548 ReplayInputStream ris = getReplayInputStream(startOffset);
549 ReplayCharSequence rcs = new MultiByteReplayCharSequence(
550 ris,
551 this.backingFilename,
552 characterEncoding);
553 ris.close();
554 return rcs;
555 }
556
557 }
558
559 }
560
561 public long getResponseContentLength() {
562 return this.size - this.contentBeginMark;
563 }
564
565 /***
566 * @return True if this ROS is open.
567 */
568 public boolean isOpen() {
569 return this.out != null;
570 }
571
572 /***
573 * When used alongside a mark-supporting RecordingInputStream, remember
574 * a position reachable by a future reset().
575 */
576 public void mark() {
577
578 this.markPosition = position;
579 }
580
581 /***
582 * When used alongside a mark-supporting RecordingInputStream, reset
583 * the position to that saved by previous mark(). Until the position
584 * again reached "new" material, none of the bytes pushed to this
585 * stream will be digested or recorded.
586 */
587 public void reset() {
588
589 maxPosition = Math.max(maxPosition, position);
590
591 position = markPosition;
592 }
593
594 /***
595 * Set limits on length, time, and rate to enforce.
596 *
597 * @param length
598 * @param milliseconds
599 * @param rateKBps
600 */
601 public void setLimits(long length, long milliseconds, long rateKBps) {
602 maxLength = (length>0) ? length : Long.MAX_VALUE;
603 timeoutMs = (milliseconds>0) ? milliseconds : Long.MAX_VALUE;
604 maxRateBytesPerMs = (rateKBps>0) ? rateKBps*1024/1000 : Long.MAX_VALUE;
605 }
606
607 /***
608 * Reset limits to effectively-unlimited defaults
609 */
610 public void resetLimits() {
611 maxLength = Long.MAX_VALUE;
612 timeoutMs = Long.MAX_VALUE;
613 maxRateBytesPerMs = Long.MAX_VALUE;
614 }
615
616 /***
617 * Return number of bytes that could be recorded without hitting
618 * length limit
619 *
620 * @return long byte count
621 */
622 public long getRemainingLength() {
623 return maxLength - position;
624 }
625 }