1   /* ReplayCharSequenceTest
2    *
3    * Created on Dec 26, 2006
4    *
5    * Copyright (C) 2006 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.io;
24  
25  import java.io.File;
26  import java.io.IOException;
27  import java.util.Date;
28  import java.util.logging.Logger;
29  
30  import org.archive.util.FileUtils;
31  import org.archive.util.TmpDirTestCase;
32  
33  /***
34   * Test ReplayCharSequences.
35   *
36   * @author stack, gojomo
37   * @version $Revision: 5099 $, $Date: 2007-04-27 02:14:08 +0000 (Fri, 27 Apr 2007) $
38   */
39  public class ReplayCharSequenceTest extends TmpDirTestCase
40  {
41      /***
42       * Logger.
43       */
44      private static Logger logger =
45          Logger.getLogger("org.archive.io.ReplayCharSequenceFactoryTest");
46  
47  
48      private static final int SEQUENCE_LENGTH = 127;
49      private static final int MULTIPLIER = 3;
50      private static final int BUFFER_SIZE = SEQUENCE_LENGTH * MULTIPLIER;
51      private static final int INCREMENT = 1;
52  
53      /***
54       * Buffer of regular content.
55       */
56      private byte [] regularBuffer = null;
57  
58      /*
59       * @see TestCase#setUp()
60       */
61      protected void setUp() throws Exception
62      {
63          super.setUp();
64          this.regularBuffer =
65              fillBufferWithRegularContent(new byte [BUFFER_SIZE]);
66      }
67      
68      public void testShiftjis() throws IOException {
69  
70          // Here's the bytes for the JIS encoding of the Japanese form of Nihongo
71          byte[] bytes_nihongo = {
72              (byte) 0x1B, (byte) 0x24, (byte) 0x42, (byte) 0x46,
73              (byte) 0x7C, (byte) 0x4B, (byte) 0x5C, (byte) 0x38,
74              (byte) 0x6C, (byte) 0x1B, (byte) 0x28, (byte) 0x42,
75              (byte) 0x1B, (byte) 0x28, (byte) 0x42 };
76          final String ENCODING = "SJIS";
77          // Here is nihongo converted to JVM encoding.
78          String nihongo = new String(bytes_nihongo, ENCODING);
79  
80          RecordingOutputStream ros = writeTestStream(
81                  bytes_nihongo,MULTIPLIER,
82                  "testShiftjis",MULTIPLIER);
83          // TODO: check for existence of overflow file?
84          ReplayCharSequence rcs = ros.getReplayCharSequence(ENCODING);
85              
86          // Now check that start of the rcs comes back in as nihongo string.
87          String rcsStr = rcs.subSequence(0, nihongo.length()).toString();
88          assertTrue("Nihongo " + nihongo + " does not equal converted string" +
89                  " from rcs " + rcsStr,
90              nihongo.equals(rcsStr));
91          // And assert next string is also properly nihongo.
92          if (rcs.length() >= (nihongo.length() * 2)) {
93              rcsStr = rcs.subSequence(nihongo.length(),
94                  nihongo.length() + nihongo.length()).toString();
95              assertTrue("Nihongo " + nihongo + " does not equal converted " +
96                  " string from rcs (2nd time)" + rcsStr,
97                  nihongo.equals(rcsStr));
98          }
99      }
100 
101     public void testGetReplayCharSequenceByteZeroOffset() throws IOException {
102 
103         RecordingOutputStream ros = writeTestStream(
104                 regularBuffer,MULTIPLIER,
105                 "testGetReplayCharSequenceByteZeroOffset",MULTIPLIER);
106         ReplayCharSequence rcs = ros.getReplayCharSequence();
107 
108         for (int i = 0; i < MULTIPLIER; i++) {
109             accessingCharacters(rcs);
110         }
111     }
112 
113     public void testGetReplayCharSequenceByteOffset() throws IOException {
114 
115         RecordingOutputStream ros = writeTestStream(
116                 regularBuffer,MULTIPLIER,
117                 "testGetReplayCharSequenceByteOffset",MULTIPLIER);
118         ReplayCharSequence rcs = ros.getReplayCharSequence(null,SEQUENCE_LENGTH);
119 
120         for (int i = 0; i < MULTIPLIER; i++) {
121             accessingCharacters(rcs);
122         }
123     }
124 
125     public void testGetReplayCharSequenceMultiByteZeroOffset()
126         throws IOException {
127 
128         RecordingOutputStream ros = writeTestStream(
129                 regularBuffer,MULTIPLIER,
130                 "testGetReplayCharSequenceMultiByteZeroOffset",MULTIPLIER);
131         ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8");
132 
133         for (int i = 0; i < MULTIPLIER; i++) {
134             accessingCharacters(rcs);
135         }
136     }
137 
138     public void testGetReplayCharSequenceMultiByteOffset() throws IOException {
139 
140         RecordingOutputStream ros = writeTestStream(
141                 regularBuffer,MULTIPLIER,
142                 "testGetReplayCharSequenceMultiByteOffset",MULTIPLIER);
143         ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8", SEQUENCE_LENGTH);
144 
145         try {
146             for (int i = 0; i < MULTIPLIER; i++) {
147                 accessingCharacters(rcs);
148             }
149         } finally {
150             rcs.close();
151         }
152     }
153     
154     public void testReplayCharSequenceByteToString() throws IOException {
155         String fileContent = "Some file content";
156         byte [] buffer = fileContent.getBytes();
157         RecordingOutputStream ros = writeTestStream(
158                 buffer,1,
159                 "testReplayCharSequenceByteToString.txt",0);
160         ReplayCharSequence rcs = ros.getReplayCharSequence();
161         String result = rcs.toString();
162         assertEquals("Strings don't match",result,fileContent);
163     }
164     
165     public void testReplayCharSequenceByteToStringOverflow() throws IOException {
166         String fileContent = "Some file content. ";
167         byte [] buffer = fileContent.getBytes();
168         RecordingOutputStream ros = writeTestStream(
169                 buffer,1,
170                 "testReplayCharSequenceByteToString.txt",1);
171         String expectedContent = fileContent+fileContent;
172         ReplayCharSequence rcs = ros.getReplayCharSequence();
173         String result = rcs.toString();
174         assertEquals("Strings don't match", expectedContent, result);
175     }
176     
177     public void testReplayCharSequenceByteToStringMulti() throws IOException {
178         String fileContent = "Some file content";
179         byte [] buffer = fileContent.getBytes("UTF-8");
180         final int MULTIPLICAND = 10;
181         StringBuilder sb =
182             new StringBuilder(MULTIPLICAND * fileContent.length());
183         for (int i = 0; i < MULTIPLICAND; i++) {
184             sb.append(fileContent);
185         }
186         String expectedResult = sb.toString();
187         RecordingOutputStream ros = writeTestStream(
188                 buffer,1,
189                 "testReplayCharSequenceByteToStringMulti.txt",MULTIPLICAND-1);
190         for (int i = 0; i < 3; i++) {
191             ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8");
192             String result = rcs.toString();
193             assertEquals("Strings don't match", result, expectedResult);
194             rcs.close();
195         }
196     }
197     
198     /***
199      * Accessing characters test.
200      *
201      * Checks that characters in the rcs are in sequence.
202      *
203      * @param rcs The ReplayCharSequence to try out.
204      */
205     private void accessingCharacters(CharSequence rcs) {
206         long timestamp = (new Date()).getTime();
207         int seeks = 0;
208         for (int i = (INCREMENT * 2); (i + INCREMENT) < rcs.length();
209                 i += INCREMENT) {
210             checkCharacter(rcs, i);
211             seeks++;
212             for (int j = i - INCREMENT; j < i; j++) {
213                 checkCharacter(rcs, j);
214                 seeks++;
215             }
216         }
217         // Note that printing out below breaks cruisecontrols drawing
218         // of the xml unit test results because it outputs disallowed
219         // xml characters.
220         logger.fine(rcs + " seeks count " + seeks + " in " +
221             ((new Date().getTime()) - timestamp) + " milliseconds.");
222     }
223 
224     /***
225      * Check the character read.
226      *
227      * Throws assertion if not expected result.
228      *
229      * @param rcs ReplayCharSequence to read from.
230      * @param i Character offset.
231      */
232     private void checkCharacter(CharSequence rcs, int i) {
233         int c = rcs.charAt(i);
234         assertTrue("Character " + Integer.toString(c) + " at offset " + i +
235             " unexpected.", (c % SEQUENCE_LENGTH) == (i % SEQUENCE_LENGTH));
236     }
237 
238     /***
239      * @param baseName
240      * @return RecordingOutputStream
241      * @throws IOException
242      */
243     private RecordingOutputStream writeTestStream(byte[] content, 
244             int memReps, String baseName, int fileReps) throws IOException {
245         String backingFilename = FileUtils.maybeRelative(getTmpDir(),baseName).getAbsolutePath();
246         RecordingOutputStream ros = new RecordingOutputStream(
247                 content.length * memReps,
248                 backingFilename);
249         ros.open();
250         for(int i = 0; i < (memReps+fileReps); i++) {
251             // fill buffer (repeat MULTIPLIER times) and 
252             // overflow to disk (also MULTIPLIER times)
253             ros.write(content);
254         }
255         ros.close();
256         return ros; 
257     }
258 
259 
260     /***
261      * Fill a buffer w/ regular progression of single-byte 
262      * (and <= 127) characters.
263      * @param buffer Buffer to fill.
264      * @return The buffer we filled.
265      */
266     private byte [] fillBufferWithRegularContent(byte [] buffer) {
267         int index = 0;
268         for (int i = 0; i < buffer.length; i++) {
269             buffer[i] = (byte) (index & 0x00ff);
270             index++;
271             if (index >= SEQUENCE_LENGTH) {
272                 // Reset the index.
273                 index = 0;
274             }
275         }
276         return buffer;
277     }
278 
279     public void testCheckParameters()
280     {
281         // TODO.
282     }
283 }