1   /* BenchmarkBlooms
2   *
3   * $Id: BenchmarkBlooms.java 3868 2005-10-05 19:58:56Z gojomo $
4   *
5   * Created on Jun 30, 2005
6   *
7   * Copyright (C) 2005 Internet Archive
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */
25  package org.archive.util;
26  
27  /***
28   * Simple benchmarking of different BloomFilter
29   * implementations.
30   * 
31   * Take care when interpreting results; the effect of GC,
32   * dynamic compilation, and any other activity on test 
33   * machine may affect relative time tallies in unpredictable
34   * ways.
35   * 
36   * @author Gordon Mohr
37   */
38  public class BenchmarkBlooms {
39  
40  	public static void main(String[] args) {
41  		(new BenchmarkBlooms()).instanceMain(args);
42  	}
43  	
44  	public void instanceMain(String[] args) {
45  		int reps = 
46  			(args.length > 0) ? Integer.parseInt(args[0]) : 3;
47  		int n_expected = 
48  			(args.length > 1) ? Integer.parseInt(args[1]) : 10000000;
49  		int d_hashes = 
50  			(args.length > 2) ? Integer.parseInt(args[2]) : 22;
51  		int adds = 
52  		    	(args.length > 3) ? Integer.parseInt(args[3]) : 5000000;
53  	    String prefix = 
54  	    	(args.length > 4) ? args[4] : "http://www.archive.org/";
55  	    
56  	    System.out.println(
57  	    		"reps="+reps+" n_expected="+n_expected+
58  				" d_hashes="+d_hashes+" adds="+adds+" prefix="+prefix);
59  	    
60  	    BloomFilter bloom64;
61  	    BloomFilter bloom32;
62          BloomFilter bloom32split;
63  	    BloomFilter bloom32p2;
64          BloomFilter bloom32p2split;
65  		for (int r=0;r<reps;r++) {
66  			bloom32 = new BloomFilter32bit(n_expected,d_hashes);
67  			testBloom(bloom32,adds,prefix);
68  			bloom32=null;
69              bloom32split = new BloomFilter32bitSplit(n_expected,d_hashes);
70              testBloom(bloom32split,adds,prefix);
71              bloom32split=null;
72  			bloom64 = new BloomFilter64bit(n_expected,d_hashes);
73  			testBloom(bloom64,adds,prefix);
74  			bloom64=null;
75  			bloom32p2 = new BloomFilter32bp2(n_expected,d_hashes);
76  			testBloom(bloom32p2,adds,prefix);
77  			bloom32p2=null;
78              bloom32p2split = new BloomFilter32bp2Split(n_expected,d_hashes);
79              testBloom(bloom32p2split,adds,prefix);
80              bloom32p2split=null;
81  		}
82  	}
83  	
84  	/***
85  	 * @param bloom
86  	 * @param prefix
87  	 * @param adds
88  	 * @param d_hashes
89  	 */
90  	private void testBloom(BloomFilter bloom, int adds, String prefix) {
91  		System.gc();
92  		long startTime = System.currentTimeMillis();
93  		long falsePositives = 0;
94  		for(int i = 0; i<adds; i++) {
95  			if(!bloom.add(prefix+Integer.toString(i))) {
96  				falsePositives++;
97  			}
98  		}
99  		long finishTime = System.currentTimeMillis();
100 		System.out.println(bloom.getClass().getName()+": "
101 				+(finishTime-startTime)+"ms "
102 				+bloom.getSizeBytes()+"bytes "
103 				+falsePositives+"false");
104 	}
105 }