1   /* BloomFilter
2   *
3   * $Id: BloomFilter.java 3655 2005-07-05 19:20:03Z gojomo $
4   *
5   * Created on Jun 30, 2005
6   *
7   * Copyright (C) 2005 Internet Archive; an adaptation of
8   * LGPL work (C) Sebastiano Vigna
9   *
10  * This file is part of the Heritrix web crawler (crawler.archive.org).
11  *
12  * Heritrix is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser Public License as published by
14  * the Free Software Foundation; either version 2.1 of the License, or
15  * any later version.
16  *
17  * Heritrix is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU Lesser Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser Public License
23  * along with Heritrix; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25  */
26  
27  package org.archive.util;
28  
29  /***
30   * Common interface for different Bloom filter 
31   * implementations
32   * 
33   * @author Gordon Mohr
34   */
35  public interface BloomFilter {
36  	/*** The number of character sequences in the filter.
37  	 *
38  	 * @return the number of character sequences in the filter (but see {@link #contains(CharSequence)}).
39  	 */
40  	public abstract int size();
41  
42  	/*** Checks whether the given character sequence is in this filter.
43  	 *
44  	 * <P>Note that this method may return true on a character sequence that is has
45  	 * not been added to the filter. This will happen with probability 2<sub>-<var>d</var></sub>,
46  	 * where <var>d</var> is the number of hash functions specified at creation time, if
47  	 * the number of the elements in the filter is less than <var>n</var>, the number
48  	 * of expected elements specified at creation time.
49  	 *
50  	 * @param s a character sequence.
51  	 * @return true if the sequence is in the filter (or if a sequence with the
52  	 * same hash sequence is in the filter).
53  	 */
54  	public abstract boolean contains(final CharSequence s);
55  
56  	/*** Adds a character sequence to the filter.
57  	 *
58  	 * @param s a character sequence.
59  	 * @return true if the character sequence was not in the filter (but see {@link #contains(CharSequence)}).
60  	 */
61  	public abstract boolean add(final CharSequence s);
62  
63  	/***
64       * The amount of memory in bytes consumed by the bloom 
65       * bitfield.
66       *
67  	 * @return memory used by bloom bitfield, in bytes
68  	 */
69  	public abstract long getSizeBytes();
70  }