I have created a set of hash functions for my implementation of Bloom filter. More about them is in my blog post. Here is GitHub repository for the project with the most recent code. Bellow is the complete code, including an additional class for testing purposes (as it is on 3 June 2018).
package bloomfilter; /** * This class provides a set of hashing functions for <code>String</code>. * @author Maciej Modzelewski <https://maciejmodzelewski.com/contact/> * @version 0.1 (14 April 2018) */ public class StringHashFunctions { // DO NOT change this prime number // if you want to keep compatibility // with previously done hashes private static final int PRIME_NUMBER = 99989; /** * Returns a hash value of the specified <code>String</code> element. * @param s the element whose hash is to be generated * @param size the maximum size of the hash * @return the hash of the <code>String</code> element */ public static int indexValueHash(String s, int size) { int sum = 0; char[] sChar = s.toCharArray(); for (int i = 0; i < sChar.length; i++) { if (sChar.length == 1) { sum = sChar[i] ^ 1; } else { sum += (~i << sChar[i]) ^ size ^ 17; } } if (sum < 0) { sum >>>= 1; } return sum % size; } /** * Returns a hash value of the specified <code>String</code> element. * @param s the element whose hash is to be generated * @param size the maximum size of the hash * @return the hash of the <code>String</code> element */ public static int crossHash(String s, int size) { int sum = 0; char[] sChar = s.toCharArray(); for (int i = 0; i < sChar.length; i++) { if (i == 0 || i == sChar.length - 1 && sChar.length != 2) { sum += sChar[i] * ~3; } else { sum += (sChar[i] << sChar[sChar.length - i]) ^ ~size; } } if (sum < 0) { sum >>>= 1; } return sum % size; } /** * Returns a hash value of the specified <code>String</code> element. * @param s the element whose hash is to be generated * @param size the maximum size of the hash * @return the hash of the <code>String</code> element */ public static int primeHash(String s, int size) { int sum = 0; char[] sChar = s.toCharArray(); for (int i = 0; i < sChar.length; i++) { sum ^= (sChar[i] * PRIME_NUMBER << i); } if (sum < 0) { sum >>>= 1; } return sum % size; } /** * Returns a hash value of the specified <code>String</code> element. * @param s the element whose hash is to be generated * @param size the maximum size of the hash * @return the hash of the <code>String</code> element */ public static int simpleHash(String s, int size) { int hash = s.hashCode(); if (hash < 0) { hash >>>= 1; } return hash % size; } }
HashTester class
package bloomfilter; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.Reader; import java.io.Writer; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; /** * * @author Maciej Modzelewski <https://maciejmodzelewski.com/contact/> */ class HashTester { private static final int INPUT_BUFFER_SIZE = 10000000; private BufferedReader reader; private BufferedWriter writer; public static void main(String[] args) { BufferedReader inputData; BufferedWriter outputData; HashTester tester = null; String className = "bloomfilter.StringHashFunctions"; String[] methods = new String[]{"simpleHash", "crossHash", "indexValueHash", "primeHash"}; int size = 728570000; // add paths here String inputPath = ""; String outputPath = ""; boolean append = false; try { inputData = new BufferedReader(new FileReader(inputPath)); outputData = new BufferedWriter(new FileWriter(outputPath, append)); tester = new HashTester(inputData, outputData); tester.hashStrings(className, methods, size); // add paths here tester.setInput(new FileReader("")); tester.setOutput(new FileWriter("",false)); tester.testHashFunctions(className, methods, size); tester.close(); } catch (IOException | ClassNotFoundException | NoSuchMethodException | IllegalAccessException | IllegalArgumentException | InvocationTargetException ex) { System.err.println(ex.toString()); } finally { try { tester.close(); } catch (IOException ex) { System.err.println(ex.toString()); } } } HashTester(BufferedReader input, BufferedWriter output) { reader = input; writer = output; } private void hashStrings(String className, String[] hashFunctions, int size) throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException, IllegalArgumentException, InvocationTargetException { String line; int[] hashes; String header = ""; for (String hashFunction : hashFunctions) { header += "\"" + hashFunction + "\","; } writer.write(header.substring(0, header.length()-1)); writer.newLine(); reader.mark(HashTester.INPUT_BUFFER_SIZE); line = reader.readLine(); while (line != null) { hashes = HashTester.hashString(className, hashFunctions, size, line); for (int i = 0; i < hashes.length; i++) { writer.write(String.valueOf(hashes[i])); if (i < hashes.length - 1) { writer.write(','); } } writer.newLine(); line = reader.readLine(); } writer.flush(); reader.reset(); } private void testHashFunctions(String className, String[] hashFunctions, int size) throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException, IllegalArgumentException, InvocationTargetException { List<Set<Integer>> uniqueHashes; int[] hashes; String line; uniqueHashes = new ArrayList<>(); for (String hashFunction : hashFunctions) { uniqueHashes.add(new HashSet<>()); } reader.mark(HashTester.INPUT_BUFFER_SIZE); while ((line = reader.readLine()) != null) { hashes = HashTester.hashString(className, hashFunctions, size, line); for (int i = 0; i < hashes.length; i++) { uniqueHashes.get(i).add(hashes[i]); } } for (int i = 0; i < uniqueHashes.size(); i++) { writer.write(String.valueOf(uniqueHashes.get(i).size())); if (i < uniqueHashes.size() - 1) { writer.write(','); } } writer.newLine(); writer.flush(); reader.reset(); } private void close() throws IOException { reader.close(); writer.close(); } private void setInput(Reader input) { reader = new BufferedReader(input); } private void setInput(BufferedReader input) { reader = input; } private void setOutput(Writer output) { writer = new BufferedWriter(output); } private void setOutput(BufferedWriter output) { writer = output; } private static int[] hashString(String className, String[] hashFunctions, int size, String s) throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, IllegalArgumentException, InvocationTargetException { Class<?> strHashFunc = Class.forName(className); Class<?>[] methodParams = {String.class, int.class}; Method hashFunc; int[] hashes; hashes = new int[hashFunctions.length]; for (int i = 0; i < hashFunctions.length; i++) { hashFunc = strHashFunc.getDeclaredMethod(hashFunctions[i], methodParams); hashes[i] = (int) hashFunc.invoke(hashFunctions[i], s, size); } return hashes; } }
Here is a ZIP containing files with the source code, HashTester.java and StringHashFunctions.java: HashFunctions