String hash functions

I have created a set of hash functions for my implementation of Bloom filter. More about them is in my blog post. Here is GitHub repository for the project with the most recent code. Bellow is the complete code, including an additional class for testing purposes (as it is on 3 June 2018).

package bloomfilter;

/**
 * This class provides a set of hashing functions for <code>String</code>.
 * @author Maciej Modzelewski <https://maciejmodzelewski.com/contact/>
 * @version 0.1 (14 April 2018)
 */
public class StringHashFunctions {
    // DO NOT change this prime number
    // if you want to keep compatibility
    // with previously done hashes
    private static final int PRIME_NUMBER = 99989;
    
    /**
     * Returns a hash value of the specified <code>String</code> element.
     * @param s the element whose hash is to be generated
     * @param size the maximum size of the hash
     * @return the hash of the <code>String</code> element
     */
    public static int indexValueHash(String s, int size) {
        int sum = 0;
        char[] sChar = s.toCharArray();
        
        for (int i = 0; i < sChar.length; i++) {
            if (sChar.length == 1) {
                sum = sChar[i] ^ 1;
            } else {
                sum += (~i << sChar[i]) ^ size ^ 17;
            }
        }
        
        if (sum < 0) {
            sum >>>= 1;
        }
        
        return sum % size;
    }
    
    /**
     * Returns a hash value of the specified <code>String</code> element.
     * @param s the element whose hash is to be generated
     * @param size the maximum size of the hash
     * @return the hash of the <code>String</code> element
     */
    public static int crossHash(String s, int size) {
        int sum = 0;
        char[] sChar = s.toCharArray();
        
        for (int i = 0; i < sChar.length; i++) {
            if (i == 0 || i == sChar.length - 1 && sChar.length != 2) {
                sum += sChar[i] * ~3;
            } else {
                sum += (sChar[i] << sChar[sChar.length - i]) ^ ~size;
            } 
        }
        
        if (sum < 0) {
            sum >>>= 1;
        }
        
        return sum % size;
    }
    
    /**
     * Returns a hash value of the specified <code>String</code> element.
     * @param s the element whose hash is to be generated
     * @param size the maximum size of the hash
     * @return the hash of the <code>String</code> element 
     */
    public static int primeHash(String s, int size) {
        int sum = 0;
        char[] sChar = s.toCharArray();
        
        for (int i = 0; i < sChar.length; i++) {
            sum ^= (sChar[i] * PRIME_NUMBER << i);
        }
        
        if (sum < 0) {
            sum >>>= 1;
        }
        
        return sum % size;
    }
    
    /**
     * Returns a hash value of the specified <code>String</code> element.
     * @param s the element whose hash is to be generated
     * @param size the maximum size of the hash
     * @return the hash of the <code>String</code> element 
     */
    public static int simpleHash(String s, int size) {
        int hash = s.hashCode();
        
        if (hash < 0) {
            hash >>>= 1;
        }
        
        return hash % size;
    }
}

HashTester class

package bloomfilter;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 *
 * @author Maciej Modzelewski <https://maciejmodzelewski.com/contact/>
 */
class HashTester {
    private static final int INPUT_BUFFER_SIZE = 10000000;
    private BufferedReader reader;
    private BufferedWriter writer;
    
    public static void main(String[] args) {
        BufferedReader inputData;
        BufferedWriter outputData;
        HashTester tester = null;
        String className = "bloomfilter.StringHashFunctions";
        String[] methods = new String[]{"simpleHash", "crossHash", 
            "indexValueHash", "primeHash"};
        int size = 728570000;
        // add paths here
        String inputPath = "";
        String outputPath = "";
        boolean append = false;
        try {
            inputData = new BufferedReader(new FileReader(inputPath));
            outputData = new BufferedWriter(new FileWriter(outputPath, append));
            
            tester = new HashTester(inputData, outputData);
            tester.hashStrings(className, methods, size);
            // add paths here
            tester.setInput(new FileReader(""));
            tester.setOutput(new FileWriter("",false));
            tester.testHashFunctions(className, methods, size);
            tester.close();
        } catch (IOException | ClassNotFoundException | NoSuchMethodException | 
                IllegalAccessException | IllegalArgumentException | 
                InvocationTargetException ex) {
            System.err.println(ex.toString());
        } finally {
            try
            {
                tester.close();
            }
            catch (IOException ex)
            {
                System.err.println(ex.toString());
            }
        }
    }
    
    HashTester(BufferedReader input, BufferedWriter output) {
        reader = input;
        writer = output;
    }
    
    private void hashStrings(String className, String[] hashFunctions, int size) 
            throws IOException, ClassNotFoundException, NoSuchMethodException, 
            IllegalAccessException, IllegalArgumentException, 
            InvocationTargetException {
        String line;
        int[] hashes;
        String header = "";
        for (String hashFunction : hashFunctions) {
            header += "\"" + hashFunction + "\",";
        }
        writer.write(header.substring(0, header.length()-1));
        writer.newLine();
        reader.mark(HashTester.INPUT_BUFFER_SIZE);
        line = reader.readLine();
        while (line != null) {
            hashes = HashTester.hashString(className, hashFunctions, size, line);
            for (int i = 0; i < hashes.length; i++) {
                writer.write(String.valueOf(hashes[i]));
                if (i < hashes.length - 1) {
                    writer.write(',');
                }
            }
            writer.newLine();
            line = reader.readLine();
        }
        writer.flush();
        reader.reset();
    }
    
    private void testHashFunctions(String className, String[] hashFunctions, 
            int size) throws IOException, ClassNotFoundException, 
            NoSuchMethodException, IllegalAccessException, 
            IllegalArgumentException, InvocationTargetException {
        List<Set<Integer>> uniqueHashes;
        int[] hashes;
        String line;
        uniqueHashes = new ArrayList<>();
        for (String hashFunction : hashFunctions) {
            uniqueHashes.add(new HashSet<>());
        }
        reader.mark(HashTester.INPUT_BUFFER_SIZE);
        while ((line = reader.readLine()) != null) {
            hashes = HashTester.hashString(className, hashFunctions, size, line);
            for (int i = 0; i < hashes.length; i++) {
                uniqueHashes.get(i).add(hashes[i]);
            }
        }
        for (int i = 0; i < uniqueHashes.size(); i++) {
            writer.write(String.valueOf(uniqueHashes.get(i).size()));
            if (i < uniqueHashes.size() - 1) {
                writer.write(',');
            }
        }
        writer.newLine();
        writer.flush();
        reader.reset();
    }
    
    private void close() throws IOException {
        reader.close();
        writer.close();
    }
    
    private void setInput(Reader input) {
        reader = new BufferedReader(input);
    }
    
    private void setInput(BufferedReader input) {
        reader = input;
    }
    
    private void setOutput(Writer output) {
        writer = new BufferedWriter(output);
    }
    
    private void setOutput(BufferedWriter output) {
        writer = output;
    }
    
    private static int[] hashString(String className, String[] hashFunctions, 
            int size, String s) throws ClassNotFoundException, 
            NoSuchMethodException, IllegalAccessException, 
            IllegalArgumentException, InvocationTargetException {
        Class<?> strHashFunc = Class.forName(className);
        Class<?>[] methodParams = {String.class, int.class};
        Method hashFunc;
        int[] hashes;
        hashes = new int[hashFunctions.length];
        for (int i = 0; i < hashFunctions.length; i++) {
            hashFunc = strHashFunc.getDeclaredMethod(hashFunctions[i], methodParams);
            hashes[i] = (int) hashFunc.invoke(hashFunctions[i], s, size);
        }
        return hashes;
    }
}

Here is a ZIP containing files with the source code, HashTester.java and StringHashFunctions.java: HashFunctions

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

This site uses Akismet to reduce spam. Learn how your comment data is processed.