Skip to main content

Hash-based Search

An example of a hash-based search, although using dictionaries.  The code is designed to manage lists of Person objects, with unique Id's as the key value, with a rudimentary hash function to assign to various lists.

Salient Characteristic(s)
  • Useful for managing large unsorted lists
  • Divides the items into smaller hash-based buckets (divide and conquer)
  • Generally excellent performance
  • Sensitive to the choice of hash function and data structures chosen
Note
  • The Id key is not rigorous enough to guarantee uniqueness, and a better key might need to be selected, e.g., SSN, to prevent the same person being entered twice.
Code

using System;
using System.Collections.Generic;

namespace Algorithms
{
    //Example object to add and find in Hash Search example
    public class Person
    {
        private int _Id;
        public int Id 
        {
            get { return _Id; }
            set { _Id = value; } 
        }
        
        private string _LastName;
        public string LastName 
        {
            get { return _LastName; }
            set { _LastName = value; } 
        }
        
        private string _FirstName;
        public string FirstName 
        {
            get { return _FirstName; }
            set { _FirstName = value; } 
        }
        
        private DateTime _DateOfBirth;
        public DateTime DateOfBirth 
        {
            get { return _DateOfBirth; }
            set { _DateOfBirth = value; } 
        }
    }

    public class HashSearch
    {
        Dictionary<int, Dictionary<int, Person>> _HashList = new Dictionary<int, Dictionary<int, Person>>();
        int _BucketCount = 10;
        public HashSearch()
        {
            //create example list of persons
            IList<Person> arrayToSort = CreatePersonArray();
            
            //create bucket using hash, x/3
            CreateHashedArray(arrayToSort);
        }

        //code to create artificail list for example
        public IList<Person> CreatePersonArray()
        {
            IList<Person> arrayToSort = new List<Person>();
            int[] idNumbers = { 11, 7, 22, 2, 33, 3, 17, 44, 4, 55, 5, 66, 6, 1, 77 };
            foreach (int id in idNumbers)
            {
                Person tempPerson = new Person();
                tempPerson.Id = id;
                arrayToSort.Add(tempPerson);
            }
            return arrayToSort;
        }

        /// <summary>
        /// Helper for this example: Loads predefined array into internal lists
        /// </summary>
        /// <param name="arrayToSort"></param>
        private void CreateHashedArray(IList<Person> arrayToSort)
        {
            foreach (Person item in arrayToSort)
            {
                Add(item);
            }
        }

        /// <summary>
        /// Given a person Id, return person or null
        /// </summary>
        /// <param name="num"></param>
        /// <returns></returns>
        public Person Find(int num)
        {
            Person itemFound = null;
            int hash = CommonMethods.hashFunction(num, _BucketCount);
            Dictionary<int, Person> tempList;
            if (_HashList.TryGetValue(hash, out tempList))
            {
                foreach (KeyValuePair<int, Person> item in tempList)
                {
                    if (item.Key == num)
                    {
                        itemFound = item.Value;
                    }
                }
            } 
            return itemFound;
        }

        /// <summary>
        /// Given an Id, attempts to add, if item already exist, returns an error
        /// </summary>
        /// <param name="item"></param>
        public void Add(Person item)
        {
            int hash = CommonMethods.hashFunction(item.Id, _BucketCount);
            Dictionary<int, Person> tempList;
            if (!_HashList.TryGetValue(hash, out tempList))
            {
                tempList = new Dictionary<int, Person>();
                _HashList.Add(hash, tempList);
            }
            _HashList[hash].Add(item.Id, item);
        }       
    }
}

Helper Code

        public static int hashFunction(int num, int buckets)
        {
            return num / buckets;
        }

Example Usage

using System;

namespace Algorithms
{
    class Program
    {
        static void Main(string[] args)
        {
            HashSearch list = new HashSearch();

            //Verification, code to find items, and if not found, add
            Person find;
            for (int counter = 0; counter < 50; counter++)
            {
                find = null;
                find = list.Find(counter);

                if (find != null)
                {
                    Console.WriteLine(String.Format("Found ID: {0}", find.Id));
                }
                else
                {
                    Person newItem = new Person();
                    newItem.Id = counter;
                    list.Add(newItem);
                    Console.WriteLine(String.Format("Added ID: {0}", counter));
                }
            }

            //Error example: Adding an existing Person item
            try
            {
                Person errorItem = new Person();
                errorItem.Id = 11;
                list.Add(errorItem);
            }
            catch (Exception ex)
            {
                Console.WriteLine(String.Format("Error: {0}", ex.Message));
            }
        }
    }
}

Comments

Popular posts from this blog

Review - TFS/VSTS - Great Product, Ideal for Small Development Shops

This is a report a short review I provided for G2 regarding TFS : What do you like best? If you use Visual Studio for development, TFS, or its online equivalent VSTS, you can have a fairly seamless end-to-end integration. Out of the box, it provides code management, testing, work hierarchy in agile formats, automated build, and deployment. What do you dislike? Branching and merging can be a bit painful, in that it needs to be planned, and is not natively part of the process. Code review also needs to be planned and only recently has it become part of the process. Recommendations to others considering the product My only concern regarding TFS and VSTS is that Microsoft itself recommends using Git. What business problems are you solving with the product? What benefits have you realized? In my current role, I've joined a shop that has application development as secondary to their role of desktop OS and app deployment/maintenance, so their code management practi...

Complexity: A Guided Tour

Complexity: A Guided Tour by Melanie Mitchell My rating: 4 of 5 stars I enjoy reading in systems and complexity, and this was a nice addition to my shelf, with a slightly different take than other books. I found a few areas in the first half a bit tedious, overly long, repetitive, and not illuminating, but generally, it's a great overview of seminal work and very thought-provoking. The first half overlaps but nicely differs from other books I've read, covering things like chaos and information processing, and the latter half of the book I found more engaging, focused on models, computation, network science, and scaling. As mentioned, although I found the first half a bit of a slog at times, the second half was very engaging. View all my reviews

Do Algorithms Make You a Better Developer?

Responding to a question on HashNode, Developers who practise algorithms are better at software development than people who just do development. Is it true? , I wrote the following: My feeling is that algorithms help make one a better programmer, but that is likely true of many coding concepts. I did not have algorithms as an undergraduate, so my knowledge is acquired through reading and practice, but after reading and applying Algorithm's in a Nutshell, I felt the quality of my work improved. That said, my development work increased more after understanding Design Patterns, or after consuming books on database design.  Since many types of knowledge improve developing and architecting abilities, one has to consider how it helps and to what degree. Algorithms are coding-in-the-small, often narrowly focused solutions, but which can have a great impact at scale. For many applications, a focus on algorithms would be overkill as data sets and requirements do not require it. In this ...