Wednesday, August 1, 2012

Hash-based Search

An example of a hash-based search, although using dictionaries.  The code is designed to manage lists of Person objects, with unique Id's as the key value, with a rudimentary hash function to assign to various lists.

Salient Characteristic(s)
  • Useful for managing large unsorted lists
  • Divides the items into smaller hash-based buckets (divide and conquer)
  • Generally excellent performance
  • Sensitive to the choice of hash function and data structures chosen
Note
  • The Id key is not rigorous enough to guarantee uniqueness, and a better key might need to be selected, e.g., SSN, to prevent the same person being entered twice.
Code

using System;
using System.Collections.Generic;

namespace Algorithms
{
    //Example object to add and find in Hash Search example
    public class Person
    {
        private int _Id;
        public int Id 
        {
            get { return _Id; }
            set { _Id = value; } 
        }
        
        private string _LastName;
        public string LastName 
        {
            get { return _LastName; }
            set { _LastName = value; } 
        }
        
        private string _FirstName;
        public string FirstName 
        {
            get { return _FirstName; }
            set { _FirstName = value; } 
        }
        
        private DateTime _DateOfBirth;
        public DateTime DateOfBirth 
        {
            get { return _DateOfBirth; }
            set { _DateOfBirth = value; } 
        }
    }

    public class HashSearch
    {
        Dictionary<int, Dictionary<int, Person>> _HashList = new Dictionary<int, Dictionary<int, Person>>();
        int _BucketCount = 10;
        public HashSearch()
        {
            //create example list of persons
            IList<Person> arrayToSort = CreatePersonArray();
            
            //create bucket using hash, x/3
            CreateHashedArray(arrayToSort);
        }

        //code to create artificail list for example
        public IList<Person> CreatePersonArray()
        {
            IList<Person> arrayToSort = new List<Person>();
            int[] idNumbers = { 11, 7, 22, 2, 33, 3, 17, 44, 4, 55, 5, 66, 6, 1, 77 };
            foreach (int id in idNumbers)
            {
                Person tempPerson = new Person();
                tempPerson.Id = id;
                arrayToSort.Add(tempPerson);
            }
            return arrayToSort;
        }

        /// <summary>
        /// Helper for this example: Loads predefined array into internal lists
        /// </summary>
        /// <param name="arrayToSort"></param>
        private void CreateHashedArray(IList<Person> arrayToSort)
        {
            foreach (Person item in arrayToSort)
            {
                Add(item);
            }
        }

        /// <summary>
        /// Given a person Id, return person or null
        /// </summary>
        /// <param name="num"></param>
        /// <returns></returns>
        public Person Find(int num)
        {
            Person itemFound = null;
            int hash = CommonMethods.hashFunction(num, _BucketCount);
            Dictionary<int, Person> tempList;
            if (_HashList.TryGetValue(hash, out tempList))
            {
                foreach (KeyValuePair<int, Person> item in tempList)
                {
                    if (item.Key == num)
                    {
                        itemFound = item.Value;
                    }
                }
            } 
            return itemFound;
        }

        /// <summary>
        /// Given an Id, attempts to add, if item already exist, returns an error
        /// </summary>
        /// <param name="item"></param>
        public void Add(Person item)
        {
            int hash = CommonMethods.hashFunction(item.Id, _BucketCount);
            Dictionary<int, Person> tempList;
            if (!_HashList.TryGetValue(hash, out tempList))
            {
                tempList = new Dictionary<int, Person>();
                _HashList.Add(hash, tempList);
            }
            _HashList[hash].Add(item.Id, item);
        }       
    }
}

Helper Code

        public static int hashFunction(int num, int buckets)
        {
            return num / buckets;
        }

Example Usage

using System;

namespace Algorithms
{
    class Program
    {
        static void Main(string[] args)
        {
            HashSearch list = new HashSearch();

            //Verification, code to find items, and if not found, add
            Person find;
            for (int counter = 0; counter < 50; counter++)
            {
                find = null;
                find = list.Find(counter);

                if (find != null)
                {
                    Console.WriteLine(String.Format("Found ID: {0}", find.Id));
                }
                else
                {
                    Person newItem = new Person();
                    newItem.Id = counter;
                    list.Add(newItem);
                    Console.WriteLine(String.Format("Added ID: {0}", counter));
                }
            }

            //Error example: Adding an existing Person item
            try
            {
                Person errorItem = new Person();
                errorItem.Id = 11;
                list.Add(errorItem);
            }
            catch (Exception ex)
            {
                Console.WriteLine(String.Format("Error: {0}", ex.Message));
            }
        }
    }
}

Friday, July 13, 2012

Binary Search

This is the basic divide-and-conquer algorithm, requiring a sorted array. This repeatedly halves the remaining positions to search, until the value is found, or it determines that the value cannot be found. As mentioned, it requires a presorted array, or if unsorted, requires it is sorted. The benefits accrue in instances where the array is sorted once, but searched many times.

On a hypothetical array of 100 items, a linear search has an average performances of O(n/2), or a search of half the positions, 50, to find the item. By comparison, the average performance of Binary Search is O(log n), which for 100 items translates into 4.6, a radically reduced number of iterations, but with greater requirements for logic and memory.

Done in both C#, as an iterative while loop, and F#, as a recursion.

C#

using System;

namespace Algorithms
{
    class BinarySearch
    {
        public BinarySearch()
        {
        }

        public int Search(int num, int[] arrayToSearch)
        {
            int lowPosition = 0;
            int highPosition = arrayToSearch.Length - 1;           
            while (highPosition >= lowPosition)
            {
                //correct calculation for the midpoint
                int position = lowPosition + ((highPosition - lowPosition) / 2);
            
                if (arrayToSearch[position] < num)
                {
                    lowPosition = position + 1;
                }
                else if (arrayToSearch[position] > num)
                {
                    highPosition = position - 1;
                }
                else
                {
                    return position;
                }
            }
            //value not found
            return -1;
        }
    }
}


F#

//a recursive binary search
let rec BinarySearch (arr:array) (num:int) (min:int) (max:int) =    
    if max < min then
        -1 //System.Int32.MinValue
    else
        let position = min + ((max-min)/2)
        if arr.[position] < num then
            // key is in upper subset
            BinarySearch arr num (position+1) max
        else if arr.[position] > num then
            // key is in lower subset
            BinarySearch arr num min (position-1)
        else
            position

//sample sorted array
let baseArray = Array.append [|1..55|] [|99..999|]

//value of max in recursion, used for searches
let maxIndex = ((Array.length baseArray)-1)

//sample searches, including edge cases and returns for value not found
let resultFound1 = BinarySearch baseArray 2 0 maxIndex
let resultFound2 = BinarySearch baseArray 22 0 maxIndex
let resultFound3 = BinarySearch baseArray 222 0 maxIndex
let resultFound4 = BinarySearch baseArray 75 0 maxIndex
let resultFound5 = BinarySearch baseArray 1 0 maxIndex
let resultFound6 = BinarySearch baseArray 0 0 maxIndex
let resultFound7 = BinarySearch baseArray -1 0 maxIndex
let resultFound8 = BinarySearch baseArray 1000 0 maxIndex

Wednesday, July 11, 2012

Linear Search

Because Linear Search is so simple, I thought of doing something a bit different, so this post includes code in F# as well as C#.

F#

//linear search, but with error handling for 'value not found'
let LinearSearch num arr = 
    try
        arr |> List.findIndex (fun x -> x = num)
    with
        | :? System.Collections.Generic.KeyNotFoundException -> -1

//create array to use
let baseList = [3; 1; 7; 2; 9; 4; 1; 12; 25; 10; 11; 19; 22]

//two examples, one that works, and one that returns an error of -1
let resultFound = LinearSearch 12 baseList
let resultMissing = LinearSearch 23 baseList

C#

Code

using System;

namespace Algorithms
{
    class LinearSearch
    {
        public LinearSearch()
        {
        }
        
        public int Search(int num, int[] arrayToSearch)
        {
            //creates array
            //int[] arrayToSort = { 11, 7, 22, 2, 33, 3, 17, 44, 4, 55, 5, 66, 6, 1, 77 };

            int posFound = -1;
            for(int pos = 0; pos<arrayToSearch.Length; pos++)
            { 
                if (arrayToSearch[pos] == num)
                {
                    posFound = pos;
                    break;
                }
            }
            return posFound;
        }
    }
}

Usage

This returns position 9:

            LinearSearch search = new LinearSearch();
            int[] arr = { 11, 7, 22, 2, 33, 3, 17, 44, 4, 55, 5, 66, 6, 1, 77 };
            int result = search.Search(55, arr);




Bucket Sort

This following uses the divide-and-conquer method of resolution, by dividing the problem into smaller arrays, sorting each smaller array, then reinserting the subarrays back into the original array.  This is not a pure solution, in that it uses the .NET List, rather than a linked list struct.

using System;
using System.Collections;
using System.Collections.Generic;

namespace Algorithms
{
    class BucketSort
    {
        public int[] Sort()
        {   
            //creates arbitrary array
            int[] arrayToSort = { 11, 7, 22, 2, 33, 3, 17, 44, 4, 55, 5, 66, 6, 1, 77 };
            //create bucket using hash, x/10
            List<int>[] bucketList = CreateHashedArray(arrayToSort, 10);
            //sort the bucket list back into the original array
            ReorderList(bucketList, arrayToSort);
            return arrayToSort;
        }

        private int hashFunction(int num, int buckets)
        {
            return num / buckets;
        }

        private List<int>[] CreateHashedArray(int[] arrayToSort, int buckets)
        {
            List<int>[] bucketList;

            //get min and max of array
            int min = arrayToSort[0];
            int max = arrayToSort[0];
            foreach (int num in arrayToSort)
            {
                if (num < min) min = num;
                if (num > max) max = num;
            }

            //create linkedlist of buckets
            int bucketSize = (max - min) / buckets + 1;
            bucketList = new List<int>[bucketSize];
            foreach (int num in arrayToSort)
            {
                int hash = hashFunction(num, buckets);
                if (bucketList[hash] == null)
                {
                    bucketList[hash] = new List<int>();
                }
                bucketList[hash].Add(num);
            }
            return bucketList;
        }

        int reorderPosition = 0;
        private int[] ReorderList(List<int>[] buckets, int[] originalArray)
        {
            foreach (List<int> lst in buckets)
            {
                if (lst != null && lst.Count > 0)
                {
                    List<int> innerList = InsertionSort.Sort(lst);

                    for (int innerCounter = 0; innerCounter < lst.Count; innerCounter++)
                    {
                        originalArray[reorderPosition] = innerList[innerCounter];
                        reorderPosition++;
                    }
                }
            }
            return originalArray;
        }
    }
}

Wednesday, July 4, 2012

Counting Sort

I did not spend a significant amount of time removing potential issues with this code, but then, neither do most examples of the Counting Sort; there are more interesting algorithms ahead.  The most obvious flaws in this particular implementation are as follows:
  • It cannot handle negative integers
  • It is designed only for integers. 
using System;

namespace Algorithms
{
    class CountingSort
    {
        public int[] Sort()
        {
            //creates array
            int[] arrayToSort = { 2, 1, 2, 1, 3, 0, 3, 4, 4, 3, 0, 2, 1, 3, 5, 1, 3 };

            //create array for bucket
            int min = 0; 
            int max = 0;
            for (int counter = 0; counter < arrayToSort.Length; counter++)
            {
                if (arrayToSort[counter] < min)
                {
                    min = arrayToSort[counter];
                }

                if (arrayToSort[counter] > max)
                {
                    max = arrayToSort[counter];
                }
            }
            int[] arrayBucket = new int[max - min + 1];

            //count items into buckets
            for (int counter = 0; counter < arrayToSort.Length; counter++)
            {
                arrayBucket[arrayToSort[counter]]++;   
            }

            //sort buckets back into array
            //count items into buckets
            int lastPosition = 0;
            for (int counter = 0; counter < arrayBucket.Length; counter++)
            {
                for (int innerCounter = 0; innerCounter < arrayBucket[counter]; innerCounter++)
                {
                    arrayToSort[lastPosition++] = counter;
                }
            }
            return arrayToSort;
        }
    }
}

Tuesday, July 3, 2012

Heap Sort

Heap Sort is interesting because the use of the 'heapify' method that creates a binary tree as a flat array. First, a binary tree, also known as a heap, is created, and then the same function is used to sort the elements. In a heap, the first node in the array at zero (0) is the top node of the binary tree. The next two (2) items are the two (2) subnodes of the top node, and so on. For each node at a position (positionIndex), its 2 subnodes are in the following positions:

  • int left = 2 * positionIndex + 1;
  • int right = 2 *  positionIndex + 2;

The CommonMethods.Swap() function is a reusable class, since swapping values by position is a common action during these example sorts.

Class

using System;

namespace Algorithms
{
    class HeapSort
    {
        public int[]  Sort()
        {
            //creates array
            int[] arrayToSort = { 11, 1, 22, 2, 33, 3, 44, 4, 55, 5, 66, 6, 7, 77, 88 };
           
            //creates a heap
            BuildHeap(arrayToSort);

            //sorts a heap
            //takes the largest element @ first position and moves to end
            //reuses the same heapify function, but in reverse, to sort the array
            int size = arrayToSort.Length;
            for (int counter = size - 1; counter > 0; counter--)
            {
                CommonMethods.Swap(arrayToSort, 0, counter);
                HeapifyRecursive(arrayToSort, 0, counter);
            }
            return arrayToSort;
        }

        //a heap, in this case, is a binary tree
        //the largest element is in the first position
        //the next two positions are the 2 subelements of this node, and so on
        //it looks at each element, then reporders them so it maintains the heap structure
        public int[] BuildHeap(int[] arrayToSort)
        {
            int size = arrayToSort.Length;
            int start = (arrayToSort.Length / 2) - 1;
          
            for (int counter = start; counter >= 0; counter--)
            {
                HeapifyRecursive(arrayToSort, counter, size);
            }
            return arrayToSort;
        }

        public void HeapifyRecursive(int[] arrayToSort, int idx, int max)
        {
            int left = 2 * idx + 1;
            int right = 2 * idx + 2;
            int largest;
            if (left < max && arrayToSort[left] > arrayToSort[idx])
            {
                largest = left;
            }
            else
            {
                largest = idx;
            }

            if (right < max && arrayToSort[right] > arrayToSort[largest])
            {
                largest = right;
            }

            if (largest != idx)
            {
                CommonMethods.Swap(arrayToSort, idx, largest);
                HeapifyRecursive(arrayToSort, largest, max);
            }
        }
    }
}

Swap - A Resuable Method

During sorts, swapping elements is a common function, and this static class can be used instead of writing redundant code.  In this case, for the example code on this site, the function only swaps integer values in an integer-based array, but the idea could easily be overloaded for numerous array-like structures. 

Usage

The order of the numbers to swap is immaterial.  Simply pass it the zero-based positions of the items the code needs to swap.

CommonMethods.Swap(arrayToSort, firstNumber, second number);

Class

using System;

namespace Algorithms
{
    static class CommonMethods
    {
        public static int[] Swap(int[] arrayToUse, int itemOneIndex, int itemTwoIndex)
        {
            int tempValueHolder = arrayToUse[itemOneIndex];
            arrayToUse[itemOneIndex] = arrayToUse[itemTwoIndex];
            arrayToUse[itemTwoIndex] = tempValueHolder;
            return arrayToUse;
        }
    }
}