Subscribe to The Madman Scribblings        RSS Feed
-----

Developing a Generic Radix Sort

Icon Leave Comment
The .net framework has a sorting method (I think either QuickSort or MergeSort) that can be made to work for any type.
So want to see if it was possible to create a general RadixSort that could work for any type.

This is what I've come up with.

The Interface: IRadixSort(Of T)
Public Interface IRadixSort(Of T)
 Function Orderer() As IEnumerator(Of Integer)
 Function RadixFN() As Func(Of T, Integer, Integer)
End Interface


The Classes: Radixer & Radix(Of T)
Option Strict On
Public Class Radixer
 Public Shared Function CreateNew(Of T)(ByVal eo As System.Collections.Generic.IEnumerator(Of Integer), _
                                  ByVal wRadix As Func(Of T, Integer, Integer)) As IRadixSort(Of T)
  Return New Radixer(Of T) With {.m = eo, .mwRadix = wRadix}
 End Function
End Class

Public Class Radixer(Of T)
 Implements IRadixSort(Of T)
 Protected Friend m As System.Collections.Generic.IEnumerator(Of Integer)
 Protected Friend mwRadix As Func(Of T, Integer, Integer)

 Public Function Orderer() As System.Collections.Generic.IEnumerator(Of Integer) Implements IRadixSort(Of T).Orderer
  Return m
 End Function

 Public Function RadixFN() As System.Func(Of T, Integer, Integer) Implements IRadixSort(Of T).RadixFN
  Return mwRadix
 End Function
End Class


Module: RadixSort
Option Strict On
Public Module RadixSort

 Private Structure RadixChainLinks
  Dim First As Integer
  Dim Last As Integer
 End Structure

 Private Structure DataChainLink
  Dim DataIndex As Integer
  Dim NextIndex As Integer
 End Structure

 Private Const EndOfChain As Integer = -1
 Private NoChains As New RadixChainLinks With {.First = EndOfChain, .Last = EndOfChain}

 Private Sub ResetChains(ByRef ptrc As RadixChainLinks())
  ptrc = Enumerable.Repeat(NoChains, 256).ToArray
 End Sub



This subroutine is the main work horse of the Radix Sort, it based on the BirstSort Algorithm.
Extra Info;-Explaination Tutorial, Snippet: BIRST String Snippet: BIRST Sort (Improved)

 <System.Runtime.CompilerServices.Extension()> _
 Public Sub RadixSort(Of T)(ByVal DataElements As T(), ByVal Radixer As IRadixSort(Of T))
  ' Define an array to store the data chain links '
  Dim DataIndexPtrs(DataElements.Count) As DataChainLink
  ' Define an array to store the RadixChainLinks '
  Dim RadixIndex(255) As RadixChainLinks
  ' Define an array for data positions, with initial positions '
  ' (This allows move the actual only twice.) '
  Dim DataPositions() As Integer = Enumerable.Range(0, DataElements.Count).ToArray
  ' Define variable that regularly, to save time on creation and disposal  '
  Dim TmpDCL As DataChainLink
  Dim RadixIndexValue As Integer
  Dim ThisDataIndex As Integer
  Dim CurrIndexPtr As Integer
  ' Define a Lamda Function to test for e chain (Aids Readibility of Code) '
  Dim DoesRadixHaveChainAttached = New Func(Of Integer, Boolean)(Function(iii) RadixIndex(iii).First > EndOfChain)
  ' Get the Orderer (Which decides order of the Radix) '
  Dim RadixOrderer = Radixer.Orderer

  ' While we still have a next Radix, the process for that radix '
  While RadixOrderer.MoveNext
   ' Set RadixIndex Chains to Initial Values '
   ResetChains(RadixIndex)
   ' Loop through the Data Elements '
   For i As Integer = 0 To DataElements.Count - 1
    ' Get Index Pointer to the Element position. '
    ThisDataIndex = DataPositions(i)
    ' Calculate its Radix Index '
    RadixIndexValue = Radixer.RadixFN(DataElements(ThisDataIndex), RadixOrderer.Current)
    ' Set the Data Index Ptrs for the current i to point to Current DataIndex '
    DataIndexPtrs(i).DataIndex = ThisDataIndex
    DataIndexPtrs(i).NextIndex = EndOfChain
    ' Does the Radix have a chain attached to it? '
    If DoesRadixHaveChainAttached(RadixIndexValue) Then
     ' Yes, Radix value has node chain attached to it so add a link to it by change NextIndex of DataIndex pointed to
     ' via the Last at this Radix Index, so it points to this one '
     DataIndexPtrs(RadixIndex(RadixIndexValue).Last).NextIndex = i
    Else
     ' No Chain so attach a chain to this radix value '
     RadixIndex(RadixIndexValue).First = i
    End If
    ' Change the Last at this Radix so it points to this DataIndex '
    RadixIndex(RadixIndexValue).Last = i
    ' Loop over all the data elements '
   Next i
   ' ------- Untangling the Chains ---------------------------- '
   ' After to Looping over all the Data Element for this Radix '
   ' We need to Redorder the DataIndexs '
   ' To do this we simple follow the each of the Radix Chains for 0 - 255 '
   ThisDataIndex = -1
   For j As Integer = 0 To 255 Step 1
    ' Check to see if radix value has a node chain attached to it '
    If DoesRadixHaveChainAttached(j) Then
     CurrIndexPtr = RadixIndex(j).First
     ' Continue to Walk through the Chain until we reach the End '
     While CurrIndexPtr > EndOfChain
      ' Increment the DataIndex '
      ThisDataIndex += 1
      ' Get the DataIndexPts
      TmpDCL = DataIndexPtrs(CurrIndexPtr)
      ' Update them
      DataPositions(ThisDataIndex) = TmpDCL.DataIndex
      CurrIndexPtr = TmpDCL.NextIndex
     End While
    End If
    ' Move on to next Radix Chain'
   Next j
   ' Repeat for next Radix '
  End While
  ' ------- Final Ordering of the Data ---------------------------- '
  ' At this stage the DataIndexPtr array points to the Correct ordering of the data '
  ' So now all wel have to do is put Data Elements into that correct order '
  Dim TempData(DataElements.Count - 1) As T
  For i As Integer = 0 To DataElements.Count - 1
   ' Set the TempList(i) to the word list at the indicate list position '
   TempData(i) = DataElements(DataPositions(i))
  Next i
  ' Copy from TempData into the actual Collection '
  Array.Copy(TempData, DataElements, DataElements.Count)
 End Sub

End Module



Extension Methods:
Option Strict On
Namespace Extensions

 Public Module Helpers
  <System.Runtime.CompilerServices.Extension()> _
  Public Function MaxStringLength(ByVal c As IEnumerable(Of String)) As Integer
   Return (From wwww As String In c Select wwww.Length).Max
  End Function
 End Module

 Public Module Radixers
  <System.Runtime.CompilerServices.Extension()> _
  Public Function GetRadixer(ByVal TheseStrings As ICollection(Of String)) As Radixer(Of String)
   Return CType(Radixer.CreateNew(Of String)(Enumerable.Range(0, TheseStrings.MaxStringLength).Reverse.GetEnumerator, _
  New Func(Of String, Integer, Integer)( _
   Function(Elem As String, cr As Integer) If(cr >= Elem.Length, 0, Asc(Elem(cr))))), Global.RadixSort.Radixer(Of String))
  End Function

  ' Radixer for UInt32 '
  <System.Runtime.CompilerServices.Extension()> _
 Public Function GetRadixer(ByVal TheseNumbers As ICollection(Of UInt32)) As Radixer(Of UInt32)
   Return CType(Radixer.CreateNew(Of UInt32)(Enumerable.Range(0, 4).GetEnumerator, _
  New Func(Of UInt32, Integer, Integer)( _
   Function(Elem As UInt32, cr As Integer) _
    CType((Elem And (255 << (8 * cr))) >> (8 * cr), Integer))), Global.RadixSort.Radixer(Of UInt32))
  End Function

  ' Radixer for Int16'
  <System.Runtime.CompilerServices.Extension()> _
 Public Function GetRadixer(ByVal TheseNumbers As ICollection(Of Int16)) As Radixer(Of Int16)
   Return CType(Radixer.CreateNew(Of Int16)(Enumerable.Range(0, 2).GetEnumerator, _
  New Func(Of Int16, Integer, Integer)( _
   Function(Elem As Int16, cr As Integer) _
    CType(If(cr < 1, (Elem And (255 << (8 * cr))) >> (8 * cr), 128 + ((Elem And (255 << (8 * cr))) >> (8 * cr))), Integer))), Global.RadixSort.Radixer(Of Int16))
  End Function

  ' Radixer for Int32'
  <System.Runtime.CompilerServices.Extension()> _
 Public Function GetRadixer(ByVal TheseNumbers As ICollection(Of Int32)) As Radixer(Of Int32)
   Return CType(Radixer.CreateNew(Of Int32)(Enumerable.Range(0, 4).GetEnumerator, _
  New Func(Of Int32, Integer, Integer)( _
   Function(Elem As Int32, cr As Integer) _
    CType(If(cr < 3, (Elem And (255 << (8 * cr))) >> (8 * cr), 128 + ((Elem And (255 << (8 * cr))) >> (8 * cr))), Integer))), Global.RadixSort.Radixer(Of Int32))
  End Function

  ' Radixer for Int64'
  <System.Runtime.CompilerServices.Extension()> _
 Public Function GetRadixer(ByVal TheseNumbers As ICollection(Of Int64)) As Radixer(Of Int64)
   Return CType(Radixer.CreateNew(Of Int64)(Enumerable.Range(0, 8).GetEnumerator, _
  New Func(Of Int64, Integer, Integer)( _
   Function(Elem As Int64, cr As Integer) _
    CType(If(cr < 7, (Elem And (255 << (8 * cr))) >> (8 * cr), 128 + ((Elem And (255 << (8 * cr))) >> (8 * cr))), Integer))), Global.RadixSort.Radixer(Of Int64))
  End Function
 End Module

End Namespace



Testing the Generic RadixSort.

mports RadixSort
Imports RadixSort.Extensions
Module Test_Module

 Dim rnd As New Random
 Sub Main()
  Dim y As New IO.StreamWriter("Test.txt", False)
  Dim rn0() As Int32 = (From ee In Enumerable.Range(0, 200) Select rnd.Next(-1000, 1000)).ToArray
  Dim rn1(rn0.Count - 1) As Int32

  Array.Copy(rn0, rn1, rn0.Count)

  Dim rs = rn1.GetRadixer
  Dim sw As New Diagnostics.Stopwatch
  sw.Start()
  Array.Sort(rn0)
  sw.Stop()
  y.WriteLine("{0}  {1}ms", "Array.Sort", sw.ElapsedMilliseconds)
  sw.Reset()
  sw.Start()
  rn1.RadixSort(rs)
  sw.Stop()
  y.WriteLine("{0}  {1}ms", "RadixSort", sw.ElapsedMilliseconds)
  y.WriteLine("Strings")

  Dim Lengths() As Integer = {16, 32, 64, 128, 256, 512, 1024, 2048}
  Dim Counts() As Integer = {16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536}
  Dim a As String() = {"AB", "AAA", "C", "B"}
  Dim rns(a.Count - 1) As String
  Dim RadTime As TimeSpan
  Dim r As Radixer(Of String)
  For Each strLen In Lengths
   For Each strCount In Counts
    Console.WriteLine()
    Console.Write("Building String ( {0} Strings  Lengths: 0 ~ {1} Chars)", strCount, strLen)

    y.WriteLine()
    y.Write("Building String ( {0} Strings  Lengths: 0 ~ {1} Chars)", strCount, strLen)
    Dim q As New List(Of String)
    For i As Integer = 1 To strCount
     q.Add(CreateRandomString(rnd.Next(0, strLen)))
    Next
    y.Write("   Built")

    a = q.ToArray
    Array.Resize(rns, a.Count)
    Array.Copy(a, rns, a.Count)
    sw.Reset()
    sw.Start()
    Array.Sort(rns)
    sw.Stop()
    y.WriteLine()
    y.WriteLine("{0}  {1}ms", "Array.Sort", sw.ElapsedMilliseconds)

    sw.Reset()
    sw.Start()
    r = a.GetRadixer
    sw.Stop()
    RadTime = sw.Elapsed

    sw.Reset()
    sw.Start()
    a.RadixSort(r)
    sw.Stop()
    y.WriteLine("{0} ( Radixer {1}ms + Sorting {2}ms = {3} )", "RadixSort", RadTime.Milliseconds, sw.ElapsedMilliseconds, (RadTime.Milliseconds + sw.ElapsedMilliseconds))

   Next
  Next
  Beep()
  y.Close()
  Console.ReadKey()

 End Sub

 Private Function CreateRandomString(ByVal OfLength As Integer) As String
  Dim str As String = ""
  For i As Integer = 1 To OfLength
   str &= Chr(65 + rnd.Next(0, 2))
  Next
  Return str
 End Function

End Module



Results
Array.Sort  0ms
RadixSort  10ms
Strings
Count	Length		Array.Sort
16	16	0ms	(3ms + Sorting 5ms = 8 )
32	16	0ms	(0ms + Sorting 0ms = 0 )
64	16	0ms	(0ms + Sorting 2ms = 2 )
128	16	0ms	(0ms + Sorting 0ms = 0 )
256	16	0ms	(0ms + Sorting 1ms = 1 )
512	16	1ms	(0ms + Sorting 1ms = 1 )
1024	16	2ms	(0ms + Sorting 3ms = 3 )
2048	16	5ms	(0ms + Sorting 5ms = 5 )
4096	16	13ms	(0ms + Sorting 11ms = 11 )
8192	16	27ms	(0ms + Sorting 23ms = 23 )
16384	16	56ms	(1ms + Sorting 50ms = 51 )
32768	16	124ms	(1ms + Sorting 107ms = 108 )
65536	16	264ms	(2ms + Sorting 199ms = 201 )

16	32	0ms	(0ms + Sorting 0ms = 0 )
32	32	0ms	(0ms + Sorting 0ms = 0 )
64	32	0ms	(0ms + Sorting 0ms = 0 )
128	32	0ms	(0ms + Sorting 1ms = 1 )
256	32	0ms	(0ms + Sorting 1ms = 1 )
512	32	1ms	(0ms + Sorting 3ms = 3 )
1024	32	2ms	(0ms + Sorting 6ms = 6 )
2048	32	6ms	(0ms + Sorting 11ms = 11 )
4096	32	13ms	(0ms + Sorting 26ms = 26 )
8192	32	27ms	(0ms + Sorting 44ms = 44 )
16384	32	70ms	(1ms + Sorting 123ms = 124 )
32768	32	128ms	(1ms + Sorting 194ms = 195 )
65536	32	277ms	(2ms + Sorting 447ms = 449 )

16	64	0ms	(0ms + Sorting 1ms = 1 )
32	64	0ms	(0ms + Sorting 5ms = 5 )
64	64	0ms	(0ms + Sorting 1ms = 1 )
128	64	0ms	(0ms + Sorting 2ms = 2 )
256	64	0ms	(0ms + Sorting 4ms = 4 )
512	64	1ms	(0ms + Sorting 6ms = 6 )
1024	64	2ms	(0ms + Sorting 12ms = 12 )
2048	64	6ms	(0ms + Sorting 24ms = 24 )
4096	64	13ms	(0ms + Sorting 46ms = 46 )
8192	64	29ms	(0ms + Sorting 92ms = 92 )
16384	64	62ms	(0ms + Sorting 193ms = 193 )
32768	64	134ms	(1ms + Sorting 458ms = 459 )
65536	64	298ms	(3ms + Sorting 994ms = 997 )

16	128	0ms	(0ms + Sorting 4ms = 4 )
32	128	0ms	(0ms + Sorting 4ms = 4 )
64	128	0ms	(0ms + Sorting 4ms = 4 )
128	128	0ms	(0ms + Sorting 5ms = 5 )
256	128	0ms	(0ms + Sorting 7ms = 7 )
512	128	1ms	(0ms + Sorting 13ms = 13 )
1024	128	3ms	(0ms + Sorting 26ms = 26 )
2048	128	6ms	(0ms + Sorting 46ms = 46 )
4096	128	14ms	(0ms + Sorting 91ms = 91 )
8192	128	30ms	(0ms + Sorting 181ms = 181 )
16384	128	64ms	(1ms + Sorting 459ms = 460 )
32768	128	145ms	(2ms + Sorting 1027ms = 1029 )
65536	128	313ms	(5ms + Sorting 2155ms = 2160 )

16	256	0ms	(0ms + Sorting 6ms = 6 )
32	256	0ms	(0ms + Sorting 6ms = 6 )
64	256	0ms	(0ms + Sorting 7ms = 7 )
128	256	0ms	(0ms + Sorting 10ms = 10 )
256	256	0ms	(0ms + Sorting 16ms = 16 )
512	256	1ms	(0ms + Sorting 26ms = 26 )
1024	256	3ms	(0ms + Sorting 49ms = 49 )
2048	256	7ms	(0ms + Sorting 94ms = 94 )
4096	256	15ms	(0ms + Sorting 183ms = 183 )
8192	256	32ms	(0ms + Sorting 380ms = 380 )
16384	256	71ms	(1ms + Sorting 1024ms = 1025 )
32768	256	160ms	(3ms + Sorting 2207ms = 2210 )
65536	256	347ms	(7ms + Sorting 4710ms = 4717 )

16	512	0ms	(0ms + Sorting 10ms = 10 )
32	512	0ms	(0ms + Sorting 11ms = 11 )
64	512	0ms	(0ms + Sorting 14ms = 14 )
128	512	0ms	(0ms + Sorting 19ms = 19 )
256	512	0ms	(0ms + Sorting 31ms = 31 )
512	512	1ms	(0ms + Sorting 53ms = 53 )
1024	512	3ms	(0ms + Sorting 98ms = 98 )
2048	512	8ms	(0ms + Sorting 185ms = 185 )
4096	512	17ms	(0ms + Sorting 370ms = 370 )
8192	512	37ms	(0ms + Sorting 797ms = 797 )
16384	512	79ms	(1ms + Sorting 2165ms = 2166 )
32768	512	172ms	(3ms + Sorting 4566ms = 4569 )
65536	512	374ms	(7ms + Sorting 9383ms = 9390 )

16	1024	0ms	(0ms + Sorting 18ms = 18 )
32	1024	0ms	(0ms + Sorting 24ms = 24 )
64	1024	0ms	(0ms + Sorting 29ms = 29 )
128	1024	0ms	(0ms + Sorting 39ms = 39 )
256	1024	1ms	(0ms + Sorting 63ms = 63 )
512	1024	2ms	(0ms + Sorting 106ms = 106 )
1024	1024	4ms	(0ms + Sorting 197ms = 197 )
2048	1024	10ms	(0ms + Sorting 380ms = 380 )
4096	1024	22ms	(0ms + Sorting 757ms = 757 )
8192	1024	49ms	(0ms + Sorting 1719ms = 1719 )
16384	1024	106ms	(1ms + Sorting 4377ms = 4378 )
32768	1024	216ms	(3ms + Sorting 9395ms = 9398 )
65536	1024	447ms	(7ms + Sorting 19249ms = 19256 )

16	2048	0ms	(0ms + Sorting 37ms = 37 )
32	2048	0ms	(0ms + Sorting 47ms = 47 )
64	2048	0ms	(0ms + Sorting 58ms = 58 )
128	2048	0ms	(0ms + Sorting 79ms = 79 )
256	2048	1ms	(0ms + Sorting 124ms = 124 )
512	2048	3ms	(0ms + Sorting 217ms = 217 )
1024	2048	7ms	(0ms + Sorting 397ms = 397 )
2048	2048	14ms	(0ms + Sorting 766ms = 766 )
4096	2048	33ms	(0ms + Sorting 1539ms = 1539 )
8192	2048	67ms	(0ms + Sorting 3371ms = 3371 )
16384	2048	140ms	(2ms + Sorting 8925ms = 8927 )
32768	2048	295ms	(4ms + Sorting 19115ms = 19119 )
65536	2048	621ms	(8ms + Sorting 39697ms = 39705 )




Analysis.
From the result you can see the strong linearity of the Radix Sort. Double the Count and you double the time it takes.
For strings of length 16 my method is quicker than Array.Sort.

Conclusion
The sort method provided by .net library is fastest enough for most datasets. I shows that writing code for sorting algorithms is purely a educational venture.

0 Comments On This Entry

 

Search My Blog

Recent Entries

Recent Comments