Bag(包)
背包:不支持删除元素的集合数据类型。
public interface IBag<TItem> : IEnumerable<TItem>, IDisposable
{
bool IsEmpty { get; }
int Length { get; }
void Add(TItem item);
}
数组实现
public class ResizingArrayBagNet<TItem>:IBag<TItem>
{
private TItem[] _items;
private int _length;
public ResizingArrayBagNet()
{
_items=new TItem[2];
_length = 0;
}
public IEnumerator<TItem> GetEnumerator()
{
if (!IsEmpty)
{
for (var i = 0; i < _length; i++)
{
yield return _items[i];
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Dispose()
{
throw new NotImplementedException();
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
if (!IsEmpty)
{
for (var i = 0; i < _length ; i++)
{
_items[i] = default;
}
}
}
}
public bool IsEmpty => _length == 0;
public int Length => _length;
public void Add(TItem item)
{
if(_length==_items.Length) resize(_length*2);
_items[_length++] = item;
}
private void resize(int capacity)
{
var temitems=new TItem[capacity];
Array.Copy(_items,0,temitems,0,_length);
_items = temitems;
}
}
链表下压栈
public class LinkedBagNet<TItem> : IBag<TItem>
{
private Node _first;
private int _length;
private class Node
{
public TItem Item { set; get; }
public Node Next { set; get; }
}
public LinkedBagNet()
{
_length = 0;
_first = default;
}
public IEnumerator<TItem> GetEnumerator()
{
if (!IsEmpty)
{
var temnode = _first;
while (temnode != default)
{
var item = temnode.Item;
temnode = temnode.Next;
yield return item;
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
if (!IsEmpty)
{
var temnode = _first;
while (temnode != default)
{
temnode.Item = default;
temnode = temnode.Next;
}
}
}
}
public bool IsEmpty => _length == 0;
public int Length => _length;
public void Add(TItem item)
{
Node oldeNode = _first;
_first=new Node();
_first.Item = item;
_first.Next = oldeNode;
_length++;
}
}
Stack(栈)
栈:后进先出。
游离:弹出的元素所在的引用必须要置空,不然没办法被垃圾回收器回收。
public interface IStack<TItem> : IEnumerable<TItem>, IDisposable
{
bool IsEmpty { get; }
int Length { get; }
void Push(TItem item);
TItem Pop();
}
栈应用:算术表达式求值:
- 将操作数压入操作数栈
- 将运算符压入运算栈
- 忽略左括号
- 在遇到右括号时,弹出一个运算符,弹出所需要数量的操作数,并将运算符和操作数的运算结果压入操作数栈。
数组实现
public class ResizingArrayStackNet<TItem> : IStack<TItem>
{
private TItem[] _items;
private int _length;
public ResizingArrayStackNet()
{
_items = new TItem[2];
_length = 0;
}
public int Length => _length;
public bool IsEmpty => _length == 0;
private void resize(int capacity)
{
var temitems=new TItem[capacity];
Array.Copy(_items,0,temitems,0,_items.Length);
_items = temitems;
}
public void Push(TItem item)
{
if(_length==_items.Length) resize(2*_length);
_items[_length++] = item;
}
public TItem Pop()
{
if (IsEmpty) return default;
TItem item = _items[--_length] ;
_items[_length] = default;
if(_length>0 && _length==_items.Length/4) resize( _items.Length / 2);
return item;
}
public IEnumerator<TItem> GetEnumerator()
{
if(!IsEmpty)
{
for(var i=0;i<_length;i++)
{
yield return _items[i];
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
if (!IsEmpty)
{
for (var i = 0; i < _length; i++)
{
_items[i] = default;
}
}
}
}
}
链表实现下压堆栈
public class LinkedStackNet<TItem> : IStack<TItem>
{
private Node _first;
private class Node
{
public TItem Item { set; get; }
public Node Next { set; get; }
}
public LinkedStackNet()
{
_length = 0;
_first = default;
}
public IEnumerator<TItem> GetEnumerator()
{
if (!IsEmpty)
{
var temnode = _first;
while (temnode != default)
{
var item = temnode.Item;
temnode = temnode.Next;
yield return item;
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
if (!IsEmpty)
{
var temnode = _first;
while (temnode != default)
{
temnode.Item = default;
temnode = temnode.Next;
}
}
}
}
private int _length;
public bool IsEmpty => _length == 0;
public int Length => _length ;
public void Push(TItem item)
{
Node oldfirst = _first;
_first=new Node();
_first.Item = item;
_first.Next = oldfirst;
_length++;
}
public TItem Pop()
{
if (IsEmpty) return default;
var item = _first.Item;//不存在游离的问题,这个Node被GC回收
_first = _first.Next;
_length--;
return item;
}
}
Queue(队列)
队列:先进先出(FIFO)
public interface IQueue<TItem> : IEnumerable<TItem>, IDisposable
{
bool IsEmpty { get; }
int Length { get; }
void Enqueue(TItem item);
TItem Dequeue();
}
数组实现
public class ResizingArrayQueueNet<TItem> : IQueue<TItem>
{
public ResizingArrayQueueNet()
{
_first = 0;
_last = 0;
_items=new TItem[2];
}
public IEnumerator<TItem> GetEnumerator()
{
if (!IsEmpty)
{
for (var i = _first; i < _last; i++)
{
yield return _items[i];
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
if (!IsEmpty)
{
for (var i = _first; i < _last; i++)
{
_items[i] = default;
}
_items = null;
}
}
}
private TItem[] _items;
private int _first;
private int _last;
public bool IsEmpty => (_last - _first) == 0;
public int Length => _last-_first;
private void resize(int size)
{
var temitems=new TItem[size];
var temlength = Length;
Array.Copy(_items,_first,temitems,0,Length);
_first = 0;
_last = temlength;
_items = temitems;
}
public void Enqueue(TItem item)
{
if(_last==_items.Length) resize(Length*2);
_items[_last++] = item;
}
public TItem Dequeue()
{
if (IsEmpty) return default;
var item = _items[_first++];
if(Length<_items.Length/4) resize(_items.Length/2);
return item;
}
}
链表实现下压队列
public class LinkedQueueNet<TItem> : IQueue<TItem>
{
private Node _first;
private Node _last;
private int _length;
public LinkedQueueNet()
{
_first = null;
_last = null;
_length = 0;
}
private class Node
{
public TItem Item { set; get; }
public Node Next { set; get; }
}
public IEnumerator<TItem> GetEnumerator()
{
if (!IsEmpty)
{
var temnode = _first;
while (temnode != default)
{
var item = temnode.Item;
temnode = temnode.Next;
yield return item;
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
if (!IsEmpty)
{
var temfirst = _first;
while (temfirst != default)
{
temfirst.Item = default;
temfirst = temfirst.Next;
}
_length = 0;
}
}
}
public bool IsEmpty => _length == 0;
public int Length => _length;
public void Enqueue(TItem item)
{
var temnode = _last;
_last = new Node();
_last.Item = item;
_last.Next = null;
if (IsEmpty) _first = _last;
else temnode.Next = _last;
_length++;
}
public TItem Dequeue()
{
if (_length > 0)
{
var temitem = _first.Item;
_first = _first.Next;
_length--;
return temitem;
}
return default;
}
}
排序
比较和交换:
public class AssistSort<T> where T : IComparable<T>
{
public static void Shuffle(T[] seq)
{
int n = seq.Length;
var random = new Random(Guid.NewGuid().GetHashCode());
for (int i = 0; i < n; i++)
{
// choose index uniformly in [0, i]
int r = (int)(random.Next(n));
T swap = seq[r];
seq[r] = seq[i];
seq[i] = swap;
}
}
// does v == w ?
public static bool Eq(T v, T w)
{
if (ReferenceEquals(v, w)) return true; // optimization when reference equal
return v.CompareTo(w) == 0;
}
public static void Exch(T[] seq, int value, int min)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (value > seq.Length - 1 || value < 0)
throw new ArgumentException("value is not in seq");
if (min > seq.Length - 1 || min < 0)
throw new ArgumentException("min is not in seq");
T t = seq[value];
seq[value] = seq[min];
seq[min] = t;
}
public static bool Less(T value, T compare)
{
if (value == null)
throw new ArgumentNullException("value is null");
if (compare == null)
throw new ArgumentNullException("compare is null");
return value.CompareTo(compare) < 0;
}
public static void Show(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
throw new ArgumentException("seq length equal 0");
Console.WriteLine(string.Join(" ", seq));
}
public static bool IsSorted(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
throw new ArgumentException("seq length equal 0");
for (int i = 1; i < seq.Length; i++)
{
if (Less(seq[i], seq[i - 1])) return false;
}
return true;
}
public static bool IsSorted(T[] seq, int lo, int hi)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (seq.Any() || seq.Length < 2)
throw new ArgumentException("seq length is abnormal");
if (lo < 0 || lo > seq.Length - 1 || (lo + 1) == seq.Length)
throw new ArgumentException("lo is abnormal");
if (hi < 0 || hi > seq.Length - 1 || hi < lo)
throw new ArgumentException("hi is abnormal");
for (int i = lo + 1; i <= hi; i++)
if (Less(seq[i], seq[i - 1])) return false;
return true;
}
}
选择排序
运行时间与初始状态无关
数据移动是最少的,只用了N次交换,交换次数和数组的大小是线性关系,
找到剩余元素中最小数与第一个交换
结论A:对于长度为N的数组,选择排序的需要大约N^2/2次比较和N次交换
public class SelectionSort<T> where T : System.IComparable<T>
{
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
var count = seq.Length;
for (int i = 0; i < count; i++)
{
int min = i;
//找出剩余项中最小项的位置
for (int j = i + 1; j < count; j++)
{
//if (seq[j].CompareTo(seq[min]) < 0) min = j;
if (AssistSort<T>.Less(seq[j], seq[min])) min = j;
}
//将最小项与min交换
AssistSort<T>.Exch(seq, i, min); //调用静态方法非常消耗性能
//T t = seq[i];
//seq[i] = seq[min];
//seq[min] = t;
}
}
}
插入排序
像打牌一项,将每一张排插入到已经有序的牌中的适当位置。
运行时间取决于元素的初始顺序。
结论B:对于随机不重复的数组,平均情况下插入排序需要N^2/4次比较和交换。最坏情况下需要N^2/2次交换和交换,最好情况下需要N-1次比较和0次交换。
public class InsertionSort<T> where T : IComparable<T>
{
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
int count = seq.Length;
for (int i = 0; i < count; i++)
{
for (int j = i; j > 0 && AssistSort<T>.Less(seq[j], seq[j - 1]); j--)
{
AssistSort<T>.Exch(seq, j, j - 1);
}
}
// 5 4 3 1 2
// 4 5 3 1 2
// 4 3 5 1 2
// 3 4 5 1 2
// 1 3 4 5 2
// 1 2 3 4 5
//1 2 4 3 5
//1 2
}
public static void Sort(T[] seq, int lo, int hi)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
if (hi < lo || lo < 0 || hi < 0 || lo > seq.Length || hi > seq.Length)
throw new ArgumentException("Parameter error");
for (int i = lo + 1; i < hi; i++)
{
for (int j = i; j > lo && AssistSort<T>.Less(seq[j], seq[j - 1]); j--)
{
//交换,每个seq[j]相当于被访问了2次,跟seq[j+1]和seq[j-1]各比较一次
AssistSort<T>.Exch(seq, j, j - 1);
}
}
}
}
插入排序改进减少比较
结论C:插入排序需要的交换操作和数组中倒置的数量相同,需要比较次数大于等于倒置的数量,小于等于倒置的数量加上数组的大小再减一。
改进插入排序,只需要在内循环中将较大的元素都向右移动而不总是交换两个元素。这样访问数组的次数能够减半。
public static class InsertionSortX<T> where T : IComparable<T>
{
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
int n = seq.Length;
// put smallest element in position to serve as sentinel
int exchanges = 0;
for (int i = n - 1; i > 0; i--)
{
if (AssistSort<T>.Less(seq[i], seq[i - 1]))
{
AssistSort<T>.Exch(seq, i, i - 1);
exchanges++;
}
}
if (exchanges == 0) return;
// insertion sort with half-exchanges
for (int i = 2; i < n; i++)
{
T v = seq[i];
int j = i;
while (AssistSort<T>.Less(v, seq[j - 1]))
{
//右移,seq[j]只跟当前比较的值V比较了1次
seq[j] = seq[j - 1];
j--;
}
seq[j] = v;
}
}
}
插入和选择比较
插入排序改进二分快速定位
可以看到插入左边都是已近排号顺序的,可以使用二分法快速确定待插入数的位置。
public static class BinaryInsertionSort<T> where T : IComparable<T>
{
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
int n = seq.Length;
for (int i = 1; i < n; i++)
{
// binary search to determine index j at which to insert a[i]
T v = seq[i];
int lo = 0, hi = i;
while (lo < hi)
{
int mid = lo + (hi - lo) / 2;
if (AssistSort<T>.Less(v, seq[mid])) hi = mid;
else lo = mid + 1;
}
// insetion sort with "half exchanges"
// (insert a[i] at index j and shift a[j], ..., a[i-1] to right)
for (int j = i; j > lo; --j)
seq[j] = seq[j - 1];
seq[lo] = v;
}
}
}
结论D:对于随机排序的无重复主键的数组,插入排序和选择排序的运行时间是平方级别的,两者之比应该是一个较小的常数。
希尔排序
大规模乱序的插入排序很慢,因为它只会交换相邻的元素,如果最小的元素正好在数组的尽头,那么把它移动到正确位置就需要N-1次移动,希尔排序就是为了解决这个问题,交换不相邻的元素以对数组的局部进行排序。
希尔排序的思想:数组中任意间隔为h的元素都是有序的,这样的数组成为h有序数组。一个h有序的数组是h个相互独立的有序数组组成的一个数组。
希尔排序高效的原因是:它权衡了子数组的规模和有序性。
/// <summary>
/// 希尔排序,插入排序的变种
/// 其实就是从h项开始选定,跟之前的h项进行比较交换
/// 然后再缩小h进行重复的插入排序,h缩小的顺序,1 4 13 40.。。。
/// 最后一次一定是1,就是插入排序,只不过这时序列已经大部分顺序,所以进行了少量交换操作
/// </summary>
/// <typeparam name="T"></typeparam>
public class ShellSort<T> where T : IComparable<T>
{
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
int count = seq.Length;
int h = 1;
//求出最大限度间隔,
while (h < count / 3) h = 3 * h + 1;//1 4 13 40 121 364 1093
while (h >= 1)
{//将数组变成h有序
for (int i = h; i < count; i++)
{
//将a[i]插入a[i-h],a[i-2*h],a[i-3*h].....
for (int j = i; j >= h && AssistSort<T>.Less(seq[j], seq[j - h]); j -= h)
{
AssistSort<T>.Exch(seq, j, j - h);
}
}
h = h / 3;
}
//0 1 2 3 4 5 6 h=4
//4:0 5:1 6:2
//h=1
//1:0 2:1 3:2
}
public static void SortPro(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
int count = seq.Length;
int h = 1;
//求出最大限度间隔,
while (h < count / 2) h = 2 * h + 1;//1 3 7 15 31
while (h >= 1)
{//将数组变成h有序
for (int i = h; i < count; i++)
{
for (int j = i; j >= h && AssistSort<T>.Less(seq[j], seq[j - h]); j -= h)
{
AssistSort<T>.Exch(seq, j, j - h);
}
}
h = h / 2;
}
}
public static void SortPlus(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
int count = seq.Length;
int h = 1;
//求出最大限度间隔,
while (h < count / 4) h = 4 * h + 1;//1 5 21 85
while (h >= 1)
{//将数组变成h有序
for (int i = h; i < count; i++)
{
for (int j = i; j >= h && AssistSort<T>.Less(seq[j], seq[j - h]); j -= h)
{
AssistSort<T>.Exch(seq, j, j - h);
}
}
h = h / 4;
}
}
}
希尔排序对任意排序的数组表现都很好。
希尔排序能够解决一些初级排序算法无能为力的问题,通过提升速度来解决其他方式无法解决的问题是研究算法的设计和性能的主要原因。
希尔排序:当一个h有序的数组按照增幅K排序后,它仍然是h有序的
希尔排序的运行时间不到平方级别,在最后情况下是N^(3/2)
性质E:使用递增序列1,4,13,40,121,364……的希尔排序所需的比较次数不会超出N的若干倍乘以递增序列的长度。
如果需要解决一个排序问题,优先先使用希尔,再使用其他,因为它不需要额外的内存,其他的高效排序不会比希尔排序快2倍。
归并排序自顶而下
归并排序核心思想是:可以先(递归地)将它分成两个数组f分别排序,然后结果归并起来。
性能:能够将任意长度为N的数组排序所需的时间和NlogN成正比。
分而治之
/// <summary>
/// 归并排序,自顶而下,使用了分治的思想,NlgN
/// 核心思想就是两个顺序数列合并成一个数列
/// 将数列以中间index为界分为前后,然后递归分,直到子数组只有2个然后合并。
/// 浪费了大量的空间,每次合并都需一个Auxiliary
/// </summary>
/// <typeparam name="T"></typeparam>
public class MergeSortUB<T> where T : IComparable<T>
{
/// <summary>
/// 将数列low-mid和ming-high合并
/// low-mid必须顺序
/// mid-high必须顺序
/// </summary>
/// <param name="seq">数列</param>
/// <param name="aux">合并需要数组</param>
/// <param name="lo">low所在index</param>
/// <param name="mid">mind所在index</param>
/// <param name="hi">hi所在index</param>
private static void Merge(T[] seq, T[] aux, int lo, int mid, int hi)//aux:auxiliary备用
{
// 合并前两个数列必须是顺序的,私有方法不需要验证
//if(!AssistSort<T>.IsSorted(seq, lo, mid))
// throw new ArgumentException("seq is not sort in low to mid");
//if(!AssistSort<T>. IsSorted(seq, mid
//+1, hi))
// throw new ArgumentException("seq is not sort in mid to high");
// 备份
for (int k = lo; k <= hi; k++)
{
aux[k] = seq[k];
}
// 合并算法,i左半边开始位,j右半边开始位
int i = lo, j = mid + 1;
for (int k = lo; k <= hi; k++)
{
if (i > mid) seq[k] = aux[j++];//左半边用尽,取右半边元素
else if (j > hi) seq[k] = aux[i++];//右半边用尽,取左半边元素
else if (AssistSort<T>.Less(aux[j], aux[i])) seq[k] = aux[j++];//右半边元素小于左边元素,取右半边元素
else seq[k] = aux[i++];//取左半边元素
}
}
private static void Sort(T[] seq, T[] aux, int lo, int hi)
{
if (hi <= lo) return;
int mid = lo + (hi - lo) / 2;
Sort(seq, aux, lo, mid);
Sort(seq, aux, mid + 1, hi);
Merge(seq, aux, lo, mid, hi);
}
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
T[] aux = new T[seq.Length];
Sort(seq, aux, 0, seq.Length - 1);
}
}
通过一棵树来解释归并的性能,每个节点都标识通过Merge()方法并归成的子数组,这棵树正好n层,k表示在K层,K层有2^K个节点,每个节点的长度为2^(n-k),那么并归最多需要2^(n-k)次比较。那么每层需要比较次数2^K*2^(n-k)=2^n,n层总共为n*2^n=NlgN;
命题F:对于长度为N的任意数组,自顶向下的并归需要1/2NlgN至NlgN次比较。
命题G:对于长度为N的任意数组,自顶向下的归并排序最多需要访问数组6NlgN次。
归并统计交换次数
/// <summary>
/// 统计交换次数,自顶而下并归方法
/// </summary>
/// <typeparam name="T"></typeparam>
public class Inversion<T> where T : IComparable<T>
{
public static long Count(T[] seq)
{
T[] b = new T[seq.Length];
T[] aux = new T[seq.Length];
for (int i = 0; i < seq.Length; i++)
b[i] = seq[i];
long inversions = Count(seq, b, aux, 0, seq.Length - 1);
return inversions;
}
private static long Count(T[] a, T[] b, T[] aux, int lo, int hi)
{
long inversions = 0;
if (hi <= lo) return 0;
int mid = lo + (hi - lo) / 2;
inversions += Count(a, b, aux, lo, mid);
inversions += Count(a, b, aux, mid + 1, hi);
inversions += Merge(b, aux, lo, mid, hi);
if (inversions != Brute(a, lo, hi))
throw new Exception("inversion is not right");
return inversions;
}
private static long Merge(T[] a, T[] aux, int lo, int mid, int hi)
{
long inversions = 0;
// copy to aux[]
for (int k = lo; k <= hi; k++)
{
aux[k] = a[k];
}
// merge back to a[]
int i = lo, j = mid + 1;
for (int k = lo; k <= hi; k++)
{
if (i > mid) a[k] = aux[j++];
else if (j > hi) a[k] = aux[i++];
else if (AssistSort<T>.Less(aux[j], aux[i])) { a[k] = aux[j++]; inversions += (mid - i + 1); }
else a[k] = aux[i++];
}
return inversions;
}
private static long Brute(T[] a, int lo, int hi)
{
long inversions = 0;
for (int i = lo; i <= hi; i++)
for (int j = i + 1; j <= hi; j++)
if (AssistSort<T>.Less(a[j], a[i])) inversions++;
return inversions;
}
}
归并排序小数组用插值排序
使用插入排序处理小规模数组(长度小于15),一般可以将归并排序的运行时间减少10%~15%
归并排序改进a[mid]小于等于a[mid+1]
这样使得任意有序的子数组算法运行时间线性
归并排序不复制元素到辅助数组
归并排序自底向上
/// <summary>
/// 并归排序,自底而上,使用了分治的思想,NlgN
/// </summary>
/// <typeparam name="T"></typeparam>
public class MergeSortBU<T> where T : IComparable<T>
{
/// <summary>
/// 将数列low-mid和ming-high合并
/// low-mid必须顺序
/// mid-high必须顺序
/// </summary>
/// <param name="seq">数列</param>
/// <param name="aux">合并需要数组</param>
/// <param name="lo">low所在index</param>
/// <param name="mid">mind所在index</param>
/// <param name="hi">hi所在index</param>
private static void Merge(T[] seq, T[] aux, int lo, int mid, int hi)
{
// copy to aux[]
for (int k = lo; k <= hi; k++)
{
aux[k] = seq[k];
}
// merge back to a[]
int i = lo, j = mid + 1;
for (int k = lo; k <= hi; k++)
{
if (i > mid) seq[k] = aux[j++]; // this copying is unneccessary
else if (j > hi) seq[k] = aux[i++];
else if (AssistSort<T>.Less(aux[j], aux[i])) seq[k] = aux[j++];
else seq[k] = aux[i++];
}
}
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
int n = seq.Length;
T[] aux = new T[n];
for (int len = 1; len < n; len *= 2) // 1 2 4 8 16
{
for (int lo = 0; lo < n - len; lo += len + len)
{
int mid = lo + len - 1;
int hi = Math.Min(lo + len + len - 1, n - 1);
Merge(seq, aux, lo, mid, hi);
}
}
}
}
命题H:对于长度为N的任意数组,自底向上归并排序需要1/2NlgN至NlgN次比较,最多访问数组6NlgN次。
当数组长度为2的幂的时候,自顶向下和自底向上的归并排序所用的比较次数和数组访问次数正好相同。
自底向上的归并排序比较适合链表组织的数据。这种方法只需要重新组织链表的链接就能将链表原地排序
自顶向下:化整为零。
自下向上:循序渐进。
命题I:没有任何基于比较的算法能够保证使用少于lg(N!)~NlgN次比较将长度为N的数组排序。
命题J:归并排序是一种渐进最有的基于比较排序的算法
快速排序
快速排序和归并排序是互补的,快速排序方式当两个子数组都有序时,整个数据就自然有序了。
/// <summary>
/// 快速算法应该是并归算法的一个变种,并归算是找中点,一切为二
/// 快速算法也是分治算法的一种,怎么将序列切分呢,哪个相当于中点的数是这样的,这个数的前面都比这个数小,这个数的后面都比这个数大,
/// 这样不断切分,最后自然切分完自然序列排序好了。
/// 如果有大量重复的数据这个实现有点问题
/// </summary>
/// <typeparam name="T"></typeparam>
public static class QuickSort<T> where T : IComparable<T>
{
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
Sort(seq, 0, seq.Length - 1);
}
private static void Sort(T[] seq, int lo, int hi)
{
if (hi <= lo) return;
int j = Partition(seq, lo, hi);
Sort(seq, lo, j - 1);
Sort(seq, j + 1, hi);
}
/// <summary>
/// 切分
/// a[lo..hi] => a[lo..j-1] = a[j] = a[j+1..hi]
/// </summary>
/// <param name="seq"></param>
/// <param name="lo"></param>
/// <param name="hi"></param>
/// <returns></returns>
private static int Partition(T[] seq, int lo, int hi)
{
int i = lo;
int j = hi + 1;
T v = seq[lo];
//确保index k之前的数都比其小,index k之后的数都比其大
while (true)
{
// 找到第一个比第一项大的数index叫largeindex
while (AssistSort<T>.Less(seq[++i], v))
{
if (i == hi) break;
}
// 找到最后一个比第一项小的数index,litindex
while (AssistSort<T>.Less(v, seq[--j]))
{
if (j == lo) break; // redundant since a[lo] acts as sentinel
}
// 检查第一个和最后一个是否交叉了
if (i >= j) break;
//将第一个项和最后一个项交换,确保比第一项小的数再第一项前面
AssistSort<T>.Exch(seq, i, j);
}
//这个切分点就是第一个数
AssistSort<T>.Exch(seq, lo, j);
// 现在, a[lo .. j-1] <= a[j] <= a[j+1 .. hi]
return j;
}
}
交换部分
可能出现的问题:
- 原地切分:使用辅助数组,很容易实现切分,但切分后的数组复制回去的开销会比较多。
- 别越界:
- 保持随机性:
- 终止循环:
- 处理切分元素值的重复情况
- 终止递归
性能优势:
- 用一个递增的索引将数组元素和一个定值比较。很难有排序算法比这更小的内循环。
- 快速排序的比较次数比较少。
- 快速排序最好的情况是:每次都正好将数组对半分,
命题K:将长度为N的无重复数组排序,快速排序平均需要2NlnN次比较。
命题L:快速排序最多需要约N^2/2次比较,但随机打乱数组能够预防这种情况。每次切分找到的切分值都是最大值或最小值
快速排序该进熵最优
如果有大量重复的元素,将数组切成三份,小于,等于和大于
/// <summary>
/// 该算法主要针对序列中有大量相同的项,将序列分为三部分,小于,等于,大于
/// </summary>
/// <typeparam name="T"></typeparam>
public class QuickSort3WaySort<T> where T : IComparable<T>
{
private static void Sort(T[] seq, int lo, int hi)
{
if (hi <= lo) return;
//切分的lt,gt
int lt = lo, gt = hi;
T v = seq[lo];
int i = lo + 1;
//切分
while (i <= gt)
{
int cmp = seq[i].CompareTo(v);
if (cmp < 0) AssistSort<T>.Exch(seq, lt++, i++);
else if (cmp > 0) AssistSort<T>.Exch(seq, i, gt--);
else i++;
}
//切分完成
// a[lo..lt-1] < v = a[lt..gt] < a[gt+1..hi].
Sort(seq, lo, lt - 1);
Sort(seq, gt + 1, hi);
}
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
//AssistSort<T>.Shuffle(seq);
Sort(seq, 0, seq.Length - 1);
}
}
快速排序改进三取样切分并小数组插入
取子数组的中位数为切分点,取样大小s设为3并用大小s设为3并用大小居中的元素切分的效果最好。
问题:
- 小数组时插入排序比快速排序要快
- 因为递归,快速排序sort()方法在小数组中也会调用自己。
简单操作将
if(hi <= lo) return;
替换成
if(hi <= lo+M) { Insertion.Sort(a,lo,hi); return;}
/// <summary>
/// 快速算法的痛点是在小序列的排序性能并不好,所有该X实现在小序列的时候排序采用插入排序算法
/// </summary>
/// <typeparam name="T"></typeparam>
public class QuickSortX<T> where T : IComparable<T>
{
private static readonly int INSERTION_SORT_CUTOFF = 8;
private static void Sort(T[] seq, int lo, int hi)
{
if (hi <= lo) return;
// cutoff to insertion sort (Insertion.sort() uses half-open intervals)
int n = hi - lo + 1;
if (n <= INSERTION_SORT_CUTOFF)//少量数组插入排序
{
InsertionSort<T>.Sort(seq, lo, hi + 1);
return;
}
int j = Partition(seq, lo, hi);
Sort(seq, lo, j - 1);
Sort(seq, j + 1, hi);
}
// partition the subarray a[lo..hi] so that a[lo..j-1] <= a[j] <= a[j+1..hi]
// and return the index j.
private static int Partition(T[] seq, int lo, int hi)
{
int n = hi - lo + 1;
int m = Median3(seq, lo, lo + n / 2, hi);//三取样点,lo,中点,hi
AssistSort<T>.Exch(seq, m, lo);
int i = lo;
int j = hi + 1;
T v = seq[lo];
// a[lo] is unique largest element
while (AssistSort<T>.Less(seq[++i], v))
{
if (i == hi) { AssistSort<T>.Exch(seq, lo, hi); return hi; }
}
// a[lo] is unique smallest element
while (AssistSort<T>.Less(v, seq[--j]))
{
if (j == lo + 1) return lo;
}
// the main loop
while (i < j)
{
AssistSort<T>.Exch(seq, i, j);
while (AssistSort<T>.Less(seq[++i], v)) ;
while (AssistSort<T>.Less(v, seq[--j])) ;
}
// put partitioning item v at a[j]
AssistSort<T>.Exch(seq, lo, j);
// now, a[lo .. j-1] <= a[j] <= a[j+1 .. hi]
return j;
}
private static int Median3(T[] seq, int i, int j, int k)
{
return (AssistSort<T>.Less(seq[i], seq[j]) ?
(AssistSort<T>.Less(seq[j], seq[k]) ? j : AssistSort<T>.Less(seq[i], seq[k]) ? k : i) :
(AssistSort<T>.Less(seq[k], seq[j]) ? j : AssistSort<T>.Less(seq[k], seq[i]) ? k : i));
}
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
//AssistSort<T>.Shuffle(seq);
Sort(seq, 0, seq.Length - 1);
}
}
命题M:不存在任何基于比较的排序算法能够保证在NH-N次比较之内将N个元素排序,
命题N:三向切分的快速排序需要(2ln2)NH次比较
快速排序中型数组排序
/// <summary>
/// 该算法进一步升级,不仅在小序列排序时实现插入排序,在中型序列实现三分排序。
/// </summary>
/// <typeparam name="T"></typeparam>
public class QuickBentleyMcIlroySort<T> where T : IComparable<T>
{
// cutoff to insertion sort, must be >= 1
private static readonly int INSERTION_SORT_CUTOFF = 8;
// cutoff to median-of-3 partitioning
private static readonly int MEDIAN_OF_3_CUTOFF = 40;
/// <summary>
/// Rearranges the array in ascending order, using the natural order.
/// </summary>
/// <param name="seq"></param>
public static void Sort(T[] seq)
{
if (seq == null)
throw new ArgumentNullException("seq is null");
if (!seq.Any())
return;
Sort(seq, 0, seq.Length - 1);
}
private static void Sort(T[] seq, int lo, int hi)
{
int n = hi - lo + 1;
// cutoff to insertion sort
if (n <= INSERTION_SORT_CUTOFF)
{
InsertionSort(seq, lo, hi);
return;
}
// use median-of-3 as partitioning element
else if (n <= MEDIAN_OF_3_CUTOFF)
{
int m = Median3(seq, lo, lo + n / 2, hi);
AssistSort<T>.Exch(seq, m, lo);
}
// use Tukey ninther as partitioning element
else
{//其实就是扩大切分点的寻找范围,最有希望找到中间点,达到最有的快速排序
int eps = n / 8;
int mid = lo + n / 2;
int m1 = Median3(seq, lo, lo + eps, lo + eps + eps);
int m2 = Median3(seq, mid - eps, mid, mid + eps);
int m3 = Median3(seq, hi - eps - eps, hi - eps, hi);
int ninther = Median3(seq, m1, m2, m3);
AssistSort<T>.Exch(seq, ninther, lo);
}
// Bentley-McIlroy 3-way partitioning
int i = lo, j = hi + 1;
int p = lo, q = hi + 1;
T v = seq[lo];
while (true)
{
while (AssistSort<T>.Less(seq[++i], v))
if (i == hi) break;
while (AssistSort<T>.Less(v, seq[--j]))
if (j == lo) break;
// pointers cross
if (i == j && AssistSort<T>.Eq(seq[i], v))
AssistSort<T>.Exch(seq, ++p, i);
if (i >= j) break;
AssistSort<T>.Exch(seq, i, j);
if (AssistSort<T>.Eq(seq[i], v)) AssistSort<T>.Exch(seq, ++p, i);
if (AssistSort<T>.Eq(seq[j], v)) AssistSort<T>.Exch(seq, --q, j);
}
i = j + 1;
for (int k = lo; k <= p; k++)
AssistSort<T>.Exch(seq, k, j--);
for (int k = hi; k >= q; k--)
AssistSort<T>.Exch(seq, k, i++);
Sort(seq, lo, j);
Sort(seq, i, hi);
}
// sort from a[lo] to a[hi] using insertion sort
private static void InsertionSort(T[] seq, int lo, int hi)
{
for (int i = lo; i <= hi; i++)
for (int j = i; j > lo && AssistSort<T>.Less(seq[j], seq[j - 1]); j--)
AssistSort<T>.Exch(seq, j, j - 1);
}
// return the index of the median element among a[i], a[j], and a[k]
private static int Median3(T[] seq, int i, int j, int k)
{
return (AssistSort<T>.Less(seq[i], seq[j]) ?
(AssistSort<T>.Less(seq[j], seq[k]) ? j : AssistSort<T>.Less(seq[i], seq[k]) ? k : i) :
(AssistSort<T>.Less(seq[k], seq[j]) ? j : AssistSort<T>.Less(seq[k], seq[i]) ? k : i));
}
}