2017-07-04

排序算法总结

排序

有n个记录的序列为{R1，R2……Rn}，其对应关键字为{K1，K2……Kn}，需确定一种排列{P1,P2……Pn}使其关键字满足KP1<KP2<KP3……<KPn等关系。即使得序列成为一个按关键字有序的序列{Rp1，Rp2，……Rpn}，这样的操作就称为排序
用人话说就是从大到小，要么从小到大排列就OK。

排序的稳定性
存在两个关键字相等Ki = Kj(i != j,i < j),如果从小到大排序后 Ri仍在Rj前面，则是稳定排序，反之，则是不稳定排序。
内排序和外排序
内排序就是待排序所有记录放置于内存中，外排序需要在内外存进行多次交换，我们多使用内排序。
影响内排序的性能：时间性能，就是时间复杂度；辅助空间，就是空间复杂度；算法复杂性，是算法本身的复杂性
排序分类
按照算法复杂度排序算法可分为两类：

冒泡排序、简单选择排序、直接插入排序属于插入排序

希尔排序、堆排序、归并排序、快速排序属于改进算法。

数据的产生

为了便于查看排序算法的效果，随机产生随机数，并存储在文件中，使用时读取即可

// 产生长度为length的随机数，并保持到文件
bool generateRandom(const string &  filename, int length)
{
	//生成随机数文件
	ofstream fout(filename);
	double time_start = (double)clock();
	srand(time(0));
	for(int i = 0; i < length; i++)
	{
		int tmp = rand();
		if(i == 0)
			fout << tmp;
		else
			fout << " " << tmp;
	}
	fout << endl;
	fout.close();
	cout << "Done" << endl;
	double time_end = (double)clock();
	printf("IO time: %.2fms\n", time_end-time_start);
	return true;
}

// 打开文件，获取长度为length的随机数
vector<int> parasRandom(const string & filename, int length)
{
	vector<int> a(length);
	ifstream fin(filename);
	double time_start = (double)clock();
	srand(time(0));
	for(int i = 0; i < length; i++)
	{
		fin >> a[i];
	}
	fin.close();
	cout << "Done" << endl;
	double time_end = (double)clock();
	printf("IO time: %.2fms\n", time_end-time_start);
	return a;
}

测试函数

主函数……测试是换排序算法名称就OK了

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <ctime>

using namespace std;
int main()
{
	vector<int> tmp;
	//generateRandom("Random.txt", 10000);
	tmp = parasRandom("Random.txt", 1000);
	for(int i = 0; i < tmp.size(); i++)
		cout << tmp[i] << endl;
	cout << "Result is: \n" << endl;
	mergingSort(tmp,0);
	for(auto it = tmp.begin(); it != tmp.end(); it++)
		cout << *it << endl;
	system("pause");
	return 0;
}

冒泡法排序

复杂度：最好的情况

// 从小到大
// 时间复杂度： O(n*n)
void bubbleSort(vector<int> & a, const int n)
{
	for(int i = 0; i < a.size(); i++)
	{
		for(int j = i+1; j < a.size(); j++)
		{
			if(a[j] < a[i])
				swap(a[i],a[j]);   //STL算法
		}
	}
}

// 时间复杂度 O(n*n)
void bubbleSort1(vector<int> & a, const int n)
{
	for(int i = 0; i < a.size(); i++)
	{
		for(int j = a.size()-1; j > i; j--)
		{
			if(a[j] < a[j-1])
				swap(a[j],a[j-1]);
		}
	}
}

// 时间复杂度 最好的情况(顺序)：O(n-1)  最坏(逆序): O(n*(n-1)/2) 故O(n*n)
void bubbleSort2(vector<int> & a, const int n)
{
	bool flag = true;
	for(int i = 0; i < a.size() && flag; i ++)
	{
		flag = false;  //如果这个一个循环都没有改变位置，说明不再需要改变了。退出大循环
		for(int j = a.size()-1; j > i; j--)
		{
			if(a[j] < a[j-1])
			{
				flag = true;
				swap(a[j],a[j-1]);
			}
		}
	}
	
}

简单选择排序

其实冒泡法排序第一种实质就是简单选择排序，第二种才是正宗的冒泡法。所以这里就不贴代码了。时间复杂度O(n*n).

直接插入排序

原理就是，一次性插入至比自己都大的前面，而不是不断交换

void insertSort(vector<int> & a, const int n)
{
	for(int i = 1; i < a.size(); i++)
	{
		if(a[i] < a[i-1])
		{
			int temp = a[i];
			int j,n = i -1;
			bool zeroFlag = false;
			for(j = i-1; a[j] > temp; j--)  //这里没有设置端点，所以需要特别注意，可以设置哨兵
			{
				n = j;
				a[j+1] = a[j];
				if(j == 0)
				{
					break;
				}
			}
			a[n] = temp;
		}
	}
}

时间复杂度：最好的情况为顺序 O(n) 最坏的情况为逆序O(nn) 故插入排序的时间复杂度为O(nn)

希尔排序

原理：采用跳跃分割策略，相隔某个增量的序列为子序列，把每个子序列从小到大搞定，增量递减为1时就OK了。当然其并不是一种稳定的排序方法

void shellSort(vector<int> & a, const int n)
{
	int i,j;
	int increment = a.size();
	do{
		increment = increment/3;   //初始化一个步长
		for(i = increment; i < a.size(); i++)
		{
			if(a[i] < a[i-increment])
			{

				int temp = a[i];  //这个需要前移				
				for(j = i-increment; j >= 0 && a[j] > temp; j -= increment)
				{
					swap(a[j],a[j+increment]);
				}				
			}
		}
	}while(increment>1);
}

时间复杂度：希尔增量时间复杂度O(n*n),有个叫Hibbard增量时间复杂度为O(n^(3/2)),跟增量序列有关。但是效果要优于O(n^2)的算法。

堆排序

原理：利用二叉树顶堆的性质实现排序，顶堆最大值永远在父结点，所以每次得到剩余待排序数据最大值，再将剩余数据调整成顶堆，循环即可。

void heapSort(vector<int> & a, const int n)
{
	a.insert(a.begin(),a.size());
	// heapSort begin
	int i;
	//把整个待排序数据搞成大顶堆这个部分很重要就基本构建了大概的从大到小的顺序
	for(i = a[0]/2;i>0;i--)        
		heapAdjust(a,i,a[0]);

	for(i = a[0]; i>1; i--)       //循环把最大值放到队尾，再重新搞成大顶堆
	{
		swap(a[1],a[i]);
		heapAdjust(a,1,i-1);
	}
	a.erase(a.begin());
}

//把以m为父节点的长度为n的二叉树调整为大顶堆
void heapAdjust(vector<int> & a, int m, int n)
{
	int temp,j;
	temp = a[m];   //暂存父节点
	for(j = 2*m; j <= n; j *= 2)
	{
		if(j<n && a[j] < a[j+1])  //这里j<n而不是j<=n是  j必是左节点，如果其是二叉树结尾，后面就无法比了
			++j;
		if(temp >= a[j])  //如果temp大于了两个左右孩子就不用再往下比了，因为构建的时候已经是一个父节点比子节点大的顶堆了
			break;
		//a[m] = a[j];
		swap(a[m],a[j]);
		m = j;
	}
}

时间复杂度：构建顶堆的时间O(n),取堆顶记录并重建O(logn),一共取了n-1次堆顶，所以时间复杂度为O(nlogn).不稳定排序方法，适合数据较多的情况。

归并排序

原理：利用二叉树结构的思想，对待排序数据进行两两合并，再合并……
步骤：先把待排序数据递归展开，分成一个个数据，再递归合并，最后成为原数据大小的数组

void mergingSort(vector<int> & a, const int n)
{
	mSort(a,a,0,a.size()-1);
}

void mSort(vector<int> & a,vector<int> & b, int begin, int end)
{
	int m;
	vector<int> temp(a.size());
	if(begin == end)
		b[begin] = a[begin];
	else
	{
		m = (begin+end)/2;
		mSort(a,temp,begin,m);
		mSort(a,temp,m+1,end);
		mMerge(temp,b,begin,m,end);
	}
}

void mMerge(vector<int> & a, vector<int> & b,int begin, int middle, int end)
{
	int j,k,l;
	for(j=middle+1,k=begin; begin<=middle&&j<=end; k++)
	{
		if(a[begin] < a[j])
			b[k] = a[begin++];
		else
			b[k] = a[j++];
	}
	if(begin <= middle)
	{
		for(l=0; l <= middle-begin; l++)
			b[k+l] = a[begin+l];
	}
	if(j<=end)
	{
		for(l=0;l <= end-j; l++)
			b[k+l] = a[j+l];
	}
}

时间复杂度：每一次递归都把整个数据两两合并（不是两个，是两组）需要扫描所有数据，复杂度为O(n),根据二叉树性质，需要递归logn次，故时间复杂度为O(nlogn).

上面是用的递归方式，非递归方式以后有时间再更新。

快速排序

原理：一种比较高级的交换排序的方法，就是每次扫描数据，指定枢轴从而把数据分为两部分，使枢轴前面数据的比枢轴小，后面的比他大，再递归……

void quickSort(vector<int> & a, const int n)
{
	qSort(a,0,a.size());
}

void qSort(vector<int> & a, int low, int high)
{
	int pivot;  //快排中的精华，枢轴 前面都比他小， 后面都比他大
	if(low < high)
	{
		pivot = getPivot(a,low,high);
		qSort(a,low,pivot-1);
		qSort(a,pivot,high);
	}
}

int getPivot(vector<int> & a, int low, int high)
{
	int pivot = a[low];
	while(low < high)
	{
		while(low < high && a[high] >= pivot)
			high--;
		swap(a[low],a[high]);
		while(low < high && a[low] <= pivot)
			low++;
		swap(a[low],a[high]);
	}
	return low;
}

时间复杂度：跟枢轴的位置有关，平均时间复杂度为O(nlogn),由于有跳跃交换，非稳定排序。

快速排序的优化

其实排序算法都是在不断优化的过程中，即使是综合性能比较厉害的快速排序算法也有很大的优化空间：

优化枢轴的选取
前面我们是直接指定第一个为枢轴，显然这是有问题的，所以优化方法是三数取中，或者九数取中。
优化每次枢轴选取中的不必要交换
前面使用交换对性能有一定影响，但是如果把整个调整枢轴位置过程中换成替换，这样会有一定优化
优化小数据量排序方法
前面直接就是用快排的迭代，但是如果数据量小，反而效果不好。故我们设定一个阈值，小的用插入，大的用快排
优化递归
每个递归对性能都是致命的，所以尽量用循环替换

优化后的代码如下：

void quickSort1(vector<int> & a, const int n) //优化枢轴的选择，不必要的交换，小数组排序，递归操作
{
	qSort2(a,0,a.size()-1);
}
int getPivot1(vector<int> & a, int low, int high)
{
	int pivot;
	int m = (low+high)/2;
	if(a[low] > a[high])
		swap(a[low],a[high]);
	if(a[m] > a[high])
		swap(a[m],a[high]);
	if(a[low] > a[m])
		swap(a[low], a[m]);
	pivot = a[m];
	while(low < high)
	{
		while(low < high && a[high] >= pivot)
			high--;
		a[low] = a[high];
		while(low < high && a[low] <= pivot)
			low++;
		a[high] = a[low];
	}
	a[low] = pivot;
	return low;
}
void qSort1(vector<int> & a, int low, int high)
{
	int pivot;
	const int length = 7;
	if(high-low > length)
	{
		pivot = getPivot1(a,low,high);
		qSort1(a,low, pivot-1);
		qSort1(a,pivot+1,high);
	}
	else
	{
		insertSort(a,0);
	}
}

void qSort2(vector<int> & a, int low, int high)
{
	int pivot;
	const int length = 7;
	if(high-low > length)
	{
		while(low < high)
		{
			pivot = getPivot1(a,low,high);
			qSort1(a,low, pivot-1);
			//qSort1(a,pivot+1,high);
			low = pivot + 1;    //以循环替换递归，减少递归提高性能
		}		
	}
	else
	{
		insertSort(a,0);
	}
}

排序算法代码在我的github可以下载，欢迎提PR或者issue，喜欢也可以star、fellow、fork。

Zelang Liang

New Bird , New Beginning