kNN(k-近邻)分类算法的实现
(1) 简介:
(2)算法描述:
(3)
<?php
/*
*KNN K-近邻方法(分类算法的实现)
*/ /*
*把.txt中的内容读到数组中保存,$filename:文件名称
*/
//--------------------------------------------------------------------
function getFileContent($filename)
{
$array = array(null);
$content = file_get_contents($filename);
$result = explode("\r\n",$content);
//print_r(count($result));
for($j=0;$j<count($result);$j++)
{
//print_r($result[$j]."<br>");
$con = explode(" ",$result[$j]);
array_push($array,$con);
}
array_splice($array,0,1);
return $array;
}
//-------------------------------------------------------------------- /*
*/
//希尔排序算法
//--------------------------------------------------------------------
function shell_sort($array)//降序
{
$dh=(int)(count($array)/2);
while($dh>=1)
{
for($i=$dh;$i<count($array);$i++)
{
$temp=array($array[$i][0],$array[$i][1]);
$j=$i-$dh;
while($j>=0&&($array[$j][1]<$temp[1]))
{
$array[$j+$dh][1]=$array[$j][1];
$array[$j+$dh][0]=$array[$j][0];
$j-=$dh;
}
$array[$j+$dh][1]=$temp[1];
$array[$j+$dh][0]=$temp[0];
}
$dh=(int)($dh/2);
}
return $array;
}
//-------------------------------------------------------------------------
/*
*KNN算法
*$test:测试文本;$train:训练文本;$flagsyes:yes;$flagsno:no
*/
//--------------------------------------------------------------------
function KNN($test,$train,$flagsyes,$flagsno)
{
for($i=1;$i<count($train);$i++)
{
for($j=1;$j<count($test)-1;$j++)
{
if($test[$j]==$train[$i][$j]) $a[$j] = 1;
else $a[$j] = 0;
} //求两个例子之间的欧氏距离
$sum = 0;
for($j=1;$j<count($test)-1;$j++)
{
$sum += pow(1-$a[$j],2);
}
$distance[$i] = sqrt($sum);
}
$d = array(array(NULL,NULL));
for($i=1;$i<count($train);$i++)
{
$d[$i-1][0] = $train[$i][count($train[$i])-1];
$d[$i-1][1] = 1 /(1+ $distance[$i]) ;
}
$result =shell_sort($d); $k = (int) sqrt(count($train)-1);//k=sqrt(N); $count_yes = 0;
for($i=0;$i<$k;$i++)
{
if($result[$i][0]==$flagsyes)$count_yes++;
}
$count_no = $k - $count_yes; if($count_yes>$count_no)return $flagsyes;
else return $flagsno;
}
//-------------------------------------------------------------------- $train = getFileContent("train.txt");
$test = getFileContent("test.txt"); for($i=1;$i<count($test);$i++)
{
$test[$i][count($test[0])-1] = KNN($test[$i],$train,Y,N);
}
/*
*将数组中的内容读到.txt中
*/
//--------------------------------------------------------------------
$fp= fopen('result.txt','wb');
for($i=0;$i<count($test);$i++)
{
$temp = NULL;
for($j=0;$j<count($test[$i]);$j++)
{
$temp = $test[$i][$j]."\t";
fwrite($fp,$temp);
}
fwrite($fp,"\r\n");
}
fclose($fp);
//-------------------------------------------------------------------- /*
*打印输出
*/
//--------------------------------------------------------------------
echo "<pre>";
print_r($test);
echo "</pre>";
//--------------------------------------------------------------------
?>