场景其实可以更有意思,不过抽象成模型却是这样的:在一大堆点中,找出异常的点。对应的解决方案如何呢?
交给Excel来弄吧,可视化的模型如下:
散点图,可以看出分布情况,大致可以识别异常的数据:
气泡图,经过聚类,可以把孤独异常的数据挑出来:
其实线性拟合好像不管用;最管用的还是气泡图。但是如何挑出异常的呢?Excel应该可以深入分析,不过想想实现的原理,应该相对还比较简单。
圆状的气泡还是相对有点麻烦,简化为矩形吧。
这个算法对应的最原生算法,因为相比其它有名冠之的聚类算法,它暴力,不求速度,只求开发的简单。
算法原理:
- 1. 从原始数据中挑出一个未聚类的点,将该点聚类,直到原始数据中所有的点都聚完;
- 2. 聚每个点的时候,将相关点添加到该组列表上;直到该组的每个点都聚过
以下是代码:
function JL(data,dx,dy)
{
this.dx_ = dx==undefined ? 2.2 : dx;
this.dy_ = dy==undefined ? this.dx_ : dy;
this.data_src_ = data;
this.data_ = [];
var tmp;
for(var i=0; i<data.length;i++)
{
tmp = new Array(4);
tmp[0] = data[i][0];
tmp[1] = data[i][1];
if(data[i].length<3)
tmp[2] = i;
else
{
tmp[2] = data[i][2];
}
tmp[3] = 0;
this.data_.push(tmp);
}
//-结果: 聚类,this.rtn_jl_[i]为数组,对应原始数组id-
this.rtn_jl_ = [];
//-结果:飞点,-
this.rtn_fly_ = [];
}
//-判断两个点是否接近-
JL.prototype.isNear = function(pt1,pt2)
{
return ( Math.abs(pt1[0]-pt2[0])<this.dx_ && Math.abs(pt1[1]-pt2[1])<this.dy_ ) ? true : false;
}
//-深度遍历-
JL.prototype.depth = function(index)
{
var rtn = [index];
var i = 0,j,tmpdata,curdata;
do
{
curdata = this.data_[rtn[i]];
for(j=0; j<this.data_.length; j++)
{
if(j==index)
continue;tmpdata = this.data_[j];
if(tmpdata[3]==0 && this.isNear(tmpdata,curdata) )
{
tmpdata[3] = 1;
rtn.push(j)
}
}
i++;
if(i>=rtn.length)
break;
}while(1);
return rtn;
}
//-聚类分析-
JL.prototype.jl = function()
{
for(var i=0; i<this.data_.length; i++)
{
if(this.data_[i][3]==0)
{
this.rtn_jl_.push(this.depth(i));
}
}
}
//-异常提取-
JL.prototype.pick = function()
{
var i,j;
for(i=0; i<this.rtn_jl_.length; i++)
{
//-转换成原始索引-
for(j=0; j<this.rtn_jl_[i].length; j++)
{
this.rtn_jl_[i][j] = this.data_[this.rtn_jl_[i][j]][2];
}
//-每类个数小于3当做错误,对应原始索引保存-
if(this.rtn_jl_[i].length<3)
{
for(j=0; j<this.rtn_jl_[i].length; j++)
{
this.rtn_fly_.push(this.rtn_jl_[i][j]);
}
}
}
}
JL.prototype.show = function()
{
console.log("this.rtn_jl_.length = " + this.rtn_jl_.length);
for(var i=0; i<this.rtn_jl_.length; i++)
{
console.log( i + " : " +this.rtn_jl_[i] );
if(this.rtn_jl_[i].length<3)
{
for(var j=0; j<this.rtn_jl_[i].length; j++)
{
console.log("\r\t" + this.data_src_[this.rtn_jl_[i][j]]);
}
}
}
}
//-统一:排序+聚类+异常提取-
JL.prototype.do = function()
{
//this.data_.sort(); //--排序没什么用--
this.jl();
this.pick();
}
exports.JL = JL;
以下是测试代码:
var JL = require('./julei.js');
var data = [
[417897.497135,143782.261573],
[417897.190857,143782.048861],
[417896.924219,143781.900988],
[417896.679199,143781.763937],
[417896.192915,143782.631089],
[417895.933481,143782.494021],
[417895.447032,143782.194715],
[417897.756803,143784.155517],
[417897.537008,143784.032893],
[417897.317213,143783.90667],
[417897.097417,143783.780446],
[417896.657823,143783.531596],
[417897.901004,143784.782087],
[417896.434421,143783.405367],
[417896.110127,143783.214232],
[417895.663317,143782.954569],
[417895.432705,143782.821129],
[417895.227317,143782.709315],
[417895.00391,143782.579483],
[417894.611145,143782.355871],
[417897.447001,143784.511627],
[417897.605544,143784.623389],
[417897.227205,143784.389003],
[417896.809233,143784.158177],
[417896.625467,143784.057189],
[417896.373241,143783.912928],
[417896.131821,143783.772279],
[417894.283457,143783.781198],
[417899.91476,143783.091968],
[417899.673353,143782.944129],
[417899.475183,143782.828733],
[417899.338656,143785.615128],
[417898.758239,143782.961243],
[417898.484401,143782.802569],
[417898.142101,143782.607825],
[417897.911499,143782.481593],
[417898.635971,143784.642401],
[417898.437801,143784.541403],
[417900.249551,143781.130199],
[417898.196389,143784.397161],
[417897.944166,143784.249305],
[417901.284153,143785.670988],
[417900.437589,143786.228211],
[417900.696865,143785.339217],
[417900.401417,143785.176929],
[417899.810518,143784.845149],
[417898.329631,143783.922069],
[417900.239421,143786.116417],
[417901.691188,143785.170948],
[417901.269638,143784.907741],
[417900.963383,143784.727443],
[417900.682349,143784.575969],
[417900.426533,143784.420919],
[417899.929313,143784.139629],
[417899.716733,143784.020621],
[417899.446503,143783.876353],
[417899.187081,143783.728495],
[417898.974499,143783.609483],
[417898.707867,143783.458017],
[417902.008177,143784.822029],
[417901.619055,143784.587655],
[417900.992137,143784.230647],
[417900.761545,143784.093623],
[417900.545363,143783.974611],
[417900.958005,143783.955627],
[417899.796246,143789.867581],
[417899.702247,143783.473381],
[417899.48246,143783.354364]
];
var obj = new JL.JL(data);
obj.do();
obj.show();
评论