# inX: 用于分类的输入向量# dataSet:输入的训练集# labels:标签向量# k:选择近邻项目的个数def classify0(inX, dataSet, labels, k) : dataSetSize = dataSet.shape[0] # 距离计算 diffMat = tile(inX, (dataSetSize, 1)) - dataSet sqDiffMat = diffMat ** 2 # python中, **2 代表2平方,**0.5代表开方 sqDistances = sqDiffMat.sum(axis=1) # 加入axis=1以后就是将一个矩阵的每一行向量相加 distances = sqDistances ** 0.5 sortedDistIndicies = distances.argsort() classCount = {} # 选择距离最小的k个点 for i in range(k) : voteILabel = labels[sortedDistIndicies[i]] classCount[voteILabel] = classCount.get(voteILabel, 0) + 1 # 排序 sortedClassCount = sorted(classCount.iteritems(), key = operator.itemgetter(1), reverse=True) return sortedClassCount[0][0]
然后我们进行测试,重新打开python编译环境:
>>> import kNN>>> group, labels = kNN.createDataSet()>>> kNN.classify0([0, 0], group, labels, 3)'B'>>> kNN.classify0([0.3, 0], group, labels, 3)'B'>>> kNN.classify0([0.8, 0.9], group, labels, 3)'A'
我们看到,一个简单的分类器就这样搞定了。这时,我们来将电影数据进行样本写入:
def createDataSet(): group = array([ [3, 104], [2, 100], [1, 81], [101, 10], [99, 5], [98, 2] ]) labels = ["love", "love", "love", "action", "action",