归一化数值公式
newValue=(oldValue-min)/(max-min)
def autoNorm(dataSet):
3 minVals = dataSet.min(0)
4 maxVals = dataSet.max(0)
5 ranges = maxVals - minVals
6 normDataSet = zeros(shape(dataSet))
7 m = dataSet.shape[0]
8 normDataSet = dataSet - tile(minVals,(m,1))
9 normDataSet = normDataSet / tile(ranges,(m,1))
10 return normDataSet,ranges,minVals
def datingClassTest():
2 hoRatio = 0.10
3 datingDataMat,datingLabels = file2matrix('datingTestSet.txt')
4 normMat,ranges,minVals = autoNorm(datingDataMat)
5 m = normMat.shape[0]
6 numTestVecs = int(m * hoRatio)
7 errorCount = 0.0
8 for i in range(numTestVecs):
9 classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
10 print "back %d ,real %d" % (classifierResult,datingLabels[i])
11 if(classifierResult != datingLabels[i]):
12 errorCount += 1.0
13 print "range is %f" % (errorCount / float(numTestVecs))
1 #约会网站测试函数
2 def classifyPerson():
3 resultList = ['not at all','in small doses','in large dose']
4 percentTats = float(raw_input("per of time spent playing video games?"))
5 ffMiles = float(raw_input("fre flier miles earned per year?"))
6 iceCream = float(raw_input("liters of ice cream consumed per year?"))
7 datingDataMat,datingLabels = file2matrix('datingTestSet.txt')
8 normMat,ranges,minVals = autoNorm(datingDataMat)
9 inArr = array([ffMiles,percentTats,iceCream])
10 classifierResult = classify0((inArr - minVals)/ranges,normMat,datingLabels,3)
11 print "You will probably like this person :",
12 print resultList[classifierResult-1]