1.生成sframe文件
import turicreate as tc
sf_functional = tc.SFrame.read_csv('train/train_FNC.csv')
sf_morphometry = tc.SFrame.read_csv('train/train_SBM.csv')
sf = sf_functional.join(sf_morphometry, on='Id')
sf = sf.remove_column('Id')
sf.save('schizophrenia_clean.sframe')
train_FNC.csv和train_SBM.csv在kaggle上可以下载
2.create kmeans model
# create kmeans model
from math import sqrt
K = int(sqrt(sf.num_rows() / 2.0))
kmeans_model = tc.kmeans.create(sf, num_clusters=K)
print kmeans_model.summary()
3.使用
kmeans_model['cluster_info'].print_rows(
num_columns=5,
max_row_width=80,
max_column_width=10
)
new_clusters = kmeans_model.predict(sf[:5])
print new_clusters