机器学习常用代码

sigmoid函数对数据进行归一化处理。

def Sigmoid (X):
    return (1.0 / (1 + np.exp(-float(X)));

数据清洗时,将dataframe文本转化为数值。

def Replace (X,columns):
    a = X.groupby([columns],as_index=False)[columns].agg({'cnt':'count'})
    for i in a[columns]:
        X[columns] = X[columns].replace(i,a[(a[columns]== i )].index.tolist()[0])
    return (X)

划分测试集与训练集(3:7)

def Data(X,columns):       
    from sklearn import model_selection
    from sklearn import cross_validation
    Y = X[columns]
    X= X.drop([columns],axis = 1)
    X_train, X_test, y_train, y_test = \
        cross_validation.train_test_split( X, Y, test_size=0.3, random_state=0)
    return (X_train, X_test, y_train, y_test)

基础的机器学习代码,查看得分

y_train.astype('int')

def RF(X_train, X_test, y_train, y_test):    #随机森林 
    from  sklearn.ensemble  import  RandomForestClassifier
    model= RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    predicted= model.predict(X_test)
    score = accuracy_score(y_test, predicted)
    return (score)

def LOR(X_train, X_test, y_train, y_test):   #逻辑回归
    from sklearn.linear_model import LogisticRegression
    lor = LogisticRegression(penalty='l1',C=100,multi_class='ovr') 
    lor.fit(X_train, y_train)
    predicted= lor.predict(X_test)
    score = accuracy_score(y_test, predicted)
    return (score)

def Svm(X_train, X_test, y_train, y_test):   #支持向量机
    from sklearn import svm
    model = svm.SVC(kernel='rbf')
    model.fit(X_train, y_train)    
    predicted= model.predict(X_test)
    score = accuracy_score(y_test, predicted)
    return (score) 

def LR(X_train, X_test, y_train, y_test):    #线性回归
    from sklearn.linear_model import LinearRegression            
    LR = LinearRegression()
    LR.fit(X_train, y_train)
    predicted = LR.predict(X_test)
    score = accuracy_score(y_test, predicted)
    return ( score,LR.intercept_,LR.coef_)

确定自变量和目标变量之间的相关性

def  Correlation (df , columns_name):
    import six
    for i in df.columns:
        if not( isinstance(df.select(i).take(1)[0][0], six.string_types)):
            print( "Correlation to {} for ".format(columns_name), i, house_df.stat.corr(columns_name,i))
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容