完整实现一个 AI 获客系统包含众多功能,如数据挖掘与分析、精准客户定位、市场趋势洞察、客户画像构建、动态更新等。以下是一个整合了上述主要功能的简化示例代码,借助 Python 和常见的机器学习库实现。
代码说明
数据预处理:对数据进行缺失值处理和异常值处理,确保数据质量。
精准客户定位:使用随机森林分类器对客户是否为潜在客户进行预测,并评估模型准确率。
市场趋势洞察:对市场指标进行时间序列分解,可视化展示趋势、季节性和残差。
客户画像构建:使用 KMeans 算法对客户进行聚类,构建不同客户群体的画像。
动态更新模型:当有新数据到来时,使用新数据对现有模型进行更新。
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import joblib
# 数据预处理
def data_preprocessing(file_path):
data = pd.read_csv(file_path)
# 处理缺失值,数值列用均值填充
numerical_columns = data.select_dtypes(include=['number']).columns
for col in numerical_columns:
data[col].fillna(data[col].mean(), inplace=True)
# 处理异常值,删除超出3倍标准差的数据
for col in numerical_columns:
mean = data[col].mean()
std = data[col].std()
data = data[(data[col] <= mean + 3 * std) & (data[col] >= mean - 3 * std)]
return data
# 精准客户定位
def precise_customer_positioning(data, target_column):
X = data.drop(target_column, axis=1)
y = data[target_column]
# 特征标准化
numerical_columns = X.select_dtypes(include=['number']).columns
scaler = StandardScaler()
X[numerical_columns] = scaler.fit_transform(X[numerical_columns])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"精准客户定位模型准确率: {accuracy}")
return model
# 市场趋势洞察
def market_trend_insight(data, column):
data['date'] = pd.to_datetime(data['date'])
data.set_index('date', inplace=True)
decomposition = seasonal_decompose(data[column], model='additive', period=12)
plt.figure(figsize=(12, 8))
plt.subplot(411)
plt.plot(decomposition.observed, label='Observed')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(decomposition.trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(decomposition.seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(decomposition.resid, label='Residual')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
# 客户画像构建
def customer_profile_construction(data):
numerical_features = data.select_dtypes(include=['number'])
scaler = StandardScaler()
scaled_features = scaler.fit_transform(numerical_features)
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(scaled_features)
data['cluster'] = kmeans.labels_
profiles = {}
for cluster in range(len(data['cluster'].unique())):
cluster_data = data[data['cluster'] == cluster]
profile = {
'average_age': cluster_data['age'].mean(),
'average_purchase_amount': cluster_data['purchase_amount'].mean(),
'average_purchase_frequency': cluster_data['purchase_frequency'].mean()
}
profiles[f'Cluster {cluster}'] = profile
print("客户画像:")
for cluster, profile in profiles.items():
print(f"{cluster}: {profile}")
# 动态更新模型
def dynamic_model_update(existing_model, new_data, target_column):
new_X = new_data.drop(target_column, axis=1)
new_y = new_data[target_column]
numerical_columns = new_X.select_dtypes(include=['number']).columns
scaler = StandardScaler()
new_X[numerical_columns] = scaler.fit_transform(new_X[numerical_columns])
existing_model.fit(new_X, new_y)
return existing_model
if __name__ == "__main__":
# 初始数据文件路径
initial_file_path = "initial_customer_data.csv"
# 新数据文件路径
new_file_path = "new_customer_data.csv"
target_column = "is_potential_customer"
# 数据预处理
initial_data = data_preprocessing(initial_file_path)
# 精准客户定位
model = precise_customer_positioning(initial_data, target_column)
# 市场趋势洞察
market_trend_insight(initial_data, 'market_metric')
# 客户画像构建
customer_profile_construction(initial_data)
# 保存初始模型
joblib.dump(model, "customer_acquisition_model.pkl")
# 新数据到来
new_data = data_preprocessing(new_file_path)
# 加载现有模型
existing_model = joblib.load("customer_acquisition_model.pkl")
# 动态更新模型
updated_model = dynamic_model_update(existing_model, new_data, target_column)
# 保存更新后的模型
joblib.dump(updated_model, "customer_acquisition_model.pkl")