数据预处理:解决ValueError: Cannot convert non-finite values (NA or inf) to integer
import pandas as pd
df = pd.read_csv(r"D:\Raoling\NLP-Work\NewsData\data_all.csv", encoding='utf-8')
df = df[['uuid', '标题', '中文内容', '人工校验情感']]
new_col = ['id', 'title', 'content', 'label']
df.columns = new_col
df.info()
df['title'] = df['title'].fillna('无')
df['label'] = df['label'].map({'正面':'0','中性':'1','负面':'2', '':'3'})
# 为了标识NaN值,使用boolean indexing
print(df[df['label'].isnull()])
# 然后,要删除所有非数字值,to_numeric()函数中的参数errors='coerce'-将非数字值替换为NaN:
df['label'] = pd.to_numeric(df['label'], errors='coerce')
# 删除列中带有NaN的所有行,使用dropna
df = df.dropna(subset=['label'])
df['label'] = df['label'].astype(int)