推荐系列4 电影推荐

1 数据集下载

https://www.kaggle.com/sherinclaudia/movielens

2 加载工具包

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import tensorflow as tf
from sklearn.metrics import mean_squared_error, roc_auc_score
import pickle
from sklearn.base import BaseEstimator, TransformerMixin
from time import time
from tensorflow.contrib.layers.python.layers import batch_norm as batch_norm
print(os.listdir("../input"))

ROOT = '../input/movielens/'

3 处理数据集

def load_data(root, savefile):
    users_title = ['UserID', 'Gender', 'Age', 'JobID', 'Zip-code']
    users = pd.read_csv(root+'users.dat', sep='::', header=None, names=users_title, engine = 'python')
    users.drop(columns=['Zip-code'], inplace=True)
    users_ori = users.copy()
    feature_num = 0
    feature_dict = {}
    for col in users.columns:
        unique_val = users[col].unique()
        feature_dict[col] = dict(zip(unique_val,range(feature_num, feature_num+len(unique_val))))
        feature_num += len(unique_val)
        users[col] = users[col].map(feature_dict[col])

    movies_title = ['MovieID', 'Title', 'Genres']
    movies = pd.read_csv(root+'movies.dat', sep='::', header=None, names=movies_title, engine = 'python')
    movies_ori = movies.copy()
    unique_val = movies['MovieID'].unique()
    feature_dict['MovieID'] = dict(zip(unique_val,range(feature_num, feature_num+len(unique_val))))
    feature_num += len(unique_val)
    movies['MovieID'] = movies['MovieID'].map(feature_dict['MovieID'])
    title_set = set()
    genres_set = set()
    for row in movies.itertuples():
        title_set.update(row.Title.split(' ')[:-1])
        genres_set.update(row.Genres.split('|'))
    title_set.add('<PAD>')
    genres_set.add('<PAD>')
    title2int = {x:(i+feature_num) for i,x in enumerate(title_set)}
    feature_num += len(title_set)
    title2map = {x:[title2int[r] for r in x.split(' ')[:-1]] for x in set(movies['Title'])}
    title_max_len = movies['Title'].str.split(' ').apply(len).max()
    for key in title2map.keys():
        l = len(title2map[key])
        title2map[key] = title2map[key] + [title2int['<PAD>']]*(title_max_len-l)
    movies['Title'] = movies['Title'].map(title2map)
    genres_set.add('<PAD>')
    genres2int = {x:(i+feature_num) for i,x in enumerate(genres_set)}
    feature_num += len(genres_set)
    genres2map = {x:[genres2int[r] for r in x.split('|')] for x in set(movies['Genres'])}
    genres_max_len = movies['Genres'].str.split('|').apply(len).max()
    for key in genres2map.keys():
        l = len(genres2map[key])
        genres2map[key] = genres2map[key] + [genres2int['<PAD>']]*(genres_max_len-l)
    movies['Genres'] = movies['Genres'].map(genres2map)
    feature_dict['Title'] = title2map
    feature_dict['Genres'] = genres2map
    
    ratings_title = ['UserID','MovieID', 'Rating', 'timestamps']
    ratings = pd.read_csv(root+'ratings.dat', sep='::', header=None, names=ratings_title, engine = 'python')
    ratings.drop(columns=['timestamps'], inplace=True)
    ratings['UserID'] = ratings['UserID'].map(feature_dict['UserID'])
    ratings['MovieID'] = ratings['MovieID'].map(feature_dict['MovieID'])
    
    pickle.dump((feature_num, feature_dict, title_max_len, genres_max_len, users_ori, movies_ori)
                , open(savefile, 'wb'))
    
    return users, movies, ratings

users, movies, ratings = load_data(ROOT, 'preprocess.p')
data = pd.merge(pd.merge(ratings, users), movies)
feature_num, feature_dict, title_max_len, genres_max_len, users_ori, movies_ori \
    = pickle.load(open('preprocess.p', mode='rb'))
user_clumns = users.columns
movie_columns = ['MovieID', 'Genres'] # 'Genres_0', 'Genres_1', 'Genres_2', 'Genres_3', 'Genres_4',
text_columns = ['Title']
user_data = data[user_clumns].values
movie_data = []
text_data = []
for row in data.itertuples():
    movie_data.append([row.MovieID]+row.Genres)
    text_data.append(row.Title)
label = data['Rating'].values

4 搭建模型

class RecomCNN(BaseEstimator, TransformerMixin):
    def __init__(self, feature_size,
                 user_size,
                 movie_size,
                 text_size=15,
                 filter_size=8,
                 embedding_size=8,
                 user_deep_layers=[200,200],
                 movie_deep_layers=[96,200],
                 user_dropout_keep_layers=[0.8,0.8,0.8],
                 movie_dropout_keep_layers=[0.8,0.8,0.8],
                 windows=[2,3,4,5],
                 deep_layers_activation=tf.nn.relu,
                 epoch=10, 
                 batch_size=256,
                 learning_rate=0.001, 
                 optimizer_type="adam",
                 verbose=1, 
                 random_seed=2016,
                 loss_type="logloss", 
                 eval_metric=roc_auc_score,
                 l2_reg=0.0, 
                 isrestore=False, 
                 save_path=''):
        assert loss_type in ["logloss", "mse"], \
            "loss_type can be either 'logloss' for classification task or 'mse' for regression task"

        self.feature_size = feature_size
        self.user_size = user_size
        self.movie_size = movie_size
        self.text_size = text_size
        self.filter_size = filter_size
        self.embedding_size = embedding_size
        
        self.user_deep_layers = user_deep_layers
        self.user_dropout_keep_layers = user_dropout_keep_layers
        self.movie_deep_layers = movie_deep_layers
        self.movie_dropout_keep_layers = movie_dropout_keep_layers
        self.windows = windows
        
        self.deep_layers_activation = deep_layers_activation
        self.l2_reg = l2_reg

        self.epoch = epoch
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.optimizer_type = optimizer_type

        self.verbose = verbose
        self.random_seed = random_seed
        self.loss_type = loss_type
        self.eval_metric = eval_metric
        
        self.isrestore = isrestore
        self.save_path = save_path

        self._init_graph()


    def _init_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():

            tf.set_random_seed(self.random_seed)

            self.user_index = tf.placeholder(tf.int32, shape=[None, None],
                                                 name="user_index")  # None * F
            self.movie_index = tf.placeholder(tf.int32, shape=[None, None],
                                                 name="movie_index")  # None * F
            self.text_index = tf.placeholder(tf.int32, shape=[None, None],
                                                 name="text_index")  # None * F
            self.label = tf.placeholder(tf.float32, shape=[None, 1], name="label")  # None * 1
            self.user_dropout_keep = tf.placeholder(tf.float32, shape=[None], name="user_dropout_keep_layer")
            self.movie_dropout_keep = tf.placeholder(tf.float32, shape=[None], name="movie_dropout_keep_layer")

            self.weights = self._initialize_weights()

            # model

            # ---------- user part ----------
            self.user_embeddings = tf.nn.embedding_lookup(self.weights["feature_embeddings"],
                                                             self.user_index)  # None * U * K
            self.user_deep = tf.reshape(self.user_embeddings, (-1, self.user_size * self.embedding_size))
            self.user_deep = tf.nn.dropout(self.user_deep, self.user_dropout_keep[0])
            for i in range(0, len(self.user_deep_layers)):
                self.user_deep = tf.add(
                    tf.matmul(self.user_deep, self.weights["user_layer_%d" %i]), self.weights["user_bias_%d"%i]) # None * layer[i] * 1
                self.user_deep = self.deep_layers_activation(self.user_deep)
                self.user_deep = tf.nn.dropout(self.user_deep, self.user_dropout_keep[1+i]) # dropout at each Deep layer
                
            # ---------- movie part ----------
            # movie genres
            self.movie_embeddings = tf.nn.embedding_lookup(self.weights["feature_embeddings"],
                                                             self.movie_index)  # None * M * K
            self.movie_deep = tf.reshape(self.movie_embeddings, (-1, self.movie_size * self.embedding_size))
            self.movie_deep = tf.nn.dropout(self.movie_deep, self.movie_dropout_keep[0])
            for i in range(0, len(self.movie_deep_layers)-1):
                self.movie_deep = tf.add(
                    tf.matmul(self.movie_deep, self.weights["movie_layer_%d" %i]), self.weights["movie_bias_%d"%i]) # None * layer[i] * 1
                self.movie_deep = self.deep_layers_activation(self.movie_deep)
                self.movie_deep = tf.nn.dropout(self.movie_deep, self.movie_dropout_keep[1+i])
            # movie text
            self.text_embeddings = tf.nn.embedding_lookup(
                self.weights["feature_embeddings"], self.text_index)  # None * T * K
            self.text_deep = tf.expand_dims(self.text_embeddings, 3)  # None * T * K * 1
            text_cnn_output = []
            for i in range(len(self.windows)):
                text_cnn = tf.nn.conv2d(
                    self.text_deep, self.weights['text_filter_%d' % i], strides=[1,1,self.embedding_size,1], 
                    padding="VALID")
                ksize = self.text_size - self.windows[i] + 1
                text_cnn_output.append(
                    tf.reshape(
                        tf.nn.max_pool2d(text_cnn, [ksize,1], strides=[1,ksize,1,1], padding="VALID"),
                        (-1, self.filter_size)))
            self.text_deep = tf.concat(text_cnn_output, axis=1) # None * (filter_size*len(windows))
            self.movie_deep = tf.concat([self.text_deep, self.movie_deep], axis=1)
            i = len(self.movie_deep_layers) - 1
            self.movie_deep = tf.add(
                tf.matmul(self.movie_deep, self.weights["movie_layer_%d" %i]), self.weights["movie_bias_%d"%i]) # None * layer[i] * 1
            self.movie_deep = self.deep_layers_activation(self.movie_deep)
            self.movie_deep = tf.nn.dropout(self.movie_deep, self.movie_dropout_keep[1+i])
            

            # ---------- Conbine ----------
            self.out = tf.reduce_sum(tf.multiply(self.user_deep, self.movie_deep), axis=1, keep_dims=True)

            # loss
            if self.loss_type == "logloss":
                self.out = tf.nn.sigmoid(self.out)
                self.loss = tf.losses.log_loss(self.label, self.out)
            elif self.loss_type == "mse":
                self.loss = tf.nn.l2_loss(tf.subtract(self.label, self.out))
            # l2 regularization on weights
            if self.l2_reg > 0:
                for key in self.weights.keys():
                    if key == 'feature_embeddings':
                        continue
                    self.loss += tf.contrib.layers.l2_regularizer(
                        self.l2_reg)(self.weights[key])

            # optimizer
            if self.optimizer_type == "adam":
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999,
                                                        epsilon=1e-8).minimize(self.loss)
            elif self.optimizer_type == "adagrad":
                self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate,
                                                           initial_accumulator_value=1e-8).minimize(self.loss)
            elif self.optimizer_type == "gd":
                self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
            elif self.optimizer_type == "momentum":
                self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.95).minimize(
                    self.loss)
            elif self.optimizer_type == "ftrl":
                self.optimizer = tf.train.FtrlOptimizer(learning_rate=self.learning_rate).minimize(
                    self.loss)

            # init
            self.saver = tf.train.Saver()
            init = tf.global_variables_initializer()
            self.sess = self._init_session()
            if self.isrestore:
                self.saver.restore(self.sess, self.save_path)
            else:
                self.sess.run(init)

            # number of params
            total_parameters = 0
            for variable in self.weights.values():
                shape = variable.get_shape()
                variable_parameters = 1
                for dim in shape:
                    variable_parameters *= dim.value
                total_parameters += variable_parameters
            if self.verbose > 0:
                print("#params: %d" % total_parameters)


    def _init_session(self):
        config = tf.ConfigProto(device_count={"gpu": 0})
        config.gpu_options.allow_growth = True
        return tf.Session(config=config)


    def _initialize_weights(self):
        weights = dict()

        # embeddings
        weights["feature_embeddings"] = tf.Variable(
            tf.random_normal([self.feature_size, self.embedding_size], 0.0, 0.01),
            name="feature_embeddings")  # feature_size * K

        # user deep layers
        num_layer = len(self.user_deep_layers)
        input_size = self.user_size * self.embedding_size
        glorot = np.sqrt(2.0 / (input_size + self.user_deep_layers[0]))
        weights["user_layer_0"] = tf.Variable(
            np.random.normal(loc=0, scale=glorot, size=(input_size, self.user_deep_layers[0])), dtype=np.float32)
        weights["user_bias_0"] = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(1, self.user_deep_layers[0])),
                                                        dtype=np.float32)  # 1 * layers[0]
        for i in range(1, num_layer):
            glorot = np.sqrt(2.0 / (self.user_deep_layers[i-1] + self.user_deep_layers[i]))
            weights["user_layer_%d" % i] = tf.Variable(
                np.random.normal(loc=0, scale=glorot, size=(self.user_deep_layers[i-1], self.user_deep_layers[i])),
                dtype=np.float32)  # layers[i-1] * layers[i]
            weights["user_bias_%d" % i] = tf.Variable(
                np.random.normal(loc=0, scale=glorot, size=(1, self.user_deep_layers[i])),
                dtype=np.float32)  # 1 * layer[i]
        
        # movie deep layers
        num_layer = len(self.movie_deep_layers)
        input_size = self.movie_size * self.embedding_size
        glorot = np.sqrt(2.0 / (input_size + self.movie_deep_layers[0]))
        weights["movie_layer_0"] = tf.Variable(
            np.random.normal(loc=0, scale=glorot, size=(input_size, self.movie_deep_layers[0])), dtype=np.float32)
        weights["movie_bias_0"] = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(1, self.movie_deep_layers[0])),
                                                        dtype=np.float32)  # 1 * layers[0]
        for i in range(1, num_layer):
            text_concat_size = 0
            if i == num_layer-1:
                text_concat_size = self.filter_size * len(self.windows)
            glorot = np.sqrt(2.0 / (self.movie_deep_layers[i-1] + self.movie_deep_layers[i] + text_concat_size))
            weights["movie_layer_%d" % i] = tf.Variable(
                np.random.normal(loc=0, scale=glorot, 
                                 size=(self.movie_deep_layers[i-1]+text_concat_size, self.movie_deep_layers[i])),
                dtype=np.float32)  # layers[i-1] * layers[i]
            weights["movie_bias_%d" % i] = tf.Variable(
                np.random.normal(loc=0, scale=glorot, size=(1, self.movie_deep_layers[i])),
                dtype=np.float32)  # 1 * layer[i]
            
        # text cnn layers
        for i in range(len(self.windows)):
            weights['text_filter_%d' % i] = tf.Variable(
                tf.random_normal([self.windows[i], self.embedding_size, 1, self.filter_size], 0.0, 0.01))

        return weights

    def get_batch(self, Xu, Xm, Xt, y, batch_size, index):
        start = index * batch_size
        end = (index+1) * batch_size
        end = end if end < len(y) else len(y)
        return Xu[start:end], Xm[start:end], Xt[start:end], [[y_] for y_ in y[start:end]]


    # shuffle three lists simutaneously
    def shuffle_in_unison_scary(self, a, b, c, d):
        rng_state = np.random.get_state()
        np.random.shuffle(a)
        np.random.set_state(rng_state)
        np.random.shuffle(b)
        np.random.set_state(rng_state)
        np.random.shuffle(c)
        np.random.set_state(rng_state)
        np.random.shuffle(d)


    def fit_on_batch(self, Xu, Xm, Xt, y):
        feed_dict = {self.user_index: Xu,
                     self.movie_index: Xm,
                     self.text_index: Xt,
                     self.label: y,
                     self.user_dropout_keep: self.user_dropout_keep_layers,
                     self.movie_dropout_keep: self.movie_dropout_keep_layers,}
        opt = self.sess.run(self.optimizer, feed_dict=feed_dict)


    def fit(self, Xu_train, Xm_train, Xt_train, y_train,
            Xu_valid=None, Xm_valid=None, Xt_valid=None, y_valid=None, epoches=10, paras_save=False):
        """
        self.user_index
        self.movie_index
        self.text_index
        user_dropout_keep_layers
        movie_dropout_keep_layers
        """
        self.epoch = epoches
        has_valid = y_valid is not None
        for epoch in range(self.epoch):
            t1 = time()
            self.shuffle_in_unison_scary(Xu_train, Xm_train, Xt_train, y_train)
            total_batch = int(np.ceil(len(y_train) / self.batch_size))
            for i in range(total_batch):
                Xu_batch, Xm_batch, Xt_batch, y_batch = self.get_batch(
                    Xu_train, Xm_train, Xt_train, y_train, self.batch_size, i)
                self.fit_on_batch(Xu_batch, Xm_batch, Xt_batch, y_batch)

            # evaluate training and validation datasets
            if has_valid:
                valid_result = self.evaluate(Xu_valid, Xm_valid, Xt_valid, y_valid)
#                 self.valid_result.append(valid_result)
            if self.verbose > 0 and epoch % self.verbose == 0:
                train_result = self.evaluate(Xu_train, Xm_train, Xt_train, y_train)
#                 self.train_result.append(train_result)
                if has_valid:
                    print("[%d] train-result=%.4f, valid-result=%.4f [%.1f s]"
                        % (epoch + 1, train_result, valid_result, time() - t1))
                else:
                    print("[%d] train-result=%.4f [%.1f s]"
                        % (epoch + 1, train_result, time() - t1))
        if paras_save:
            self.saver.save(self.sess, self.save_path)

    def predict(self, Xu_train, Xm_train, Xt_train):
        """
        :param Xi: list of list of feature indices of each sample in the dataset
        :param Xv: list of list of feature values of each sample in the dataset
        :return: predicted probability of each sample
        """
        # dummy y
        dummy_y = [1] * len(Xu_train)
        total_batch = int(np.ceil(len(Xu_train) / self.batch_size))
        y_pred = None
        for i in range(total_batch):
            Xu_batch, Xm_batch, Xt_batch, y_batch = self.get_batch(
                Xu_train, Xm_train, Xt_train, dummy_y, self.batch_size, i)
            feed_dict = {self.user_index: Xu_batch,
                         self.movie_index: Xm_batch,
                         self.text_index: Xt_batch,
                         self.user_dropout_keep: [1.0]*len(self.user_dropout_keep_layers),
                         self.movie_dropout_keep: [1.0]*len(self.movie_dropout_keep_layers),}
            batch_out = self.sess.run(self.out, feed_dict=feed_dict)
            if i == 0:
                y_pred = batch_out.flatten()
            else:
                y_pred = np.concatenate((y_pred, batch_out.flatten()))
        return y_pred


    def evaluate(self, Xu, Xm, Xt, y):
        """
        :param Xi: list of list of feature indices of each sample in the dataset
        :param Xv: list of list of feature values of each sample in the dataset
        :param y: label of each sample in the dataset
        :return: metric of the evaluation
        """
        y_pred = self.predict(Xu, Xm, Xt)
        return self.eval_metric(y, y_pred)

5 训练数据

model = RecomCNN(feature_size=15186,
            user_size=len(user_data[0]),
            movie_size=len(movie_data[0]),
            text_size=len(text_data[0]),
            filter_size=8,
            embedding_size=32,
            user_deep_layers=[200,200],
            movie_deep_layers=[96,200],
            user_dropout_keep_layers=[1,1,0.8],
            movie_dropout_keep_layers=[1,1,0.8],
            windows=[2,3,4,5],
            deep_layers_activation=tf.nn.relu,
            epoch=10, 
            batch_size=256,
            learning_rate=0.001, 
            optimizer_type="adam",
            verbose=1, 
            random_seed=2019,
            loss_type="mse", 
            eval_metric=mean_squared_error,
            l2_reg=0.0,  
            isrestore=False, 
            save_path='movie.ckpt')

model.fit(user_data, movie_data, text_data, label, epoches=10, paras_save=True)

6 建立用户和电影的特征向量

def add_feature_vector(users, movies, model, users_ori, movies_ori):
    movie_data = [[movies['MovieID'][i]]+row for i,row in enumerate(movies['Genres'])]
    text_data = [row for row in movies['Title']]
    feed_dict = {model.user_index: users.values,
                 model.movie_index: movie_data,
                 model.text_index: text_data,
                 model.user_dropout_keep: [1.0]*3,
                 model.movie_dropout_keep: [1.0]*3,
                }
    user_matrix = model.user_deep.eval(feed_dict, session=model.sess)
    movie_matrix = model.movie_deep.eval(feed_dict, session=model.sess)
    users_ori['feat_vect'] = user_matrix.tolist()
    movies_ori['feat_vect'] = movie_matrix.tolist()
    return users_ori, movies_ori

users_with_feat_vector, movies_with_feat_vector = add_feature_vector(users, movies, model, users_ori, movies_ori)
users_with_feat_vector.set_index('UserID', inplace=True)
movies_with_feat_vector.set_index('MovieID', inplace=True)

7 推荐环节

# 推荐与当前电影类似的电影
def recommend_same_type_movie(movie_id, movies, top_k=20, recom_size=5):
    '''
    paras: 'top_k'为最匹配的top内容,'recom_size'为从匹配的top内容里随机选取的内容
    '''
    movie_vec = np.asarray([movies.loc[movie_id, 'feat_vect']])
    movie_matrix = np.asarray([row for row in movies['feat_vect']])
    movie_vec = movie_vec / np.sqrt(np.sum(np.square(movie_vec)))
    movie_matrix = movie_matrix / np.sqrt(np.sum(np.square(movie_matrix), axis=1, keepdims=True))
    similar =  pd.Series((movie_matrix@movie_vec.T).flatten(), index=movies.index)
    prob = similar.sort_values(ascending=False)[1:1+top_k]
    recom_index = np.random.choice(prob.index, size=recom_size, replace=False, p=(prob/sum(prob)).values)
    print('当前电影为: {}'.format(movie_id), movies.loc[movie_id, 'Title'], movies.loc[movie_id, 'Genres'])
    print('相似内容有:')
    for i in recom_index:
        print(i, movies.loc[i, 'Title'], movies.loc[i, 'Genres'], 
              '相似度为:{}'.format(round(similar[i], 2)))

# 推荐用户可能喜欢的电影
def recommend_your_favorite_movie(user_id, users, movies, top_k=20, recom_size=5):
    user_vec = np.asarray([users.loc[user_id, 'feat_vect']])
    movie_matrix = np.asarray([row for row in movies['feat_vect']])
    ratings = pd.Series((movie_matrix@user_vec.T).flatten(), index = movies.index)
    prob = ratings.sort_values(ascending=False)[:top_k]
    recom_index = np.random.choice(prob.index, size=recom_size, replace=False, p=(prob/sum(prob)).values)
    print('以下为推荐内容:')
    for i in recom_index:
        print(i, movies.loc[i, 'Title'], movies.loc[i, 'Genres'], 
              '喜欢度为:{}'.format(round(ratings[i], 2)))

# 推荐喜欢当前电影的用户及其喜欢的电影
def recommend_other_favorite_movie(movie_id, users, movies, top_k=20, recom_size=5):
    movie_vec = np.asarray([movies.loc[movie_id, 'feat_vect']])
    user_matrix = np.asarray([row for row in users['feat_vect']])
    movie_matrix = np.asarray([row for row in movies['feat_vect']])
    users_ratings = pd.Series((user_matrix@movie_vec.T).flatten(), index=users.index)
    user_top_ratings = users_ratings.sort_values(ascending=False)[:top_k]
    user_recom_id = np.random.choice(user_top_ratings.index, size=recom_size, replace=False,
                                   p=(user_top_ratings/sum(user_top_ratings)).values)
    user_top_matrix = np.asarray([row for row in users.loc[user_top_ratings.index, 'feat_vect']])
    movie_top_index = pd.Series(np.argpartition(movie_matrix@user_top_matrix.T, 10, axis=0)[0:10].flatten()).value_counts()
    movie_recom_index = np.random.choice(movie_top_index.index, size=recom_size, replace=False,
                                   p=(movie_top_index/sum(movie_top_index)).values)
    print('您看的电影是:', movie_id, movies.loc[movie_id].Title, movies.loc[movie_id].Genres)
    print('喜欢该电影的用户有:')
    for i in user_recom_id:
        print(i, users.loc[i, 'Gender'], users.loc[i, 'Age'],)
    print('喜欢该电影的人还喜欢:')
    for i in movie_recom_index:
        print(movies.index[i], movies.iloc[i].Title, movies.iloc[i].Genres,)

recommend_same_type_movie(1401, movies_with_feat_vector)
recommend_your_favorite_movie(1041, users_with_feat_vector, movies_with_feat_vector)
recommend_other_favorite_movie(1401, users, movies)

8 结果展示

您看的电影是: 1401 Ghosts of Mississippi (1996) Drama
喜欢该电影的用户有:
2867 M 45
3902 M 25
2155 F 1
215 M 35
283 M 25
喜欢该电影的人还喜欢:
1323 Amityville 3-D (1983) Horror
1383 Adrenalin: Fear the Rush (1996) Action|Sci-Fi
3574 Carnosaur 3: Primal Species (1996) Horror|Sci-Fi
810 Kazaam (1996) Children's|Comedy|Fantasy
1495 Turbo: A Power Rangers Movie (1997) Action|Adventure|Children's

9 引用

使用MovieLens数据集训练的电影推荐系统

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 204,053评论 6 478
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 85,527评论 2 381
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 150,779评论 0 337
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 54,685评论 1 276
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 63,699评论 5 366
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,609评论 1 281
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 37,989评论 3 396
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,654评论 0 258
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 40,890评论 1 298
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,634评论 2 321
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,716评论 1 330
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,394评论 4 319
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 38,976评论 3 307
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 29,950评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 31,191评论 1 260
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 44,849评论 2 349
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 42,458评论 2 342

推荐阅读更多精彩内容