In this post, we show how to use contextual features related to users/items [1, 2]. To this end, we consider one contextual features for users, and one for items:
- User model: in addition to user id embeddings, we use the timestamps of movie ratings which include discretized timestamps and its normalized values.
- Movie model: in addition to movie embedding, we use the text of a movie title using its embeddings
Content: We briefly compare three versions of retrieval models in the following by enabling or disabling those additional optional contextual features:
- Pure user and movie models without any contextual features
- User model with the consideration of timestamps
- Using all contextual features for both user and movie models
Load the dataset
from typing import Dict, Text # for typing hint
import os
import pprint
import numpy as np
import tempfile
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
print(tf.__version__)
print(tfrs.__version__)
Output:
2.9.1
v0.7.0
# Load the movielens dataset
ratings = tfds.load('movielens/100k-ratings', split='train')
ratings = ratings.map(lambda x: {
'movie_title': x['movie_title'],
'user_id': x['user_id'],
'user_rating': x['user_rating'],
'timestamp': x['timestamp']
})
movies = tfds.load('movielens/100k-movies', split='train')
movies = movies.map(lambda x: x['movie_title'])
timestamps = np.concatenate(list(
ratings.map(lambda x: x['timestamp']).batch(100)))
max_timestamp = timestamps.max()
min_timestamp = timestamps.min()
timestamp_buckets = np.linspace(
min_timestamp, max_timestamp, num=1000)
unique_movie_titles = np.unique(np.concatenate(list(movies.batch(1000))))
unique_user_ids = np.unique(np.concatenate(list(ratings.batch(1_000).map(
lambda x: x['user_id']))))
print(len(unique_movie_titles), len(unique_user_ids))
Output:
1664 943
We have 1664 unique movies and 943 unique users in the dataset.
[8.74724710e+08 8.74743291e+08 8.74761871e+08 8.74780452e+08
8.74799032e+08 8.74817613e+08 8.74836193e+08 8.74854774e+08
8.74873354e+08 8.74891935e+08 8.74910515e+08 8.74929096e+08
8.74947676e+08 8.74966257e+08 8.74984837e+08 8.75003418e+08
...
User model
As we can see from below, the use_timestamps parameter can control whether to consider the contextual feature (timestamps) or not.
class UserModel(tf.keras.Model):
# User embedding will be user id + ts + normalized ts embeddings
def __init__(self, use_timestamps):
super().__init__()
self._use_timestamps = use_timestamps
# User id embedding
self.user_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_user_ids,
mask_token=None),
tf.keras.layers.Embedding(len(unique_user_ids)+1, 32)
])
if use_timestamps:
# Use timestamp
self.timestamp_embedding = tf.keras.Sequential([
tf.keras.layers.Discretization(timestamp_buckets.tolist()),
tf.keras.layers.Embedding(len(timestamp_buckets)+1, 32)
])
# Normalized timestamp
self.normalized_timestamp = tf.keras.layers.Normalization(axis=None)
self.normalized_timestamp.adapt(timestamps)
def call(self, inputs):
if not self._use_timestamps:
return self.user_embedding(inputs['user_id'])
return tf.concat([
self.user_embedding(inputs['user_id']),
self.timestamp_embedding(inputs['timestamp']),
tf.reshape(self.normalized_timestamp(inputs['timestamp']),(-1,1))
], axis=1)
Movie model
Similar to the user model, we use the use_title_text parameter to control whether we would like to consider the contextual feature (title text) or not.
class MovieModel(tf.keras.Model):
# Movie embedding: title text + id
def __init__(self, use_title_text):
super().__init__()
max_tokens = 10_000
self._use_title_text = use_title_text
self.title_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_movie_titles, mask_token=None),
tf.keras.layers.Embedding(len(unique_movie_titles)+1, 32)
])
if use_title_text:
self.title_vectorizer = tf.keras.layers.TextVectorization(
max_tokens=max_tokens)
self.title_vectorizer.adapt(movies)
self.title_text_embedding = tf.keras.Sequential([
self.title_vectorizer,
tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
tf.keras.layers.GlobalAveragePooling1D()
])
def call(self, inputs):
if not self._use_title_text:
return self.title_embedding(inputs)
return tf.concat([
self.title_embedding(inputs),
self.title_text_embedding(inputs)
], axis=1)
Movielens model
Finally, we can define the MovielensModel by using the predefined user model and movie model and compute_loss function.
class MovielensModel(tfrs.models.Model):
def __init__(self, use_timestamps, use_title_text):
super().__init__()
self.query_model = tf.keras.Sequential([
UserModel(use_timestamps),
tf.keras.layers.Dense(32)
])
self.candidate_model = tf.keras.Sequential([
MovieModel(use_title_text),
tf.keras.layers.Dense(32)
])
# Define task
self.task = tfrs.tasks.Retrieval(
metrics=tfrs.metrics.FactorizedTopK(
candidates=movies.batch(128) \
.map(self.candidate_model),
)
)
# Define compute loss
def compute_loss(self, features, training=False):
query_embeddings = self.query_model({
'user_id': features['user_id'],
'timestamp': features['timestamp']
})
movie_embeddings = self.candidate_model(
features['movie_title'])
return self.task(query_embeddings, movie_embeddings)
Experiment
Now, we are ready to run some experiments to compare the three models mentioned at the beginning of this post:
- Pure user and movie models without any contextual features
- User model with the consideration of timestamps
- Using all contextual features for both user and movie models
# -------------------------------
# Experiment
# -------------------------------
# Prepare data
tf.random.set_seed(7)
shuffled = ratings.shuffle(100_000, seed=7,
reshuffle_each_iteration=False)
train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)
cached_train = train.shuffle(100_000).batch(2048).cache()
cached_test = test.batch(4096).cache()
Baseline1: user model without timestamp features, movie model without title text
# Baseline: user model without timestamp features, movie model without title text
model = MovielensModel(use_timestamps=False, use_title_text=False)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(cached_train, epochs=3)
train_acc = model.evaluate(
cached_train, return_dict=True)['factorized_top_k/top_100_categorical_accuracy']
test_acc = model.evaluate(
cached_test, return_dict=True)['factorized_top_k/top_100_categorical_accuracy']
print(f'Top-100 accuracy (train): {train_acc:.2f}')
print(f'Top-100 accuracy (test): {test_acc:.2f}')
Output:
Top-100 accuracy (train): 0.27
Top-100 accuracy (test): 0.21
Baseline2: user model with timestamp features, movie model without title text
tf.keras.backend.clear_session()
model = MovielensModel(use_timestamps=True, use_title_text=False)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(cached_train, epochs=3)
train_acc = model.evaluate(
cached_train, return_dict=True)['factorized_top_k/top_100_categorical_accuracy']
test_acc = model.evaluate(
cached_test, return_dict=True)['factorized_top_k/top_100_categorical_accuracy']
print(f'Top-100 accuracy (train): {train_acc:.2f}')
print(f'Top-100 accuracy (test): {test_acc:.2f}')
Output:
Top-100 accuracy (train): 0.34
Top-100 accuracy (test): 0.24
Finally, we use both timestamps for user model and the text of movie title for movie model.
tf.keras.backend.clear_session()
model = MovielensModel(use_timestamps=True, use_title_text=True)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(cached_train, epochs=3)
train_acc = model.evaluate(
cached_train, return_dict=True)['factorized_top_k/top_100_categorical_accuracy']
test_acc = model.evaluate(
cached_test, return_dict=True)['factorized_top_k/top_100_categorical_accuracy']
print(f'Top-100 accuracy (train): {train_acc:.2f}')
print(f'Top-100 accuracy (test): {test_acc:.2f}')
Output:
Top-100 accuracy (train): 0.35
Top-100 accuracy (test): 0.25
For the sake of not running too long, we set epochs as 3 but you can definitely try it out with larger values. Under the current settings, it is interesting to see the performance increases with enabling contextual features one by one.
More TFRS tutorials can be found at https://parklize.blogspot.com/p/tensorflow.html
[1]. Youtube: Leveraging context features and multitask learning (Building recommendation systems with TensorFlow)
[2]. TFRS tutorial: Taking advantage of context features