import torch from datamaestro import prepare_dataset import pandas as pd from torch.utils.data import Dataset,DataLoader import numpy as np from sklearn.metrics import auc from sklearn.metrics import roc_auc_score data_files = prepare_dataset("org.grouplens.movielens.small") df_rate = pd.read_csv(data_files.ratings.path) dmovies = pd.read_csv(data_files.movies.path) movies_sel = df_rate.groupby("movieId").count().query("rating>10").index.values users_sel = df_rate.groupby("userId").count().query("rating>10").index.values couples_sel = df_rate.userId.isin(users_sel) & df_rate.movieId.isin(movies_sel) ratings = df_rate[couples_sel].drop("timestamp",axis=1) # penser à utiliser groupby et aggregate ...