question-mark
Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

Merge y_reco_list and y_true_list to the original test dataset in Evaluate.py

See original GitHub issue

Kindly find updated evaluate.py script:

import numbers
import numpy as np
import pandas as pd
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    log_loss,
    balanced_accuracy_score,
    roc_auc_score,
    precision_recall_curve,
    auc
)
from ..data import TransformedSet
from .metrics import precision_at_k, recall_at_k, map_at_k, ndcg_at_k
from .metrics import POINTWISE_METRICS, LISTWISE_METRICS, ALLOWED_METRICS
from .computation import (
    compute_preds,
    compute_probs,
    compute_recommends,
    build_transformed_data
)


class EvalMixin(object):
    def __init__(self, task, data_info, eval_class=None):
        self.task = task
        self.n_users = data_info.n_users
        self.n_items = data_info.n_items
        self.eval_class = eval_class
        #added in
        self.df=pd.DataFrame()
        self.rec_results=pd.DataFrame()
        self.true_results=pd.DataFrame()
        self.df_batch = pd.DataFrame()
        self.epoch_num =-1

    def _check_metrics(self, metrics, k):
        if not isinstance(metrics, (list, tuple)):
            metrics = [metrics]
        if self.task == "rating":
            for m in metrics:
                if m not in ALLOWED_METRICS["rating_metrics"]:
                    raise ValueError(
                        f"metrics {m} is not suitable for rating task...")
        elif self.task == "ranking":
            for m in metrics:
                if m not in ALLOWED_METRICS["ranking_metrics"]:
                    raise ValueError(
                        f"metrics {m} is not suitable for ranking task...")

        if not isinstance(k, numbers.Integral):
            raise TypeError("k must be integer")

        return metrics

    def print_metrics(self, train_data=None, eval_data=None, metrics=None,
                      eval_batch_size=8192, k=3, sample_user_num=2048,
                      **kwargs):
        if not metrics:
            metrics = ["loss"]
        metrics = self._check_metrics(metrics, k)
        seed = kwargs.get("seed", 42)
        if "eval_batch_size" in kwargs:
            eval_batch_size = kwargs["eval_batch_size"]
        if "k" in kwargs:
            k = kwargs["k"]
        if "sample_user_num" in kwargs:
            sample_user_num = kwargs["sample_user_num"]

        if self.task == "rating":
            if train_data:
                y_pred, y_true = compute_preds(
                    self, train_data, eval_batch_size
                )
                # y_true = train_data.labels
                print_metrics_rating(
                    metrics, y_true, y_pred, train=True, **kwargs)
            if eval_data:
                y_pred, y_true = compute_preds(
                    self, eval_data, eval_batch_size
                )
                # y_true = eval_data.labels
                print_metrics_rating(
                    metrics, y_true, y_pred, train=False, **kwargs)

        elif self.task == "ranking":
            if train_data:
                train_params = dict()
                if POINTWISE_METRICS.intersection(metrics):
                    (train_params["y_prob"],
                     train_params["y_true"]) = compute_probs(
                        self, train_data, eval_batch_size
                    )
                    # train_params["y_true"] = train_data.labels

                print_metrics_ranking(self,self.df,metrics, **train_params, train=True)

            if eval_data:
                test_params = dict()
                if POINTWISE_METRICS.intersection(metrics):
                    (test_params["y_prob"],
                     test_params["y_true"]) = compute_probs(
                        self, eval_data, eval_batch_size
                    )
                    # test_params["y_true"] = eval_data.labels

                if LISTWISE_METRICS.intersection(metrics):
                    chosen_users = sample_user(
                        eval_data, seed, sample_user_num)
                    (test_params["y_reco_list"],
                     test_params["users"]) = compute_recommends(
                        self, chosen_users, k)
                    test_params["y_true_list"] = eval_data.user_consumed

                #print_metrics_ranking(metrics, **test_params, k=k, train=False)
###########################################################
                #Save to disk here to be merged with original test set
                self.df,self.true_results,self.rec_results =print_metrics_ranking(self,self.df,metrics, **test_params, k=k, train=False)
                self.df.to_parquet('C:\\Projects\\RecommendationModel\\LibRecommender-master\\LibRec\\output\\evalresults.par', engine='pyarrow', compression='snappy', index=False)
                self.true_results.to_parquet('C:\\Projects\\RecommendationModel\\LibRecommender-master\\LibRec\\output\\testresults.par', engine='pyarrow', compression='snappy', index=False) 
                self.rec_results.to_parquet('C:\\Projects\\RecommendationModel\\LibRecommender-master\\LibRec\\output\\recresults.par', engine='pyarrow', compression='snappy', index=False)
 

def sample_user(data, seed, num):
    np.random.seed(seed)
    unique_users = np.unique(data.user_indices)
    if isinstance(num, numbers.Integral) and num < len(unique_users):
        # noinspection PyTypeChecker
        users = np.random.choice(unique_users, num, replace=False)
    else:
        users = unique_users
    if isinstance(users, np.ndarray):
        users = list(users)
    return users


def evaluate(model, data, eval_batch_size=8192, metrics=None, k=3,
             sample_user_num=2048, neg_sample=False, update_features=False,
             **kwargs):
    seed = kwargs.get("seed", 42)
    if isinstance(data, pd.DataFrame):
        data = build_transformed_data(
            model, data, neg_sample, update_features, seed
        )
    assert isinstance(data, TransformedSet), (
        "The data from evaluation must be TransformedSet object."
    )
    if not metrics:
        metrics = ["loss"]
    metrics = model._check_metrics(metrics, k)
    eval_result = dict()

    if model.task == "rating":
        y_pred, y_true = compute_preds(model, data, eval_batch_size,)
        for m in metrics:
            if m in ["rmse", "loss"]:
                eval_result[m] = np.sqrt(
                    mean_squared_error(y_true, y_pred))
            elif m == "mae":
                eval_result[m] = mean_absolute_error(y_true, y_pred)
            elif m == "r2":
                eval_result[m] = r2_score(y_true, y_pred)

    elif model.task == "ranking":
        if POINTWISE_METRICS.intersection(metrics):
            y_prob, y_true = compute_probs(model, data, eval_batch_size,)
        if LISTWISE_METRICS.intersection(metrics):
            chosen_users = sample_user(data, seed, sample_user_num)
            y_reco_list, users = compute_recommends(model, chosen_users, k)
            y_true_list = data.user_consumed

        for m in metrics:
            if m in ["log_loss", "loss"]:
                eval_result[m] = log_loss(y_true, y_prob, eps=1e-7)
            elif m == "balanced_accuracy":
                y_pred = np.round(y_prob)
                eval_result[m] = balanced_accuracy_score(y_true, y_pred)
            elif m == "roc_auc":
                eval_result[m] = roc_auc_score(y_true, y_prob)
            elif m == "pr_auc":
                precision, recall, _ = precision_recall_curve(y_true,
                                                              y_prob)
                eval_result[m] = auc(recall, precision)
            elif m == "precision":
                eval_result[m] = precision_at_k(y_true_list,
                                                y_reco_list,
                                                users, k)
            elif m == "recall":
                eval_result[m] = recall_at_k(y_true_list,
                                             y_reco_list,
                                             users, k)
            elif m == "map":
                eval_result[m] = map_at_k(y_true_list,
                                          y_reco_list,
                                          users, k)
            elif m == "ndcg":
                eval_result[m] = ndcg_at_k(y_true_list,
                                           y_reco_list,
                                           users, k)

    return eval_result


def print_metrics_rating(metrics, y_true, y_pred, train=True, **kwargs):
    if kwargs.get("lower_bound") and kwargs.get("upper_bound"):
        lower_bound, upper_bound = (
            kwargs.get("lower_bound"), kwargs.get("upper_bound"))
        y_pred = np.clip(y_pred, lower_bound, upper_bound)
    if train:
        for m in metrics:
            if m in ["rmse", "loss"]:
                rmse = np.sqrt(mean_squared_error(y_true, y_pred))
                print(f"\t train rmse: {rmse:.4f}")
    else:
        for m in metrics:
            if m in ["rmse", "loss"]:
                rmse = np.sqrt(mean_squared_error(y_true, y_pred))
                print(f"\t eval rmse: {rmse:.4f}")
            elif m == "mae":
                mae = mean_absolute_error(y_true, y_pred)
                print(f"\t eval mae: {mae:.4f}")
            elif m == "r2":
                r_squared = r2_score(y_true, y_pred)
                print(f"\t eval r2: {r_squared:.4f}")


def print_metrics_ranking(self,df,metrics, y_prob=None, y_true=None, y_reco_list=None,
                          y_true_list=None, users=None, k=3, train=True):
    eval_df=pd.DataFrame()
    self.epoch_num = self.epoch_num +1
    if train:
        for m in metrics:
            if m in ["log_loss", "loss"]:
                log_loss_ = log_loss(y_true, y_prob, eps=1e-7)
                print(f"\t train log_loss: {log_loss_:.4f}")
                eval_df['log_loss']=[log_loss_]
    else:
        for m in metrics:
            if m in ["log_loss", "loss"]:
                log_loss_ = log_loss(y_true, y_prob, eps=1e-7)
                print(f"\t eval log_loss: {log_loss_:.4f}")
                eval_df['log_loss']=[log_loss_]
            elif m == "balanced_accuracy":
                y_pred = np.round(y_prob)
                accuracy = balanced_accuracy_score(y_true, y_pred)
                print(f"\t eval balanced accuracy: {accuracy:.4f}")
                eval_df['accuracy']=[round(accuracy,4)]
            elif m == "roc_auc":
                roc_auc = roc_auc_score(y_true, y_prob)
                print(f"\t eval roc_auc: {roc_auc:.4f}")
                eval_df['roc_auc']=[round(roc_auc,4)]
            elif m == "pr_auc":
                precision, recall, _ = precision_recall_curve(y_true, y_prob)
                pr_auc = auc(recall, precision)
                print(f"\t eval pr_auc: {pr_auc:.4f}")
                eval_df['pr_auc']=[round(pr_auc,4)]
            elif m == "precision":
                precision_all = precision_at_k(y_true_list, y_reco_list,
                                               users, k)
                print(f"\t eval precision@{k}: {precision_all:.4f}")
                eval_df['precision_all_@'+str(k)]=[round(precision_all,4)]
            elif m == "recall":
                recall_all = recall_at_k(y_true_list, y_reco_list, users, k)
                print(f"\t eval recall@{k}: {recall_all:.4f}")
                eval_df['recall_all_@'+str(k)]=[round(recall_all,4)]
            elif m == "map":
                map_all = map_at_k(y_true_list, y_reco_list, users, k)
                print(f"\t eval map@{k}: {map_all:.4f}")
                eval_df['map_all_@'+str(k)]=[round(map_all,4)]
            elif m == "ndcg":
                ndcg_all = ndcg_at_k(y_true_list, y_reco_list, users, k)
                print(f"\t eval ndcg@{k}: {ndcg_all:.4f}")
                eval_df['ndcg_all_@'+str(k)]=[round(ndcg_all,4)]
                
######################################################
        #Saving y_true_list and y_rec_list
        df = pd.concat([df,eval_df],ignore_index=True)
        

        rec_list = pd.DataFrame.from_dict(y_reco_list, orient="index").reset_index()
        print(rec_list)
        
        true_list = pd.DataFrame.from_dict(y_true_list, orient="index").reset_index()
        print(true_list)
        r_list=['user']
        t_list=['user']

        for i in range(1,len(rec_list.columns)):
              r_list.append('rec_'+str(i))
              
        for i in range(1,len(true_list.columns)):
              t_list.append('true_'+str(i))

        
        rec_list.columns = r_list
        true_list.columns = t_list
        
        self.rec_results  = pd.concat([self.rec_results ,rec_list],ignore_index=True)
        self.rec_results['Epoch_num']  = self.epoch_num
         
        self.true_results  = pd.concat([self.true_results ,true_list],ignore_index=True)
        print(self.true_results)
        print(self.rec_results)
        #print(self.rec_results.groupby(["user"]).count())
    return df,self.true_results,self.rec_results

I am currently trying to map back y_reco_list and y_true_list to the original test dataset using the user id. However, after printing the results, I noticed that each y_reco_list and y_true_list has and Index and realised that this column is not the user id. How does one map back to the user id ?

The other aspect I noticed is that the index column between y_reco_list and y_true_list does not match. Would this not produce incorrect results, since they are not the same sample? Print Output shown below: t1 t2

Issue Analytics

  • State:closed
  • Created 3 years ago
  • Comments:5 (2 by maintainers)

github_iconTop GitHub Comments

1reaction
massquantitycommented, Feb 25, 2021

Same, y_true_list has all the users in eval_data, and y_reco_list has sampled users. You can let the function return y_true_list and y_reco_list. Then use a for loop to get the results. y_true_list and y_reco_list are all python dict.

users, true_result, reco_result = [], [], []
for user in y_reco_list:
    users.append(user)
    true_result.append(y_true_list[user])
    reco_result.append(y_reco_list[user])
0reactions
Shadz13commented, Feb 25, 2021

Revised code above which resides in def print_metrics_ranking:

        df = pd.concat([df,eval_df],ignore_index=True)
        user_, true_result, reco_result = [], [], []
        for user in y_reco_list:
            user_.append(user)
            true_result.append(y_true_list[user])
            reco_result.append(y_reco_list[user])
              
        #print(true_result)
        reco_result = pd.DataFrame(reco_result)
        #print(reco_result)        
        true_result = pd.DataFrame(true_result)
        #print(true_result)
         
        r_list=[]
        t_list=[]

        for i in range(1,len(reco_result.columns)+1):
              r_list.append('rec_'+str(i))
              
        for i in range(1,len(true_result.columns)+1):
              t_list.append('true_'+str(i))
        
        reco_result.columns = r_list
        true_result.columns = t_list     
        print(reco_result)
        print(true_result)

        reco_result['user']  = pd.DataFrame(user_)    
        reco_result['Epoch_num']  = self.epoch_num     
        self.rec_results  = pd.concat([self.rec_results ,reco_result],ignore_index=True)        
        true_result['user']  = pd.DataFrame(user_) #########################        
        self.true_results  = pd.concat([self.true_results ,true_result],ignore_index=True)
    return df,self.true_results,self.rec_results
Read more comments on GitHub >

github_iconTop Results From Across the Web

Train-Test Split for Evaluating Machine Learning Algorithms
Samples from the original training dataset are split into the two subsets using random selection. This is to ensure that the train and...
Read more >
Merging results from model.predict() with original pandas ...
How can I align these predicted values with the appropriate rows in my df , since the y_hats array is zero-indexed and seemingly...
Read more >
Split Your Dataset With scikit-learn's train_test_split()
In this tutorial, you'll learn why it's important to split your dataset in ... The test set is needed for an unbiased evaluation...
Read more >
Machine Learning with Python! Train, Test, Split for Evaluating ...
Tutorial on how to split training and testing data using Python. Learn about the difference between training and testing data sets, create a ......
Read more >
Train/Test Split and Cross Validation in Python
From Sklearn, I've imported the datasets module, so I can load a sample dataset, and the linear_model, so I can run a linear...
Read more >

github_iconTop Related Medium Post

No results found

github_iconTop Related StackOverflow Question

No results found

github_iconTroubleshoot Live Code

Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free

github_iconTop Related Reddit Thread

No results found

github_iconTop Related Hackernoon Post

No results found

github_iconTop Related Tweet

No results found

github_iconTop Related Dev.to Post

No results found

github_iconTop Related Hashnode Post

No results found