Pandas ValueError: Невозможно установить кадр без определенного индекса и значение, которое нельзя преобразовать в серию. - PullRequest
0 голосов
/ 01 февраля 2020

Я пробую рекомендованную систему, основанную на знаниях, в python, ссылаясь на книгу Рунака Баника "Руки на системах рекомендаций с Python". У нас есть набор данных фильмов IMDB.

Я получаю ошибку при окончательном выводе. Пожалуйста, смотрите ниже весь мой код, и в функции построения диаграммы я получаю сообщение об ошибке. Пожалуйста, помогите мне решить эту проблему. Спасибо.

Мне не удалось найти ответ на аналогичный вопрос на этой платформе. Поэтому я разместил новый вопрос.

import pandas as pd
import numpy as np

df = pd.read_csv('..../RecoSys/data/movies_metadata.csv')
#print all the features(or columns) of the dataFrame
df.columns

#only keep those that we require
df = df[['title', 'genres', 'release_date', 'runtime', 'vote_average', 'vote_count']]
df.head()
#convert release_date into pandas datetime format
df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce')
df['year'] = df['release_date'].apply(lambda x: str(x).split('-')[0] if x!=np.nan else np.nan)
#Helper function to convert NaT to 0 and all other years to integers.

def convert_int(x):
    try:
        return int(x)
    except:
        return 0
#Apply convert_int to the year feature
df['year'] = df['year'].apply(convert_int)

#Drop the release_date column
df = df.drop('release_date', axis=1)

#Display the dataframe
df.head()

#Print genres of the first movie
df.iloc[0]['genres']

#Import the literal_eval function from ast
from ast import literal_eval
import json

#Define a stringified list and output its type
a = "[1,2,3]"
print(type(a))

#Apply literal_eval and output type
b = literal_eval(a)
print(type(b))

#Convert all NaN into stringified empty lists
df['genres'] = df['genres'].fillna('[]')

#Apply literal_eval to convert to the list object
df['genres'] = df['genres'].apply(literal_eval)
#df['genres'] = json.loads(df['genres'])

#Convert list of dictionaries to a list of strings
df['genres'] = df['genres'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
df.head()

#Create a new feature by exploding genres
s = df.apply(lambda x: pd.Series(x['genres']),axis=1).stack().reset_index(level=1, drop=True)

#Name the new feature as 'genre'
s.name = 'genre'

#Create a new dataframe gen_df which by dropping the old 'genres' feature and adding the new 'genre'.
gen_df = df.drop('genres', axis=1).join(s)

#Print the head of the new gen_df
gen_df.head(15)

def build_chart(gen_df, percentile=0.8):
    #Ask for preferred genres
    print("Please Input preferred genre")
    genre = input()

    #Ask for lower limit of duration
    print("Please Input shortest duration")
    low_time = int(input())

    #Ask for upper limit of duration
    print("Please Input Longesr Duration")
    high_time = int(input())

    #Ask for lower limit of timeline
    print("Input earliest year")
    low_year = int(input())

    #Ask for upper limit of timeline
    print("Input latest year")
    high_year = int(input())

    #Define a new movies variable to store the preferred movies. Copy the contents of gen_df to movies
    movies = gen_df.copy()

    #Filter based on the condition
    movies = movies[(movies['genre'] == genre) & 
                    (movies['runtime'] >= low_time) & 
                    (movies['runtime'] <= high_time) & 
                    (movies['year'] >= low_year) & 
                    (movies['year'] <= high_year)]

    #Compute the values of C and m for the filtered movies
    C = movies['vote_average'].mean()
    m = movies['vote_count'].quantile(percentile)

    #Only consider movies that have higher than m votes. Save this in a new dataframe q_movies
    q_movies = movies.copy().loc[movies['vote_count'] >= m]

    #Calculate score using the IMDB formula
    q_movies['score'] = q_movies.apply(lambda x: (x['vote_count']/(x['vote_count']+m) * x['vote_average']) 
                                       + (m/(m+x['vote_count']) * C), axis=1)
    #Sort movies in descending order of their scores
    q_movies = q_movies.sort_values('score', ascending=False)

    return q_movies

build_chart(gen_df).head()
Please Input preferred genre
animation
Please Input shortest duration
30
Please Input Longesr Duration
120
Input earliest year
1990
Input latest year
2005

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _ensure_valid_index(self, value)
   3423             try:
-> 3424                 value = Series(value)
   3425             except (ValueError, NotImplementedError, TypeError):

~\Anaconda3\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    263 
--> 264                 data = SingleBlockManager(data, index, fastpath=True)
    265 

~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in __init__(self, block, axis, do_integrity_check, fastpath)
   1480         if not isinstance(block, Block):
-> 1481             block = make_block(block, placement=slice(0, len(axis)), ndim=1)
   1482 

~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   3094 
-> 3095     return klass(values, ndim=ndim, placement=placement)
   3096 

~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in __init__(self, values, placement, ndim)
   2630         super(ObjectBlock, self).__init__(values, ndim=ndim,
-> 2631                                           placement=placement)
   2632 

~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in __init__(self, values, placement, ndim)
     86                 'Wrong number of items passed {val}, placement implies '
---> 87                 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
     88 

ValueError: Wrong number of items passed 6, placement implies 0

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-53-3c3d1bc1cf24> in <module>
     45     return q_movies
     46 
---> 47 build_chart(gen_df).head()

<ipython-input-53-3c3d1bc1cf24> in build_chart(gen_df, percentile)
     39     #Calculate score using the IMDB formula
     40     q_movies['score'] = q_movies.apply(lambda x: (x['vote_count']/(x['vote_count']+m) * x['vote_average']) 
---> 41                                        + (m/(m+x['vote_count']) * C), axis=1)
     42     #Sort movies in descending order of their scores
     43     q_movies = q_movies.sort_values('score', ascending=False)

~\Anaconda3\lib\site-packages\pandas\core\frame.py in __setitem__(self, key, value)
   3368         else:
   3369             # set column
-> 3370             self._set_item(key, value)
   3371 
   3372     def _setitem_slice(self, key, value):

~\Anaconda3\lib\site-packages\pandas\core\frame.py in _set_item(self, key, value)
   3442         """
   3443 
-> 3444         self._ensure_valid_index(value)
   3445         value = self._sanitize_column(key, value)
   3446         NDFrame._set_item(self, key, value)

~\Anaconda3\lib\site-packages\pandas\core\frame.py in _ensure_valid_index(self, value)
   3424                 value = Series(value)
   3425             except (ValueError, NotImplementedError, TypeError):
-> 3426                 raise ValueError('Cannot set a frame with no defined index '
   3427                                  'and a value that cannot be converted to a '
   3428                                  'Series')

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

1 Ответ

0 голосов
/ 04 февраля 2020

enter image description here

Кажется, я ввел неправильные данные, поэтому я и обнаружил эту ошибку. Ниже приведен снимок кода результатов.

Это вводы с учетом регистра. Вот почему у меня возникла эта проблема.

enter image description here

...