Замените значения столбца на основе списка словаря и укажите c условие даты - используйте if и для l oop - Pandas - PullRequest
0 голосов
/ 03 августа 2020

У меня есть df и список словарей, как показано ниже.

df:

 Date                  Tea_Good       Tea_bad    coffee_good      coffee_bad
2020-02-01             3              1           10                7
2020-02-02             3              1           10                7
2020-02-03             3              1           10                7
2020-02-04             3              1           10                7
2020-02-05             6              1           10                7
2020-02-06             6              2           10                11
2020-02-07             6              2           5                 11
2020-02-08             6              2           5                 11
2020-02-09             9              2           5                 11
2020-02-10             9              2           4                 11
2020-02-11             9              2           4                 11   
2020-02-12             9              2           4                 11         
2020-02-13             9              2           4                 11 
2020-02-14             9              2           4                 11

Dictionary:

rf = {
"tea": 
    [
      {
          "type": "df",
          "from": "2020-02-01T20:00:00.000Z",
          "to": "2020-02-03T20:00:00.000Z",
          "days":3,
          "coef":[0.1,0.1,0.1,0.1,0.1,0.1],
          "case":"bad"
      },
      {
          "type": "polynomial",
          "from": "2020-02-08T20:00:00.000Z",
          "to": "2020-02-10T20:00:00.000Z",
          "days":3,
          "coef":[0.1,0.1,0.1,0.1,0.1,0.1],
          "case":"good"
      },
      {
          "type": "linear",
          "from": "2020-02-01T20:00:00.000Z",
          "to": "2020-02-03T20:00:00.000Z",
          "days":3,
          "coef":[0.1,0.1,0.1,0.1,0.1,0.1],
          "case":"bad"
      },
      {
          "type": "constant",
          "from": "2020-02-04T20:00:00.000Z",
          "to": "2020-02-05T20:00:00.000Z",
          "days":2,
          "coef":[10,10,10,10,10,10],
          "case":"good"
      }],
  "coffee": [
          {
              "type": "quadratic",
              "from": "2020-02-01T20:00:00.000Z",
              "to": "2020-02-10T20:00:00.000Z",
              "days": 10,
              "coef": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
              "case":"good"
          },
          {
              "type": "df",
              "from": "2020-02-11T20:00:00.000Z",
              "to": "2020-02-13T20:00:00.000Z",
              "days": 5,
              "coef": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
              "case":"bad"
          },
          {
              "type": "linear",
              "from": "2020-02-01T20:00:00.000Z",
              "to": "2020-02-03T20:00:00.000Z",
              "days": 3,
              "coef": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
              "case":"good"
          },
          {
              "type": "linear",
              "from": "2020-02-03T20:00:00.000Z",
              "to": "2020-02-06T20:00:00.000Z",
              "days": 4,
              "coef": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
              "case":"bad"
          }
  ]
        }

Где мне нужно обновить столбцы на основе словарного значения. Значение по умолчанию end_date, start_date, n_days = 0, но для этого нам нужно как минимум 2. как ненулевое.

Если все не равны нулю, рассмотрите start_date и end_date и вычислите n_days, как показано ниже. n_days = end_date - start_date и обновить df.

Если любые два равны нулю, верните df, как и для этого условия.

Ниже приведены все условия, связанные с end_date, start_date и n_days

                if (start_date == 0) & (end_date == 0):
                    return df
                
                if (start_date == 0) & (end_date != 0) & (n_days == 0):
                    return df
                
                if (start_date != 0) & (end_date == 0) & (n_days == 0):
                    return df

                # if start date, end date and n_days are non zero then consider start date and n_days
                if (start_date != 0) & (end_date != 0) & (n_days != 0):
                    #n_days = (end_date - start_date).days
                    #n_days = (end_date - start_date).days
                    end_date = start_date + DT.timedelta(days=n_days)
                
                if (start_date != 0) & (end_date != 0) & (n_days == 0) :
                    n_days = (end_date - start_date)
                    print(f" n day = {n_days}")
                    end_date = end_date
                
                if (start_date != 0) & (end_date == 0) & (n_days != 0) :
                    #n_days = (end_date - start_date)
                    #print(f" n day = {n_days}")
                    end_date = start_date + DT.timedelta(days=n_days)
                    
                if (start_date == 0) & (end_date != 0) & (n_days != 0) :
                    start_date = end_date - DT.timedelta(days=n_days)
                    
                




                if (n_days != 0) & (start_date != 0):
                    end_date = start_date + DT.timedelta(days=n_days)

Я пробовал код ниже.

def rf_user_input(df, REQUEST_OBJ):
    '''
        This functions returns the tea_coffee dataframe with the user input functions for tea, coffee

        params: data : tea_coffee dataframe uploaded from user
                request_object_api: The api should contain the below params
                    start_date: start date of the user function for rf
                    end_date : end date of the user function for the rf
                    label : {'constant', 'linear', 'quadratic', 'polynomial', 'exponential', 'df'}
                    coef : list with 6 indexes [a0,a1,a2,a3,a4,a5]

        return: rf computed with user inputs
    '''
    # df.days.iloc[(df[df.Date==start_date].index[0])]
    df = df.sort_values(by='Date')
    df['days'] = (df['Date'] - df.at[0, 'Date']).dt.days + 1

    REQUIRED_KEYS = ["tea", "coffee"]

    for teacoffee_category in REQUIRED_KEYS:
        print(f" teacoffee_category - {teacoffee_category}")
        if teacoffee_category in REQUEST_OBJ.keys():
            param_obj_list = REQUEST_OBJ[teacoffee_category]

            for params_obj in param_obj_list:
                # Do the data processing
                goodbad_catgeory = params_obj['case']
                kind = teacoffee_category + '_' + goodbad_catgeory
                start_date, end_date, label, coef, n_days = params_obj['from'], params_obj['to'], params_obj['type'], \
                                                            params_obj['coef'], params_obj['days']

                start_date = DT.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%S.%fZ")
                end_date = DT.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S.%fZ")
                print(f" start date - {start_date}")
                print(f" end date - {end_date}")

                # Additional n_days code - Start
                first_date = df['Date'].min()
                period_days = (start_date - first_date)
                print(f" period day - {period_days}")
                # Additional n_days code - End

                # Checking 'start_date' , 'end_date' and 'n_days' conditions
                
                # If the start_date and end_date is null return the calibration df as it is
                if (start_date == 0) & (end_date == 0):
                    return df
                
                if (start_date == 0) & (end_date != 0) & (n_days == 0):
                    return df
                
                if (start_date != 0) & (end_date == 0) & (n_days == 0):
                    return df

                # if start date, end date and n_days are non zero then consider start date and n_days
                if (start_date != 0) & (end_date != 0) & (n_days != 0):
                    #n_days = (end_date - start_date).days
                    #n_days = (end_date - start_date).days
                    end_date = start_date + DT.timedelta(days=n_days)
                
                if (start_date != 0) & (end_date != 0) & (n_days == 0) :
                    n_days = (end_date - start_date)
                    print(f" n day = {n_days}")
                    end_date = end_date
                
                if (start_date != 0) & (end_date == 0) & (n_days != 0) :
                    #n_days = (end_date - start_date)
                    #print(f" n day = {n_days}")
                    end_date = start_date + DT.timedelta(days=n_days)
                    
                if (start_date == 0) & (end_date != 0) & (n_days != 0) :
                    start_date = end_date - DT.timedelta(days=n_days)
                    
                




                if (n_days != 0) & (start_date != 0):
                    end_date = start_date + DT.timedelta(days=n_days)

                    # If the start_date and end_date is null return the calibration df as it is

                if len(coef) == 6:
                        # Coefficients Index Initializations
                    a0 = coef[0]
                    a1 = coef[1]
                    a2 = coef[2]
                    a3 = coef[3]
                    a4 = coef[4]
                    a5 = coef[5]

                    # Constant
                    if label == 'constant':
                        if kind == 'tea_good':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_Good'] = a0 + (df['days']) - period_days
                        elif kind == 'tea_bad':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_bad'] = a0 + df['days'] - period_days
                        elif kind == 'coffee_good':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_good'] = a0 + df['days'] - period_days
                        elif kind == 'coffee_bad':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_bad'] = a0 + df['days'] - period_days

                    # Linear
                    if label == 'linear':
                        if kind == 'tea_good':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_Good'] = a0 + (
                                    a1 * ((df['days']) - period_days))
                        elif kind == 'tea_bad':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_bad'] = a0 + (
                                    a1 * ((df['days']) - period_days))
                        elif kind == 'coffee_good':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_good'] = a0 + (
                                    a1 * ((df['days']) - period_days))
                        elif kind == 'coffee_bad':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_bad'] = a0 + (
                                    a1 * ((df['days']) - period_days))

                    # Quadratic
                    if label == 'quadratic':
                        if kind == 'tea_good':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_Good'] = a0 + (
                                    a1 * ((df['days']) - period_days)) + (a2 * ((df['days']) - period_days) ** 2)
                        elif kind == 'tea_bad':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_bad'] = a0 + (
                                    a1 * ((df['days']) - period_days)) + (a2 * ((df['days']) - period_days) ** 2)
                        elif kind == 'coffee_good':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_good'] = a0 + (
                                    a1 * ((df['days']) - period_days)) + (a2 * ((df['days']) - period_days) ** 2)
                        elif kind == 'coffee_bad':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_bad'] = a0 + (
                                    a1 * ((df['days']) - period_days)) + (a2 * ((df['days']) - period_days) ** 2)

                    # Polynomial
                    if label == 'polynomial':
                        if kind == 'tea_good':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_Good'] = a0 + (
                                    a1 * ((df['days']) - period_days)) + (a2 * (
                                    (df['days']) - period_days) ** 2) + (a3 * (
                                    (df['days']) - period_days) ** 3) + (a4 * (
                                    (df['days']) - period_days) ** 4) + (a5 * ((df['days']) - period_days) ** 5)
                        elif kind == 'tea_bad':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_bad'] = a0 + (
                                    a1 * ((df['days']) - period_days)) + (a2 * (
                                    (df['days']) - period_days) ** 2) + (a3 * (
                                    (df['days']) - period_days) ** 3) + (a4 * (
                                    (df['days']) - period_days) ** 4) + (a5 * ((df['days']) - period_days) ** 5)
                        elif kind == 'coffee_good':
                            df.loc[
                                (df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_good'] = a0 + (
                                    a1 * ((df['days']) - period_days)) + (a2 * (
                                    (df['days']) - period_days) ** 2) + (a3 * (
                                    (df['days']) - period_days) ** 3) + (a4 * (
                                    (df['days']) - period_days) ** 4) + (a5 * ((df['days']) - period_days) ** 5)
                        elif kind == 'coffee_bad':
                            df.loc[(df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_bad'] = a0 + (
                                a1 * ((df['days']) - period_days)) +  (a2 * (
                                (df['days']) - period_days) ** 2) + (a3 * (
                                (df['days']) - period_days) ** 3) + (a4 * (
                                (df['days']) - period_days) ** 4) + (a5 * ((df['days']) - period_days) ** 5)

                    # Exponential
                    if label == 'exponential':
                        if kind == 'tea_good':
                            df.loc[(df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_Good'] = np.exp(a0)
                        elif kind == 'tea_bad':
                            df.loc[(df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_bad'] = np.exp(a0)
                        elif kind == 'coffee_good':
                            df.loc[(df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_good'] = np.exp(a0)
                        elif kind == 'coffee_bad':
                            df.loc[(df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_bad'] = np.exp(a0)

                    # Calibration File
                    if label == 'calibration_file':
                        pass
                    #                     return df
                else:
                    raise Exception('Coefficients index do not match. All values of coefficients should be passed')

            else:
                return df
    return df

И я столкнулся с ошибкой ниже после выполнения ниже.

df1 = rf_user_input(df, rf)

Я столкнулся с ошибкой ниже

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-146-338f614b0d56> in <module>
----> 1 df1 = rf_user_input(df, rf)

<ipython-input-144-86019ba60ca3> in rf_user_input(df, REQUEST_OBJ)
    109                             df.loc[
    110                                 (df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_Good'] = a0 + (
--> 111                                     a1 * ((df['days']) - period_days))
    112                         elif kind == 'tea_bad':
    113                             df.loc[

~/admvenv/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
     62         other = item_from_zerodim(other)
     63 
---> 64         return method(self, other)
     65 
     66     return new_method

~/admvenv/lib/python3.7/site-packages/pandas/core/ops/__init__.py in wrapper(left, right)
    501         lvalues = extract_array(left, extract_numpy=True)
    502         rvalues = extract_array(right, extract_numpy=True)
--> 503         result = arithmetic_op(lvalues, rvalues, op, str_rep)
    504 
    505         return _construct_result(left, result, index=left.index, name=res_name)

~/admvenv/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in arithmetic_op(left, right, op, str_rep)
    191         #  by dispatch_to_extension_op.
    192         # Timedelta is included because numexpr will fail on it, see GH#31457
--> 193         res_values = dispatch_to_extension_op(op, lvalues, rvalues)
    194 
    195     else:

~/admvenv/lib/python3.7/site-packages/pandas/core/ops/dispatch.py in dispatch_to_extension_op(op, left, right)
    123     # The op calls will raise TypeError if the op is not defined
    124     # on the ExtensionArray
--> 125     res_values = op(left, right)
    126     return res_values

TypeError: unsupported operand type(s) for -: 'numpy.ndarray' and 'Timedelta'
...