Практический смысл не таков:
users_ids = orders_df['user_id'].unique().copy()
print("Total user_ids: ", len(np.unique(users_ids)))
print("Total size: ", sys.getsizeof(users_ids))
print("Total size: ", sys.getsizeof(users_ids[0]))
uuid_to_int = {}
next_uuid_int = 0
for uuid in users_ids:
if uuid not in uuid_to_int:
uuid_to_int[uuid] = next_uuid_int
next_uuid_int += 1
def recode_uuid_to_int (uuid) -> int:
return uuid_to_int[uuid]
print("UUID to int: ", recode_uuid_to_int(UUID('0sb7ff82-4ec5-4c71-9627-ca209e27df5f')))
orders_df['user_id_as_int'] = orders_df['user_id'].apply(lambda x: recode_uuid_to_int(x))
users_ids_recoded = orders_df['user_id_as_int'].unique().copy()
print("After recoding: ", sys.getsizeof(users_ids_recoded))
дает вывод:
Total user_ids: [cut]
Total size: 139992
Total size: 56
UUID to int: 1
After recoding: 139992