получение ошибки типа: ожидается аргумент str - PullRequest
0 голосов
/ 09 апреля 2020

Код для сопоставления с использованием инструментария связывания записей. Когда я устанавливаю сравнение, это дает мне ошибку TypeError: str argument expected. Я не понимаю, что не так с кодом:

compare = recordlinkage.Compare()
   compare.exact('Sex Global', 'Sex')
   compare.exact('Age', 'Age (Yrs) at presentation')
   compare.string('ADD_LINE_1', 'Address', 
            method='levenshtein', 
            threshold=0.7)
   compare.string('CITY', 'City', 
            threshold=0.8)
   compare.string('STATE_C', 'State',
            threshold=0.85)
features = compare.compute(combinations, phhs_abnormal, hhsc_copy)

traceback:

TypeError                                 Traceback (most recent call last)
<ipython-input-24-d84008a8ba19> in <module>
     13               threshold=0.85)
     14 features = compare.compute(combinations, phhs_abnormal,
---> 15                         hhsc_copy)

~/.local/lib/python3.7/site-packages/recordlinkage/base.py in compute(self, pairs, x, x_link)
    863 
    864         if self.n_jobs == 1:
--> 865             results = self._compute(pairs, x, x_link)
    866         elif self.n_jobs > 1:
    867             results = self._compute_parallel(

~/.local/lib/python3.7/site-packages/recordlinkage/base.py in _compute(self, pairs, x, x_link)
    727                 )
    728 
--> 729             result = feat._compute(data1, data2)
    730             features.append((result, feat.label))
    731 

~/.local/lib/python3.7/site-packages/recordlinkage/base.py in _compute(self, left_on, right_on)
    447             numpy.ndarray objects.
    448         """
--> 449         result = self._compute_vectorized(*tuple(left_on + right_on))
    450 
    451         return result

~/.local/lib/python3.7/site-packages/recordlinkage/compare.py in _compute_vectorized(self, s_left, s_right)
    148                 self.method))
    149 
--> 150         c = str_sim_alg(s_left, s_right)
    151 
    152         if self.threshold is not None:

~/.local/lib/python3.7/site-packages/recordlinkage/algorithms/string.py in levenshtein_similarity(s1, s2)
     62                 raise err
     63 
---> 64     return conc.apply(levenshtein_apply)
     65 
     66 

/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
   4040             else:
   4041                 values = self.astype(object).values
-> 4042                 mapped = lib.map_infer(values, f, convert=convert_dtype)
   4043 
   4044         if len(mapped) and isinstance(mapped[0], Series):

pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()

~/.local/lib/python3.7/site-packages/recordlinkage/algorithms/string.py in levenshtein_apply(x)
     60                 return np.nan
     61             else:
---> 62                 raise err
     63 
     64     return conc.apply(levenshtein_apply)

~/.local/lib/python3.7/site-packages/recordlinkage/algorithms/string.py in levenshtein_apply(x)
     54 
     55         try:
---> 56             return 1 - jellyfish.levenshtein_distance(x[0], x[1]) \
     57                 / np.max([len(x[0]), len(x[1])])
     58         except Exception as err:

TypeError: str argument expected
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...