Код для сопоставления с использованием инструментария связывания записей. Когда я устанавливаю сравнение, это дает мне ошибку TypeError: str argument expected
. Я не понимаю, что не так с кодом:
compare = recordlinkage.Compare()
compare.exact('Sex Global', 'Sex')
compare.exact('Age', 'Age (Yrs) at presentation')
compare.string('ADD_LINE_1', 'Address',
method='levenshtein',
threshold=0.7)
compare.string('CITY', 'City',
threshold=0.8)
compare.string('STATE_C', 'State',
threshold=0.85)
features = compare.compute(combinations, phhs_abnormal, hhsc_copy)
traceback:
TypeError Traceback (most recent call last)
<ipython-input-24-d84008a8ba19> in <module>
13 threshold=0.85)
14 features = compare.compute(combinations, phhs_abnormal,
---> 15 hhsc_copy)
~/.local/lib/python3.7/site-packages/recordlinkage/base.py in compute(self, pairs, x, x_link)
863
864 if self.n_jobs == 1:
--> 865 results = self._compute(pairs, x, x_link)
866 elif self.n_jobs > 1:
867 results = self._compute_parallel(
~/.local/lib/python3.7/site-packages/recordlinkage/base.py in _compute(self, pairs, x, x_link)
727 )
728
--> 729 result = feat._compute(data1, data2)
730 features.append((result, feat.label))
731
~/.local/lib/python3.7/site-packages/recordlinkage/base.py in _compute(self, left_on, right_on)
447 numpy.ndarray objects.
448 """
--> 449 result = self._compute_vectorized(*tuple(left_on + right_on))
450
451 return result
~/.local/lib/python3.7/site-packages/recordlinkage/compare.py in _compute_vectorized(self, s_left, s_right)
148 self.method))
149
--> 150 c = str_sim_alg(s_left, s_right)
151
152 if self.threshold is not None:
~/.local/lib/python3.7/site-packages/recordlinkage/algorithms/string.py in levenshtein_similarity(s1, s2)
62 raise err
63
---> 64 return conc.apply(levenshtein_apply)
65
66
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
4040 else:
4041 values = self.astype(object).values
-> 4042 mapped = lib.map_infer(values, f, convert=convert_dtype)
4043
4044 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
~/.local/lib/python3.7/site-packages/recordlinkage/algorithms/string.py in levenshtein_apply(x)
60 return np.nan
61 else:
---> 62 raise err
63
64 return conc.apply(levenshtein_apply)
~/.local/lib/python3.7/site-packages/recordlinkage/algorithms/string.py in levenshtein_apply(x)
54
55 try:
---> 56 return 1 - jellyfish.levenshtein_distance(x[0], x[1]) \
57 / np.max([len(x[0]), len(x[1])])
58 except Exception as err:
TypeError: str argument expected