Как уже упоминалось в комментариях, это может быть достигнуто за счет использования табличной функции с разбивкой строк. Лично я использую один, основанный на очень эффективном подходе на основе таблиц подсчета, составленном Джеффом Моденом , который находится в конце моего ответа.
Использование этой функции позволяет сравнивать отдельные слова, разделенные пробелом, и подсчитывать количество совпадений по сравнению с общим количеством слов в двух значениях.
Заметьте, однако, что это решение подходит для любых значений с начальными пробелами. Если это будет проблемой, очистите ваши данные перед запуском этого скрипта или настройте их для обработки :
declare @t1 table(v nvarchar(50));
declare @t2 table(v nvarchar(50));
insert into @t1 values('Microsoft SQL Server'),('Office Microsoft'),('Other values'); -- Add in some extra values, with the same number of words and some with the same number of characters
insert into @t2 values('SQL Server Microsoft'),('Microsoft Office'),('that matched'),('that didn''t'),('Other valuee');
with c as
(
select t1.v as v1
,t2.v as v2
,len(t1.v) - len(replace(t1.v,' ','')) + 1 as NumWords -- String Length - String Length without spaces = Number of words - 1
from @t1 as t1
cross join @t2 as t2 -- Cross join the two tables to get all comparisons
where len(replace(t1.v,' ','')) = len(replace(t2.v,' ','')) -- Where the length without spaces is the same. Can't have the same words in a different order if the number of non space characters in the whole string is different
)
select c.v1
,c.v2
,c.NumWords
,sum(case when s1.item = s2.item then 1 else 0 end) as MatchedWords
from c
cross apply dbo.fn_StringSplit4k(c.v1,' ',null) as s1
cross apply dbo.fn_StringSplit4k(c.v2,' ',null) as s2
group by c.v1
,c.v2
,c.NumWords
having c.NumWords = sum(case when s1.item = s2.item then 1 else 0 end);
Выход
+----------------------+----------------------+----------+--------------+
| v1 | v2 | NumWords | MatchedWords |
+----------------------+----------------------+----------+--------------+
| Microsoft SQL Server | SQL Server Microsoft | 3 | 3 |
| Office Microsoft | Microsoft Office | 2 | 2 |
+----------------------+----------------------+----------+--------------+
Функция
create function dbo.fn_StringSplit4k
(
@str nvarchar(4000) = ' ' -- String to split.
,@delimiter as nvarchar(1) = ',' -- Delimiting value to split on.
,@num as int = null -- Which value to return.
)
returns table
as
return
-- Start tally table with 10 rows.
with n(n) as (select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1)
-- Select the same number of rows as characters in @str as incremental row numbers.
-- Cross joins increase exponentially to a max possible 10,000 rows to cover largest @str length.
,t(t) as (select top (select len(isnull(@str,'')) a) row_number() over (order by (select null)) from n n1,n n2,n n3,n n4)
-- Return the position of every value that follows the specified delimiter.
,s(s) as (select 1 union all select t+1 from t where substring(isnull(@str,''),t,1) = @delimiter)
-- Return the start and length of every value, to use in the SUBSTRING function.
-- ISNULL/NULLIF combo handles the last value where there is no delimiter at the end of the string.
,l(s,l) as (select s,isnull(nullif(charindex(@delimiter,isnull(@str,''),s),0)-s,4000) from s)
select rn
,item
from(select row_number() over(order by s) as rn
,substring(@str,s,l) as item
from l
) a
where rn = @num
or @num is null;