PL SQL до T SQL - REGEXP - PullRequest
       38

PL SQL до T SQL - REGEXP

0 голосов
/ 17 февраля 2020

Я пытаюсь преобразовать сценарий из PL SQL в T SQL, и теперь у меня несколько строк

table(cast(multiset(select level from dual connect by level <= len (regexp_replace(t.image, '[^**]+'))/2) as sys.OdciNumberList)) levels

where substr(REGEXP_SUBSTR (t.image,  '[^**]+',1, levels.column_value),1,instr( REGEXP_SUBSTR (t.image,  '[^**]+',1, levels.column_value),'=',1) -1)

IMAGE

Any помощь была бы отличной.

Крис

1 Ответ

0 голосов
/ 17 февраля 2020

Для лучшего ответа было бы хорошо включить некоторые примеры ввода и желаемых результатов. Особенно при обращении к другой версии SQL. Возможно, включение тега PL / SQL поможет найти человека, который понимает PL / SQL и T- SQL. Также было бы полезно включить DDL, в частности тип данных для «Уровень». Опять же, я говорю, что это не критично, а скорее поможет вам получить лучшие ответы здесь.

Все это говорит о том, что вы можете выполнить sh то, что вы пытаетесь сделать в T- SQL, используя таблицу подсчета, функцию N-граммы и пару других функций, которые Я включен в конце этого поста.

regexp_replace

Чтобы заменить или удалить символы, которые соответствуют шаблону в t- SQL, вы можете использовать patreplace8k. Вот пример того, как использовать его для замены чисел на *:

SELECT pr.NewString
FROM  samd.patReplace8K('My phone number is 555-2211','[0-9]','*') AS pr;

Возвраты: Мой номер телефона *** - ****

regexp_subsr

Вот пример того, как извлечь все телефонные номера из строки:

DECLARE
  @string  VARCHAR(8000)  = 'Call me later at 222-3333 or tomorrow at 312.555.2222, 
                             (313)555-6789, or at 1+800-555-4444 before noon. Thanks!',
  @pattern VARCHAR(50)    = '%[^0-9()+.-]%';

-- EXTRACTOR
SELECT ItemNumber = ROW_NUMBER() OVER (ORDER BY f.position),
       ItemIndex  = f.position,
       ItemLength = itemLen.l,
       Item       = SUBSTRING(f.token, 1, itemLen.l)
FROM
(
 SELECT ng.position, SUBSTRING(@string,ng.position,DATALENGTH(@string))
 FROM   samd.NGrams8k(@string, 1) AS ng
 WHERE  PATINDEX(@pattern, ng.token) <  --<< this token does NOT match the pattern
        ABS(SIGN(ng.position-1)-1) +    --<< are you the first row?  OR
        PATINDEX(@pattern,SUBSTRING(@string,ng.position-1,1)) --<< always 0 for 1st row
) AS f(position, token)
CROSS APPLY (VALUES(ISNULL(NULLIF(PATINDEX(@pattern,f.token),0), --CROSS APPLY (VALUES(ISNULL(NULLIF(PATINDEX('%'+@pattern+'%',f.token),0),
  DATALENGTH(@string)+2-f.position)-1)) AS itemLen(l)
WHERE    itemLen.L > 6 -- this filter is more harmful to the extractor than the splitter
ORDER BY ItemNumber;

T- SQL Функция INSTR

Я включил версию Oracles INSTR версии T- SQL в конце этого поста. Обратите внимание на следующие примеры:

DECLARE
  @string    VARCHAR(8000) = 'AABBCC-AA123-AAXYZPDQ-AA-54321',
  @search    VARCHAR(8000) = '-AA',
  @position  INT           = 1,
  @occurance INT           = 2;

-- 1.1. Get me the 2nd @occurance "-AA" in @string beginning at @position 1
SELECT f.* FROM samd.instr8k(@string,@search,@position,@occurance) AS f;

-- 1.2. Retreive everything *BEFORE* the second instance of "-AA"
SELECT 
  ItemIndex = f.ItemIndex,
  Item      = SUBSTRING(@string,1,f.itemindex-1)
FROM samd.instr8k(@string,@search,@position,@occurance) AS f;

-- 1.3. Retreive everything *AFTER* the second instance of "-AA"
SELECT
  ItemIndex = MAX(f.ItemIndex),
  Item      = MAX(SUBSTRING(@string,f.itemindex+f.itemLength,8000))
FROM   samd.instr8k(@string,@search,@position,@occurance) AS f;

regexp_replace (ADVANCED)

Вот более сложный пример, использующий ngrams8k для замены телефонных номеров текстом «УДАЛЕНО»

DECLARE
  @string  VARCHAR(8000)  = 'Call me later at 222-3333 or tomorrow at 312.555.2222, (313)555-6789, or at 1+800-555-4444 before noon. Thanks!',
  @pattern VARCHAR(50)    = '%[0-9()+.-]%';

SELECT NewString = (
  SELECT IIF(IsMatch=1 AND patSplit.item LIKE '%[0-9][0-9][0-9]%','<REMOVED>', patSplit.item)
  FROM
  (
    SELECT 1, i.Idx, SUBSTRING(@string,1,i.Idx), CAST(0 AS BIT)
    FROM   (VALUES(PATINDEX(@pattern,@string)-1)) AS i(Idx) --FROM   (VALUES(PATINDEX('%'+@pattern+'%',@string)-1)) AS i(Idx)
    WHERE  SUBSTRING(@string,1,1) NOT LIKE @pattern
    UNION ALL
    SELECT r.RN,
           itemLength = LEAD(r.RN,1,DATALENGTH(@string)+1) OVER (ORDER BY r.RN)-r.RN,
           item       = SUBSTRING(@string,r.RN,
                        LEAD(r.RN,1,DATALENGTH(@string)+1) OVER (ORDER BY r.RN)-r.RN),
           isMatch    = ABS(t.p-2+1)
    FROM   core.rangeAB(1,DATALENGTH(@string),1,1) AS r
    CROSS APPLY (VALUES (
      CAST(PATINDEX(@pattern,SUBSTRING(@string,r.RN,1))   AS BIT),
      CAST(PATINDEX(@pattern,SUBSTRING(@string,r.RN-1,1)) AS BIT),
      SUBSTRING(@string,r.RN,r.Op+1))) AS t(c,p,s)
    WHERE t.c^t.p = 1
  ) AS patSplit(ItemIndex, ItemLength, Item, IsMatch)
  FOR XML PATH(''), TYPE).value('.','varchar(8000)');

Возвращает:

Позвоните мне позже, или завтра, или до полудня. Спасибо!

CREATE FUNCTION core.rangeAB
(
  @Low  BIGINT, -- (start) Lowest  number in the set
  @High BIGINT, -- (stop)  Highest number in the set
  @Gap  BIGINT, -- (step)  Difference between each number in the set
  @Row1 BIT     -- Base: 0 or 1; should RN begin with 0 or 1?
)
/****************************************************************************************
[Purpose]:
 Creates a lazy, in-memory, forward-ordered sequence of up to 531,441,000,000 integers
 starting with @Low and ending with @High (inclusive). RangeAB is a pure, 100% set-based
 alternative to solving SQL problems using iterative methods such as loops, cursors and
 recursive CTEs. RangeAB is based on Itzik Ben-Gan's getnums function for producing a
 sequence of integers and uses logic from Jeff Moden's fnTally function which includes a
 parameter for determining if the "row-number" (RN) should begin with 0 or 1.

 I wanted to use the name "Range" because it functions and performs almost identically to
 the Range function built into Python and Clojure. RANGE is a reserved SQL keyword so I 
 went with "RangeAB". Functions/Algorithms developed using rangeAB can be easilty ported
 over to Python, Clojure or any other programming language that leverages a lazy sequence.
 The two major differences between RangeAB and the Python/Clojure versions are:
   1. RangeAB is *Inclusive* where the other two are *Exclusive". range(0,3) in Python and
      Clojure return [0 1 2], core.rangeAB(0,3) returns [0 1 2 3].
   2. RangeAB has a fourth Parameter (@Row1) to determine if RN should begin with 0 or 1.

[Author]:
 Alan Burstein

[Compatibility]: 
 SQL Server 2008+

[Syntax]:
 SELECT r.RN, r.OP, r.N1, r.N2
 FROM   core.rangeAB(@Low,@High,@Gap,@Row1) AS r;

[Parameters]:
 @Low  = BIGINT; represents the lowest  value for N1.
 @High = BIGINT; represents the highest value for N1.
 @Gap  = BIGINT; represents how much N1 and N2 will increase each row. @Gap is also the 
                 difference between N1 and N2.
 @Row1 = BIT;    represents the base (first) value of RN. When @Row1 = 0, RN begins with 0,
                 when @row = 1 then RN begins with 1.

[Returns]:
 Inline Table Valued Function returns:
 RN = BIGINT; a row number that works just like T-SQL ROW_NUMBER() except that it can 
      start at 0 or 1 which is dictated by @Row1. If you need the numbers: 
      (0 or 1) through @High, then use RN as your "N" value, ((@Row1=0 for 0, @Row1=1),
      otherwise use N1.
 OP = BIGINT; returns the "finite opposite" of RN. When RN begins with 0 the first number 
      in the set will be 0 for RN, the last number in will be 0 for OP. When returning the
      numbers 1 to 10, 1 to 10 is retrurned in ascending order for RN and in descending 
      order for OP.
      Given the Numbers 1 to 3, 3 is the opposite of 1, 2 the opposite of 2, and 1 is the
      opposite of 3. Given the numbers -1 to 2, the opposite of -1 is 2, the opposite of 0
      is 1, and the opposite of 1 is 0.
      The best practie is to only use OP when @Gap > 1; use core.O instead. Doing so will
      improve performance by 1-2% (not huge but every little bit counts)      
 N1 = BIGINT; This is the "N" in your tally table/numbers function. this is your *Lazy* 
      sequence of numbers starting at @Low and incrimenting by @Gap until the next number
      in the sequence is greater than @High.
 N2 = BIGINT; a lazy sequence of numbers starting @Low+@Gap and incrimenting by @Gap. N2
      will always be greater than N1 by @Gap. N2 can also be thought of as:
      LEAD(N1,1,N1+@Gap) OVER (ORDER BY RN)

[Dependencies]:
 N/A

[Developer Notes]:
 1.  core.rangeAB returns one billion rows in exactly 90 seconds on my laptop:
     4X 2.7GHz CPU's, 32 GB - multiple versions of SQL Server (2005-2019)       
 2.  The lowest and highest possible numbers returned are whatever is allowable by a 
     bigint. The function, however, returns no more than 531,441,000,000 rows (8100^3). 
 3.  @Gap does not affect RN, RN will begin at @Row1 and increase by 1 until the last row
     unless its used in a subquery where a filter is applied to RN.
 4.  @Gap must be greater than 0 or the function will not return any rows.
 5.  Keep in mind that when @Row1 is 0 then the highest RN value (ROWNUMBER) will be the 
     number of rows returned minus 1
 6.  If you only need is a sequential set beginning at 0 or 1 then, for best performance
     use the RN column. Use N1 and/or N2 when you need to begin your sequence at any 
     number other than 0 or 1 or if you need a gap between your sequence of numbers. 
 7.  Although @Gap is a bigint it must be a positive integer or the function will
     not return any rows.
 8.  The function will not return any rows when one of the following conditions are true:
       * any of the input parameters are NULL
       * @High is less than @Low 
       * @Gap is not greater than 0
     To force the function to return all NULLs instead of not returning anything you can
     add the following code to the end of the query:

       UNION ALL 
       SELECT NULL, NULL, NULL, NULL
       WHERE NOT (@High&@Low&@Gap&@Row1 IS NOT NULL AND @High >= @Low AND @Gap > 0)

     This code was excluded as it adds a ~5% performance penalty.
 9.  There is no performance penalty for sorting by RN ASC; there is a large performance 
     penalty, however for sorting in descending order. If you need a descending sort the
     use OP in place of RN then sort by rn ASC. 
 10. When setting the @Row1 to 0 and sorting by RN you will see that the 0 is added via
     MERGE JOIN concatination. Under the hood the function is essentially concatinating
     but, because it's using a MERGE JOIN operator instead of concatination the cost 
     estimations are needlessly high. You can circumvent this problem by changing:
     ORDER BY core.rangeAB.RN to: ORDER BY ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

[Examples]:

-----------------------------------------------------------------------------------------
[Revision History]:
 Rev 00 - 20140518 - Initial Development - AJB
 Rev 05 - 20191122 - Developed this "core" version for open source distribution;
                     updated notes and did some final code clean-up 
*****************************************************************************************/
RETURNS TABLE WITH SCHEMABINDING AS RETURN
WITH
L1(N) AS 
(
  SELECT 1
  FROM (VALUES
   ($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),
   ($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),
   ($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),
   ($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),($),
   ($),($)) T(N) -- 90 values
),
L2(N)      AS (SELECT 1 FROM L1 a CROSS JOIN L1 b CROSS JOIN L1 c),
iTally(RN) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) FROM L2 a CROSS JOIN L2 b)
SELECT r.RN, r.OP, r.N1, r.N2
FROM
(
  SELECT
    RN = 0,
    OP = (@High-@Low)/@Gap,
    N1 = @Low,
    N2 = @Gap+@Low
  WHERE @Row1 = 0
  UNION ALL   -- (@High-@Low)/@Gap+1:
  SELECT TOP (ABS((ISNULL(@High,0)-ISNULL(@Low,0))/ISNULL(@Gap,0)+ISNULL(@Row1,1)))
    RN = i.RN,
    OP = (@High-@Low)/@Gap+(2*@Row1)-i.RN,
    N1 = (i.rn-@Row1)*@Gap+@Low,
    N2 = (i.rn-(@Row1-1))*@Gap+@Low
  FROM       iTally AS i
  ORDER BY   i.RN
) AS r
WHERE @High&@Low&@Gap&@Row1 IS NOT NULL AND @High >= @Low 
AND   @Gap > 0;
GO

CREATE FUNCTION samd.ngrams8k
(
  @String VARCHAR(8000), -- Input string
  @N      INT            -- requested token size
)
/*****************************************************************************************
[Purpose]:
 A character-level N-Grams function that outputs a contiguous stream of @N-sized tokens
 based on an input string (@String). Accepts strings up to 8000 varchar characters long.
 For more information about N-Grams see: http://en.wikipedia.org/wiki/N-gram.

[Author]: 
 Alan Burstein

[Compatibility]:
 SQL Server 2008+, Azure SQL Database

[Syntax]:
--===== Autonomous
 SELECT ng.Position, ng.Token 
 FROM   samd.ngrams8k(@String,@N) AS ng;

--===== Against a table using APPLY
 SELECT      s.SomeID, ng.Position, ng.Token
 FROM        dbo.SomeTable                 AS s
 CROSS APPLY samd.ngrams8k(s.SomeValue,@N) AS ng;

[Parameters]:
 @String  = The input string to split into tokens.
 @N       = The size of each token returned.

[Returns]:
 Position = BIGINT; the position of the token in the input string
 token    = VARCHAR(8000); a @N-sized character-level N-Gram token

[Dependencies]:
 1. core.rangeAB (iTVF)

[Developer Notes]:
 1. ngrams8k is not case sensitive;

 2. Many functions that use ngrams8k will see a huge performance gain when the optimizer
    creates a parallel execution plan. One way to get a parallel query plan (if the
    optimizer does not choose one) is to use make_parallel by Adam Machanic which can be
    found here:
 sqlblog.com/blogs/adam_machanic/archive/2013/07/11/next-level-parallel-plan-porcing.aspx

3. When @N is less than 1 or greater than the datalength of the input string then no
    tokens (rows) are returned. If either @String or @N are NULL no rows are returned.
    This is a debatable topic but the thinking behind this decision is that: because you
    can't split 'xxx' into 4-grams, you can't split a NULL value into unigrams and you
    can't turn anything into NULL-grams, no rows should be returned.

    For people who would prefer that a NULL input forces the function to return a single
    NULL output you could add this code to the end of the function:

    UNION ALL
    SELECT 1, NULL
    WHERE NOT(@N > 0 AND @N <= DATALENGTH(@String)) OR (@N IS NULL OR @String IS NULL)

 4. ngrams8k is deterministic. For more about deterministic functions see:
    https://msdn.microsoft.com/en-us/library/ms178091.aspx

[Examples]:
--===== 1. Split the string, "abcd" into unigrams, bigrams and trigrams
 SELECT ng.Position, ng.Token FROM samd.ngrams8k('abcd',1) AS ng; -- unigrams (@N=1)
 SELECT ng.Position, ng.Token FROM samd.ngrams8k('abcd',2) AS ng; -- bigrams  (@N=2)
 SELECT ng.Position, ng.Token FROM samd.ngrams8k('abcd',3) AS ng; -- trigrams (@N=3)    

[Revision History]:
------------------------------------------------------------------------------------------
 Rev 00 - 20140310 - Initial Development - Alan Burstein
 Rev 01 - 20150522 - Removed DQS N-Grams functionality, improved iTally logic. Also Added
                     conversion to bigint in the TOP logic to remove implicit conversion
                     to bigint - Alan Burstein
 Rev 05 - 20171228 - Small simplification; changed: 
                (ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@String,''))-(ISNULL(@N,1)-1)),0)))
                                           to:
                (ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@String,''))+1-ISNULL(@N,1)),0)))
 Rev 06 - 20180612 - Using CHECKSUM(N) in the to convert N in the token output instead of
                     using (CAST N as int). CHECKSUM removes the need to convert to int.
 Rev 07 - 20180612 - re-designed to: Use core.rangeAB - Alan Burstein
*****************************************************************************************/
RETURNS TABLE WITH SCHEMABINDING AS RETURN
SELECT
  Position   = r.RN,
  Token      = SUBSTRING(@String,CHECKSUM(r.RN),@N)
FROM  core.rangeAB(1,LEN(@String)+1-@N,1,1) AS r
WHERE @N > 0 AND @N <= LEN(@String);
GO

CREATE FUNCTION samd.patReplace8K
(
  @string  VARCHAR(8000),
  @pattern VARCHAR(50),
  @replace VARCHAR(20)
) 
/*****************************************************************************************
[Purpose]:
 Given a string (@string), a pattern (@pattern), and a replacement character (@replace)
 patReplace8K will replace any character in @string that matches the @Pattern parameter 
 with the character, @replace.

[Author]:
 Alan Burstein

[Compatibility]:
  SQL Server 2008+

[Syntax]:
--===== Basic Syntax Example
 SELECT pr.NewString
 FROM   samd.patReplace8K(@String,@Pattern,@Replace) AS pr;

[Developer Notes]:
 1. Required SQL Server 2008+
 2. @Pattern IS case sensitive but can be easily modified to make it case insensitive
 3. There is no need to include the "%" before and/or after your pattern since since we 
    are evaluating each character individually
 4. Certain special characters, such as "$" and "%" need to be escaped with a "/"
    like so: [/$/%]

[Examples]:
--===== 1. Replace numeric characters with a "*"
 SELECT pr.NewString
 FROM  samd.patReplace8K('My phone number is 555-2211','[0-9]','*') AS pr;

[Revision History]:
 Rev 00 - 10/27/2014 Initial Development - Alan Burstein
 Rev 01 - 10/29/2014 Mar 2007 - Alan Burstein
        - Redesigned based on the dbo.STRIP_NUM_EE by Eirikur Eiriksson
          (see: http://www.sqlservercentral.com/Forums/Topic1585850-391-2.aspx)
        - change how the cte tally table is created 
        - put the include/exclude logic in a CASE statement instead of a WHERE clause
        - Added Latin1_General_BIN Colation
        - Add code to use the pattern as a parameter.
 Rev 02 - 20141106
        - Added final performane enhancement (more cudo's to Eirikur Eiriksson)
        - Put 0 = PATINDEX filter logic into the WHERE clause
Rev 03  - 20150516
        - Updated to deal with special XML characters
Rev 04  - 20170320
        - changed @replace from char(1) to varchar(1) to address how spaces are handled
Rev 05  - Re-write using samd.NGrams
*****************************************************************************************/
RETURNS TABLE WITH SCHEMABINDING AS RETURN
SELECT newString =
  (
    SELECT   CASE WHEN @string = CAST('' AS VARCHAR(8000))   THEN CAST('' AS VARCHAR(8000))
                  WHEN @pattern+@replace+@string IS NOT NULL THEN 
                    CASE WHEN PATINDEX(@pattern,token COLLATE Latin1_General_BIN)=0
                         THEN ng.token ELSE @replace         END END
    FROM     samd.NGrams8K(@string, 1) AS ng
    ORDER BY ng.position
    FOR XML PATH(''),TYPE
  ).value('text()[1]', 'VARCHAR(8000)');
GO

CREATE FUNCTION samd.Instr8k
(
  @string    VARCHAR(8000),
  @search    VARCHAR(8000),
  @position  INT,
  @occurance INT 
) 
/*****************************************************************************************
[Purpose]:
  Returns the position (ItemIndex) of the Nth(@occurance) occurrence of one string(@search) within 
  another(@string). Similar to Oracle's PL/SQL INSTR funtion. 
  https://www.techonthenet.com/oracle/functions/instr.php

[Author]: 
 Alan Burstein

[Compatibility]:
 SQL Server 2008+

[Syntax]:
--===== Autonomous
 SELECT ins.ItemIndex, ins.ItemLength, ins.ItemCount
 FROM   samd.Instr8k(@string,@search,@position,@occurance) AS ins;

--===== Against a table using APPLY
 SELECT      s.SomeID, ins.ItemIndex, ins.ItemLength, ins.ItemCount
 FROM        dbo.SomeTable                 AS s
 CROSS APPLY samd.Instr8k(s.string,@search,@position,@occurance) AS ins

[Parameters]:
  @string    = VARCHAR(8000); Input sting to evaluate
  @search    = VARCHAR(8000); Token to search for inside of @string
  @position  = INT; Where to begin searching for @search; identical to the third 
                     parameter in SQL Server CHARINDEX [, start_location]
  @occurance = INT; Represents the Nth instance of the search string (@search)

[Returns]:
  ItemIndex  = Position of the Nth (@occurance) instance of @search inside @string
  ItemLength = Length of @search (in case you need it, no need to re-evaluate the string)
  ItemCount  = Number of times @search appears inside @string

[Dependencies]:
 1. samd.ngrams8k
   1.1. dbo.rangeAB (iTVF)
 2. samd.substringCount8K_lazy

[Developer Notes]:
 1. samd.Instr8k does not treat the input strings (@string and @search) as case sensitive.
 2. Don't use instr8k for "SubstringBetween" functionality; for better performance use
    samd.SubstringBetween8k instead. 
 3. The @position parameter is the key benefit of this function when dealing with long
    strings where the search item is towards the back of the string. For example, take a
    5000 character string where, what you are looking for is always *at least* 3000
    characters deep. Setting @position to 3000 will dramatically improve performance.
 4. Unlike Oracle's PL/SQL INSTR function, Instr8k does not accept numbers less than 1.

[Examples]:

[Revision History]:
------------------------------------------------------------------------------------------
 Rev 00 - 20191112 - Initial Development - Alan Burstein

*****************************************************************************************/
RETURNS TABLE WITH SCHEMABINDING AS RETURN
SELECT
  ItemIndex  = ISNULL(MAX(ISNULL(instr.Position,1)+(a.Pos-1)),0),
  ItemLength = ISNULL(MAX(LEN(@search)),LEN(@search)),
  ItemCount  = ISNULL(MAX(items.SubstringCount),0)
FROM        (VALUES(ISNULL(@position,1),LEN(@search)))   AS a(Pos,SrchLn)
CROSS APPLY (VALUES(SUBSTRING(@string,a.Pos,8000)))      AS f(String)
CROSS APPLY samd.substringCount8K_lazy(f.string,@search) AS items
CROSS APPLY
(
  SELECT TOP (@occurance) RN = ROW_NUMBER() OVER (ORDER BY ng.position), ng.position
  FROM   samd.ngrams8k(f.string,a.SrchLn) AS ng
  WHERE  ng.token = @search
  ORDER BY RN
) AS instr
WHERE a.Pos > 0
AND   @occurance <= items.SubstringCount
AND   instr.RN = @occurance;
GO

CREATE FUNCTION samd.substringCount8K_lazy
(
  @string       varchar(8000),
  @searchstring varchar(1000)
)
/*****************************************************************************************
[Purpose]:
 Scans the input string (@string) and counts how many times the search character
 (@searchChar) appears. This function is Based on Itzik Ben-Gans cte numbers table logic 
[Compatibility]: 
 SQL Server 2008+
 Uses TABLE VALUES constructor (not available pre-2008)

[Author]: Alan Burstein

[Syntax]:
--===== Autonomous
 SELECT f.substringCount
 FROM   samd.substringCount8K_lazy(@string,@searchString) AS f;

--===== Against a table using APPLY
 SELECT      f.substringCount
 FROM        dbo.someTable AS t
 CROSS APPLY samd.substringCount8K_lazy(t.col, @searchString) AS f;

Parameters:
  @string       = VARCHAR(8000); input string to analyze
  @searchString = VARCHAR(1000); substring to search for

[Returns]:
 Inline table valued function returns -
 substringCount = int; Number of times that @searchChar appears in @string

[Developer Notes]:
 1. substringCount8K_lazy does NOT take overlapping values into consideration. For 
    example, this query will return a 1 but the correct result is 2:

      SELECT substringCount FROM samd.substringCount8K_lazy('xxx','xx')

    When overlapping values are a possibility or concern then use substringCountAdvanced8k

 2. substringCount8K_lazy is what is referred to as an "inline" scalar UDF." Technically 
    it's aninline table valued function (iTVF) but performs the same task as a scalar 
    valued user defined function (UDF); the difference is that it requires the APPLY table 
    operator to accept column values as a parameter. For more about "inline" scalar UDFs 
    see thisarticle by SQL MVP Jeff Moden: 
      http://www.sqlservercentral.com/articles/T-SQL/91724/
    and for more about how to use APPLY see the this article by SQL MVP Paul White:
      http://www.sqlservercentral.com/articles/APPLY/69953/.

    Note the above syntax example and usage examples below to better understand how to
    use the function. Although the function is slightly more complicated to use than a
    scalar UDF it will yield notably better performance for many reasons. For example,
    unlike a scalar UDFs or multi-line table valued functions, the inline scalar UDF does
    not restrict the query optimizer's ability generate a parallel query execution plan.

 3. substringCount8K_lazy returns NULL when either input parameter is NULL and returns 0 
    when either input parameter is blank.

 4. substringCount8K_lazy does not treat parameters as cases senstitive

 5. substringCount8K_lazy is deterministic. For more deterministic functions see:
    https://msdn.microsoft.com/en-us/library/ms178091.aspx

[Examples]:
--===== 1. How many times does the substring "abc" appear?    
 SELECT f.* FROM samd.substringCount8k_lazy('abc123xxxabc','abc') AS f;

--===== 2. Return records from a table where the substring "ab" appears more than once
 DECLARE @table TABLE (string varchar(8000));
 DECLARE @searchString varchar(1000) = 'ab';
 INSERT  @table VALUES ('abcabc'),('abcd'),('bababab'),('baba'),(NULL);

 SELECT      searchString = @searchString, t.string, f.substringCount
 FROM        @table                                  AS t
 CROSS APPLY samd.substringCount8k_lazy(string,'ab') AS f
 WHERE       f.substringCount > 1;

-----------------------------------------------------------------------------------------
[Revision History]:
 Rev 00 - 20180625 - Initial Development - Alan Burstein
 Rev 01 - 20190102 - Added logic to better handle @searchstring = char(32) - Alan Burstein
*****************************************************************************************/
RETURNS TABLE WITH SCHEMABINDING AS RETURN 
SELECT      substringCount = (LEN(v.s)-LEN(REPLACE(v.s,v.st,'')))/d.l
FROM        (VALUES(DATALENGTH(@searchstring)))                      AS d(l)
CROSS APPLY (VALUES(@string,CASE WHEN d.l>0 THEN @searchstring END)) AS v(s,st);
GO
...