Как разделить поле с помощью динамического разделителя - PullRequest
0 голосов
/ 20 декабря 2018

У меня есть таблица в BigQuery с контактными письмами.

name_family@company.com

name-family@company.com

name.family@company.com

Мне нужно извлечь имя и фамилию для разделения столбцов.Я написал этот код SQL, но ищу другие / лучшие способы сделать это


WITH emailWithUnserscore AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(1)]) > 0 ),
         emailWithMinus AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(1)]) > 0 ),
         emailWithDot AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(1)]) > 0 ),
         allEmails AS
      (SELECT *,
              SPLIT(string_field_0, '@')[SAFE_OFFSET(0)] AS firstName,
              '' AS lasttName
       FROM `project.dataset.contacts`)
    SELECT allEmails.string_field_0 AS Email,
           if(LENGTH(emailWithUnserscore.lasttName) > 0, emailWithUnserscore.firstName, if(LENGTH(emailWithMinus.lasttName) > 0, emailWithMinus.firstName, if(LENGTH(emailWithDot.lasttName) > 0, emailWithDot.firstName, allEmails.firstName))) AS firstName,
           if(LENGTH(emailWithUnserscore.lasttName) > 0, emailWithUnserscore.lasttName, if(LENGTH(emailWithMinus.lasttName) > 0, emailWithMinus.lasttName, if(LENGTH(emailWithDot.lasttName) > 0, emailWithDot.lasttName, allEmails.lasttName))) AS lastName
    FROM allEmails
    LEFT JOIN emailWithUnserscore ON allEmails.string_field_0 = emailWithUnserscore.string_field_0
    LEFT JOIN emailWithMinus ON allEmails.string_field_0 = emailWithMinus.string_field_0
    LEFT JOIN emailWithDot ON allEmails.string_field_0 = emailWithDot.string_field_0
    ORDER BY Email DES

1 Ответ

0 голосов
/ 20 декабря 2018
#standardSQL
WITH `project.dataset.contacts` AS (
  SELECT 'name_family@company.com' email UNION ALL
  SELECT 'name-family@company.com' UNION ALL
  SELECT 'name.family@company.com' 
)
SELECT 
  email,
  REGEXP_EXTRACT(email, r'(.*?)[_\-.]') firstName,
  REGEXP_EXTRACT(email, r'[_\-.](.*?)@') lastName
FROM `project.dataset.contacts`

результат

Row     email                       firstName   lastName     
1       name_family@company.com     name        family   
2       name-family@company.com     name        family   
3       name.family@company.com     name        family   
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...