Я преобразовываю следующий запрос оракула в bigquery, но количество записей результатов отличается - PullRequest
0 голосов
/ 03 октября 2019

Я преобразовываю следующий запрос Oracle в запрос BigQuery, но результаты (количество записей) разные, хотя базовые таблицы, участвующие в запросе, имеют одинаковое количество записей в Oracle и в bq.

Oracle:

SELECT
to_char(R_PROJECT_S.PROJECT_COPYRIGHT_YEAR),
R_PROJECT_S.PROJECT_TITLE,
to_char(R_PROJECT_S.EDITION),
R_PROJECT_S.CIRCULATION_DESC,
R_PROJECT_S.DISTRIBUTION_DESC,
R_PROJECT_S.PROJECT_ID,
DB.R_USAGE_INFO_S.OBJECT_ID,
UPPER(DB.R_INFO_S.PHOTOGRAPHER),
UPPER(DB.R_INFO_S.SOURCE_CAPTION),
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHEAU,
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHECPYR,
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHEED,
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHEPRDDE,
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHEGRDE,
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHSODE,
R_PROJECT_S.CHARGE_TO_ISBN,
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHEPTIT,
DB.R_INFO_S.SOURCE_NAME,
R_PROJECT_S.LANGUAGE_DESC,
R_PROJECT_S.PROJECT_FORMAT_DESC,
DB.R_USAGE_INFO_S.USAGE_ID,
DB.R_USAGE_INFO_S.PAGE,
DB.R_USAGE_INFO_S.CHAPTER,
DB.R_INFO_S.WORK_PROJECT_ID,
DB.R_INFO_S.IMAGE_TYPE_DESC,
DB.R_INFO_S.IMAGE_DESC,
DB.R_USAGE_INFO_S.PERMISSION_TYPE_DESC,
DB.R_USAGE_INFO_S.PERMISSION_STATUS_DESC,
DB.R_USAGE_INFO_S.PERMISSION_USAGE_DESC,
DB.R_USAGE_INFO_S.USAGE_LABEL,
DB.R_USAGE_INFO_S.QUOTED_COST,
DB.R_INFO_S.SOURCE_OBJECT_ID,
DB.R_USAGE_INFO_S.USAGE_TYPE_DESC,
GHEPM_TITLE_PSPP.TITLE_DESCRIPTION,
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHESOAB,
ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHEGRCD
FROM
DB.R_PROJECT_S_VW  R_PROJECT_S,
DB.R_USAGE_INFO_S,
DB.R_INFO_S,
ADMIN.BIC_APHEISBN00_BO_VW,
DB.GHEPM_TITLE  GHEPM_TITLE_PSPP
WHERE
( R_PROJECT_S.PROJECT_ID=DB.R_USAGE_INFO_S.PROJECT_ID(+)
)
AND  ( DB.R_USAGE_INFO_S.OBJECT_ID=DB.R_INFO_S.OBJECT_ID  )
AND  ( R_PROJECT_S.PROJECT_ID=ADMIN.BIC_APHEISBN00_BO_VW.BIC_ZCHETIIS(+)  )
AND  ( R_PROJECT_S.PROJECT_ID=DB.GHEPM_TITLE_PSPP.ISBN10(+)  ) 
AND  UPPER(DB.R_USAGE_INFO_S.USAGE_LABEL)  NOT LIKE  UNISTR('%KILL%')

BQ:

SELECT
CAST(R_PROJECT_S.PROJECT_COPYRIGHT_YEAR AS string) COPYRIGHT_YEAR,
R_PROJECT_S.PROJECT_TITLE,
CAST(R_PROJECT_S.EDITION AS string) EDITION,
R_PROJECT_S.CIRCULATION_DESC,
R_PROJECT_S.DISTRIBUTION_DESC,
R_PROJECT_S.PROJECT_ID,
R_USAGE_INFO_S.OBJECT_ID,
UPPER(R_INFO_S.PHOTOGRAPHER) PHOTOGRAPHER,
UPPER(R_INFO_S.SOURCE_CAPTION) SOURCE_CAPTION,
BIC_APHEISBN00_BO._BIC_ZCHEAU,
BIC_APHEISBN00_BO._BIC_ZCHECPYR,
BIC_APHEISBN00_BO._BIC_ZCHEED,
BIC_APHEISBN00_BO._BIC_ZCHEPRDDE,
BIC_APHEISBN00_BO._BIC_ZCHEGRDE,
BIC_APHEISBN00_BO._BIC_ZCHSODE,
R_PROJECT_S.CHARGE_TO_ISBN,
BIC_APHEISBN00_BO._BIC_ZCHEPTIT,
R_INFO_S.SOURCE_NAME,
R_PROJECT_S.LANGUAGE_DESC,
R_PROJECT_S.PROJECT_FORMAT_DESC,
R_USAGE_INFO_S.USAGE_ID,
R_USAGE_INFO_S.PAGE,
R_USAGE_INFO_S.CHAPTER,
R_INFO_S.WORK_PROJECT_ID,
R_INFO_S.IMAGE_TYPE_DESC,
R_INFO_S.IMAGE_DESC,
R_USAGE_INFO_S.PERMISSION_TYPE_DESC,
R_USAGE_INFO_S.PERMISSION_STATUS_DESC,
R_USAGE_INFO_S.PERMISSION_USAGE_DESC,
R_USAGE_INFO_S.USAGE_LABEL,
R_USAGE_INFO_S.QUOTED_COST,
R_INFO_S.SOURCE_OBJECT_ID,
R_USAGE_INFO_S.USAGE_TYPE_DESC,
GHEPM_TITLE_PSPP.TITLE_DESCRIPTION,
BIC_APHEISBN00_BO._BIC_ZCHESOAB,
BIC_APHEISBN00_BO._BIC_ZCHEGRCD  
FROM
`domain-rr.oracle_DB_DB.R_info_s` R_INFO_S
inner join 
`domain-rr.oracle_DB_DB.R_usage_info_s` R_USAGE_INFO_S
on 
R_USAGE_INFO_S.OBJECT_ID=R_INFO_S.OBJECT_ID
right outer join
`domain-rr.DB_RPT.R_PROJECT_S_VW` R_PROJECT_S
on 
R_PROJECT_S.PROJECT_ID=R_USAGE_INFO_S.PROJECT_ID
left outer join
`domain-rr.DB_RPT.BIC_APHEISBN00_BO_VW` BIC_APHEISBN00_BO
ON
R_PROJECT_S.PROJECT_ID=BIC_APHEISBN00_BO._BIC_ZCHETIIS
left outer join
`domain-rr.oracle_DB_DB.ghepm_title` GHEPM_TITLE_PSPP
ON
R_PROJECT_S.PROJECT_ID=GHEPM_TITLE_PSPP.ISBN10
AND UPPER(R_USAGE_INFO_S.USAGE_LABEL)  NOT LIKE '%KILL%'

Количество Oracle - 1553437 Количество BQ - 2414413

Пожалуйста, помогите мне узнать, как получить одинаковые значения для оракула и для BQ

Спасибо, Нарен

1 Ответ

1 голос
/ 03 октября 2019

Если бы вы использовали более читаемые, сокращенные псевдонимы таблиц, можно выделить несколько различий:

  • Oracle не пытается выполнить RIGHT JOIN;
  • GBQ должен выполнить UPPER(...) выражение вWHERE не в последнем предложении LEFT JOIN или перемещении выражения в INNER JOIN в таблице ui (но без тестирования может не иметь значения, но удобочитаемость);
  • Порядок таблиц может привести кразница, особенно с использованием объединений INNER и OUTER;

Oracle (с использованием устаревших неявных объединений)

...
FROM
      GRDW.RMS_IMAGE_PROJECT_S_VW p,
      GRDW.RMS_IMAGE_USAGE_INFO_S ui,
      GRDW.RMS_IMAGE_INFO_S i,
      BOADMIN.BIC_APHEISBN00_BO_VW b,
      GRDW.GHEPM_TITLE g
WHERE
  ( p.PROJECT_ID = ui.PROJECT_ID(+)                      -- LEFT JOIN
  )
  AND  ( ui.OBJECT_ID = i.OBJECT_ID  )                   -- INNER JOIN
  AND  ( p.PROJECT_ID = b.BIC_ZCHETIIS(+) )              -- LEFT JOIN
  AND  ( p.PROJECT_ID = g.ISBN10(+)  )                   -- LEFT JOIN
  AND  UPPER(ui.USAGE_LABEL)  NOT LIKE  UNISTR('%KILL%')

GoogleBigQuery (используя текущий стандарт явных объединений)

...
FROM
      `pearson-rr.oracle_grdw_grdw.rms_image_info_s` i
INNER JOIN 
      `pearson-rr.oracle_grdw_grdw.rms_image_usage_info_s` ui 
   ON ui.OBJECT_ID = i.OBJECT_ID
RIGHT OUTER JOIN
      `pearson-rr.GRDW_RPT.RMS_IMAGE_PROJECT_S_VW` p 
   ON p.PROJECT_ID = ui.PROJECT_ID
LEFT OUTER JOIN
      `pearson-rr.GRDW_RPT.BIC_APHEISBN00_BO_VW` b 
   ON p.PROJECT_ID = b._BIC_ZCHETIIS
LEFT OUTER JOIN
      `pearson-rr.oracle_grdw_grdw.ghepm_title` g 
   ON p.PROJECT_ID = g.ISBN10
   AND UPPER(ui.USAGE_LABEL) NOT LIKE '%KILL%'

Поэтому, чтобы учесть порядок таблиц и соответствующий JOIN, рассмотрим приведенный ниже скорректированный Google BigQuery:

...
FROM
     `pearson-rr.GRDW_RPT.RMS_IMAGE_PROJECT_S_VW` p 
LEFT OUTER JOIN 
      `pearson-rr.oracle_grdw_grdw.rms_image_usage_info_s` ui 
   ON p.PROJECT_ID = ui.PROJECT_ID
INNER OUTER JOIN 
      `pearson-rr.oracle_grdw_grdw.rms_image_info_s` i
   ON ui.OBJECT_ID = i.OBJECT_ID AND UPPER(ui.USAGE_LABEL) NOT LIKE '%KILL%'
LEFT OUTER JOIN
      `pearson-rr.GRDW_RPT.BIC_APHEISBN00_BO_VW` b 
   ON p.PROJECT_ID = b._BIC_ZCHETIIS
LEFT OUTER JOIN
      `pearson-rr.oracle_grdw_grdw.ghepm_title` g 
   ON p.PROJECT_ID = g.ISBN10
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...