Повторяющиеся записи о присоединении - PullRequest
0 голосов
/ 17 мая 2019

Впервые до BigQuery.

В таблице inventory гранулярность равна depot_id и product_id, а в таблице inventorytransaction журналы каждой операции (сложения или вычитания) сделаны из таблицы inventory.

Требуется получить сумму количества за каждый месяц (январь-декабрь) текущего года в виде дополнительных столбцов в таблице inventory 1012 *, например,

SELECT inventory.*, janTotalQuantity, febTotalQuantity, marTotalQuantity,...

Я попробовал LEFT JOIN таблицу инвентаризации с подзапросом, который получает общее количество на склад и продукт за месяц-год (например, январь-2019, февраль-2019, март-2019, ...). Ниже приведено утверждение SQL, которое делает именно это.

SELECT inv.inventory_id, p.product_name, p.product_type, p.product_distributor as distributor, p.product_category as category, d.depot_name as location, inv.quantity, inv.lower_limit, inv.unit_cost, inv.quantity * inv.unit_cost as value, p.product_id, d.depot_id, TIMESTAMP_SECONDS(inv.update_date) as last_update, inv.delete_status, IF(agg_sd.mon_year = "Jan-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS janQuantityTotal,IF(agg_sd.mon_year = "Feb-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS febQuantityTotal,IF(agg_sd.mon_year = "Mar-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS marQuantityTotal,IF(agg_sd.mon_year = "Apr-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS aprQuantityTotal,IF(agg_sd.mon_year = "May-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS mayQuantityTotal,IF(agg_sd.mon_year = "Jun-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS junQuantityTotal,IF(agg_sd.mon_year = "Jul-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS julQuantityTotal,IF(agg_sd.mon_year = "Aug-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS augQuantityTotal,IF(agg_sd.mon_year = "Sep-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS sepQuantityTotal,IF(agg_sd.mon_year = "Oct-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS octQuantityTotal,IF(agg_sd.mon_year = "Nov-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS novQuantityTotal,IF(agg_sd.mon_year = "Dec-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS decQuantityTotal      
FROM iprocure_stage.inventory inv
JOIN iprocure_ods.product p ON p.product_id = inv.product_id 
JOIN iprocure_ods.depot d ON d.depot_id = inv.depot_id
LEFT JOIN (
     SELECT FORMAT_TIMESTAMP('%b-%Y', transaction_date) mon_year, product_id, depot_id, SUM(quantity) as totalQuantity
     FROM `iprocure_ods.inventorytransaction`
     WHERE EXTRACT(YEAR FROM transaction_date) = {{ execution_date.year }}
                AND transaction_type = 1 AND (reference_type = 1 OR reference_type = 6) AND delete_status = 0
                GROUP BY mon_year, product_id, depot_id 
 ) AS agg_sd ON agg_sd.product_id = inv.product_id AND agg_sd.depot_id = inv.depot_id

Проблема с вышеприведенным запросом состоит в том, что для общего количества каждого продукта данного депо в каждом месяце дублируется запись инвентаризации, например

----------------------------------------------------------------------------------
  inventory_id    depot_id    product_id    janTotalQuantity    febTotalQuantity
-------------------------------------------------------------------------------------
    123             2             3              56                   0
    123             2             3              0                    65

Как можно избежать дублирования таблицы inventory и добавить столбцы общего количества в месяц в BigQuery

Ответы [ 2 ]

2 голосов
/ 17 мая 2019

Вы можете группировать, выводя за пределы ваших частичных сумм, и применять к ним функцию агрегирования SUM. Это должно сгладить ваш выходной набор данных:

SELECT inv.inventory_id, p.product_name, p.product_type, p.product_distributor as distributor, p.product_category as category, d.depot_name as location, inv.quantity, inv.lower_limit, inv.unit_cost, inv.quantity * inv.unit_cost as value, p.product_id, d.depot_id, TIMESTAMP_SECONDS(inv.update_date) as last_update, inv.delete_status,
SUM(IF(agg_sd.mon_year = "Jan-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS janQuantityTotal,
SUM(IF(agg_sd.mon_year = "Feb-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS febQuantityTotal,
SUM(IF(agg_sd.mon_year = "Mar-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS marQuantityTotal,
SUM(IF(agg_sd.mon_year = "Apr-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS aprQuantityTotal,
SUM(IF(agg_sd.mon_year = "May-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS mayQuantityTotal,
SUM(IF(agg_sd.mon_year = "Jun-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS junQuantityTotal,
SUM(IF(agg_sd.mon_year = "Jul-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS julQuantityTotal,
SUM(IF(agg_sd.mon_year = "Aug-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS augQuantityTotal,
SUM(IF(agg_sd.mon_year = "Sep-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS sepQuantityTotal,
SUM(IF(agg_sd.mon_year = "Oct-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS octQuantityTotal,
SUM(IF(agg_sd.mon_year = "Nov-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS novQuantityTotal,
SUM(IF(agg_sd.mon_year = "Dec-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS decQuantityTotal      
FROM iprocure_stage.inventory inv
JOIN iprocure_ods.product p ON p.product_id = inv.product_id 
JOIN iprocure_ods.depot d ON d.depot_id = inv.depot_id
LEFT JOIN (
     SELECT FORMAT_TIMESTAMP('%b-%Y', transaction_date) mon_year, product_id, depot_id, SUM(quantity) as totalQuantity
     FROM `iprocure_ods.inventorytransaction`
     WHERE EXTRACT(YEAR FROM transaction_date) = {{ execution_date.year }}
                AND transaction_type = 1 AND (reference_type = 1 OR reference_type = 6) AND delete_status = 0
                GROUP BY mon_year, product_id, depot_id 
 ) AS agg_sd ON agg_sd.product_id = inv.product_id AND agg_sd.depot_id = inv.depot_id
GROUP BY inv.inventory_id, p.product_name, p.product_type, p.product_distributor as distributor, p.product_category as category, d.depot_name as location, inv.quantity, inv.lower_limit, inv.unit_cost, inv.quantity * inv.unit_cost as value, p.product_id, d.depot_id, TIMESTAMP_SECONDS(inv.update_date), inv.delete_status
1 голос
/ 17 мая 2019

Вы пытаетесь имитировать сводную таблицу, для этого следует использовать (поддельную) функцию агрегирования

SELECT inv.inventory_id
  , p.product_name
  , p.product_type
  , p.product_distributor as distributor
  , p.product_category as category
  , d.depot_name as location
  , inv.quantity
  , inv.lower_limit
  , inv.unit_cost
  , inv.quantity * inv.unit_cost as value
  , p.product_id, d.depot_id
  , TIMESTAMP_SECONDS(inv.update_date) as last_update
  , inv.delete_status
  , max(IF(agg_sd.mon_year = "Jan-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS janQuantityTotal
  , max(IF(agg_sd.mon_year = "Feb-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS febQuantityTotal
  , max(IF(agg_sd.mon_year = "Mar-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS marQuantityTotal
  , max(IF(agg_sd.mon_year = "Apr-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS aprQuantityTotal
  , max(IF(agg_sd.mon_year = "May-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS mayQuantityTotal
  , max(IF(agg_sd.mon_year = "Jun-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS junQuantityTotal
  , max(IF(agg_sd.mon_year = "Jul-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS julQuantityTotal
  , max(IF(agg_sd.mon_year = "Aug-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS augQuantityTotal
  , max(IF(agg_sd.mon_year = "Sep-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS sepQuantityTotal
  , max(IF(agg_sd.mon_year = "Oct-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS octQuantityTotal
  , max(IF(agg_sd.mon_year = "Nov-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS novQuantityTotal
  , max(IF(agg_sd.mon_year = "Dec-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS decQuantityTotal      
FROM iprocure_stage.inventory inv
JOIN iprocure_ods.product p ON p.product_id = inv.product_id 
JOIN iprocure_ods.depot d ON d.depot_id = inv.depot_id
LEFT JOIN (
     SELECT FORMAT_TIMESTAMP('%b-%Y', transaction_date) mon_year, product_id, depot_id, SUM(quantity) as totalQuantity
     FROM `iprocure_ods.inventorytransaction`
     WHERE EXTRACT(YEAR FROM transaction_date) = {{ execution_date.year }}
                AND transaction_type = 1 AND (reference_type = 1 OR reference_type = 6) AND delete_status = 0
                GROUP BY mon_year, product_id, depot_id 
 ) AS agg_sd ON agg_sd.product_id = inv.product_id AND agg_sd.depot_id = inv.depot_id
GROUP BY inv.inventory_id
  , p.product_name
  , p.product_type
  , p.product_distributor as distributor
  , p.product_category as category
  , d.depot_name as location
  , inv.quantity
  , inv.lower_limit
  , inv.unit_cost
  , inv.quantity * inv.unit_cost as value
  , p.product_id, d.depot_id
  , TIMESTAMP_SECONDS(inv.update_date) as last_update
  , inv.delete_status 
...