Я пытаюсь понять, если то, что я делаю, достаточно эффективно, я специально не использовал plpgsql
, поэтому, возможно, ответ на этот вопрос - написать какую-то функцию?
У меня есть симуляция, которая в настоящее время работает в памяти и опирается на базу данных, я пытаюсь понять, будет ли лучше работать сегменты сима полностью в базе данных.
Вот уменьшенная таблица, public.household
имеет ~ 1 миллион строк, а public.peep
имеет ~ 4 миллиона строк.
CREATE TABLE public.household
(
id integer NOT NULL DEFAULT nextval('household_id_seq'::regclass),
discretionary_funds integer NOT NULL,
CONSTRAINT household_pkey PRIMARY KEY (id)
)
CREATE TABLE public.peep
(
id integer NOT NULL DEFAULT nextval('peep_id_seq'::regclass),
cash integer NOT NULL,
household_id integer NOT NULL,
CONSTRAINT peep_pkey PRIMARY KEY (id),
CONSTRAINT "peep_household_id_fkey" FOREIGN KEY (household_id)
REFERENCES public.household (id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION
)
Таким образом, household
имеет от 2 до 5 peep
, связанных с ним. Если в пипе cash
больше текущего discretionary_funds
, возьмите 10% их cash
, округленных в меньшую сторону, и добавьте его к discretionary_funds
.
Ниже приведены примеры запросов, которые я запустить с соответствующим временем (я не уверен, как составить планы запросов):
## Первая попытка (Удалено из-за пробела.)
Немного вложенности + Сортировка
Я ожидал, что это будет быстрее, поэтому я был удивлен, что это не так.
execution time (min): 5.65
planning time (ms): 10.48
slowest node (min): 2.49
largest node (rows): 4,000,001
costliest node: 326,739.75
WITH contribution AS (
SELECT *,
CASE
WHEN (h_p.discretionary_funds > h_p.cash) THEN 0
WHEN (h_p.discretionary_funds <= h_p.cash) THEN CAST(FLOOR(h_p.cash / 10) as int)
END AS amount_in
FROM (
SELECT household.id AS h_pid, household.discretionary_funds, p.id AS p_pid, p.name, p.cash, p.household_id
FROM household
JOIN peep p on household.id = p.household_id
WHERE p.cash <> 0
ORDER BY p.household_id) AS h_p
), agg_contributions AS (
SELECT contribution.household_id, sum(contribution.amount_in) AS total_contributions
FROM contribution
GROUP BY contribution.household_id
ORDER BY contribution.household_id
), house_update AS (
UPDATE household
SET discretionary_funds = discretionary_funds + agg_contributions.total_contributions
FROM agg_contributions
WHERE household.id = agg_contributions.household_id
returning *
), peep_update AS (
UPDATE peep
SET cash = peep.cash - contribution.amount_in
FROM contribution
WHERE id = contribution.p_pid
returning *
)
SELECT
(SELECT count(*) from house_update) as hu,
(SELECT count(*) from peep_update) as pu;
Вложение всего
Я также ожидал, что это будет быстрее, поэтому я был удивлен, что это не так. Я тоже не уверен, что обновление безопасно? У меня не было возможности проверить это. Под безопасностью я подразумеваю, что хочу быть уверенным, что в начале и конце запроса discretionary_funds
+ cash
для каждого домохозяйства одинаковы, ie мы не теряем и не получаем деньги.
execution time (min): 5.19
planning time (ms): 10.28
slowest node (min): 4.13
largest node (rows): 4,000,000
costliest node: 282,401.16
WITH house_update AS (
UPDATE household
SET discretionary_funds = discretionary_funds + agg_contributions.total_contributions
FROM (
SELECT contribution.household_id, sum(contribution.amount_in) AS total_contributions
FROM (
SELECT *,
CASE
WHEN (h_p.discretionary_funds > h_p.cash) THEN 0
WHEN (h_p.discretionary_funds <= h_p.cash) THEN CAST(FLOOR(h_p.cash / 10) as int)
END AS amount_in
FROM (
SELECT household.id AS h_pid, household.discretionary_funds, p.id AS p_pid, p.name, p.cash, p.household_id
FROM household
JOIN peep p on household.id = p.household_id
WHERE p.cash <> 0) AS h_p
) AS contribution
GROUP BY contribution.household_id
) AS agg_contributions
WHERE household.id = agg_contributions.household_id
returning *
), peep_update AS (
UPDATE peep
SET cash = peep.cash - contribution.amount_in
FROM (
SELECT *,
CASE
WHEN (h_p.discretionary_funds > h_p.cash) THEN 0
WHEN (h_p.discretionary_funds <= h_p.cash) THEN CAST(FLOOR(h_p.cash / 10) as int)
END AS amount_in
FROM (
SELECT household.id AS h_pid, household.discretionary_funds, p.id AS p_pid, p.name, p.cash, p.household_id
FROM household
JOIN peep p on household.id = p.household_id
WHERE p.cash <> 0) AS h_p
)
AS contribution
WHERE id = contribution.p_pid
returning *
)
SELECT
(SELECT count(*) from house_update) as hu,
(SELECT count(*) from peep_update) as pu;
Полностью вложенный с фильтром + основа c сортировка
Сортировка, однако, оказала влияние, которое я рад видеть, но все же не уверен, что это safe.
execution time (min): 2.32
planning time (ms): 28.77
slowest node (min): 1.32
largest node (rows): 4,000,000
costliest node: 308,096.72
WITH house_update AS (
UPDATE household
SET discretionary_funds = discretionary_funds + agg_contributions.total_contributions
FROM (
SELECT contribution.household_id, sum(contribution.amount_in) AS total_contributions
FROM (
SELECT *,
CASE
WHEN (h_p.discretionary_funds > h_p.cash) THEN 0
WHEN (h_p.discretionary_funds <= h_p.cash) THEN CAST(FLOOR(h_p.cash / 10) as int)
END AS amount_in
FROM (
SELECT household.id AS h_pid, household.discretionary_funds, p.id AS p_pid, p.name, p.cash, p.household_id
FROM household
JOIN peep p on household.id = p.household_id
WHERE p.cash <> 0
ORDER BY p.household_id) AS h_p
) AS contribution
WHERE amount_in <> 0
GROUP BY contribution.household_id
ORDER BY contribution.household_id
) AS agg_contributions
WHERE household.id = agg_contributions.household_id
returning *
), peep_update AS (
UPDATE peep
SET cash = peep.cash - contribution.amount_in
FROM (
SELECT *,
CASE
WHEN (h_p.discretionary_funds > h_p.cash) THEN 0
WHEN (h_p.discretionary_funds <= h_p.cash) THEN CAST(FLOOR(h_p.cash / 10) as int)
END AS amount_in
FROM (
SELECT household.id AS h_pid, household.discretionary_funds, p.id AS p_pid, p.name, p.cash, p.household_id
FROM household
JOIN peep p on household.id = p.household_id
WHERE p.cash <> 0
ORDER BY p_pid) AS h_p
)
AS contribution
WHERE amount_in <> 0 AND id = contribution.p_pid
returning *
)
SELECT
(SELECT count(*) from house_update) as hu,
(SELECT count(*) from peep_update) as pu;
РЕДАКТИРОВАТЬ: Добавление еще нескольких подходов:
Разделение с обновлением
У меня было немного мозговых волн этим утром и думал о копании в оконных функциях, но это не совсем сработало, как я надеялся.
execution time (min): 5.71
planning time (ms): 14.53
slowest node (min): 4.33
largest node (rows): 4,000,001
costliest node: 374,090.01
WITH contribution AS (
SELECT household.id AS h_pid, p.id AS p_pid, p.household_id,
(CASE
WHEN (household.discretionary_funds > p.cash) THEN 0
WHEN (household.discretionary_funds <= p.cash) THEN CAST(FLOOR(p.cash / 10) as int)
END) AS amount_in,
sum(
CASE
WHEN (household.discretionary_funds > p.cash) THEN 0
WHEN (household.discretionary_funds <= p.cash) THEN CAST(FLOOR(p.cash / 10) as int)
END) OVER (PARTITION BY household.id) AS total_contributions
FROM household
JOIN peep p on household.id = p.household_id
WHERE p.cash <> 0
ORDER BY household.id
), house_update AS (
UPDATE household
SET discretionary_funds = discretionary_funds + contribution.total_contributions
FROM contribution
WHERE household.id = contribution.household_id
returning *
), peep_update AS (
UPDATE peep
SET cash = peep.cash - contribution.amount_in
FROM contribution
WHERE id = contribution.p_pid
returning *
)
SELECT
(SELECT count(*) from house_update) as hu,
(SELECT count(*) from peep_update) as pu;
Разделение с обновлением с фильтром
Я также не могу поверить, что я не думал фильтровать обновление или только возвращать идентификаторы, но он мало что сделал.
execution time (min): 5.29
planning time (ms): 22.68
slowest node (min): 4.17
largest node (rows): 4,000,000
costliest node: 283,708.71
WITH contribution AS (
SELECT household.id AS h_pid, p.id AS p_pid, p.household_id,
(CASE
WHEN (household.discretionary_funds > p.cash) THEN 0
WHEN (household.discretionary_funds <= p.cash) THEN CAST(FLOOR(p.cash / 10) as int)
END) AS amount_in,
sum(
CASE
WHEN (household.discretionary_funds > p.cash) THEN 0
WHEN (household.discretionary_funds <= p.cash) THEN CAST(FLOOR(p.cash / 10) as int)
END) OVER (PARTITION BY household.id) AS total_contributions
FROM household
JOIN peep p on household.id = p.household_id
WHERE p.cash <> 0
ORDER BY household.id
), house_update AS (
UPDATE household
SET discretionary_funds = discretionary_funds + contribution.total_contributions
FROM contribution
WHERE household.id = contribution.household_id
AND contribution.total_contributions > 0
returning id
), peep_update AS (
UPDATE peep
SET cash = peep.cash - contribution.amount_in
FROM contribution
WHERE id = contribution.p_pid
AND contribution.amount_in > 0
returning id
)
SELECT
(SELECT count(id) from house_update) as hu,
(SELECT count(id) from peep_update) as pu;
Просто собираюсь попробовать pgadmin 4, выкидывая текстовую версию плана запроса, кажется, не просто ее скопировать.
Result (cost=1034781.00..1034781.01 rows=1 width=16) (actual time=280192.117..280192.118 rows=1 loops=1)
Output: $5, $6
Buffers: shared hit=17468306 read=2912087 dirtied=2855350 written=2744384, temp read=104324 written=108898
CTE contribution
-> WindowAgg (cost=393473.81..578308.15 rows=2006933 width=24) (actual time=5131.325..20632.030 rows=2000000 loops=1)
Output: household.id, p.id, p.household_id, CASE WHEN (household.discretionary_funds > p.cash) THEN 0 WHEN (household.discretionary_funds <= p.cash) THEN (floor(((p.cash / 10))::double precision))::integer ELSE NULL::integer END, sum(CASE WHEN (household.discretionary_funds > p.cash) THEN 0 WHEN (household.discretionary_funds <= p.cash) THEN (floor(((p.cash / 10))::double precision))::integer ELSE NULL::integer END) OVER (?)
Buffers: shared hit=516299 read=73074 dirtied=64781 written=57289, temp read=8832 written=8867
-> Merge Join (cost=393473.81..487996.16 rows=2006933 width=20) (actual time=5128.430..14241.654 rows=2000000 loops=1)
Output: household.id, p.id, p.household_id, household.discretionary_funds, p.cash
Merge Cond: (household.id = p.household_id)
Buffers: shared hit=516299 read=73074 dirtied=64781 written=57289, temp read=8832 written=8867
-> Index Scan using household_pkey on public.household (cost=0.42..57420.33 rows=1004727 width=8) (actual time=0.045..4655.783 rows=1000000 loops=1)
Output: household.id, household.discretionary_funds, household.created_at, household.updated_at
Buffers: shared hit=505404 read=19188 dirtied=4 written=3435
-> Materialize (cost=393472.38..403507.04 rows=2006933 width=12) (actual time=5128.373..7281.130 rows=2000000 loops=1)
Output: p.id, p.household_id, p.cash
Buffers: shared hit=10895 read=53886 dirtied=64777 written=53854, temp read=8832 written=8867
-> Sort (cost=393472.38..398489.71 rows=2006933 width=12) (actual time=5128.366..6217.474 rows=2000000 loops=1)
Output: p.id, p.household_id, p.cash
Sort Key: p.household_id
Sort Method: external merge Disk: 43104kB
Buffers: shared hit=10895 read=53886 dirtied=64777 written=53854, temp read=8832 written=8867
-> Seq Scan on public.peep p (cost=0.00..114781.00 rows=2006933 width=12) (actual time=0.099..3814.237 rows=2000000 loops=1)
Output: p.id, p.household_id, p.cash
Filter: (p.cash <> 0)
Rows Removed by Filter: 2000000
Buffers: shared hit=10895 read=53886 dirtied=64777 written=53854
CTE house_update
-> Update on public.household household_1 (cost=53801.36..126611.32 rows=668978 width=93) (actual time=9455.836..61211.020 rows=956489 loops=1)
Output: household_1.id
Buffers: shared hit=8182847 read=126650 dirtied=134506 written=96652, temp read=36652 written=44987
-> Hash Join (cost=53801.36..126611.32 rows=668978 width=93) (actual time=9449.946..37916.530 rows=1912978 loops=1)
Output: household_1.id, (household_1.discretionary_funds + contribution.total_contributions), household_1.created_at, household_1.updated_at, household_1.ctid, contribution.*
Inner Unique: true
Hash Cond: (contribution.household_id = household_1.id)
Buffers: shared hit=516303 read=93472 dirtied=84947 written=77455, temp read=36652 written=44987
-> CTE Scan on contribution (cost=0.00..45155.99 rows=668978 width=48) (actual time=5132.944..25709.420 rows=1912978 loops=1)
Output: contribution.total_contributions, contribution.*, contribution.household_id
Filter: (contribution.total_contributions > 0)
Rows Removed by Filter: 87022
Buffers: shared hit=516299 read=73074 dirtied=64781 written=57289, temp read=8832 written=17167
-> Hash (cost=30449.27..30449.27 rows=1004727 width=57) (actual time=4301.211..4301.211 rows=1000000 loops=1)
Output: household_1.id, household_1.discretionary_funds, household_1.created_at, household_1.updated_at, household_1.ctid
Buckets: 65536 Batches: 32 Memory Usage: 3391kB
Buffers: shared hit=4 read=20398 dirtied=20166 written=20166, temp written=9680
-> Seq Scan on public.household household_1 (cost=0.00..30449.27 rows=1004727 width=57) (actual time=0.024..3174.650 rows=1000000 loops=1)
Output: household_1.id, household_1.discretionary_funds, household_1.created_at, household_1.updated_at, household_1.ctid
Buffers: shared hit=4 read=20398 dirtied=20166 written=20166
CTE peep_update
-> Update on public.peep (cost=197750.00..299757.50 rows=668978 width=95) (actual time=6322.135..207612.987 rows=1700378 loops=1)
Output: peep.id
Buffers: shared hit=9285459 read=2785437 dirtied=2720844 written=2647732, temp read=67672 written=59372
-> Hash Join (cost=197750.00..299757.50 rows=668978 width=95) (actual time=6320.456..19916.057 rows=1700378 loops=1)
Output: peep.id, peep.name, (peep.cash - contribution_1.amount_in), peep.household_id, peep.created_at, peep.updated_at, peep.ctid, contribution_1.*
Inner Unique: true
Hash Cond: (contribution_1.p_pid = peep.id)
Buffers: shared hit=32 read=64749, temp read=67672 written=59372
-> CTE Scan on contribution contribution_1 (cost=0.00..45155.99 rows=668978 width=40) (actual time=0.055..1778.241 rows=1700378 loops=1)
Output: contribution_1.amount_in, contribution_1.*, contribution_1.p_pid
Filter: (contribution_1.amount_in > 0)
Rows Removed by Filter: 299622
Buffers: temp read=8301 written=1
-> Hash (cost=104781.00..104781.00 rows=4000000 width=63) (actual time=6308.143..6308.144 rows=4000000 loops=1)
Output: peep.id, peep.name, peep.cash, peep.created_at, peep.updated_at, peep.ctid
Buckets: 65536 Batches: 128 Memory Usage: 3633kB
Buffers: shared hit=32 read=64749, temp written=43533
-> Seq Scan on public.peep (cost=0.00..104781.00 rows=4000000 width=63) (actual time=0.026..2479.475 rows=4000000 loops=1)
Output: peep.id, peep.name, peep.cash, peep.created_at, peep.updated_at, peep.ctid
Buffers: shared hit=32 read=64749
InitPlan 4 (returns $5)
-> Aggregate (cost=15052.00..15052.01 rows=1 width=8) (actual time=64078.313..64078.313 rows=1 loops=1)
Output: count(house_update.id)
Buffers: shared hit=8182847 read=126650 dirtied=134506 written=96652, temp read=36652 written=46621
-> CTE Scan on house_update (cost=0.00..13379.56 rows=668978 width=4) (actual time=9455.842..63466.574 rows=956489 loops=1)
Output: house_update.id
Buffers: shared hit=8182847 read=126650 dirtied=134506 written=96652, temp read=36652 written=46621
InitPlan 5 (returns $6)
-> Aggregate (cost=15052.00..15052.01 rows=1 width=8) (actual time=216113.793..216113.794 rows=1 loops=1)
Output: count(peep_update.id)
Buffers: shared hit=9285459 read=2785437 dirtied=2720844 written=2647732, temp read=67672 written=62277
-> CTE Scan on peep_update (cost=0.00..13379.56 rows=668978 width=4) (actual time=6322.140..214750.542 rows=1700378 loops=1)
Output: peep_update.id
Buffers: shared hit=9285459 read=2785437 dirtied=2720844 written=2647732, temp read=67672 written=62277
Planning Time: 30.176 ms
Execution Time: 280306.887 ms
Из-за всего этого я не уверен, как удалить селекторы в конце, так как они кажутся очень дорогими.
Я не уверен, как еще можно выполнить безопасное обновление двух таблиц.
РЕДАКТИРОВАТЬ: теперь пытаюсь с материализованными представлениями, в основном потому, что, очевидно, они могут быть проиндексированы.
Сначала создаем представление:
CREATE MATERIALIZED VIEW contribution AS (
SELECT household.id AS h_pid, p.id AS p_pid,
(CASE
WHEN (household.discretionary_funds > p.cash) THEN 0
WHEN (household.discretionary_funds <= p.cash) THEN CAST(FLOOR(p.cash / 10) as int)
END) AS amount_in,
sum(
CASE
WHEN (household.discretionary_funds > p.cash) THEN 0
WHEN (household.discretionary_funds <= p.cash) THEN CAST(FLOOR(p.cash / 10) as int)
END) OVER (PARTITION BY household.id) AS total_contributions
FROM household
JOIN peep p on household.id = p.household_id
WHERE p.cash <> 0
ORDER BY household.id, p.id
);
CREATE INDEX contribution_household_id ON contribution(h_pid);
CREATE INDEX contribution_peep_id ON contribution(p_pid);
Во-первых, я просто вижу, как это выглядит без обновления вида:
UPDATE household
SET "discretionary-funds" = "discretionary-funds" + contribution.total_contributions
FROM contribution
WHERE household.id = contribution.h_pid
AND contribution.total_contributions > 0;
UPDATE peep
SET cash = peep.cash - contribution.amount_in
FROM contribution
WHERE id = contribution.p_pid
AND contribution.amount_in > 0;
К сожалению, получение подробных данных о времени не очевидно. Вот сообщение и текстовая версия, которую дал мне pgadmin:
UPDATE 1700378 Query returned successfully in 1 min 28 secs.
# | Node | Timings | Rows |
| | Exclusive | Inclusive | Rows X | Actual | Plan | Loops
1. Result (cost=1015096.96..1015096.97 rows=1 width=16) (actual=248174.424..248174.425 rows=1 loops=1) -257587.769 ms 248174.425 ms ↑ 1 1 1 1
2. Window Aggregate (cost=393473.81..568421.45 rows=2006933 width=24) (actual=3175.12..17048.861 rows=2000000 loops=1) 6666.448 ms 17048.861 ms ↑ 1.01 2000000 2006933 1
3. Merge Inner Join (cost=393473.81..478109.46 rows=2006933 width=20) (actual=3175.075..10382.413 rows=2000000 loops=1) 2394.144 ms 10382.413 ms ↑ 1.01 2000000 2006933 1
4. Index Scan using household_pkey on public.household as household (cost=0.42..47428.77 rows=1011423 width=8) (actual=0.014..2564.915 rows=1000000 loops=1) 2564.915 ms 2564.915 ms ↑ 1.02 1000000 1011423 1
5. Materialize (cost=393472.38..403507.04 rows=2006933 width=12) (actual=3175.05..5423.354 rows=2000000 loops=1) 1074.397 ms 5423.354 ms ↑ 1.01 2000000 2006933 1
6. Sort (cost=393472.38..398489.71 rows=2006933 width=12) (actual=3175.045..4348.958 rows=2000000 loops=1) 2619.425 ms 4348.958 ms ↑ 1.01 2000000 2006933 1
7. Seq Scan on public.peep as p (cost=0..114781 rows=2006933 width=12) (actual=0.025..1729.533 rows=2000000 loops=1)
Filter: (p.cash <> 0)
Rows Removed by Filter: 2000000
1729.533 ms 1729.533 ms ↑ 1.01 2000000 2006933 1
8. Update on public.household as household_1 (cost=43932.02..116813.98 rows=668978 width=93) (actual=5075.679..54061.744 rows=956489 loops=1) 24086.957 ms 54061.744 ms ↓ 1.43 956489 668978 1
9. Hash Inner Join (cost=43932.02..116813.98 rows=668978 width=93) (actual=5075.455..29974.787 rows=1912978 loops=1)
Hash Cond: (contribution.household_id = household_1.id)
6600.692 ms 29974.787 ms ↓ 2.86 1912978 668978 1
10. CTE Scan (cost=0..45155.99 rows=668978 width=48) (actual=3175.148..21475.966 rows=1912978 loops=1)
Filter: (contribution.total_contributions > 0)
Rows Removed by Filter: 87022
21475.966 ms 21475.966 ms ↓ 2.86 1912978 668978 1
11. Hash (cost=20424.23..20424.23 rows=1011423 width=57) (actual=1898.129..1898.129 rows=1000000 loops=1)
Buckets: 65536 Batches: 32 Memory Usage: 3391 kB
1145.786 ms 1898.129 ms ↑ 1.02 1000000 1011423 1
12. Seq Scan on public.household as household_1 (cost=0..20424.23 rows=1011423 width=57) (actual=0.018..752.343 rows=1000000 loops=1) 752.343 ms 752.343 ms ↑ 1.02 1000000 1011423 1
13. Update on public.peep as peep (cost=197750..299757.5 rows=668978 width=95) (actual=7282.961..186477.175 rows=1700378 loops=1) 167143.251 ms 186477.175 ms ↓ 2.55 1700378 668978 1
14. Hash Inner Join (cost=197750..299757.5 rows=668978 width=95) (actual=7281.223..19333.924 rows=1700378 loops=1)
Hash Cond: (contribution_1.p_pid = peep.id)
10225.572 ms 19333.924 ms ↓ 2.55 1700378 668978 1
15. CTE Scan (cost=0..45155.99 rows=668978 width=40) (actual=0.056..1851.061 rows=1700378 loops=1)
Filter: (contribution_1.amount_in > 0)
Rows Removed by Filter: 299622
1851.061 ms 1851.061 ms ↓ 2.55 1700378 668978 1
16. Hash (cost=104781..104781 rows=4000000 width=63) (actual=7257.291..7257.291 rows=4000000 loops=1)
Buckets: 65536 Batches: 128 Memory Usage: 3633 kB
4367.015 ms 7257.291 ms ↑ 1 4000000 4000000 1
17. Seq Scan on public.peep as peep (cost=0..104781 rows=4000000 width=63) (actual=0.039..2890.276 rows=4000000 loops=1) 2890.276 ms 2890.276 ms ↑ 1 4000000 4000000 1
18. Aggregate (cost=15052..15052.01 rows=1 width=8) (actual=56067.549..56067.55 rows=1 loops=1) 622.37 ms 56067.55 ms ↑ 1 1 1 1
19. CTE Scan (cost=0..13379.56 rows=668978 width=4) (actual=5075.683..55445.181 rows=956489 loops=1) 55445.181 ms 55445.181 ms ↓ 1.43 956489 668978 1
20. Aggregate (cost=15052..15052.01 rows=1 width=8) (actual=192106.863..192106.864 rows=1 loops=1) 1368.867 ms 192106.864 ms ↑ 1 1 1 1
21. CTE Scan (cost=0..13379.56 rows=668978 width=4) (actual=7282.967..190737.997 rows=1700378 loops=1) 190737.997 ms 190737.997 ms ↓ 2.55 1700378 668978 1
Эти два заставили меня решить попробовать еще несколько индексов:
Filter: (contribution_1.amount_in > 0)
Rows Removed by Filter: 299622
Filter: (contribution.total_contributions > 0)
Rows Removed by Filter: 87022
CREATE INDEX contribution_amount_in ON contribution(amount_in);
CREATE INDEX contribution_total_contributions ON contribution(total_contributions);
Но, похоже, это сделало проблема усугубляется:
UPDATE 1700378 Query returned successfully in 1 min 43 secs.
# | Node | Timings | Rows |
| | Exclusive | Inclusive | Rows X | Actual | Plan | Loops
1. Result (cost=1015096.96..1015096.97 rows=1 width=16) (actual=248174.424..248174.425 rows=1 loops=1) -257587.769 ms 248174.425 ms ↑ 1 1 1 1
2. Window Aggregate (cost=393473.81..568421.45 rows=2006933 width=24) (actual=3175.12..17048.861 rows=2000000 loops=1) 6666.448 ms 17048.861 ms ↑ 1.01 2000000 2006933 1
3. Merge Inner Join (cost=393473.81..478109.46 rows=2006933 width=20) (actual=3175.075..10382.413 rows=2000000 loops=1) 2394.144 ms 10382.413 ms ↑ 1.01 2000000 2006933 1
4. Index Scan using household_pkey on public.household as household (cost=0.42..47428.77 rows=1011423 width=8) (actual=0.014..2564.915 rows=1000000 loops=1) 2564.915 ms 2564.915 ms ↑ 1.02 1000000 1011423 1
5. Materialize (cost=393472.38..403507.04 rows=2006933 width=12) (actual=3175.05..5423.354 rows=2000000 loops=1) 1074.397 ms 5423.354 ms ↑ 1.01 2000000 2006933 1
6. Sort (cost=393472.38..398489.71 rows=2006933 width=12) (actual=3175.045..4348.958 rows=2000000 loops=1) 2619.425 ms 4348.958 ms ↑ 1.01 2000000 2006933 1
7. Seq Scan on public.peep as p (cost=0..114781 rows=2006933 width=12) (actual=0.025..1729.533 rows=2000000 loops=1)
Filter: (p.cash <> 0)
Rows Removed by Filter: 2000000
1729.533 ms 1729.533 ms ↑ 1.01 2000000 2006933 1
8. Update on public.household as household_1 (cost=43932.02..116813.98 rows=668978 width=93) (actual=5075.679..54061.744 rows=956489 loops=1) 24086.957 ms 54061.744 ms ↓ 1.43 956489 668978 1
13. Update on public.peep as peep (cost=197750..299757.5 rows=668978 width=95) (actual=7282.961..186477.175 rows=1700378 loops=1) 167143.251 ms 186477.175 ms ↓ 2.55 1700378 668978 1
14. Hash Inner Join (cost=197750..299757.5 rows=668978 width=95) (actual=7281.223..19333.924 rows=1700378 loops=1)
Hash Cond: (contribution_1.p_pid = peep.id)
10225.572 ms 19333.924 ms ↓ 2.55 1700378 668978 1
15. CTE Scan (cost=0..45155.99 rows=668978 width=40) (actual=0.056..1851.061 rows=1700378 loops=1)
Filter: (contribution_1.amount_in > 0)
Rows Removed by Filter: 299622
1851.061 ms 1851.061 ms ↓ 2.55 1700378 668978 1
16. Hash (cost=104781..104781 rows=4000000 width=63) (actual=7257.291..7257.291 rows=4000000 loops=1)
Buckets: 65536 Batches: 128 Memory Usage: 3633 kB
4367.015 ms 7257.291 ms ↑ 1 4000000 4000000 1
17. Seq Scan on public.peep as peep (cost=0..104781 rows=4000000 width=63) (actual=0.039..2890.276 rows=4000000 loops=1) 2890.276 ms 2890.276 ms ↑ 1 4000000 4000000 1
18. Aggregate (cost=15052..15052.01 rows=1 width=8) (actual=56067.549..56067.55 rows=1 loops=1) 622.37 ms 56067.55 ms ↑ 1 1 1 1
19. CTE Scan (cost=0..13379.56 rows=668978 width=4) (actual=5075.683..55445.181 rows=956489 loops=1) 55445.181 ms 55445.181 ms ↓ 1.43 956489 668978 1
20. Aggregate (cost=15052..15052.01 rows=1 width=8) (actual=192106.863..192106.864 rows=1 loops=1) 1368.867 ms 192106.864 ms ↑ 1 1 1 1
21. CTE Scan (cost=0..13379.56 rows=668978 width=4) (actual=7282.967..190737.997 rows=1700378 loops=1) 190737.997 ms 190737.997 ms ↓ 2.55 1700378 668978 1
Поскольку он больше не использует индекс domestic_id.
Hash Cond: (contribution.household_id = household_1.id)