Заявление об ограничении ответственности: я использую Entity Framework Core, поэтому я несколько ограничен в том, какую форму может принимать запрос ниже.
У меня большая база данных клиентов (1,1 миллиона записей), и я использую API для выбора одного клиента с помощью mca_id и вернуть клиента и все связанные с ним данные из contact_information, address и т. д. В настоящее время это занимает около 5-700 мсек, что кажется очень медленным для получения одной записи.
Вот запрос (предназначен для возврата всей информации, связанной с этим клиентом. Обратите внимание, что Entity Framework Core (.NET / C#) применяет ORDER BY внизу, поэтому я мало что могу с этим поделать).
SELECT
t.customer_internal_id,
t.business_partner_id,
t.created_date,
t.customer_type,
t.date_of_birth,
t.first_name,
t.gender,
t.home_store_id,
t.home_store_updated,
t.last_name,
t.loyalty_db_id,
t.mca_id,
t.status,
t.status_reason,
t.store_joined,
t.title,
t.updated_by,
t.updated_date,
t.updating_store,
c0.contact_internal_id,
c0.contact_type,
c0.contact_value,
c0.created_date,
c0.customer_internal_id,
c0.updated_by,
c0.updated_date,
c0.updating_store,
c0.validated,
a.address_internal_id,
a.address_line_1,
a.address_line_2,
a.address_type,
a.address_undeliverable,
a.address_validated,
a.country,
a.created_date,
a.customer_internal_id,
a.postcode,
a.region,
a.suburb,
a.updated_by,
a.updated_date,
a.updating_store,
m.customer_internal_id,
m.channel_id,
m.created_date,
m.opt_in,
m.updated_by,
m.updated_date,
m.updating_store,
m.valid_from_date,
c1.customer_internal_id,
c1.channel_id,
c1.type_id,
c1.created_date,
c1.opt_in,
c1.updated_by,
c1.updated_date,
c1.updating_store,
c1.valid_from_date,
e.customer_internal_id,
e.card_number,
e.card_design,
e.card_status,
e.card_type,
e.created_date,
e.updated_by,
e.updated_date,
e.updating_store
FROM
(
SELECT
c.customer_internal_id,
c.business_partner_id,
c.created_date,
c.customer_type,
c.date_of_birth,
c.first_name,
c.gender,
c.home_store_id,
c.home_store_updated,
c.last_name,
c.loyalty_db_id,
c.mca_id,
c.status,
c.status_reason,
c.store_joined,
c.title,
c.updated_by,
c.updated_date,
c.updating_store
FROM
customer AS c
WHERE
c.mca_id = '2701159742879@priceline.com.au'
LIMIT
1
) AS t
LEFT JOIN contact_information AS c0 ON t.customer_internal_id = c0.customer_internal_id
LEFT JOIN address AS a ON t.customer_internal_id = a.customer_internal_id
LEFT JOIN marketing_preferences AS m ON t.customer_internal_id = m.customer_internal_id
LEFT JOIN content_type_preferences AS c1 ON t.customer_internal_id = c1.customer_internal_id
LEFT JOIN external_cards AS e ON t.customer_internal_id = e.customer_internal_id
ORDER BY
t.customer_internal_id,
c0.contact_internal_id,
c0.contact_type,
a.address_internal_id,
m.customer_internal_id,
m.channel_id,
c1.customer_internal_id,
c1.channel_id,
c1.type_id,
e.customer_internal_id,
e.card_number
Ниже приведены первичные / внешние ключи:
ПЕРВИЧНЫЙ КЛЮЧ клиента ("customer_internal_id")
ПЕРВИЧНЫЙ КЛЮЧ адреса ("address_internal_id"), CONSTRAINT "address_customer_internal_id_fkey" FOREIGN KEY ("customer_internal_id") ССЫЛКИ "publi c 1042 * customer "(" customer_internal_id ")
Contact_Information PRIMARY KEY (" contact_internal_id "," contact_type "), CONSTRAINT" contact_information_customer_internal_id_fkey "FOREIGN KEY (" customer_internal_id ") REFERENCES * 10 . "customer" ("customer_internal_id")
External_Cards PRIMARY KEY ("customer_internal_id", "ca rd_number "), CONSTRAINT" external_cards_customer_internal_id_fkey "FOREIGN KEY (" customer_internal_id ") ССЫЛКИ" publi c "." customer "(" customer_internal_id ")
Имеются следующие индексы:
CREATE INDEX idx_cust_contact ON contact_information (customer_internal_id);
CREATE INDEX idx_cust_address ON address (customer_internal_id);
CREATE INDEX idx_cust_mkpref ON marketing_preferences (customer_internal_id);
CREATE INDEX idx_cust_content ON content_type_preferences (customer_internal_id);
CREATE INDEX idx_cust_cards ON external_cards (customer_internal_id);
CREATE INDEX idx_cust_mcaid ON customer (mca_id);
Это ОБЪЯСНЕНИЕ из запроса:
"Sort (cost=103957.16..103957.20 rows=18 width=687)"
" Sort Key: c.customer_internal_id, c0.contact_internal_id, c0.contact_type, a.address_internal_id, m.customer_internal_id, m.channel_id, c1.customer_internal_id, c1.channel_id, c1.type_id, e.customer_internal_id, e.card_number"
" -> Nested Loop Left Join (cost=35817.63..103956.78 rows=18 width=687)"
" -> Nested Loop Left Join (cost=35813.32..103867.36 rows=6 width=631)"
" -> Nested Loop Left Join (cost=35809.02..103833.42 rows=3 width=506)"
" -> Hash Right Join (cost=35808.74..103808.50 rows=3 width=433)"
" Hash Cond: (c0.customer_internal_id = c.customer_internal_id)"
" -> Seq Scan on contact_information c0 (cost=0.00..59117.35 rows=2368635 width=115)"
" -> Hash (cost=35808.73..35808.73 rows=1 width=318)"
" -> Hash Right Join (cost=8.47..35808.73 rows=1 width=318)"
" Hash Cond: (a.customer_internal_id = c.customer_internal_id)"
" -> Seq Scan on address a (cost=0.00..31425.09 rows=1166709 width=148)"
" -> Hash (cost=8.46..8.46 rows=1 width=170)"
" -> Limit (cost=0.43..8.45 rows=1 width=170)"
" -> Index Scan using idx_cust_mcaid on customer c (cost=0.43..8.45 rows=1 width=170)"
" Index Cond: ((mca_id)::text = '2701159742879@priceline.com.au'::text)"
" -> Index Scan using external_cards_pkey on external_cards e (cost=0.28..8.30 rows=1 width=73)"
" Index Cond: (c.customer_internal_id = customer_internal_id)"
" -> Bitmap Heap Scan on content_type_preferences c1 (cost=4.30..11.29 rows=2 width=125)"
" Recheck Cond: (c.customer_internal_id = customer_internal_id)"
" -> Bitmap Index Scan on content_type_preferences_pkey (cost=0.00..4.30 rows=2 width=0)"
" Index Cond: (c.customer_internal_id = customer_internal_id)"
" -> Bitmap Heap Scan on marketing_preferences m (cost=4.31..14.87 rows=3 width=56)"
" Recheck Cond: (c.customer_internal_id = customer_internal_id)"
" -> Bitmap Index Scan on marketing_preferences_pkey (cost=0.00..4.31 rows=3 width=0)"
" Index Cond: (c.customer_internal_id = customer_internal_id)"
Кажется, большая часть затрат приходится на эти вложенные l oop соединения, но я не уверен, как атаковать эту проблему. Первоначально у меня было сканирование seq для клиента, но я решил это с помощью индекса на mca_id, но это практически не повлияло на время выполнения.
EDIT: Обновление. Я добавил пару индексов ha sh, чтобы обслужить customer_internal_id = customer_internal_id JOINS,
CREATE INDEX idx_contact_hash ON contact_information USING hash (customer_internal_id);
CREATE INDEX idx_address_hash ON address USING hash (customer_internal_id);
, и время запроса сократилось до 70 мс или около того. Это здорово, но я припоминаю, что индексы ha sh осуждались или не предлагались для использования? Кто-нибудь может помочь? Вот новый анализ / объяснение
"Sort (cost=119.30..119.33 rows=12 width=687) (actual time=0.082..0.082 rows=2 loops=1)"
" Sort Key: c.customer_internal_id, c0.contact_internal_id, c0.contact_type, a.address_internal_id, m.customer_internal_id, m.channel_id, c1.customer_internal_id, c1.channel_id, c1.type_id, e.customer_internal_id, e.card_number"
" Sort Method: quicksort Memory: 26kB"
" Buffers: shared hit=18"
" -> Nested Loop Left Join (cost=9.31..119.08 rows=12 width=687) (actual time=0.062..0.070 rows=2 loops=1)"
" Buffers: shared hit=18"
" -> Nested Loop Left Join (cost=5.01..59.47 rows=4 width=631) (actual time=0.054..0.059 rows=2 loops=1)"
" Buffers: shared hit=14"
" -> Nested Loop Left Join (cost=0.71..36.85 rows=2 width=506) (actual time=0.045..0.048 rows=2 loops=1)"
" Buffers: shared hit=10"
" -> Nested Loop Left Join (cost=0.71..24.79 rows=1 width=391) (actual time=0.039..0.040 rows=1 loops=1)"
" Buffers: shared hit=8"
" -> Nested Loop Left Join (cost=0.43..16.48 rows=1 width=318) (actual time=0.031..0.033 rows=1 loops=1)"
" Buffers: shared hit=6"
" -> Limit (cost=0.43..8.45 rows=1 width=170) (actual time=0.023..0.024 rows=1 loops=1)"
" Buffers: shared hit=4"
" -> Index Scan using idx_cust_mcaid on customer c (cost=0.43..8.45 rows=1 width=170) (actual time=0.022..0.022 rows=1 loops=1)"
" Index Cond: ((mca_id)::text = '2701159742879@priceline.com.au'::text)"
" Buffers: shared hit=4"
" -> Index Scan using idx_address_hash on address a (cost=0.00..8.02 rows=1 width=148) (actual time=0.006..0.006 rows=1 loops=1)"
" Index Cond: (c.customer_internal_id = customer_internal_id)"
" Buffers: shared hit=2"
" -> Index Scan using external_cards_pkey on external_cards e (cost=0.28..8.30 rows=1 width=73) (actual time=0.006..0.006 rows=0 loops=1)"
" Index Cond: (c.customer_internal_id = customer_internal_id)"
" Buffers: shared hit=2"
" -> Index Scan using idx_contact_hash on contact_information c0 (cost=0.00..12.04 rows=2 width=115) (actual time=0.004..0.005 rows=2 loops=1)"
" Index Cond: (c.customer_internal_id = customer_internal_id)"
" Buffers: shared hit=2"
" -> Bitmap Heap Scan on content_type_preferences c1 (cost=4.30..11.29 rows=2 width=125) (actual time=0.004..0.004 rows=0 loops=2)"
" Recheck Cond: (c.customer_internal_id = customer_internal_id)"
" Buffers: shared hit=4"
" -> Bitmap Index Scan on content_type_preferences_pkey (cost=0.00..4.30 rows=2 width=0) (actual time=0.002..0.002 rows=0 loops=2)"
" Index Cond: (c.customer_internal_id = customer_internal_id)"
" Buffers: shared hit=4"
" -> Bitmap Heap Scan on marketing_preferences m (cost=4.31..14.87 rows=3 width=56) (actual time=0.004..0.004 rows=0 loops=2)"
" Recheck Cond: (c.customer_internal_id = customer_internal_id)"
" Buffers: shared hit=4"
" -> Bitmap Index Scan on marketing_preferences_pkey (cost=0.00..4.31 rows=3 width=0) (actual time=0.002..0.002 rows=0 loops=2)"
" Index Cond: (c.customer_internal_id = customer_internal_id)"
" Buffers: shared hit=4"
"Planning Time: 0.770 ms"
"Execution Time: 0.181 ms"