У меня есть набор данных в BigQuery для различных пар цен на акции и их относительных zscores (упорядоченных по дате и в миллионах строк):
+-----+--------+--------+------------+---------------+------------------+---------------+
1 | Row | stock1 | stock2 | date | spreadclose | logspreadreturn | zscore20 |
+-----+--------+--------+------------+---------------+------------------+---------------+
2 | 1 | AJRD | MAS | 27/12/2010 | 0.537230704 | 0.017358199 | -0.251654379 |
3 | 2 | ABEV | EFOI | 27/12/2010 | 41.00585106 | 0.014929275 | 1.950810153 |
4 | 3 | AIRI | REFR | 27/12/2010 | 0.537688889 | 0.003638617 | 0.707555834 |
5 | 4 | AEO | RJF | 27/12/2010 | 0.35009565 | -0.004321474 | 0.265411543 |
6 | 5 | AFL | TSU | 27/12/2010 | 0.771122788 | -0.028202112 | 0.247268645 |
+----+---------+--------+------------+---------------+------------------+---------------+
Затем я беру образец цены на акции согласно ниже. Я пытаюсь вычислить записи по сделке в соответствии со столбцом «Что я хочу» в запросе Big, но может показаться, что они достигают только числа в соответствии со столбцом «(Текущий (неправильный))».
Stock1 Stock2 date zscore20 (Current wrong) What I want
1 FITB MS 4/01/2010 -1.5 1 0
2 FITB MS 5/01/2010 -1.9 1 0
3 FITB MS 6/01/2010 -2.3 1 1
4 FITB MS 7/01/2010 -2.0 1 1
5 FITB MS 8/01/2010 -1.0 1 1
6 FITB MS 11/01/2010 0.5 -1 0
7 FITB MS 12/01/2010 1.5 -1 0
8 FITB MS 13/01/2010 1.8 -1 0
9 FITB MS 14/01/2010 2.1 -1 -1
10 FITB MS 15/01/2010 1.5 -1 -1
11 FITB MS 19/01/2010 1.3 -1 -1
12 FITB MS 20/01/2010 0.4 -1 -1
13 FITB MS 21/01/2010 -0.1 1 0
Логика c в столбце «Что я хочу» выглядит следующим образом:
- Во временном ряду, когда zscore> 2.0, флаг -1; затем выйти (0), когда zscore затем пересекает ноль в отрицательное значение
- Во временном ряду, когда zscore <-2.0, флаг +1; затем выйти (0), когда zscore next пересекает ноль в postitve </li>
Мой текущий запрос, который я выполняю для большего набора данных (установка года обработки на 2010):
DECLARE processyear date;
SET processyear = '2012-01-01';
INSERT INTO `dataset.main.tradetime`(
-- Calculate tradetimeparameters
WITH tradetimeparameters AS (
SELECT
stock1,
stock2,
date,
spreadclose,
LN(spreadclose) - LN(LAG(spreadclose) OVER (PARTITION BY stock1, stock2 ORDER BY date ASC)) as logspreadreturn,
SAFE_DIVIDE((spreadclose - sma20), stdev20) as zscore20
FROM `rw-algotrader-264713.mlbprices.dailyspreads`
WHERE date >= DATE_SUB(processyear, INTERVAL 1 WEEK) and date < DATE_ADD(processyear, INTERVAL 1 YEAR)
),
secondtradetimeparameters AS (
SELECT
stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
CASE WHEN zscore20 > 0 THEN -1 WHEN zscore20 < 0 THEN 1 END as tradesignal20
FROM tradetimeparameters
WHERE date >= DATE_SUB(processyear, INTERVAL 1 WEEK) and date < DATE_ADD(processyear, INTERVAL 1 YEAR)
),
thirdtradetimeparameters AS (
SELECT
stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
LEAD(tradesignal20) OVER (PARTITION BY stock1, stock2 ORDER BY date ASC) as tradesignal20
FROM secondtradetimeparameters
WHERE date >= DATE_SUB(processyear, INTERVAL 1 WEEK) and date < DATE_ADD(processyear, INTERVAL 1 YEAR)
),
fourthtradetimeparameters AS (
SELECT
stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
tradesignal20,
CASE WHEN tradesignal20 < 0 THEN 0 ELSE tradesignal20 END as positivesignals,
CASE WHEN tradesignal20 > 0 THEN 0 ELSE tradesignal20 END as negativesignals
FROM thirdtradetimeparameters
WHERE date >= DATE_SUB(processyear, INTERVAL 1 WEEK) and date < DATE_ADD(processyear, INTERVAL 1 YEAR)
),
fifthtradetimeparameters AS (
SELECT
stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
tradesignal20,
positivesignals,
negativesignals,
(case when positivesignals = 0 then 0
else row_number() over (partition by stock1,stock2, positivesignals, grp order by date ASC)
end) as positivedaysintrade
from (select stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
tradesignal20,
positivesignals,
negativesignals, countif(positivesignals = 0) over (partition by stock1,stock2 order by date ASC) as grp
from fourthtradetimeparameters
) fourthtradetimeparameters
WHERE date >= DATE_SUB(processyear, INTERVAL 1 WEEK) and date < DATE_ADD(processyear, INTERVAL 1 YEAR)
),
sixthtradetimeparameters AS (
SELECT
stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
tradesignal20,
positivesignals,
negativesignals,
positivedaysintrade,
(case when negativesignals = 0 then 0
else row_number() over (partition by stock1,stock2, negativesignals, grp order by date ASC)
end) as negativedaysintrade
from (select stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
tradesignal20,
positivesignals,
negativesignals,
positivedaysintrade,countif(negativesignals = 0) over (partition by stock1,stock2 order by date ASC) as grp
from fifthtradetimeparameters
) fifthtradetimeparameters
WHERE date >= DATE_SUB(processyear, INTERVAL 1 WEEK) and date < DATE_ADD(processyear, INTERVAL 1 YEAR)
),
seventhtradetimeparameters AS (
SELECT
stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
tradesignal20,
positivesignals,
negativesignals,
positivedaysintrade,
negativedaysintrade,
(positivedaysintrade + negativedaysintrade) as daysintrade,
(tradesignal20 * logspreadreturn) as sdcreturn20
FROM sixthtradetimeparameters
WHERE date >= DATE_SUB(processyear, INTERVAL 1 WEEK) and date < DATE_ADD(processyear, INTERVAL 1 YEAR)
)
SELECT
stock1,
stock2,
date,
spreadclose,
logspreadreturn,
zscore20,
tradesignal20,
positivesignals,
negativesignals,
positivedaysintrade,
negativedaysintrade,
daysintrade,
sdcreturn20
FROM seventhtradetimeparameters)
Второй более короткая таблица выглядит следующим образом:
SELECT * FROM `dataset.tradetime` WHERE stock1 = 'FITB' and stock2 = 'MS' and date >= '2010-01-01'
Буду признателен, если кто-то может подсказать мне код Bigquery / SQL, который может достичь вышеуказанного , Большое спасибо заранее,