MySQL: выберите N процентов случайной выборки из каждой группы и обновите поле - PullRequest
0 голосов
/ 20 сентября 2018

У меня есть таблица MySQL с именем tbltaskrecord , и мне нужно выбрать случайную выборку 10% для каждого UserId для каждого ReviewDate и обновить поле AuditStatus для «Проверить».Это табличный скрипт и некоторые примеры данных:

CREATE TABLE tbltaskrecord(
ReviewDate  DATE  NOT NULL
,UserId      VARCHAR(50) NOT NULL
,TaskId      VARCHAR(50) NOT NULL PRIMARY KEY
,AuditStatus VARCHAR(50)
);

INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000001',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000002',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000003',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000004',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000005',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000006',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000007',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000008',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000009',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000010',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000011',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000012',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000013',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000014',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000015',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000016',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000017',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000018',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000019',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000020',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000021',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000022',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000023',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000024',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000025',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000026',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000027',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000028',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000029',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000030',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000031',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000032',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000033',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000034',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000035',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000036',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000037',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000038',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000039',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000040',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000041',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000042',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000043',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000044',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000045',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000046',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000047',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000048',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000049',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000050',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000051',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000052',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000053',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000054',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000055',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000056',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000057',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000058',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000059',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000060',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000061',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000062',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000063',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000064',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000065',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000066',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000067',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000068',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000069',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000070',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000071',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000072',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000073',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000074',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000075',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000076',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000077',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000078',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000079',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000080',NULL);

Население:

+-------------+-----------+-----------+-------------+
|   UserId    | 9/19/2018 | 9/20/2018 | Grand Total |
+-------------+-----------+-----------+-------------+
| jdoe1       |        20 |        20 |          40 |
| jdoe2       |        20 |        20 |          40 |
| Grand Total |        40 |        40 |          80 |
+-------------+-----------+-----------+-------------+

Пример:

+-------------+-----------+-----------+-------------+
|   UserId    | 9/19/2018 | 9/20/2018 | Grand Total |
+-------------+-----------+-----------+-------------+
| jdoe1       |         2 |         2 |           4 |
| jdoe2       |         2 |         2 |           4 |
| Grand Total |         4 |         4 |           8 |
+-------------+-----------+-----------+-------------+

Вот что я пытаюсь получить:

+------------+--------+------------+-------------+
| ReviewDate | UserId |   TaskId   | AuditStatus |
+------------+--------+------------+-------------+
| 2018-09-19 | jdoe1  | R110000008 | Check       |
| 2018-09-19 | jdoe1  | R110000020 | Check       |
| 2018-09-19 | jdoe2  | R110000029 | Check       |
| 2018-09-19 | jdoe2  | R110000037 | Check       |
| 2018-09-20 | jdoe1  | R110000052 | Check       |
| 2018-09-20 | jdoe1  | R110000057 | Check       |
| 2018-09-20 | jdoe2  | R110000070 | Check       |
| 2018-09-20 | jdoe2  | R110000074 | Check       |
+------------+--------+------------+-------------+

Вот что я пытался выбрать:

select * from tbltaskrecord WHERE RAND() < .10

Но для 4 проб это дало 5, 6, 9 и 8 рядов.Вот результат последнего испытания.Только 2 из jdoe1, ни один из которых не датирован 2018-09-19.

+------------+--------+-------------+-------------+
| ReviewDate | UserId |   TaskId    | AuditStatus |
+------------+--------+-------------+-------------+
| 2018-09-20 |  jdoe1 |  R110000043 |  NULL       |
| 2018-09-20 |  jdoe1 |  R110000052 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000022 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000028 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000031 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000062 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000064 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000080 |  NULL       |
+------------+--------+-------------+-------------+

Как мне это сделать?

Ответы [ 2 ]

0 голосов
/ 20 сентября 2018

Вот последняя хранимая процедура.

CREATE PROCEDURE `spPickSample`(IN `pReviewDate` VARCHAR(50))
BEGIN
  UPDATE tbltaskrecord JOIN (SELECT a.ReviewDate 
       , a.UserId 
       , a.TaskId     
       , a.AuditStatus
    FROM 
       ( SELECT x.*
              , CASE WHEN @prev = userid THEN @i:=@i+1 ELSE @i:=1 END i
              , @prev := userid
           FROM
              ( SELECT *
                  FROM tbltaskrecord
                   WHERE ReviewDate = pReviewDate
                 ORDER
                    BY userid,RAND()
              ) x -- my understanding is that this bit shouldn't work. But it does.
              , (SELECT @prev:=null,@i:=0) vars
          ORDER
             BY userid
              , i
       ) a
    JOIN
       (SELECT userid, COUNT(*)/10 pct FROM tbltaskrecord WHERE ReviewDate = pReviewDate GROUP BY userid) b
      ON b.userid = a.userid
     AND b.pct >= a.i) q ON tbltaskrecord.TaskId=q.TaskId SET tbltaskrecord.AuditStatus='Check';
END
0 голосов
/ 20 сентября 2018

Это решение балансирует на грани «взлома», но я не могу сказать, действительно ли он пересекает эту черту ...

В любом случае, хорошо это или плохо,вот одна идея ...

SELECT a.ReviewDate 
     , a.UserId 
     , a.TaskId     
     , a.AuditStatus
  FROM 
     ( SELECT x.*
            , CASE WHEN @prev = userid THEN @i:=@i+1 ELSE @i:=1 END i
            , @prev := userid
         FROM
            ( SELECT *
                FROM tbltaskrecord 
               ORDER
                  BY userid,RAND()
            ) x -- my understanding is that this bit shouldn't work. But it does.
            , (SELECT @prev:=null,@i:=0) vars
        ORDER
           BY userid
            , i
     ) a
  JOIN
     (SELECT userid, COUNT(*)/10 pct FROM tbltaskrecord GROUP BY userid) b
    ON b.userid = a.userid
   AND b.pct >= a.i;

+------------+--------+------------+-------------+
| ReviewDate | UserId | TaskId     | AuditStatus |
+------------+--------+------------+-------------+
| 2018-09-20 | jdoe1  | R110000046 | NULL        |
| 2018-09-20 | jdoe1  | R110000042 | NULL        |
| 2018-09-19 | jdoe1  | R110000012 | NULL        |
| 2018-09-19 | jdoe1  | R110000016 | NULL        |
| 2018-09-20 | jdoe2  | R110000077 | NULL        |
| 2018-09-19 | jdoe2  | R110000034 | NULL        |
| 2018-09-19 | jdoe2  | R110000022 | NULL        |
| 2018-09-19 | jdoe2  | R110000026 | NULL        |
+------------+--------+------------+-------------+
8 rows in set (0.01 sec)

SqlFiddle http://sqlfiddle.com/#!9/bd3256/1

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...