сокращение таблицы, чтобы избежать неэффективности пространства - PullRequest
1 голос
/ 08 января 2012

У меня есть следующая таблица:

 id    study_start_time  study_end_time 
____________________________________________
1234          168              256
2345          175              233
1234          256              300
1234          300              389
2345          400              425
4567          200              225 

я хочу получить приведенную ниже таблицу;

  id    study_start_time  study_end_time 
____________________________________________
1234          168              389               

процесс;

1234 starts at 168. minutes  , work until to 256. minutes
1234   "    "  256.  "       "  "    "    "  300.    "
1234   "    "  300.  "       "  "    "    "  389.    "

но фактическая таблица должна быть уменьшена до значения, показанного ниже

1234 starts at 168. minutes ,  work until to 389. minutes

как я могу это сделать?

Ответы [ 4 ]

3 голосов
/ 08 января 2012

Не получит награду за красоту, но работает:

SELECT x.c1id       id,
       MIN(c1study_start_time) study_start_time,
       MAX(c2study_end_time)   study_end_time
FROM   (SELECT c1.id    c1id,
               c1.study_start_time c1study_start_time,
               c1.study_end_time   c1study_end_time,
               c2.id    c2id,
               c2.study_start_time c2study_start_time,
               c2.study_end_time   c2study_end_time
        FROM   c c1
               inner join c c2
                 ON c2.study_start_time = c1.study_end_time) x
WHERE  x.c2id
GROUP  BY c1id

UNION ALL

SELECT x.c1id    id,
       x.c1study_start_time study_start_time,
       x.c1study_end_time   study_end_time
FROM   (SELECT c1.id    c1id,
       c1.study_start_time c1study_start_time,
       c1.study_end_time   c1study_end_time,
       c2.id    c2id,
       c2.study_start_time c2study_start_time,
       c2.study_end_time   c2study_end_time
    FROM   c c1
      left outer join c c2
      ON c2.study_start_time = c1.study_end_time) x
WHERE  x.c2id IS NULL
       AND x.c1id NOT IN (SELECT c1id
       FROM   (SELECT c1.id    c1id,



c1.study_start_time c1study_start_time,

c1.study_end_time   c1study_end_time,

c2.id    c2id,

c2.study_start_time c2study_start_time,

c2.study_end_time   c2study_end_time
FROM   c c1

inner join c c2

 ON c2.study_start_time = c1.study_end_time) x

      WHERE  x.c2id
      GROUP  BY c1id); 

Первая часть союза дает вам только курсы без пропусков. Вторая часть получает пробелы и исключает уже полученные курсы в первой части.

1 голос
/ 08 января 2012

Ну, похоже, я отправил ответ на неправильный вопрос, и фактически получил за это очки !!! Оптимизация запросов MySQL - внутренние запросы Я сделаю репост здесь:

-- EXPLAIN ANALYZE
WITH RECURSIVE tree AS (
    SELECT t0.id
        , t0.study_start_time
        , t0.study_end_time
    FROM tab t0
    WHERE NOT EXISTS( SELECT * FROM tab nx WHERE nx.id=t0.id AND nx.study_end_time = t0.study_start_time)
    UNION
    SELECT tt.id
        ,tt.study_start_time
        ,t1.study_end_time
    FROM tab t1
    JOIN tree tt ON t1.id=tt.id AND t1.study_start_time = tt.study_end_time
    )
SELECT * FROM tree
WHERE NOT EXISTS( SELECT * FROM tab nx WHERE nx.id=tree.id AND tree.study_end_time = nx.study_start_time)
ORDER BY id
    ;

Результаты:

DROP TABLE
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "tab_pkey" for table "tab"
CREATE TABLE
CREATE INDEX
INSERT 0 15
  id  | study_start_time | study_end_time 
------+------------------+----------------
 1234 |              168 |            480
 2345 |              175 |            233
 2345 |              400 |            425
 4567 |              200 |            225
 4567 |              250 |            289
 4567 |              300 |            310
 4567 |              320 |            340
 4567 |              360 |            390
(8 rows)

План запроса:

 Merge Anti Join  (cost=16209.59..16292.13 rows=6386 width=12) (actual time=0.393..0.406 rows=8 loops=1)
   Merge Cond: ((tree.id = nx.id) AND (tree.study_end_time = nx.study_start_time))
   CTE tree
     ->  Recursive Union  (cost=0.00..15348.09 rows=8515 width=12) (actual time=0.038..0.287 rows=15 loops=1)
           ->  Merge Anti Join  (cost=0.00..175.04 rows=1455 width=12) (actual time=0.031..0.081 rows=8 loops=1)
                 Merge Cond: ((t0.id = nx.id) AND (t0.study_start_time = nx.study_end_time))
                 ->  Index Scan using tab_pkey on tab t0  (cost=0.00..77.35 rows=1940 width=12) (actual time=0.015..0.030 rows=15 loops=1)
                 ->  Index Scan using sssss on tab nx  (cost=0.00..77.35 rows=1940 width=8) (actual time=0.006..0.015 rows=14 loops=1)
           ->  Merge Join  (cost=1297.04..1500.28 rows=706 width=12) (actual time=0.021..0.028 rows=1 loops=6)
                 Merge Cond: ((t1.id = tt.id) AND (t1.study_start_time = tt.study_end_time))
                 ->  Index Scan using tab_pkey on tab t1  (cost=0.00..77.35 rows=1940 width=12) (actual time=0.004..0.008 rows=9 loops=6)
                 ->  Sort  (cost=1297.04..1333.42 rows=14550 width=12) (actual time=0.011..0.011 rows=2 loops=6)
                       Sort Key: tt.id, tt.study_end_time
                       Sort Method: quicksort  Memory: 25kB
                       ->  WorkTable Scan on tree tt  (cost=0.00..291.00 rows=14550 width=12) (actual time=0.001..0.001 rows=2 loops=6)
   ->  Sort  (cost=726.15..747.44 rows=8515 width=12) (actual time=0.342..0.346 rows=15 loops=1)
         Sort Key: tree.id, tree.study_end_time
         Sort Method: quicksort  Memory: 25kB
         ->  CTE Scan on tree  (cost=0.00..170.30 rows=8515 width=12) (actual time=0.041..0.306 rows=15 loops=1)
   ->  Sort  (cost=135.34..140.19 rows=1940 width=8) (actual time=0.037..0.040 rows=15 loops=1)
         Sort Key: nx.id, nx.study_start_time
         Sort Method: quicksort  Memory: 25kB
         ->  Seq Scan on tab nx  (cost=0.00..29.40 rows=1940 width=8) (actual time=0.005..0.012 rows=15 loops=1)
 Total runtime: 0.925 ms
(24 rows)
1 голос
/ 08 января 2012

Подготовка образцов данных:

create table tab (id int, study_start_time int, study_end_time int);

insert into tab
select * from (
    select 1234 as id, 168 as study_start_time, 256 as study_end_time union all
    select 2345, 175, 233 union all
    select 1234, 256, 300 union all
    select 1234, 300, 389 union all
    select 1234, 389, 439 union all
    select 1234, 439, 460 union all
    select 1234, 460, 480 union all
    select 2345, 400, 425 union all
    select 4567, 200, 225 union all 
    select 4567, 250, 270 union all 
    select 4567, 270, 289 union all 
    select 4567, 300, 310 union all 
    select 4567, 320, 340 union all 
    select 4567, 360, 370 union all 
    select 4567, 370, 390
) t;

Способ "слияния" строк:

Sample date

Возможно, я слишком усложнил, но результат такой:ожидается :-).Существует возможность упростить его в SQL Server 2005+ с использованием CTE и при этом получить «oneliner» или использовать временную таблицу (большинство СУБД) и сделать это более чем в одной строке.

select * from (
    select m1.id, m1.study_start_time, m2.study_end_time
    from (
        select t.id, t.study_start_time, t.study_end_time, 
            t2.study_end_time as et, t3.study_start_time as st
        from tab t
        left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
        left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
    ) m1
    join (
        select m1.*
        from (
            select t.id, t.study_start_time, t.study_end_time, 
                t2.study_end_time as et, t3.study_start_time as st
            from tab t
            left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
            left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
        ) m1
        where m1.et is null and m1.st is not null
    ) m2 on m1.id = m2.id and m2.study_end_time = (
        select min(study_end_time) 
        from (
            select t.id, t.study_start_time, t.study_end_time, 
                t2.study_end_time as et, t3.study_start_time as st
            from tab t
            left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
            left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
        ) m3 
        where m3.id = m1.id and m3.study_end_time >= m1.study_end_time and m3.et is null and m3.st is not null
    )
    where m1.et is not null and m1.st is null
    union 
    select id, study_start_time, study_end_time 
    from (
        select t.id, t.study_start_time, t.study_end_time, 
            t2.study_end_time as et, t3.study_start_time as st
        from tab t
        left join tab t2 on t2.id = t.id and t2.study_start_time = t.study_end_time
        left join tab t3 on t3.id = t.id and t3.study_end_time = t.study_start_time
    ) m3 where  m3.et is null and m3.st is null
) tab
order by id, study_start_time;

Окончательный результат:

id          study_start_time study_end_time
----------- ---------------- --------------
1234        168              480
2345        175              233
2345        400              425
4567        200              225
4567        250              289
4567        300              310
4567        320              340
4567        360              390
0 голосов
/ 08 января 2012
 (Select id, study_start_time where MIN(study_start_time) from (Select id, 
 study_start_time from table)) UNION 
 (Select id, study_end_time where
 MAX(study_end_time) from (Select id, study_end_time from table)
...