Наивная функциональная реализация Union Find Disjoint Set имеет плохую производительность - PullRequest
0 голосов
/ 23 мая 2018

Следующая реализация UFDS имеет низкую производительность.Может ли кто-нибудь объяснить мне, почему это может быть?Вот отчет по профилированию:

    total time  =        0.10 secs   (98 ticks @ 1000 us, 1 processor)
    total alloc =  78,869,168 bytes  (excludes profiling overheads)

COST CENTRE        MODULE    SRC                                        %time %alloc

x.\                Main      src/merging_communities.hs:67:54-71         37.8    0.0
foldMap            Main      src/merging_communities.hs:(31,3)-(32,55)   22.4    0.0
x                  Main      src/merging_communities.hs:(65,1)-(68,79)   20.4   83.2
getElemTree        Main      src/merging_communities.hs:40:1-43          19.4    0.0
main.initialForest Main      src/merging_communities.hs:103:7-51          0.0   16.2

main.hs

   module Main where

import Control.Monad
import Control.Monad.State.Lazy
import Data.Foldable
import Data.Functor
import Data.List
import Data.Maybe
import Data.Monoid
import Prelude
import System.IO
import Text.Pretty.Simple

--import Text.Pretty.Simple (pPrint)
--The Union-Find algorithm and Disjoint Sets (UFDS) data structureare used which is able to efficiently (i.e. in nearly constant time) determine which set an item belongs to, 
--test if two items belong to the same set, and union two disjoint sets into one when needed. 
--It can be used to find connected components in an undirected graph, and can hence be used as part of Kruskal's algorithm for the Minimum Spanning Tree (MST) problem.
data Tree a =
  Node a
       [Tree a]
  deriving (Show)

instance (Eq a) => Eq (Tree a) where
  (Node a forestA) == (Node b forestB) = a == b && forestA == forestB

instance Functor Tree where
  fmap f (Node a []) = Node (f a) []
  fmap f (Node a forest) = Node (f a) (fmap (fmap f) forest)

instance Foldable Tree where
  foldMap f (Node a []) = f a
  foldMap f (Node a xs) = f a <> foldMap (foldMap f) xs

-- each disjoint set has a representative element which is used to uniquely identify the set. We can use a tree to represent a disjoint set where
-- the representative element is the root node of the tree
makeSet :: a -> Tree a
makeSet a = Node a []

getElemTree :: Eq a => a -> [Tree a] -> Maybe (Tree a)
getElemTree a forest = find (elem a) forest

size :: Tree a -> Int
size (Node a []) = 1
size (Node a forest) = 1 + (sum $ fmap size forest)

depth :: Tree a -> Int
depth (Node a []) = 1
depth (Node a forest) = 1 + (maximum $ fmap ((+ 1) . depth) forest)

flatten :: Tree a -> [a]
flatten (Node a forest) = [a] ++ (foldMap flatten forest)

-- set the parent of one of the roots to the other tree's root - which one we choose is based on our weighting
unWeightedUnion :: Eq a => a -> a -> [Tree a] -> [Tree a]
unWeightedUnion a b forest
  | isNothing treeA || treeA == treeB = forest
  | otherwise =
    let tA@(Node rootA forestA) = fromJust $ treeA
        tB@(Node rootB forestB) = fromJust $ treeB
     in changeRoot tA tB forest
  where
    treeA = getElemTree a forest
    treeB = getElemTree b forest

changeRoot tA@(Node rootA forestA) tB@(Node rootB forestB) forest =
  if (size tA <= size tB)
    then (Node rootA (tB : forestA)) : filter (\t -> t /= tB && t /= tA) forest
    else (Node rootB (tA : forestB)) : filter (\t -> t /= tB && t /= tA) forest

-- union by rank is a weighting which keeps our trees as shallow as possible When we weight by rank or tree depth we make the shallower tree root the child of the deeper tree's root
getRoot :: Tree a -> a -- get the root node
getRoot (Node a _) = a

-- return the name of the set containing the node x ie the root node of the set containing node x
-- use path compression - if parent is not the root then set the parent of the node to the root
data Query
  = M Int
      Int
  | Q Int
  deriving (Show, Read)

executeQuery :: [Query] -> Int -> StateT [Tree Int] IO Query
executeQuery [] _ = return $ M 1 1
executeQuery qs pop = do
  forest <- get
  case head qs of
    (M a b) -> do
      let newForest = unWeightedUnion a b forest
      put newForest
      executeQuery (tail qs) pop
    (Q a) -> do
      liftIO $ print $ size $ fromJust $ getElemTree a forest
      executeQuery (tail qs) pop

main = do
  contents <- readFile "queries.txt"
  print $ lines contents
  let population = head $ words contents
  let queries = map read $ tail $ lines contents :: [Query]
  let population = read $ head $ words contents :: Int
  let initialForest = map makeSet [1 .. population]
  execStateT (executeQuery queries population) initialForest

questions.txt

100000 200000
M 68770 97917
M 65906 74478
M 78744 21384
M 36186 31560
Q 43063
M 12923 73331
M 91542 54702
M 62459 96133
M 13196 56121
M 1648 86052
M 99517 97247
M 59768 66017
Q 48274
Q 96430
M 44341 70873
Q 74989
Q 71357
M 72482 16677
Q 8219
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...