Implement flexible Kahn topsort for use on git commit graphs

This commit is contained in:
fr33domlover 2016-02-29 01:59:33 +00:00
parent 1fe41edabe
commit ec9fc486ee
2 changed files with 135 additions and 0 deletions

View file

@ -0,0 +1,133 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
-- | Graph tools for use with the @fgl@ package.
module Data.Graph.Inductive.Local
( NodeSet (..)
, TraversalOrder (..)
, ResultList (..)
, topsortKahn
)
where
import Prelude
import Data.Foldable (foldl')
import Data.Graph.Inductive.Graph
import Data.List (sortBy)
-- | A graph node container to be used with Kanh's topsort algorithm.
class NodeSet s where
-- | Take a graph node and a container, insert the node into it and return
-- the resulting container.
--insert :: LNode a -> s a -> s a
insertNode :: Node -> s -> s
-- | Remove a node from the container. Return the removed node and the
-- resulting container after removal. If the container is empty (i.e. there
-- is no node to remove), return 'Nothing'.
--extract :: s a -> Maybe (LNode a, s a)
extractNode :: s -> Maybe (Node, s)
-- | Specification of the order in which a node's outgoing edges should be
-- traversed.
data TraversalOrder b
-- | The order in which they're listed by FGL functions. The FGL
-- documentation doesn't seem to specify the order, which means it may
-- depend entirely on the 'Graph' instance you are using.
= InOrder
-- | Reverse of 'InOrder'.
| ReverseOrder
-- | Sort the outgoing edge list before traversal, using the given ordering
-- function. It takes two pairs, each pair having a labeled node and the
-- label of the edge, and determines the order they should be visited. 'LT'
-- means the first edge is visited first. 'GT' means the second edge is
-- visited first. 'EQ' means it doesn't matter and the implementation can
-- choose arbitrarily.
| SortedOrder ((Node, b) -> (Node, b) -> Ordering)
-- | Lets you reorder the edge list in an arbitrary way before it gets
-- traversed. Note that it's up to you to make sure the list you return
-- really contains all the items of the input list.
| CustomOrder ([(Node, b)] -> [(Node, b)])
sortNodes :: TraversalOrder b -> [(Node, b)] -> [(Node, b)]
sortNodes InOrder = id
sortNodes ReverseOrder = reverse
sortNodes (SortedOrder f) = sortBy f
sortNodes (CustomOrder f) = f
-- | A container for storing the result of the sorting. Kahn's algorithm begins
-- with an empty structure and then appends nodes to produce the result.
-- Therefore almost any sequence container could work.
--
-- You can also use a regular Haskell list. Implement 'append' using list
-- prepend and remember to 'reverse' the list returned by the algorithm.
class ResultList l where
emptyList :: l a
appendItem :: a -> l a -> l a
-- | Flexible topological sort using Kahn's algorithm.
--
-- It seems that Haskell graph libraries (and perhaps graph libraries in
-- general) tend to implement topological sort using depth-first search (DFS).
-- While it's probably easier (since these libraries also implement DFS), the
-- result is that you pass a graph to a function and get back the sorted list.
-- There is no room left for specifying variable parts of the algorithm, which
-- means you can't control which topsort order (out of potentially many orders
-- possible) you get. Sometimes you don't care, but sometimes you do.
--
-- Kahn's algorithm has room for variations in two places:
--
-- (1) When traversing a node's outgoing edges, the order in which this
-- traversal happens isn't specified.
-- (2) The internals of structure S, the set of nodes with no inbound edges,
-- aren't specified. Therefore, so is the order in which nodes are removed
-- from it.
--
-- https://en.wikipedia.org/wiki/Topological_sort#Kahn.27s_algorithm
topsortKahn
:: (DynGraph g, NodeSet s, ResultList l)
=> g a b
-- ^ Graph whose nodes to sort
-> s
-- ^ The set of graph nodes which don't have inbound edges
-> TraversalOrder b
-- ^ In which order to go over the outgoing edges of a node
-> Maybe (l Node)
-- ^ Topologically sorted list. For each edge from node @u@ to node @v@,
-- @u@ appears before @v@ in this list. If the graph is empty or the
-- initial node set is empty, an empty list is returned. If the graph
-- contains a cycle, 'Nothing' is returned.
topsortKahn graph set order = f graph set emptyList
where
nEdges = length . labEdges
sort = sortNodes order
visit n (g, s) m =
let g' = delEdge (n, m) g
s' =
if indeg g' m > 0
then s
else insertNode m s
in (g', s')
f g s l =
case extractNode s of
Nothing ->
if nEdges g > 0
then Nothing
else Just l
Just (n, s') ->
let l' = appendItem n l
children = map fst $ sort $ lsuc g n
(g', s'') = foldl' (visit n) (g, s') children
in f g' s'' l'

View file

@ -35,6 +35,7 @@ flag library-only
library library
exposed-modules: Data.Char.Local exposed-modules: Data.Char.Local
Data.Graph.Inductive.Local
Data.List.Local Data.List.Local
Vervis.Application Vervis.Application
Vervis.Field.Person Vervis.Field.Person
@ -97,6 +98,7 @@ library
, directory >= 1.1 && < 1.3 , directory >= 1.1 && < 1.3
, esqueleto , esqueleto
, fast-logger >= 2.2 && < 2.5 , fast-logger >= 2.2 && < 2.5
, fgl
, file-embed , file-embed
, filepath , filepath
, hit , hit