From 461283ba6e510645923746b640b73d09d6e4d776 Mon Sep 17 00:00:00 2001
From: fr33domlover <fr33domlover@rel4tion.org>
Date: Wed, 20 Jul 2016 11:24:01 +0000
Subject: [PATCH] Simpler path selection in SQL transitive reduction

The transitive reduction query works by removing all the edges which
aren't the only paths between their nodes, i.e. longer paths exist. The
first step is to pick all the paths which include 2 or more edges.

The initial code did that appending in-edges to all paths, which results
with unnecessary duplicates and an INNER JOIN. Now, instead, just pick
all the paths with length of more than 3 nodes. This is hopefully not
just simpler, but also faster.
---
 .../Persist/Sql/Graph/TransitiveReduction.hs  | 56 +++++++++----------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/src/Database/Persist/Sql/Graph/TransitiveReduction.hs b/src/Database/Persist/Sql/Graph/TransitiveReduction.hs
index 2b26df8..da34a30 100644
--- a/src/Database/Persist/Sql/Graph/TransitiveReduction.hs
+++ b/src/Database/Persist/Sql/Graph/TransitiveReduction.hs
@@ -58,11 +58,12 @@ import Database.Persist.Local.Sql.Orphan.Common
 -- >
 -- > EXCEPT
 -- >
--- > SELECT e.*
--- > FROM            edge AS pre
--- >      INNER JOIN temp        ON pre.dest = temp.path[1]
--- >      INNER JOIN edge AS e   ON e.source = pre.source AND e.dest = temp.id
--- > WHERE NOT temp.cycle
+-- > SELECT edge.*
+-- > FROM edge INNER JOIN temp
+-- > ON edge.source = temp.path[1] AND
+-- >    edge.dest = temp.id
+-- > WHERE array_length(temp.path, 1) > 2 AND
+-- >       NOT temp.cycle
 trrSelect
     :: ( MonadIO m
        , PersistEntityGraph node edge
@@ -81,8 +82,6 @@ trrSelect proxy = do
         tid = DBName "id"
         tpath = DBName "path"
         tcycle = DBName "cycle"
-        edgeP = DBName "pre"
-        edgeE = DBName "e"
         dbname = connEscapeName conn
         ecols = T.intercalate ", " $ entityColumnNames tEdge conn
         qecols name =
@@ -121,14 +120,13 @@ trrSelect proxy = do
             , " SELECT ", ecols
             , " FROM ", dbname $ entityDB tEdge
             , " EXCEPT "
-            , " SELECT ", qecols edgeE
-            , " FROM ", dbname $ entityDB tEdge, " AS ", dbname edgeP
-            , " INNER JOIN ", dbname temp
-            , " ON ", edgeP ^* fieldDB fwd, " = ", temp ^* tpath, "[1]"
-            , " INNER JOIN ", dbname $ entityDB tEdge, " AS ", dbname edgeE
-            , " ON ", edgeE ^* fieldDB bwd, " = ", edgeP ^* fieldDB bwd
-            , " AND ", edgeE ^* fieldDB fwd, " = ", temp ^* tid
-            , " WHERE NOT ", temp ^* tcycle
+            , " SELECT ", qecols $ entityDB tEdge
+            , " FROM ", entityDB tEdge <#> temp
+            , " ON "
+            , entityDB tEdge ^* fieldDB bwd, " = ", temp ^* tpath, "[1] AND "
+            , entityDB tEdge ^* fieldDB fwd, " = ", temp ^* tid
+            , " WHERE array_length(", temp ^* tpath, ", 1) > 2 AND NOT "
+            , temp ^* tcycle
             ]
     rawSql sql []
 
@@ -148,11 +146,12 @@ trrSelect proxy = do
 -- >   )
 -- > DELETE FROM edge
 -- > WHERE id IN (
--- >   SELECT e.id
--- >   FROM            edge AS pre
--- >        INNER JOIN temp        ON pre.dest = temp.path[1]
--- >        INNER JOIN edge AS e   ON e.source = pre.source AND e.dest = temp.id
--- >   WHERE NOT temp.cycle
+-- >   SELECT edge.id
+-- >   FROM edge INNER JOIN temp
+-- >   ON edge.source = temp.path[1] AND
+-- >      edge.dest = temp.id
+-- >   WHERE array_length(temp.path, 1) > 2 AND
+-- >         NOT temp.cycle
 -- > )
 trrApply
     :: ( MonadIO m
@@ -172,8 +171,6 @@ trrApply proxy = do
         tid = DBName "id"
         tpath = DBName "path"
         tcycle = DBName "cycle"
-        edgeP = DBName "pre"
-        edgeE = DBName "e"
         dbname = connEscapeName conn
         t ^* f = dbname t <> "." <> dbname f
         t <#> s = dbname t <> " INNER JOIN " <> dbname s
@@ -205,14 +202,13 @@ trrApply proxy = do
             , sqlStep fwd bwd
             , " ) DELETE FROM ", dbname $ entityDB tEdge
             , " WHERE ", entityDB tEdge ^* fieldDB (entityId tEdge), " IN ("
-                , " SELECT ", edgeE ^* fieldDB (entityId tEdge)
-                , " FROM ", dbname $ entityDB tEdge, " AS ", dbname edgeP
-                , " INNER JOIN ", dbname temp
-                , " ON ", edgeP ^* fieldDB fwd, " = ", temp ^* tpath, "[1]"
-                , " INNER JOIN ", dbname $ entityDB tEdge, " AS ", dbname edgeE
-                , " ON ", edgeE ^* fieldDB bwd, " = ", edgeP ^* fieldDB bwd
-                , " AND ", edgeE ^* fieldDB fwd, " = ", temp ^* tid
-                , " WHERE NOT ", temp ^* tcycle
+                , " SELECT ", entityDB tEdge ^* fieldDB (entityId tEdge)
+                , " FROM ", entityDB tEdge <#> temp
+                , " ON "
+                , entityDB tEdge ^* fieldDB bwd, " = ", temp ^* tpath
+                , "[1] AND ", entityDB tEdge ^* fieldDB fwd, " = ", temp ^* tid
+                , " WHERE array_length(", temp ^* tpath, ", 1) > 2 AND NOT "
+                , temp ^* tcycle
             , " )"
             ]
     rawExecuteCount sql []