Darcs inventory parser: Add hash types and parse previous inventory

2016-05-16 14:02:43 +00:00 · 2016-05-16 14:02:43 +00:00 · 9ba6761459
commit 9ba6761459
parent f8ae122da7
4 changed files with 164 additions and 93 deletions
--- a/src/Darcs/Local/PatchInfo/Parser.hs
+++ b/src/Darcs/Local/PatchInfo/Parser.hs
@ -27,12 +27,12 @@ module Darcs.Local.PatchInfo.Parser
    )
 where
-import Prelude hiding (takeWhile)
+import Prelude hiding (take, takeWhile)
-import Control.Applicative (many)
+import Control.Applicative (many, optional, liftA2)
-import Control.Monad (replicateM, replicateM_)
+import Control.Arrow (second)
 import Control.Monad (replicateM_)
 import Crypto.Hash
 import Crypto.Hash.Algorithms (SHA1 (SHA1))
 import Data.Attoparsec.ByteString
 import Data.ByteArray (convert)
 import Data.ByteString (ByteString)
@ -43,7 +43,8 @@ import System.FilePath ((</>))
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Char8 as BC
-import qualified Storage.Hashed.Hash as H
+import qualified Data.ByteString.Base16 as B16
 import qualified Data.ByteString.Lex.Integral as BX
 import Control.Applicative.Local
 import Darcs.Local.PatchInfo.Types
@ -51,15 +52,22 @@ import Data.Attoparsec.ByteString.Local
 import Data.ByteString.Local (stripPrefix)
 import Data.Text.UTF8.Local (decodeStrict)
-lf, space, star, dash, zero, nine, sqrOpen, sqrClose :: Word8
+lf       :: Word8
 lf       = 10
 space    :: Word8
 space    = 32
 star     :: Word8
 star     = 42
 dash     :: Word8
 dash     = 45
 zero     :: Word8
 zero     = 48
 nine     :: Word8
 nine     = 57
 sqrOpen  :: Word8
 sqrOpen  = 91
-sqrClose = 93
+--sqrClose :: Word8
 --sqrClose = 93
 digit :: Parser Word8
 digit = satisfy $ \ w -> zero <= w && w <= nine
@ -87,67 +95,92 @@ patchTimeP = do
    month <- decimal2P
    day <- decimal2P
-    hour <- decimal2P
+    hours <- decimal2P
-    minute <- decimal2P
+    minutes <- decimal2P
-    second <- decimal2P
+    seconds <- decimal2P
    case fromGregorianValid year month day of
        Nothing -> fail "Invalid patch date"
        Just uday -> return UTCTime
            { utctDay     = uday
            , utctDayTime =
-                secondsToDiffTime $ 3600 * hour + 60 * minute + second
+                secondsToDiffTime $ 3600 * hours + 60 * minutes + seconds
            }
 line :: Parser ByteString
 line = restOfLine
 restOfLine :: Parser ByteString
 restOfLine = takeWhile (/= lf)
 eol :: Parser ()
 eol = skip (== lf)
 skipLine :: Parser ()
-skipLine = do
+skipLine = skipWhile (/= lf)
    skipWhile (/= lf)
    skip (== lf)
 skipRestOfLine :: Parser ()
 skipRestOfLine = skipLine
 skipPatchP :: Parser ()
-skipPatchP = do
+skipPatchP =
    -- title
-    skipLine
+    skipLine *> eol *>
    -- author, inverted, time
-    skipLine
+    skipLine *> eol *>
    -- ignore, description
-    skipMany $ do
+    (skipMany $ skip (== space) *> skipRestOfLine *> eol) *>
        skip (== space)
        skipRestOfLine
    -- end of info
-    string "] \n"
+    (string "] \n") *>
    -- hash
-    skipWhile (/= lf)
+    skipLine
-pristineP :: Parser (Maybe Int, H.Hash)
+sha256P :: Parser ByteString
-pristineP = do
+sha256P = do
-    string "pristine:"
+    bs <- take 64
-    (,) Nothing . H.decodeBase16 <$> takeWhile (/= lf)
+    case second B.null $ B16.decode bs of
        (h, True) -> return h
        _         -> fail "SHA256 decoding from hex failed"
 sizeP :: Parser Int
 sizeP = do
    bs <- take 10
    case second B.null <$> BX.readDecimal bs of
        Just (n, True) -> return n
        _              -> fail "sizeP failed"
 sizeSha256P :: Parser (Int, ByteString)
 sizeSha256P = liftA2 (,) sizeP (skip (== dash) *> sha256P)
 pristineP :: Parser PristineHash
 pristineP = string "pristine:" *> (PristineHash <$> sha256P)
 prevInvP :: Parser InventoryHash
 prevInvP =
    string "Starting with inventory" *> eol *>
    (uncurry InventoryHash <$> sizeSha256P)
 patchInfoRawP :: Parser PatchInfoRaw
 patchInfoRawP = do
    word8 sqrOpen
    title <- takeWhile1 (/= lf)
-    word8 lf
+    eol
    author <- takeWhile1 (/= star)
    word8 star
    inverted <- (/= star) <$> (satisfy $ \ c -> c == star || c == dash)
    (timeRaw, time) <- match patchTimeP
-    word8 lf
+    eol
    word8 space
    junkp <- string "Ignore-this: "
    junkc <- takeWhile1 (/= lf)
-    word8 lf
+    eol
-    lines <- many $ word8 space *> takeWhile (/= lf) <* word8 lf
+    lines <- many $ word8 space *> takeWhile (/= lf) <* eol
    string "] \nhash: "
-    hash <- takeWhile1 (/= lf)
+    hash <- sizeSha256P
    return PatchInfoRaw
        { pirAuthor      = author
@ -185,7 +218,7 @@ refinePatchInfo pir =
            l -> Just $ BC.unlines l
    in  PatchInfo
            { piAuthor      = decodeStrict $ pirAuthor pir
-            , piHash        = pirHash pir
+            , piHash        = uncurry ContentHash $ pirHash pir
            , piTitle       = decodeStrict title
            , piDescription = decodeStrict <$> description
            , piTag         = tag
@ -194,71 +227,54 @@ refinePatchInfo pir =
 -- | Parse patch metadata and compute the metadata's hash, which can be used as
 -- a patch identifier for lookup and matching.
-patchInfoP :: Parser (PatchInfo, ByteString)
+patchInfoP :: Parser (PatchInfo, PatchHash)
 patchInfoP = do
    pir <- patchInfoRawP
-    return (refinePatchInfo pir, convert $ hashPatchInfo SHA1 pir)
+    return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir)
 -- NEXT current plan:
 --
 -- (0) all parses are incremental!!!
 -- (1) use CountP below to determine number of patches
 -- (2) given pagination details determine offset and limit
 -- (3) parse the patches we want
 -- (4) reverse their order...
 -- (5) generate a whamlet widget from that
 -- (*) FOR NOW no pagination, just read the entire thing once...
 patchInfosCountP :: Parser Int
 patchInfosCountP = do
-    skipWhile (/= lf) -- pristine hash
+    -- pristine hash
-    n <- length <$> (many $ word8 lf >> skipPatchP)
+    skipLine
-    word8 lf
+    -- previous inventory
    optional $
        eol *> string "Starting" *> skipRestOfLine *> eol *>
        skipLine
    -- patch info
    n <- length <$> (many $ eol *> skipPatchP)
    eol
    return n
 patchInfosAllP :: Parser PatchSeq
-patchInfosAllP = do
+patchInfosAllP = PatchSeq
-    (psize, phash) <- pristineP
+    <$> pristineP
-    ps <- many $ word8 lf >> patchInfoP
+    <*> optional (eol *> prevInvP)
-    word8 lf
+    <*> many (eol *> patchInfoP)
-    return PatchSeq
+    <*  eol
        { psPristineHash = phash
        , psPristineSize = psize
        , psPatches      = ps
        }
 patchInfosOffsetP :: Int -> Parser PatchSeq
-patchInfosOffsetP off = do
+patchInfosOffsetP off = PatchSeq
-    (psize, phash) <- pristineP
+    <$> pristineP
-    replicateM_ off $ word8 lf >> skipPatchP
+    <*> optional (eol *> prevInvP)
-    ps <- many $ word8 lf >> patchInfoP
+    <*> ( replicateM_ off (eol *> skipPatchP) *>
-    word8 lf
+          many (eol *> patchInfoP)
-    return PatchSeq
+        )
-        { psPristineHash = phash
+    <* eol
-        , psPristineSize = psize
+
        , psPatches      = ps
        }
 patchInfosLimitP :: Int -> Parser PatchSeq
-patchInfosLimitP lim = do
+patchInfosLimitP lim = PatchSeq
-    (psize, phash) <- pristineP
+    <$> pristineP
-    ps <- atMost lim $ word8 lf >> patchInfoP
+    <*> optional (eol *> prevInvP)
-    return PatchSeq
+    <*> atMost lim (eol *> patchInfoP)
        { psPristineHash = phash
        , psPristineSize = psize
        , psPatches      = ps
        }
 patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq
-patchInfosOffsetLimitP off lim = do
+patchInfosOffsetLimitP off lim = PatchSeq
-    (psize, phash) <- pristineP
+    <$> pristineP
-    replicateM_ off $ word8 lf >> skipPatchP
+    <*> optional (eol *> prevInvP)
-    ps <- atMost lim $ word8 lf >> patchInfoP
+    <*> ( replicateM_ off (eol *> skipPatchP) *>
-    return PatchSeq
+          atMost lim (eol *> patchInfoP)
-        { psPristineHash = phash
+        )
        , psPristineSize = psize
        , psPatches      = ps
        }
 darcsDir :: FilePath
 darcsDir = "_darcs"
--- a/src/Darcs/Local/PatchInfo/Types.hs
+++ b/src/Darcs/Local/PatchInfo/Types.hs
@ -15,23 +15,54 @@
 module Darcs.Local.PatchInfo.Types
    ( PatchInfoRaw (..)
    , PatchHash (..)
    , ContentHash (..)
    , InventoryHash (..)
    , PristineHash (..)
    , PatchInfo (..)
    , PatchSeq (..)
    )
 where
 -- TODO
 --
 -- Apparently, after a while, some of the patches are moved from
 -- hashed_inventory into the inventories/ dir. So the patch set contains more
 -- than one group. This means I need to extend my parser to cover this case.
 -- Sources for info about this thing:
 --
 -- * Darcs source code
 -- * Darcs wiki
 -- * Local Darcs repos I have
 --
 -- From Darcs source code:
 --
 -- > The patches in a repository are stored in chunks broken up at \"clean\"
 -- > tags. A tag is clean if the only patches before it in the current
 -- > repository ordering are ones that the tag depends on (either directly
 -- > or indirectly). Each chunk is stored in a separate inventory file on disk.
 -- >
 -- > A 'PatchSet' represents a repo's history as the list of patches since the
 -- > last clean tag, and then a list of patch lists each delimited by clean tags.
 -- >
 -- > A 'Tagged' is a single chunk of a 'PatchSet'.  It has a 'PatchInfo'
 -- > representing a clean tag, the hash of the previous inventory (if it exists),
 -- > and the list of patches since that previous inventory.
 --
 -- Let's start with finding out the format of the binary inventories and
 -- parsing them.
 import Prelude
 import Data.ByteString (ByteString)
 import Data.Text (Text)
 import Data.Time.Clock (UTCTime)
 import Storage.Hashed.Hash (Hash)
 -- | Patch metadata in raw form. This is intended for accurate hashing of the
 -- patch info.
 data PatchInfoRaw = PatchInfoRaw
    { pirAuthor      :: ByteString
-    , pirHash        :: ByteString
+    , pirHash        :: (Int, ByteString)
    , pirTitle       :: ByteString
    , pirDescription :: [ByteString]
    , pirJunkPrefix  :: ByteString
@ -40,14 +71,36 @@ data PatchInfoRaw = PatchInfoRaw
    , pirInverted    :: Bool
    }
 -- | A SHA1 hash of the patch info (author, title, description including junk,
 -- timestamp). The hash is in binary form, not hex, i.e. its size is always 20
 -- bytes.
 newtype PatchHash = PatchHash { unPatchHash :: ByteString }
 -- | Content size and SHA256 hash of a patch's info and content. The hash is in
 -- binary form, not hex, i.e. its size is always 32 bytes.
 data ContentHash = ContentHash
    { chSize :: Int
    , chHash :: ByteString
    }
 -- | Content size and SHA256 hash of an inventory (a patch set in a single
 -- invetory file). The hash is in binary form, not hex, i.e. its size is always
 -- 32 bytes.
 data InventoryHash = InventoryHash
    { ihSize :: Int
    , ihHash :: ByteString
    }
 -- | A SHA256 hash of the entire recorded state of the repo. The hash is in
 -- binary form, not hex, i.e. its size is always 32 bytes.
 newtype PristineHash = PristineHash { unPristineHash :: ByteString }
 -- | Patch metadata read from the inventory file.
 data PatchInfo = PatchInfo
    { -- | Author name and email
      piAuthor      :: Text
-      -- | Currently this is the patch hash in textual form. Possibly I'll
+      -- | Patch content hash
-      -- change to binary form when I find out the encoding scheme of the hash
+    , piHash        :: ContentHash
      -- string.
    , piHash        :: ByteString
      -- | Single message line
    , piTitle       :: Text
      -- | Optional description, may contain several lines
@ -60,7 +113,7 @@ data PatchInfo = PatchInfo
 -- | The information from the hashed inventory file.
 data PatchSeq = PatchSeq
-    { psPristineHash :: Hash
+    { psPristineHash :: PristineHash
-    , psPristineSize :: Maybe Int
+    , psPrevious     :: Maybe InventoryHash
-    , psPatches      :: [(PatchInfo, ByteString)]
+    , psPatches      :: [(PatchInfo, PatchHash)]
    }
--- a/src/Vervis/Darcs.hs
+++ b/src/Vervis/Darcs.hs
@ -132,7 +132,7 @@ readChangesView path off lim = fmap maybeRight $ runExceptT $ do
    let toLE pi h = LogEntry
            { leAuthor  =
                T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi
-            , leHash    = decodeStrict $ B16.encode h
+            , leHash    = decodeStrict $ B16.encode $ unPatchHash h
            , leMessage = piTitle pi
            , leTime    =
                intervalToEventTime $
--- a/vervis.cabal
+++ b/vervis.cabal
@ -134,6 +134,8 @@ library
                     , blaze-markup
                     , byteable
                     , bytestring
                       -- for Darcs.Local.PatchInfo.Parser
                     , bytestring-lexing
                     , case-insensitive
                     , classy-prelude
                     , classy-prelude-conduit