Darcs inventory parser: Add hash types and parse previous inventory

This commit is contained in:
fr33domlover 2016-05-16 14:02:43 +00:00
parent f8ae122da7
commit 9ba6761459
4 changed files with 164 additions and 93 deletions

View file

@ -27,12 +27,12 @@ module Darcs.Local.PatchInfo.Parser
)
where
import Prelude hiding (takeWhile)
import Prelude hiding (take, takeWhile)
import Control.Applicative (many)
import Control.Monad (replicateM, replicateM_)
import Control.Applicative (many, optional, liftA2)
import Control.Arrow (second)
import Control.Monad (replicateM_)
import Crypto.Hash
import Crypto.Hash.Algorithms (SHA1 (SHA1))
import Data.Attoparsec.ByteString
import Data.ByteArray (convert)
import Data.ByteString (ByteString)
@ -43,7 +43,8 @@ import System.FilePath ((</>))
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Storage.Hashed.Hash as H
import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString.Lex.Integral as BX
import Control.Applicative.Local
import Darcs.Local.PatchInfo.Types
@ -51,15 +52,22 @@ import Data.Attoparsec.ByteString.Local
import Data.ByteString.Local (stripPrefix)
import Data.Text.UTF8.Local (decodeStrict)
lf, space, star, dash, zero, nine, sqrOpen, sqrClose :: Word8
lf :: Word8
lf = 10
space :: Word8
space = 32
star :: Word8
star = 42
dash :: Word8
dash = 45
zero :: Word8
zero = 48
nine :: Word8
nine = 57
sqrOpen :: Word8
sqrOpen = 91
sqrClose = 93
--sqrClose :: Word8
--sqrClose = 93
digit :: Parser Word8
digit = satisfy $ \ w -> zero <= w && w <= nine
@ -87,67 +95,92 @@ patchTimeP = do
month <- decimal2P
day <- decimal2P
hour <- decimal2P
minute <- decimal2P
second <- decimal2P
hours <- decimal2P
minutes <- decimal2P
seconds <- decimal2P
case fromGregorianValid year month day of
Nothing -> fail "Invalid patch date"
Just uday -> return UTCTime
{ utctDay = uday
, utctDayTime =
secondsToDiffTime $ 3600 * hour + 60 * minute + second
secondsToDiffTime $ 3600 * hours + 60 * minutes + seconds
}
line :: Parser ByteString
line = restOfLine
restOfLine :: Parser ByteString
restOfLine = takeWhile (/= lf)
eol :: Parser ()
eol = skip (== lf)
skipLine :: Parser ()
skipLine = do
skipWhile (/= lf)
skip (== lf)
skipLine = skipWhile (/= lf)
skipRestOfLine :: Parser ()
skipRestOfLine = skipLine
skipPatchP :: Parser ()
skipPatchP = do
skipPatchP =
-- title
skipLine
skipLine *> eol *>
-- author, inverted, time
skipLine
skipLine *> eol *>
-- ignore, description
skipMany $ do
skip (== space)
skipRestOfLine
(skipMany $ skip (== space) *> skipRestOfLine *> eol) *>
-- end of info
string "] \n"
(string "] \n") *>
-- hash
skipWhile (/= lf)
skipLine
pristineP :: Parser (Maybe Int, H.Hash)
pristineP = do
string "pristine:"
(,) Nothing . H.decodeBase16 <$> takeWhile (/= lf)
sha256P :: Parser ByteString
sha256P = do
bs <- take 64
case second B.null $ B16.decode bs of
(h, True) -> return h
_ -> fail "SHA256 decoding from hex failed"
sizeP :: Parser Int
sizeP = do
bs <- take 10
case second B.null <$> BX.readDecimal bs of
Just (n, True) -> return n
_ -> fail "sizeP failed"
sizeSha256P :: Parser (Int, ByteString)
sizeSha256P = liftA2 (,) sizeP (skip (== dash) *> sha256P)
pristineP :: Parser PristineHash
pristineP = string "pristine:" *> (PristineHash <$> sha256P)
prevInvP :: Parser InventoryHash
prevInvP =
string "Starting with inventory" *> eol *>
(uncurry InventoryHash <$> sizeSha256P)
patchInfoRawP :: Parser PatchInfoRaw
patchInfoRawP = do
word8 sqrOpen
title <- takeWhile1 (/= lf)
word8 lf
eol
author <- takeWhile1 (/= star)
word8 star
inverted <- (/= star) <$> (satisfy $ \ c -> c == star || c == dash)
(timeRaw, time) <- match patchTimeP
word8 lf
eol
word8 space
junkp <- string "Ignore-this: "
junkc <- takeWhile1 (/= lf)
word8 lf
eol
lines <- many $ word8 space *> takeWhile (/= lf) <* word8 lf
lines <- many $ word8 space *> takeWhile (/= lf) <* eol
string "] \nhash: "
hash <- takeWhile1 (/= lf)
hash <- sizeSha256P
return PatchInfoRaw
{ pirAuthor = author
@ -185,7 +218,7 @@ refinePatchInfo pir =
l -> Just $ BC.unlines l
in PatchInfo
{ piAuthor = decodeStrict $ pirAuthor pir
, piHash = pirHash pir
, piHash = uncurry ContentHash $ pirHash pir
, piTitle = decodeStrict title
, piDescription = decodeStrict <$> description
, piTag = tag
@ -194,71 +227,54 @@ refinePatchInfo pir =
-- | Parse patch metadata and compute the metadata's hash, which can be used as
-- a patch identifier for lookup and matching.
patchInfoP :: Parser (PatchInfo, ByteString)
patchInfoP :: Parser (PatchInfo, PatchHash)
patchInfoP = do
pir <- patchInfoRawP
return (refinePatchInfo pir, convert $ hashPatchInfo SHA1 pir)
-- NEXT current plan:
--
-- (0) all parses are incremental!!!
-- (1) use CountP below to determine number of patches
-- (2) given pagination details determine offset and limit
-- (3) parse the patches we want
-- (4) reverse their order...
-- (5) generate a whamlet widget from that
-- (*) FOR NOW no pagination, just read the entire thing once...
return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir)
patchInfosCountP :: Parser Int
patchInfosCountP = do
skipWhile (/= lf) -- pristine hash
n <- length <$> (many $ word8 lf >> skipPatchP)
word8 lf
-- pristine hash
skipLine
-- previous inventory
optional $
eol *> string "Starting" *> skipRestOfLine *> eol *>
skipLine
-- patch info
n <- length <$> (many $ eol *> skipPatchP)
eol
return n
patchInfosAllP :: Parser PatchSeq
patchInfosAllP = do
(psize, phash) <- pristineP
ps <- many $ word8 lf >> patchInfoP
word8 lf
return PatchSeq
{ psPristineHash = phash
, psPristineSize = psize
, psPatches = ps
}
patchInfosAllP = PatchSeq
<$> pristineP
<*> optional (eol *> prevInvP)
<*> many (eol *> patchInfoP)
<* eol
patchInfosOffsetP :: Int -> Parser PatchSeq
patchInfosOffsetP off = do
(psize, phash) <- pristineP
replicateM_ off $ word8 lf >> skipPatchP
ps <- many $ word8 lf >> patchInfoP
word8 lf
return PatchSeq
{ psPristineHash = phash
, psPristineSize = psize
, psPatches = ps
}
patchInfosOffsetP off = PatchSeq
<$> pristineP
<*> optional (eol *> prevInvP)
<*> ( replicateM_ off (eol *> skipPatchP) *>
many (eol *> patchInfoP)
)
<* eol
patchInfosLimitP :: Int -> Parser PatchSeq
patchInfosLimitP lim = do
(psize, phash) <- pristineP
ps <- atMost lim $ word8 lf >> patchInfoP
return PatchSeq
{ psPristineHash = phash
, psPristineSize = psize
, psPatches = ps
}
patchInfosLimitP lim = PatchSeq
<$> pristineP
<*> optional (eol *> prevInvP)
<*> atMost lim (eol *> patchInfoP)
patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq
patchInfosOffsetLimitP off lim = do
(psize, phash) <- pristineP
replicateM_ off $ word8 lf >> skipPatchP
ps <- atMost lim $ word8 lf >> patchInfoP
return PatchSeq
{ psPristineHash = phash
, psPristineSize = psize
, psPatches = ps
}
patchInfosOffsetLimitP off lim = PatchSeq
<$> pristineP
<*> optional (eol *> prevInvP)
<*> ( replicateM_ off (eol *> skipPatchP) *>
atMost lim (eol *> patchInfoP)
)
darcsDir :: FilePath
darcsDir = "_darcs"

View file

@ -15,23 +15,54 @@
module Darcs.Local.PatchInfo.Types
( PatchInfoRaw (..)
, PatchHash (..)
, ContentHash (..)
, InventoryHash (..)
, PristineHash (..)
, PatchInfo (..)
, PatchSeq (..)
)
where
-- TODO
--
-- Apparently, after a while, some of the patches are moved from
-- hashed_inventory into the inventories/ dir. So the patch set contains more
-- than one group. This means I need to extend my parser to cover this case.
-- Sources for info about this thing:
--
-- * Darcs source code
-- * Darcs wiki
-- * Local Darcs repos I have
--
-- From Darcs source code:
--
-- > The patches in a repository are stored in chunks broken up at \"clean\"
-- > tags. A tag is clean if the only patches before it in the current
-- > repository ordering are ones that the tag depends on (either directly
-- > or indirectly). Each chunk is stored in a separate inventory file on disk.
-- >
-- > A 'PatchSet' represents a repo's history as the list of patches since the
-- > last clean tag, and then a list of patch lists each delimited by clean tags.
-- >
-- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo'
-- > representing a clean tag, the hash of the previous inventory (if it exists),
-- > and the list of patches since that previous inventory.
--
-- Let's start with finding out the format of the binary inventories and
-- parsing them.
import Prelude
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Time.Clock (UTCTime)
import Storage.Hashed.Hash (Hash)
-- | Patch metadata in raw form. This is intended for accurate hashing of the
-- patch info.
data PatchInfoRaw = PatchInfoRaw
{ pirAuthor :: ByteString
, pirHash :: ByteString
, pirHash :: (Int, ByteString)
, pirTitle :: ByteString
, pirDescription :: [ByteString]
, pirJunkPrefix :: ByteString
@ -40,14 +71,36 @@ data PatchInfoRaw = PatchInfoRaw
, pirInverted :: Bool
}
-- | A SHA1 hash of the patch info (author, title, description including junk,
-- timestamp). The hash is in binary form, not hex, i.e. its size is always 20
-- bytes.
newtype PatchHash = PatchHash { unPatchHash :: ByteString }
-- | Content size and SHA256 hash of a patch's info and content. The hash is in
-- binary form, not hex, i.e. its size is always 32 bytes.
data ContentHash = ContentHash
{ chSize :: Int
, chHash :: ByteString
}
-- | Content size and SHA256 hash of an inventory (a patch set in a single
-- invetory file). The hash is in binary form, not hex, i.e. its size is always
-- 32 bytes.
data InventoryHash = InventoryHash
{ ihSize :: Int
, ihHash :: ByteString
}
-- | A SHA256 hash of the entire recorded state of the repo. The hash is in
-- binary form, not hex, i.e. its size is always 32 bytes.
newtype PristineHash = PristineHash { unPristineHash :: ByteString }
-- | Patch metadata read from the inventory file.
data PatchInfo = PatchInfo
{ -- | Author name and email
piAuthor :: Text
-- | Currently this is the patch hash in textual form. Possibly I'll
-- change to binary form when I find out the encoding scheme of the hash
-- string.
, piHash :: ByteString
-- | Patch content hash
, piHash :: ContentHash
-- | Single message line
, piTitle :: Text
-- | Optional description, may contain several lines
@ -60,7 +113,7 @@ data PatchInfo = PatchInfo
-- | The information from the hashed inventory file.
data PatchSeq = PatchSeq
{ psPristineHash :: Hash
, psPristineSize :: Maybe Int
, psPatches :: [(PatchInfo, ByteString)]
{ psPristineHash :: PristineHash
, psPrevious :: Maybe InventoryHash
, psPatches :: [(PatchInfo, PatchHash)]
}

View file

@ -132,7 +132,7 @@ readChangesView path off lim = fmap maybeRight $ runExceptT $ do
let toLE pi h = LogEntry
{ leAuthor =
T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi
, leHash = decodeStrict $ B16.encode h
, leHash = decodeStrict $ B16.encode $ unPatchHash h
, leMessage = piTitle pi
, leTime =
intervalToEventTime $

View file

@ -134,6 +134,8 @@ library
, blaze-markup
, byteable
, bytestring
-- for Darcs.Local.PatchInfo.Parser
, bytestring-lexing
, case-insensitive
, classy-prelude
, classy-prelude-conduit