Darcs inventory parser: Add hash types and parse previous inventory

This commit is contained in:
fr33domlover 2016-05-16 14:02:43 +00:00
parent f8ae122da7
commit 9ba6761459
4 changed files with 164 additions and 93 deletions

View file

@ -27,12 +27,12 @@ module Darcs.Local.PatchInfo.Parser
) )
where where
import Prelude hiding (takeWhile) import Prelude hiding (take, takeWhile)
import Control.Applicative (many) import Control.Applicative (many, optional, liftA2)
import Control.Monad (replicateM, replicateM_) import Control.Arrow (second)
import Control.Monad (replicateM_)
import Crypto.Hash import Crypto.Hash
import Crypto.Hash.Algorithms (SHA1 (SHA1))
import Data.Attoparsec.ByteString import Data.Attoparsec.ByteString
import Data.ByteArray (convert) import Data.ByteArray (convert)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
@ -43,7 +43,8 @@ import System.FilePath ((</>))
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString.Char8 as BC
import qualified Storage.Hashed.Hash as H import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString.Lex.Integral as BX
import Control.Applicative.Local import Control.Applicative.Local
import Darcs.Local.PatchInfo.Types import Darcs.Local.PatchInfo.Types
@ -51,15 +52,22 @@ import Data.Attoparsec.ByteString.Local
import Data.ByteString.Local (stripPrefix) import Data.ByteString.Local (stripPrefix)
import Data.Text.UTF8.Local (decodeStrict) import Data.Text.UTF8.Local (decodeStrict)
lf, space, star, dash, zero, nine, sqrOpen, sqrClose :: Word8 lf :: Word8
lf = 10 lf = 10
space :: Word8
space = 32 space = 32
star :: Word8
star = 42 star = 42
dash :: Word8
dash = 45 dash = 45
zero :: Word8
zero = 48 zero = 48
nine :: Word8
nine = 57 nine = 57
sqrOpen :: Word8
sqrOpen = 91 sqrOpen = 91
sqrClose = 93 --sqrClose :: Word8
--sqrClose = 93
digit :: Parser Word8 digit :: Parser Word8
digit = satisfy $ \ w -> zero <= w && w <= nine digit = satisfy $ \ w -> zero <= w && w <= nine
@ -87,67 +95,92 @@ patchTimeP = do
month <- decimal2P month <- decimal2P
day <- decimal2P day <- decimal2P
hour <- decimal2P hours <- decimal2P
minute <- decimal2P minutes <- decimal2P
second <- decimal2P seconds <- decimal2P
case fromGregorianValid year month day of case fromGregorianValid year month day of
Nothing -> fail "Invalid patch date" Nothing -> fail "Invalid patch date"
Just uday -> return UTCTime Just uday -> return UTCTime
{ utctDay = uday { utctDay = uday
, utctDayTime = , utctDayTime =
secondsToDiffTime $ 3600 * hour + 60 * minute + second secondsToDiffTime $ 3600 * hours + 60 * minutes + seconds
} }
line :: Parser ByteString
line = restOfLine
restOfLine :: Parser ByteString
restOfLine = takeWhile (/= lf)
eol :: Parser ()
eol = skip (== lf)
skipLine :: Parser () skipLine :: Parser ()
skipLine = do skipLine = skipWhile (/= lf)
skipWhile (/= lf)
skip (== lf)
skipRestOfLine :: Parser () skipRestOfLine :: Parser ()
skipRestOfLine = skipLine skipRestOfLine = skipLine
skipPatchP :: Parser () skipPatchP :: Parser ()
skipPatchP = do skipPatchP =
-- title -- title
skipLine skipLine *> eol *>
-- author, inverted, time -- author, inverted, time
skipLine skipLine *> eol *>
-- ignore, description -- ignore, description
skipMany $ do (skipMany $ skip (== space) *> skipRestOfLine *> eol) *>
skip (== space)
skipRestOfLine
-- end of info -- end of info
string "] \n" (string "] \n") *>
-- hash -- hash
skipWhile (/= lf) skipLine
pristineP :: Parser (Maybe Int, H.Hash) sha256P :: Parser ByteString
pristineP = do sha256P = do
string "pristine:" bs <- take 64
(,) Nothing . H.decodeBase16 <$> takeWhile (/= lf) case second B.null $ B16.decode bs of
(h, True) -> return h
_ -> fail "SHA256 decoding from hex failed"
sizeP :: Parser Int
sizeP = do
bs <- take 10
case second B.null <$> BX.readDecimal bs of
Just (n, True) -> return n
_ -> fail "sizeP failed"
sizeSha256P :: Parser (Int, ByteString)
sizeSha256P = liftA2 (,) sizeP (skip (== dash) *> sha256P)
pristineP :: Parser PristineHash
pristineP = string "pristine:" *> (PristineHash <$> sha256P)
prevInvP :: Parser InventoryHash
prevInvP =
string "Starting with inventory" *> eol *>
(uncurry InventoryHash <$> sizeSha256P)
patchInfoRawP :: Parser PatchInfoRaw patchInfoRawP :: Parser PatchInfoRaw
patchInfoRawP = do patchInfoRawP = do
word8 sqrOpen word8 sqrOpen
title <- takeWhile1 (/= lf) title <- takeWhile1 (/= lf)
word8 lf eol
author <- takeWhile1 (/= star) author <- takeWhile1 (/= star)
word8 star word8 star
inverted <- (/= star) <$> (satisfy $ \ c -> c == star || c == dash) inverted <- (/= star) <$> (satisfy $ \ c -> c == star || c == dash)
(timeRaw, time) <- match patchTimeP (timeRaw, time) <- match patchTimeP
word8 lf eol
word8 space word8 space
junkp <- string "Ignore-this: " junkp <- string "Ignore-this: "
junkc <- takeWhile1 (/= lf) junkc <- takeWhile1 (/= lf)
word8 lf eol
lines <- many $ word8 space *> takeWhile (/= lf) <* word8 lf lines <- many $ word8 space *> takeWhile (/= lf) <* eol
string "] \nhash: " string "] \nhash: "
hash <- takeWhile1 (/= lf) hash <- sizeSha256P
return PatchInfoRaw return PatchInfoRaw
{ pirAuthor = author { pirAuthor = author
@ -185,7 +218,7 @@ refinePatchInfo pir =
l -> Just $ BC.unlines l l -> Just $ BC.unlines l
in PatchInfo in PatchInfo
{ piAuthor = decodeStrict $ pirAuthor pir { piAuthor = decodeStrict $ pirAuthor pir
, piHash = pirHash pir , piHash = uncurry ContentHash $ pirHash pir
, piTitle = decodeStrict title , piTitle = decodeStrict title
, piDescription = decodeStrict <$> description , piDescription = decodeStrict <$> description
, piTag = tag , piTag = tag
@ -194,71 +227,54 @@ refinePatchInfo pir =
-- | Parse patch metadata and compute the metadata's hash, which can be used as -- | Parse patch metadata and compute the metadata's hash, which can be used as
-- a patch identifier for lookup and matching. -- a patch identifier for lookup and matching.
patchInfoP :: Parser (PatchInfo, ByteString) patchInfoP :: Parser (PatchInfo, PatchHash)
patchInfoP = do patchInfoP = do
pir <- patchInfoRawP pir <- patchInfoRawP
return (refinePatchInfo pir, convert $ hashPatchInfo SHA1 pir) return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir)
-- NEXT current plan:
--
-- (0) all parses are incremental!!!
-- (1) use CountP below to determine number of patches
-- (2) given pagination details determine offset and limit
-- (3) parse the patches we want
-- (4) reverse their order...
-- (5) generate a whamlet widget from that
-- (*) FOR NOW no pagination, just read the entire thing once...
patchInfosCountP :: Parser Int patchInfosCountP :: Parser Int
patchInfosCountP = do patchInfosCountP = do
skipWhile (/= lf) -- pristine hash -- pristine hash
n <- length <$> (many $ word8 lf >> skipPatchP) skipLine
word8 lf -- previous inventory
optional $
eol *> string "Starting" *> skipRestOfLine *> eol *>
skipLine
-- patch info
n <- length <$> (many $ eol *> skipPatchP)
eol
return n return n
patchInfosAllP :: Parser PatchSeq patchInfosAllP :: Parser PatchSeq
patchInfosAllP = do patchInfosAllP = PatchSeq
(psize, phash) <- pristineP <$> pristineP
ps <- many $ word8 lf >> patchInfoP <*> optional (eol *> prevInvP)
word8 lf <*> many (eol *> patchInfoP)
return PatchSeq <* eol
{ psPristineHash = phash
, psPristineSize = psize
, psPatches = ps
}
patchInfosOffsetP :: Int -> Parser PatchSeq patchInfosOffsetP :: Int -> Parser PatchSeq
patchInfosOffsetP off = do patchInfosOffsetP off = PatchSeq
(psize, phash) <- pristineP <$> pristineP
replicateM_ off $ word8 lf >> skipPatchP <*> optional (eol *> prevInvP)
ps <- many $ word8 lf >> patchInfoP <*> ( replicateM_ off (eol *> skipPatchP) *>
word8 lf many (eol *> patchInfoP)
return PatchSeq )
{ psPristineHash = phash <* eol
, psPristineSize = psize
, psPatches = ps
}
patchInfosLimitP :: Int -> Parser PatchSeq patchInfosLimitP :: Int -> Parser PatchSeq
patchInfosLimitP lim = do patchInfosLimitP lim = PatchSeq
(psize, phash) <- pristineP <$> pristineP
ps <- atMost lim $ word8 lf >> patchInfoP <*> optional (eol *> prevInvP)
return PatchSeq <*> atMost lim (eol *> patchInfoP)
{ psPristineHash = phash
, psPristineSize = psize
, psPatches = ps
}
patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq
patchInfosOffsetLimitP off lim = do patchInfosOffsetLimitP off lim = PatchSeq
(psize, phash) <- pristineP <$> pristineP
replicateM_ off $ word8 lf >> skipPatchP <*> optional (eol *> prevInvP)
ps <- atMost lim $ word8 lf >> patchInfoP <*> ( replicateM_ off (eol *> skipPatchP) *>
return PatchSeq atMost lim (eol *> patchInfoP)
{ psPristineHash = phash )
, psPristineSize = psize
, psPatches = ps
}
darcsDir :: FilePath darcsDir :: FilePath
darcsDir = "_darcs" darcsDir = "_darcs"

View file

@ -15,23 +15,54 @@
module Darcs.Local.PatchInfo.Types module Darcs.Local.PatchInfo.Types
( PatchInfoRaw (..) ( PatchInfoRaw (..)
, PatchHash (..)
, ContentHash (..)
, InventoryHash (..)
, PristineHash (..)
, PatchInfo (..) , PatchInfo (..)
, PatchSeq (..) , PatchSeq (..)
) )
where where
-- TODO
--
-- Apparently, after a while, some of the patches are moved from
-- hashed_inventory into the inventories/ dir. So the patch set contains more
-- than one group. This means I need to extend my parser to cover this case.
-- Sources for info about this thing:
--
-- * Darcs source code
-- * Darcs wiki
-- * Local Darcs repos I have
--
-- From Darcs source code:
--
-- > The patches in a repository are stored in chunks broken up at \"clean\"
-- > tags. A tag is clean if the only patches before it in the current
-- > repository ordering are ones that the tag depends on (either directly
-- > or indirectly). Each chunk is stored in a separate inventory file on disk.
-- >
-- > A 'PatchSet' represents a repo's history as the list of patches since the
-- > last clean tag, and then a list of patch lists each delimited by clean tags.
-- >
-- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo'
-- > representing a clean tag, the hash of the previous inventory (if it exists),
-- > and the list of patches since that previous inventory.
--
-- Let's start with finding out the format of the binary inventories and
-- parsing them.
import Prelude import Prelude
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import Data.Text (Text) import Data.Text (Text)
import Data.Time.Clock (UTCTime) import Data.Time.Clock (UTCTime)
import Storage.Hashed.Hash (Hash)
-- | Patch metadata in raw form. This is intended for accurate hashing of the -- | Patch metadata in raw form. This is intended for accurate hashing of the
-- patch info. -- patch info.
data PatchInfoRaw = PatchInfoRaw data PatchInfoRaw = PatchInfoRaw
{ pirAuthor :: ByteString { pirAuthor :: ByteString
, pirHash :: ByteString , pirHash :: (Int, ByteString)
, pirTitle :: ByteString , pirTitle :: ByteString
, pirDescription :: [ByteString] , pirDescription :: [ByteString]
, pirJunkPrefix :: ByteString , pirJunkPrefix :: ByteString
@ -40,14 +71,36 @@ data PatchInfoRaw = PatchInfoRaw
, pirInverted :: Bool , pirInverted :: Bool
} }
-- | A SHA1 hash of the patch info (author, title, description including junk,
-- timestamp). The hash is in binary form, not hex, i.e. its size is always 20
-- bytes.
newtype PatchHash = PatchHash { unPatchHash :: ByteString }
-- | Content size and SHA256 hash of a patch's info and content. The hash is in
-- binary form, not hex, i.e. its size is always 32 bytes.
data ContentHash = ContentHash
{ chSize :: Int
, chHash :: ByteString
}
-- | Content size and SHA256 hash of an inventory (a patch set in a single
-- invetory file). The hash is in binary form, not hex, i.e. its size is always
-- 32 bytes.
data InventoryHash = InventoryHash
{ ihSize :: Int
, ihHash :: ByteString
}
-- | A SHA256 hash of the entire recorded state of the repo. The hash is in
-- binary form, not hex, i.e. its size is always 32 bytes.
newtype PristineHash = PristineHash { unPristineHash :: ByteString }
-- | Patch metadata read from the inventory file. -- | Patch metadata read from the inventory file.
data PatchInfo = PatchInfo data PatchInfo = PatchInfo
{ -- | Author name and email { -- | Author name and email
piAuthor :: Text piAuthor :: Text
-- | Currently this is the patch hash in textual form. Possibly I'll -- | Patch content hash
-- change to binary form when I find out the encoding scheme of the hash , piHash :: ContentHash
-- string.
, piHash :: ByteString
-- | Single message line -- | Single message line
, piTitle :: Text , piTitle :: Text
-- | Optional description, may contain several lines -- | Optional description, may contain several lines
@ -60,7 +113,7 @@ data PatchInfo = PatchInfo
-- | The information from the hashed inventory file. -- | The information from the hashed inventory file.
data PatchSeq = PatchSeq data PatchSeq = PatchSeq
{ psPristineHash :: Hash { psPristineHash :: PristineHash
, psPristineSize :: Maybe Int , psPrevious :: Maybe InventoryHash
, psPatches :: [(PatchInfo, ByteString)] , psPatches :: [(PatchInfo, PatchHash)]
} }

View file

@ -132,7 +132,7 @@ readChangesView path off lim = fmap maybeRight $ runExceptT $ do
let toLE pi h = LogEntry let toLE pi h = LogEntry
{ leAuthor = { leAuthor =
T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi
, leHash = decodeStrict $ B16.encode h , leHash = decodeStrict $ B16.encode $ unPatchHash h
, leMessage = piTitle pi , leMessage = piTitle pi
, leTime = , leTime =
intervalToEventTime $ intervalToEventTime $

View file

@ -134,6 +134,8 @@ library
, blaze-markup , blaze-markup
, byteable , byteable
, bytestring , bytestring
-- for Darcs.Local.PatchInfo.Parser
, bytestring-lexing
, case-insensitive , case-insensitive
, classy-prelude , classy-prelude
, classy-prelude-conduit , classy-prelude-conduit