Darcs inventory parser: Add hash types and parse previous inventory
This commit is contained in:
parent
f8ae122da7
commit
9ba6761459
4 changed files with 164 additions and 93 deletions
|
@ -27,12 +27,12 @@ module Darcs.Local.PatchInfo.Parser
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
import Prelude hiding (takeWhile)
|
import Prelude hiding (take, takeWhile)
|
||||||
|
|
||||||
import Control.Applicative (many)
|
import Control.Applicative (many, optional, liftA2)
|
||||||
import Control.Monad (replicateM, replicateM_)
|
import Control.Arrow (second)
|
||||||
|
import Control.Monad (replicateM_)
|
||||||
import Crypto.Hash
|
import Crypto.Hash
|
||||||
import Crypto.Hash.Algorithms (SHA1 (SHA1))
|
|
||||||
import Data.Attoparsec.ByteString
|
import Data.Attoparsec.ByteString
|
||||||
import Data.ByteArray (convert)
|
import Data.ByteArray (convert)
|
||||||
import Data.ByteString (ByteString)
|
import Data.ByteString (ByteString)
|
||||||
|
@ -43,7 +43,8 @@ import System.FilePath ((</>))
|
||||||
|
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import qualified Data.ByteString.Char8 as BC
|
import qualified Data.ByteString.Char8 as BC
|
||||||
import qualified Storage.Hashed.Hash as H
|
import qualified Data.ByteString.Base16 as B16
|
||||||
|
import qualified Data.ByteString.Lex.Integral as BX
|
||||||
|
|
||||||
import Control.Applicative.Local
|
import Control.Applicative.Local
|
||||||
import Darcs.Local.PatchInfo.Types
|
import Darcs.Local.PatchInfo.Types
|
||||||
|
@ -51,15 +52,22 @@ import Data.Attoparsec.ByteString.Local
|
||||||
import Data.ByteString.Local (stripPrefix)
|
import Data.ByteString.Local (stripPrefix)
|
||||||
import Data.Text.UTF8.Local (decodeStrict)
|
import Data.Text.UTF8.Local (decodeStrict)
|
||||||
|
|
||||||
lf, space, star, dash, zero, nine, sqrOpen, sqrClose :: Word8
|
lf :: Word8
|
||||||
lf = 10
|
lf = 10
|
||||||
|
space :: Word8
|
||||||
space = 32
|
space = 32
|
||||||
|
star :: Word8
|
||||||
star = 42
|
star = 42
|
||||||
|
dash :: Word8
|
||||||
dash = 45
|
dash = 45
|
||||||
|
zero :: Word8
|
||||||
zero = 48
|
zero = 48
|
||||||
|
nine :: Word8
|
||||||
nine = 57
|
nine = 57
|
||||||
|
sqrOpen :: Word8
|
||||||
sqrOpen = 91
|
sqrOpen = 91
|
||||||
sqrClose = 93
|
--sqrClose :: Word8
|
||||||
|
--sqrClose = 93
|
||||||
|
|
||||||
digit :: Parser Word8
|
digit :: Parser Word8
|
||||||
digit = satisfy $ \ w -> zero <= w && w <= nine
|
digit = satisfy $ \ w -> zero <= w && w <= nine
|
||||||
|
@ -87,67 +95,92 @@ patchTimeP = do
|
||||||
month <- decimal2P
|
month <- decimal2P
|
||||||
day <- decimal2P
|
day <- decimal2P
|
||||||
|
|
||||||
hour <- decimal2P
|
hours <- decimal2P
|
||||||
minute <- decimal2P
|
minutes <- decimal2P
|
||||||
second <- decimal2P
|
seconds <- decimal2P
|
||||||
|
|
||||||
case fromGregorianValid year month day of
|
case fromGregorianValid year month day of
|
||||||
Nothing -> fail "Invalid patch date"
|
Nothing -> fail "Invalid patch date"
|
||||||
Just uday -> return UTCTime
|
Just uday -> return UTCTime
|
||||||
{ utctDay = uday
|
{ utctDay = uday
|
||||||
, utctDayTime =
|
, utctDayTime =
|
||||||
secondsToDiffTime $ 3600 * hour + 60 * minute + second
|
secondsToDiffTime $ 3600 * hours + 60 * minutes + seconds
|
||||||
}
|
}
|
||||||
|
|
||||||
|
line :: Parser ByteString
|
||||||
|
line = restOfLine
|
||||||
|
|
||||||
|
restOfLine :: Parser ByteString
|
||||||
|
restOfLine = takeWhile (/= lf)
|
||||||
|
|
||||||
|
eol :: Parser ()
|
||||||
|
eol = skip (== lf)
|
||||||
|
|
||||||
skipLine :: Parser ()
|
skipLine :: Parser ()
|
||||||
skipLine = do
|
skipLine = skipWhile (/= lf)
|
||||||
skipWhile (/= lf)
|
|
||||||
skip (== lf)
|
|
||||||
|
|
||||||
skipRestOfLine :: Parser ()
|
skipRestOfLine :: Parser ()
|
||||||
skipRestOfLine = skipLine
|
skipRestOfLine = skipLine
|
||||||
|
|
||||||
skipPatchP :: Parser ()
|
skipPatchP :: Parser ()
|
||||||
skipPatchP = do
|
skipPatchP =
|
||||||
-- title
|
-- title
|
||||||
skipLine
|
skipLine *> eol *>
|
||||||
-- author, inverted, time
|
-- author, inverted, time
|
||||||
skipLine
|
skipLine *> eol *>
|
||||||
-- ignore, description
|
-- ignore, description
|
||||||
skipMany $ do
|
(skipMany $ skip (== space) *> skipRestOfLine *> eol) *>
|
||||||
skip (== space)
|
|
||||||
skipRestOfLine
|
|
||||||
-- end of info
|
-- end of info
|
||||||
string "] \n"
|
(string "] \n") *>
|
||||||
-- hash
|
-- hash
|
||||||
skipWhile (/= lf)
|
skipLine
|
||||||
|
|
||||||
pristineP :: Parser (Maybe Int, H.Hash)
|
sha256P :: Parser ByteString
|
||||||
pristineP = do
|
sha256P = do
|
||||||
string "pristine:"
|
bs <- take 64
|
||||||
(,) Nothing . H.decodeBase16 <$> takeWhile (/= lf)
|
case second B.null $ B16.decode bs of
|
||||||
|
(h, True) -> return h
|
||||||
|
_ -> fail "SHA256 decoding from hex failed"
|
||||||
|
|
||||||
|
sizeP :: Parser Int
|
||||||
|
sizeP = do
|
||||||
|
bs <- take 10
|
||||||
|
case second B.null <$> BX.readDecimal bs of
|
||||||
|
Just (n, True) -> return n
|
||||||
|
_ -> fail "sizeP failed"
|
||||||
|
|
||||||
|
sizeSha256P :: Parser (Int, ByteString)
|
||||||
|
sizeSha256P = liftA2 (,) sizeP (skip (== dash) *> sha256P)
|
||||||
|
|
||||||
|
pristineP :: Parser PristineHash
|
||||||
|
pristineP = string "pristine:" *> (PristineHash <$> sha256P)
|
||||||
|
|
||||||
|
prevInvP :: Parser InventoryHash
|
||||||
|
prevInvP =
|
||||||
|
string "Starting with inventory" *> eol *>
|
||||||
|
(uncurry InventoryHash <$> sizeSha256P)
|
||||||
|
|
||||||
patchInfoRawP :: Parser PatchInfoRaw
|
patchInfoRawP :: Parser PatchInfoRaw
|
||||||
patchInfoRawP = do
|
patchInfoRawP = do
|
||||||
word8 sqrOpen
|
word8 sqrOpen
|
||||||
title <- takeWhile1 (/= lf)
|
title <- takeWhile1 (/= lf)
|
||||||
word8 lf
|
eol
|
||||||
|
|
||||||
author <- takeWhile1 (/= star)
|
author <- takeWhile1 (/= star)
|
||||||
word8 star
|
word8 star
|
||||||
inverted <- (/= star) <$> (satisfy $ \ c -> c == star || c == dash)
|
inverted <- (/= star) <$> (satisfy $ \ c -> c == star || c == dash)
|
||||||
(timeRaw, time) <- match patchTimeP
|
(timeRaw, time) <- match patchTimeP
|
||||||
word8 lf
|
eol
|
||||||
|
|
||||||
word8 space
|
word8 space
|
||||||
junkp <- string "Ignore-this: "
|
junkp <- string "Ignore-this: "
|
||||||
junkc <- takeWhile1 (/= lf)
|
junkc <- takeWhile1 (/= lf)
|
||||||
word8 lf
|
eol
|
||||||
|
|
||||||
lines <- many $ word8 space *> takeWhile (/= lf) <* word8 lf
|
lines <- many $ word8 space *> takeWhile (/= lf) <* eol
|
||||||
string "] \nhash: "
|
string "] \nhash: "
|
||||||
|
|
||||||
hash <- takeWhile1 (/= lf)
|
hash <- sizeSha256P
|
||||||
|
|
||||||
return PatchInfoRaw
|
return PatchInfoRaw
|
||||||
{ pirAuthor = author
|
{ pirAuthor = author
|
||||||
|
@ -185,7 +218,7 @@ refinePatchInfo pir =
|
||||||
l -> Just $ BC.unlines l
|
l -> Just $ BC.unlines l
|
||||||
in PatchInfo
|
in PatchInfo
|
||||||
{ piAuthor = decodeStrict $ pirAuthor pir
|
{ piAuthor = decodeStrict $ pirAuthor pir
|
||||||
, piHash = pirHash pir
|
, piHash = uncurry ContentHash $ pirHash pir
|
||||||
, piTitle = decodeStrict title
|
, piTitle = decodeStrict title
|
||||||
, piDescription = decodeStrict <$> description
|
, piDescription = decodeStrict <$> description
|
||||||
, piTag = tag
|
, piTag = tag
|
||||||
|
@ -194,71 +227,54 @@ refinePatchInfo pir =
|
||||||
|
|
||||||
-- | Parse patch metadata and compute the metadata's hash, which can be used as
|
-- | Parse patch metadata and compute the metadata's hash, which can be used as
|
||||||
-- a patch identifier for lookup and matching.
|
-- a patch identifier for lookup and matching.
|
||||||
patchInfoP :: Parser (PatchInfo, ByteString)
|
patchInfoP :: Parser (PatchInfo, PatchHash)
|
||||||
patchInfoP = do
|
patchInfoP = do
|
||||||
pir <- patchInfoRawP
|
pir <- patchInfoRawP
|
||||||
return (refinePatchInfo pir, convert $ hashPatchInfo SHA1 pir)
|
return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir)
|
||||||
|
|
||||||
-- NEXT current plan:
|
|
||||||
--
|
|
||||||
-- (0) all parses are incremental!!!
|
|
||||||
-- (1) use CountP below to determine number of patches
|
|
||||||
-- (2) given pagination details determine offset and limit
|
|
||||||
-- (3) parse the patches we want
|
|
||||||
-- (4) reverse their order...
|
|
||||||
-- (5) generate a whamlet widget from that
|
|
||||||
-- (*) FOR NOW no pagination, just read the entire thing once...
|
|
||||||
|
|
||||||
patchInfosCountP :: Parser Int
|
patchInfosCountP :: Parser Int
|
||||||
patchInfosCountP = do
|
patchInfosCountP = do
|
||||||
skipWhile (/= lf) -- pristine hash
|
-- pristine hash
|
||||||
n <- length <$> (many $ word8 lf >> skipPatchP)
|
skipLine
|
||||||
word8 lf
|
-- previous inventory
|
||||||
|
optional $
|
||||||
|
eol *> string "Starting" *> skipRestOfLine *> eol *>
|
||||||
|
skipLine
|
||||||
|
-- patch info
|
||||||
|
n <- length <$> (many $ eol *> skipPatchP)
|
||||||
|
eol
|
||||||
return n
|
return n
|
||||||
|
|
||||||
patchInfosAllP :: Parser PatchSeq
|
patchInfosAllP :: Parser PatchSeq
|
||||||
patchInfosAllP = do
|
patchInfosAllP = PatchSeq
|
||||||
(psize, phash) <- pristineP
|
<$> pristineP
|
||||||
ps <- many $ word8 lf >> patchInfoP
|
<*> optional (eol *> prevInvP)
|
||||||
word8 lf
|
<*> many (eol *> patchInfoP)
|
||||||
return PatchSeq
|
<* eol
|
||||||
{ psPristineHash = phash
|
|
||||||
, psPristineSize = psize
|
|
||||||
, psPatches = ps
|
|
||||||
}
|
|
||||||
|
|
||||||
patchInfosOffsetP :: Int -> Parser PatchSeq
|
patchInfosOffsetP :: Int -> Parser PatchSeq
|
||||||
patchInfosOffsetP off = do
|
patchInfosOffsetP off = PatchSeq
|
||||||
(psize, phash) <- pristineP
|
<$> pristineP
|
||||||
replicateM_ off $ word8 lf >> skipPatchP
|
<*> optional (eol *> prevInvP)
|
||||||
ps <- many $ word8 lf >> patchInfoP
|
<*> ( replicateM_ off (eol *> skipPatchP) *>
|
||||||
word8 lf
|
many (eol *> patchInfoP)
|
||||||
return PatchSeq
|
)
|
||||||
{ psPristineHash = phash
|
<* eol
|
||||||
, psPristineSize = psize
|
|
||||||
, psPatches = ps
|
|
||||||
}
|
|
||||||
|
|
||||||
patchInfosLimitP :: Int -> Parser PatchSeq
|
patchInfosLimitP :: Int -> Parser PatchSeq
|
||||||
patchInfosLimitP lim = do
|
patchInfosLimitP lim = PatchSeq
|
||||||
(psize, phash) <- pristineP
|
<$> pristineP
|
||||||
ps <- atMost lim $ word8 lf >> patchInfoP
|
<*> optional (eol *> prevInvP)
|
||||||
return PatchSeq
|
<*> atMost lim (eol *> patchInfoP)
|
||||||
{ psPristineHash = phash
|
|
||||||
, psPristineSize = psize
|
|
||||||
, psPatches = ps
|
|
||||||
}
|
|
||||||
|
|
||||||
patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq
|
patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq
|
||||||
patchInfosOffsetLimitP off lim = do
|
patchInfosOffsetLimitP off lim = PatchSeq
|
||||||
(psize, phash) <- pristineP
|
<$> pristineP
|
||||||
replicateM_ off $ word8 lf >> skipPatchP
|
<*> optional (eol *> prevInvP)
|
||||||
ps <- atMost lim $ word8 lf >> patchInfoP
|
<*> ( replicateM_ off (eol *> skipPatchP) *>
|
||||||
return PatchSeq
|
atMost lim (eol *> patchInfoP)
|
||||||
{ psPristineHash = phash
|
)
|
||||||
, psPristineSize = psize
|
|
||||||
, psPatches = ps
|
|
||||||
}
|
|
||||||
|
|
||||||
darcsDir :: FilePath
|
darcsDir :: FilePath
|
||||||
darcsDir = "_darcs"
|
darcsDir = "_darcs"
|
||||||
|
|
|
@ -15,23 +15,54 @@
|
||||||
|
|
||||||
module Darcs.Local.PatchInfo.Types
|
module Darcs.Local.PatchInfo.Types
|
||||||
( PatchInfoRaw (..)
|
( PatchInfoRaw (..)
|
||||||
|
, PatchHash (..)
|
||||||
|
, ContentHash (..)
|
||||||
|
, InventoryHash (..)
|
||||||
|
, PristineHash (..)
|
||||||
, PatchInfo (..)
|
, PatchInfo (..)
|
||||||
, PatchSeq (..)
|
, PatchSeq (..)
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
|
-- TODO
|
||||||
|
--
|
||||||
|
-- Apparently, after a while, some of the patches are moved from
|
||||||
|
-- hashed_inventory into the inventories/ dir. So the patch set contains more
|
||||||
|
-- than one group. This means I need to extend my parser to cover this case.
|
||||||
|
-- Sources for info about this thing:
|
||||||
|
--
|
||||||
|
-- * Darcs source code
|
||||||
|
-- * Darcs wiki
|
||||||
|
-- * Local Darcs repos I have
|
||||||
|
--
|
||||||
|
-- From Darcs source code:
|
||||||
|
--
|
||||||
|
-- > The patches in a repository are stored in chunks broken up at \"clean\"
|
||||||
|
-- > tags. A tag is clean if the only patches before it in the current
|
||||||
|
-- > repository ordering are ones that the tag depends on (either directly
|
||||||
|
-- > or indirectly). Each chunk is stored in a separate inventory file on disk.
|
||||||
|
-- >
|
||||||
|
-- > A 'PatchSet' represents a repo's history as the list of patches since the
|
||||||
|
-- > last clean tag, and then a list of patch lists each delimited by clean tags.
|
||||||
|
-- >
|
||||||
|
-- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo'
|
||||||
|
-- > representing a clean tag, the hash of the previous inventory (if it exists),
|
||||||
|
-- > and the list of patches since that previous inventory.
|
||||||
|
--
|
||||||
|
-- Let's start with finding out the format of the binary inventories and
|
||||||
|
-- parsing them.
|
||||||
|
|
||||||
import Prelude
|
import Prelude
|
||||||
|
|
||||||
import Data.ByteString (ByteString)
|
import Data.ByteString (ByteString)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.Time.Clock (UTCTime)
|
import Data.Time.Clock (UTCTime)
|
||||||
import Storage.Hashed.Hash (Hash)
|
|
||||||
|
|
||||||
-- | Patch metadata in raw form. This is intended for accurate hashing of the
|
-- | Patch metadata in raw form. This is intended for accurate hashing of the
|
||||||
-- patch info.
|
-- patch info.
|
||||||
data PatchInfoRaw = PatchInfoRaw
|
data PatchInfoRaw = PatchInfoRaw
|
||||||
{ pirAuthor :: ByteString
|
{ pirAuthor :: ByteString
|
||||||
, pirHash :: ByteString
|
, pirHash :: (Int, ByteString)
|
||||||
, pirTitle :: ByteString
|
, pirTitle :: ByteString
|
||||||
, pirDescription :: [ByteString]
|
, pirDescription :: [ByteString]
|
||||||
, pirJunkPrefix :: ByteString
|
, pirJunkPrefix :: ByteString
|
||||||
|
@ -40,14 +71,36 @@ data PatchInfoRaw = PatchInfoRaw
|
||||||
, pirInverted :: Bool
|
, pirInverted :: Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- | A SHA1 hash of the patch info (author, title, description including junk,
|
||||||
|
-- timestamp). The hash is in binary form, not hex, i.e. its size is always 20
|
||||||
|
-- bytes.
|
||||||
|
newtype PatchHash = PatchHash { unPatchHash :: ByteString }
|
||||||
|
|
||||||
|
-- | Content size and SHA256 hash of a patch's info and content. The hash is in
|
||||||
|
-- binary form, not hex, i.e. its size is always 32 bytes.
|
||||||
|
data ContentHash = ContentHash
|
||||||
|
{ chSize :: Int
|
||||||
|
, chHash :: ByteString
|
||||||
|
}
|
||||||
|
|
||||||
|
-- | Content size and SHA256 hash of an inventory (a patch set in a single
|
||||||
|
-- invetory file). The hash is in binary form, not hex, i.e. its size is always
|
||||||
|
-- 32 bytes.
|
||||||
|
data InventoryHash = InventoryHash
|
||||||
|
{ ihSize :: Int
|
||||||
|
, ihHash :: ByteString
|
||||||
|
}
|
||||||
|
|
||||||
|
-- | A SHA256 hash of the entire recorded state of the repo. The hash is in
|
||||||
|
-- binary form, not hex, i.e. its size is always 32 bytes.
|
||||||
|
newtype PristineHash = PristineHash { unPristineHash :: ByteString }
|
||||||
|
|
||||||
-- | Patch metadata read from the inventory file.
|
-- | Patch metadata read from the inventory file.
|
||||||
data PatchInfo = PatchInfo
|
data PatchInfo = PatchInfo
|
||||||
{ -- | Author name and email
|
{ -- | Author name and email
|
||||||
piAuthor :: Text
|
piAuthor :: Text
|
||||||
-- | Currently this is the patch hash in textual form. Possibly I'll
|
-- | Patch content hash
|
||||||
-- change to binary form when I find out the encoding scheme of the hash
|
, piHash :: ContentHash
|
||||||
-- string.
|
|
||||||
, piHash :: ByteString
|
|
||||||
-- | Single message line
|
-- | Single message line
|
||||||
, piTitle :: Text
|
, piTitle :: Text
|
||||||
-- | Optional description, may contain several lines
|
-- | Optional description, may contain several lines
|
||||||
|
@ -60,7 +113,7 @@ data PatchInfo = PatchInfo
|
||||||
|
|
||||||
-- | The information from the hashed inventory file.
|
-- | The information from the hashed inventory file.
|
||||||
data PatchSeq = PatchSeq
|
data PatchSeq = PatchSeq
|
||||||
{ psPristineHash :: Hash
|
{ psPristineHash :: PristineHash
|
||||||
, psPristineSize :: Maybe Int
|
, psPrevious :: Maybe InventoryHash
|
||||||
, psPatches :: [(PatchInfo, ByteString)]
|
, psPatches :: [(PatchInfo, PatchHash)]
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,7 +132,7 @@ readChangesView path off lim = fmap maybeRight $ runExceptT $ do
|
||||||
let toLE pi h = LogEntry
|
let toLE pi h = LogEntry
|
||||||
{ leAuthor =
|
{ leAuthor =
|
||||||
T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi
|
T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi
|
||||||
, leHash = decodeStrict $ B16.encode h
|
, leHash = decodeStrict $ B16.encode $ unPatchHash h
|
||||||
, leMessage = piTitle pi
|
, leMessage = piTitle pi
|
||||||
, leTime =
|
, leTime =
|
||||||
intervalToEventTime $
|
intervalToEventTime $
|
||||||
|
|
|
@ -134,6 +134,8 @@ library
|
||||||
, blaze-markup
|
, blaze-markup
|
||||||
, byteable
|
, byteable
|
||||||
, bytestring
|
, bytestring
|
||||||
|
-- for Darcs.Local.PatchInfo.Parser
|
||||||
|
, bytestring-lexing
|
||||||
, case-insensitive
|
, case-insensitive
|
||||||
, classy-prelude
|
, classy-prelude
|
||||||
, classy-prelude-conduit
|
, classy-prelude-conduit
|
||||||
|
|
Loading…
Reference in a new issue