Darcs inventory parser: Add hash types and parse previous inventory
This commit is contained in:
parent
f8ae122da7
commit
9ba6761459
4 changed files with 164 additions and 93 deletions
|
@ -27,12 +27,12 @@ module Darcs.Local.PatchInfo.Parser
|
|||
)
|
||||
where
|
||||
|
||||
import Prelude hiding (takeWhile)
|
||||
import Prelude hiding (take, takeWhile)
|
||||
|
||||
import Control.Applicative (many)
|
||||
import Control.Monad (replicateM, replicateM_)
|
||||
import Control.Applicative (many, optional, liftA2)
|
||||
import Control.Arrow (second)
|
||||
import Control.Monad (replicateM_)
|
||||
import Crypto.Hash
|
||||
import Crypto.Hash.Algorithms (SHA1 (SHA1))
|
||||
import Data.Attoparsec.ByteString
|
||||
import Data.ByteArray (convert)
|
||||
import Data.ByteString (ByteString)
|
||||
|
@ -43,7 +43,8 @@ import System.FilePath ((</>))
|
|||
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Char8 as BC
|
||||
import qualified Storage.Hashed.Hash as H
|
||||
import qualified Data.ByteString.Base16 as B16
|
||||
import qualified Data.ByteString.Lex.Integral as BX
|
||||
|
||||
import Control.Applicative.Local
|
||||
import Darcs.Local.PatchInfo.Types
|
||||
|
@ -51,15 +52,22 @@ import Data.Attoparsec.ByteString.Local
|
|||
import Data.ByteString.Local (stripPrefix)
|
||||
import Data.Text.UTF8.Local (decodeStrict)
|
||||
|
||||
lf, space, star, dash, zero, nine, sqrOpen, sqrClose :: Word8
|
||||
lf :: Word8
|
||||
lf = 10
|
||||
space :: Word8
|
||||
space = 32
|
||||
star :: Word8
|
||||
star = 42
|
||||
dash :: Word8
|
||||
dash = 45
|
||||
zero :: Word8
|
||||
zero = 48
|
||||
nine :: Word8
|
||||
nine = 57
|
||||
sqrOpen :: Word8
|
||||
sqrOpen = 91
|
||||
sqrClose = 93
|
||||
--sqrClose :: Word8
|
||||
--sqrClose = 93
|
||||
|
||||
digit :: Parser Word8
|
||||
digit = satisfy $ \ w -> zero <= w && w <= nine
|
||||
|
@ -87,67 +95,92 @@ patchTimeP = do
|
|||
month <- decimal2P
|
||||
day <- decimal2P
|
||||
|
||||
hour <- decimal2P
|
||||
minute <- decimal2P
|
||||
second <- decimal2P
|
||||
hours <- decimal2P
|
||||
minutes <- decimal2P
|
||||
seconds <- decimal2P
|
||||
|
||||
case fromGregorianValid year month day of
|
||||
Nothing -> fail "Invalid patch date"
|
||||
Just uday -> return UTCTime
|
||||
{ utctDay = uday
|
||||
, utctDayTime =
|
||||
secondsToDiffTime $ 3600 * hour + 60 * minute + second
|
||||
secondsToDiffTime $ 3600 * hours + 60 * minutes + seconds
|
||||
}
|
||||
|
||||
line :: Parser ByteString
|
||||
line = restOfLine
|
||||
|
||||
restOfLine :: Parser ByteString
|
||||
restOfLine = takeWhile (/= lf)
|
||||
|
||||
eol :: Parser ()
|
||||
eol = skip (== lf)
|
||||
|
||||
skipLine :: Parser ()
|
||||
skipLine = do
|
||||
skipWhile (/= lf)
|
||||
skip (== lf)
|
||||
skipLine = skipWhile (/= lf)
|
||||
|
||||
skipRestOfLine :: Parser ()
|
||||
skipRestOfLine = skipLine
|
||||
|
||||
skipPatchP :: Parser ()
|
||||
skipPatchP = do
|
||||
skipPatchP =
|
||||
-- title
|
||||
skipLine
|
||||
skipLine *> eol *>
|
||||
-- author, inverted, time
|
||||
skipLine
|
||||
skipLine *> eol *>
|
||||
-- ignore, description
|
||||
skipMany $ do
|
||||
skip (== space)
|
||||
skipRestOfLine
|
||||
(skipMany $ skip (== space) *> skipRestOfLine *> eol) *>
|
||||
-- end of info
|
||||
string "] \n"
|
||||
(string "] \n") *>
|
||||
-- hash
|
||||
skipWhile (/= lf)
|
||||
skipLine
|
||||
|
||||
pristineP :: Parser (Maybe Int, H.Hash)
|
||||
pristineP = do
|
||||
string "pristine:"
|
||||
(,) Nothing . H.decodeBase16 <$> takeWhile (/= lf)
|
||||
sha256P :: Parser ByteString
|
||||
sha256P = do
|
||||
bs <- take 64
|
||||
case second B.null $ B16.decode bs of
|
||||
(h, True) -> return h
|
||||
_ -> fail "SHA256 decoding from hex failed"
|
||||
|
||||
sizeP :: Parser Int
|
||||
sizeP = do
|
||||
bs <- take 10
|
||||
case second B.null <$> BX.readDecimal bs of
|
||||
Just (n, True) -> return n
|
||||
_ -> fail "sizeP failed"
|
||||
|
||||
sizeSha256P :: Parser (Int, ByteString)
|
||||
sizeSha256P = liftA2 (,) sizeP (skip (== dash) *> sha256P)
|
||||
|
||||
pristineP :: Parser PristineHash
|
||||
pristineP = string "pristine:" *> (PristineHash <$> sha256P)
|
||||
|
||||
prevInvP :: Parser InventoryHash
|
||||
prevInvP =
|
||||
string "Starting with inventory" *> eol *>
|
||||
(uncurry InventoryHash <$> sizeSha256P)
|
||||
|
||||
patchInfoRawP :: Parser PatchInfoRaw
|
||||
patchInfoRawP = do
|
||||
word8 sqrOpen
|
||||
title <- takeWhile1 (/= lf)
|
||||
word8 lf
|
||||
eol
|
||||
|
||||
author <- takeWhile1 (/= star)
|
||||
word8 star
|
||||
inverted <- (/= star) <$> (satisfy $ \ c -> c == star || c == dash)
|
||||
(timeRaw, time) <- match patchTimeP
|
||||
word8 lf
|
||||
eol
|
||||
|
||||
word8 space
|
||||
junkp <- string "Ignore-this: "
|
||||
junkc <- takeWhile1 (/= lf)
|
||||
word8 lf
|
||||
eol
|
||||
|
||||
lines <- many $ word8 space *> takeWhile (/= lf) <* word8 lf
|
||||
lines <- many $ word8 space *> takeWhile (/= lf) <* eol
|
||||
string "] \nhash: "
|
||||
|
||||
hash <- takeWhile1 (/= lf)
|
||||
hash <- sizeSha256P
|
||||
|
||||
return PatchInfoRaw
|
||||
{ pirAuthor = author
|
||||
|
@ -185,7 +218,7 @@ refinePatchInfo pir =
|
|||
l -> Just $ BC.unlines l
|
||||
in PatchInfo
|
||||
{ piAuthor = decodeStrict $ pirAuthor pir
|
||||
, piHash = pirHash pir
|
||||
, piHash = uncurry ContentHash $ pirHash pir
|
||||
, piTitle = decodeStrict title
|
||||
, piDescription = decodeStrict <$> description
|
||||
, piTag = tag
|
||||
|
@ -194,71 +227,54 @@ refinePatchInfo pir =
|
|||
|
||||
-- | Parse patch metadata and compute the metadata's hash, which can be used as
|
||||
-- a patch identifier for lookup and matching.
|
||||
patchInfoP :: Parser (PatchInfo, ByteString)
|
||||
patchInfoP :: Parser (PatchInfo, PatchHash)
|
||||
patchInfoP = do
|
||||
pir <- patchInfoRawP
|
||||
return (refinePatchInfo pir, convert $ hashPatchInfo SHA1 pir)
|
||||
|
||||
-- NEXT current plan:
|
||||
--
|
||||
-- (0) all parses are incremental!!!
|
||||
-- (1) use CountP below to determine number of patches
|
||||
-- (2) given pagination details determine offset and limit
|
||||
-- (3) parse the patches we want
|
||||
-- (4) reverse their order...
|
||||
-- (5) generate a whamlet widget from that
|
||||
-- (*) FOR NOW no pagination, just read the entire thing once...
|
||||
return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir)
|
||||
|
||||
patchInfosCountP :: Parser Int
|
||||
patchInfosCountP = do
|
||||
skipWhile (/= lf) -- pristine hash
|
||||
n <- length <$> (many $ word8 lf >> skipPatchP)
|
||||
word8 lf
|
||||
-- pristine hash
|
||||
skipLine
|
||||
-- previous inventory
|
||||
optional $
|
||||
eol *> string "Starting" *> skipRestOfLine *> eol *>
|
||||
skipLine
|
||||
-- patch info
|
||||
n <- length <$> (many $ eol *> skipPatchP)
|
||||
eol
|
||||
return n
|
||||
|
||||
patchInfosAllP :: Parser PatchSeq
|
||||
patchInfosAllP = do
|
||||
(psize, phash) <- pristineP
|
||||
ps <- many $ word8 lf >> patchInfoP
|
||||
word8 lf
|
||||
return PatchSeq
|
||||
{ psPristineHash = phash
|
||||
, psPristineSize = psize
|
||||
, psPatches = ps
|
||||
}
|
||||
patchInfosAllP = PatchSeq
|
||||
<$> pristineP
|
||||
<*> optional (eol *> prevInvP)
|
||||
<*> many (eol *> patchInfoP)
|
||||
<* eol
|
||||
|
||||
patchInfosOffsetP :: Int -> Parser PatchSeq
|
||||
patchInfosOffsetP off = do
|
||||
(psize, phash) <- pristineP
|
||||
replicateM_ off $ word8 lf >> skipPatchP
|
||||
ps <- many $ word8 lf >> patchInfoP
|
||||
word8 lf
|
||||
return PatchSeq
|
||||
{ psPristineHash = phash
|
||||
, psPristineSize = psize
|
||||
, psPatches = ps
|
||||
}
|
||||
patchInfosOffsetP off = PatchSeq
|
||||
<$> pristineP
|
||||
<*> optional (eol *> prevInvP)
|
||||
<*> ( replicateM_ off (eol *> skipPatchP) *>
|
||||
many (eol *> patchInfoP)
|
||||
)
|
||||
<* eol
|
||||
|
||||
|
||||
patchInfosLimitP :: Int -> Parser PatchSeq
|
||||
patchInfosLimitP lim = do
|
||||
(psize, phash) <- pristineP
|
||||
ps <- atMost lim $ word8 lf >> patchInfoP
|
||||
return PatchSeq
|
||||
{ psPristineHash = phash
|
||||
, psPristineSize = psize
|
||||
, psPatches = ps
|
||||
}
|
||||
patchInfosLimitP lim = PatchSeq
|
||||
<$> pristineP
|
||||
<*> optional (eol *> prevInvP)
|
||||
<*> atMost lim (eol *> patchInfoP)
|
||||
|
||||
patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq
|
||||
patchInfosOffsetLimitP off lim = do
|
||||
(psize, phash) <- pristineP
|
||||
replicateM_ off $ word8 lf >> skipPatchP
|
||||
ps <- atMost lim $ word8 lf >> patchInfoP
|
||||
return PatchSeq
|
||||
{ psPristineHash = phash
|
||||
, psPristineSize = psize
|
||||
, psPatches = ps
|
||||
}
|
||||
patchInfosOffsetLimitP off lim = PatchSeq
|
||||
<$> pristineP
|
||||
<*> optional (eol *> prevInvP)
|
||||
<*> ( replicateM_ off (eol *> skipPatchP) *>
|
||||
atMost lim (eol *> patchInfoP)
|
||||
)
|
||||
|
||||
darcsDir :: FilePath
|
||||
darcsDir = "_darcs"
|
||||
|
|
|
@ -15,23 +15,54 @@
|
|||
|
||||
module Darcs.Local.PatchInfo.Types
|
||||
( PatchInfoRaw (..)
|
||||
, PatchHash (..)
|
||||
, ContentHash (..)
|
||||
, InventoryHash (..)
|
||||
, PristineHash (..)
|
||||
, PatchInfo (..)
|
||||
, PatchSeq (..)
|
||||
)
|
||||
where
|
||||
|
||||
-- TODO
|
||||
--
|
||||
-- Apparently, after a while, some of the patches are moved from
|
||||
-- hashed_inventory into the inventories/ dir. So the patch set contains more
|
||||
-- than one group. This means I need to extend my parser to cover this case.
|
||||
-- Sources for info about this thing:
|
||||
--
|
||||
-- * Darcs source code
|
||||
-- * Darcs wiki
|
||||
-- * Local Darcs repos I have
|
||||
--
|
||||
-- From Darcs source code:
|
||||
--
|
||||
-- > The patches in a repository are stored in chunks broken up at \"clean\"
|
||||
-- > tags. A tag is clean if the only patches before it in the current
|
||||
-- > repository ordering are ones that the tag depends on (either directly
|
||||
-- > or indirectly). Each chunk is stored in a separate inventory file on disk.
|
||||
-- >
|
||||
-- > A 'PatchSet' represents a repo's history as the list of patches since the
|
||||
-- > last clean tag, and then a list of patch lists each delimited by clean tags.
|
||||
-- >
|
||||
-- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo'
|
||||
-- > representing a clean tag, the hash of the previous inventory (if it exists),
|
||||
-- > and the list of patches since that previous inventory.
|
||||
--
|
||||
-- Let's start with finding out the format of the binary inventories and
|
||||
-- parsing them.
|
||||
|
||||
import Prelude
|
||||
|
||||
import Data.ByteString (ByteString)
|
||||
import Data.Text (Text)
|
||||
import Data.Time.Clock (UTCTime)
|
||||
import Storage.Hashed.Hash (Hash)
|
||||
|
||||
-- | Patch metadata in raw form. This is intended for accurate hashing of the
|
||||
-- patch info.
|
||||
data PatchInfoRaw = PatchInfoRaw
|
||||
{ pirAuthor :: ByteString
|
||||
, pirHash :: ByteString
|
||||
, pirHash :: (Int, ByteString)
|
||||
, pirTitle :: ByteString
|
||||
, pirDescription :: [ByteString]
|
||||
, pirJunkPrefix :: ByteString
|
||||
|
@ -40,14 +71,36 @@ data PatchInfoRaw = PatchInfoRaw
|
|||
, pirInverted :: Bool
|
||||
}
|
||||
|
||||
-- | A SHA1 hash of the patch info (author, title, description including junk,
|
||||
-- timestamp). The hash is in binary form, not hex, i.e. its size is always 20
|
||||
-- bytes.
|
||||
newtype PatchHash = PatchHash { unPatchHash :: ByteString }
|
||||
|
||||
-- | Content size and SHA256 hash of a patch's info and content. The hash is in
|
||||
-- binary form, not hex, i.e. its size is always 32 bytes.
|
||||
data ContentHash = ContentHash
|
||||
{ chSize :: Int
|
||||
, chHash :: ByteString
|
||||
}
|
||||
|
||||
-- | Content size and SHA256 hash of an inventory (a patch set in a single
|
||||
-- invetory file). The hash is in binary form, not hex, i.e. its size is always
|
||||
-- 32 bytes.
|
||||
data InventoryHash = InventoryHash
|
||||
{ ihSize :: Int
|
||||
, ihHash :: ByteString
|
||||
}
|
||||
|
||||
-- | A SHA256 hash of the entire recorded state of the repo. The hash is in
|
||||
-- binary form, not hex, i.e. its size is always 32 bytes.
|
||||
newtype PristineHash = PristineHash { unPristineHash :: ByteString }
|
||||
|
||||
-- | Patch metadata read from the inventory file.
|
||||
data PatchInfo = PatchInfo
|
||||
{ -- | Author name and email
|
||||
piAuthor :: Text
|
||||
-- | Currently this is the patch hash in textual form. Possibly I'll
|
||||
-- change to binary form when I find out the encoding scheme of the hash
|
||||
-- string.
|
||||
, piHash :: ByteString
|
||||
-- | Patch content hash
|
||||
, piHash :: ContentHash
|
||||
-- | Single message line
|
||||
, piTitle :: Text
|
||||
-- | Optional description, may contain several lines
|
||||
|
@ -60,7 +113,7 @@ data PatchInfo = PatchInfo
|
|||
|
||||
-- | The information from the hashed inventory file.
|
||||
data PatchSeq = PatchSeq
|
||||
{ psPristineHash :: Hash
|
||||
, psPristineSize :: Maybe Int
|
||||
, psPatches :: [(PatchInfo, ByteString)]
|
||||
{ psPristineHash :: PristineHash
|
||||
, psPrevious :: Maybe InventoryHash
|
||||
, psPatches :: [(PatchInfo, PatchHash)]
|
||||
}
|
||||
|
|
|
@ -132,7 +132,7 @@ readChangesView path off lim = fmap maybeRight $ runExceptT $ do
|
|||
let toLE pi h = LogEntry
|
||||
{ leAuthor =
|
||||
T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi
|
||||
, leHash = decodeStrict $ B16.encode h
|
||||
, leHash = decodeStrict $ B16.encode $ unPatchHash h
|
||||
, leMessage = piTitle pi
|
||||
, leTime =
|
||||
intervalToEventTime $
|
||||
|
|
|
@ -134,6 +134,8 @@ library
|
|||
, blaze-markup
|
||||
, byteable
|
||||
, bytestring
|
||||
-- for Darcs.Local.PatchInfo.Parser
|
||||
, bytestring-lexing
|
||||
, case-insensitive
|
||||
, classy-prelude
|
||||
, classy-prelude-conduit
|
||||
|
|
Loading…
Reference in a new issue