Initial implementation of Darcs patch reader

This commit is contained in:
fr33domlover 2018-07-08 14:45:35 +00:00
parent c8b085fbc8
commit 7782e83419

View file

@ -31,23 +31,29 @@ import Darcs.Util.Path
import Darcs.Util.Tree
import Darcs.Util.Tree.Hashed
import Data.Bool (bool)
import Data.Maybe (listToMaybe, mapMaybe)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (listToMaybe, mapMaybe, fromMaybe)
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8, decodeUtf8With)
import Data.Text.Encoding (encodeUtf8, decodeUtf8With, decodeUtf8)
import Data.Text.Encoding.Error (strictDecode)
import Data.Time.Clock (UTCTime, getCurrentTime, diffUTCTime)
import Data.Traversable (for)
import Development.Darcs.Internal.Hash.Codec
import Development.Darcs.Internal.Hash.Types
import Development.Darcs.Internal.Inventory.Parser
import Development.Darcs.Internal.Inventory.Read
import Development.Darcs.Internal.Inventory.Types
import Development.Darcs.Internal.Patch.Types
import System.FilePath ((</>))
import Text.Email.Validate (emailAddress)
import qualified Data.Attoparsec.Text as A
import qualified Data.ByteString.Lazy as BL (ByteString)
import qualified Data.ByteString.Base16 as B16 (encode)
import qualified Data.ByteString.Base16 as B16 (encode, decode)
import qualified Data.Foldable as F (find)
import qualified Data.Text as T (takeWhile, stripEnd)
import qualified Data.Text as T (unpack, takeWhile, stripEnd)
import qualified Data.Vector as V (empty)
import qualified Development.Darcs.Internal.Patch.Parser as P
import Darcs.Local.Repository
import Data.Either.Local (maybeRight)
@ -221,40 +227,73 @@ lastChange path now = fmap maybeRight $ runExceptT $ do
FriendlyConvert $
now `diffUTCTime` piTime pi
-- TODO
-- | Read patch content, both metadata and the actual diff, from a given Darcs
-- repository. Preconditions:
--
-- * The repository is assumed to exist. If it doesn't, an exception is thrown.
-- * The repository is assumed to be in a consistent state, all the expected
-- inventory files and patch files and so on are assumed to exist and have
-- the expected format. If not, an exception is thrown.
-- * The hash may or may not be found in the repo. If there's no patch in the
-- repo with the given hash, 'Nothing' is returned.
readPatch :: FilePath -> Text -> IO (Maybe Patch)
readPatch path hash = error "Not implemented"
-- I'm not sure what's the fastest way to find a patch file given its info
-- hash, maybe Darcs keeps some cache or something. But assuming there are
-- no tricks like that, here's an idea how to grab the patch:
--
-- (1) Start going over the whole inventory, whose order is from latest to
-- oldest, looking for a patch with the given hash.
-- (2) Once found, determine the patch filename from its size and content
-- hash
-- (3) Run the patch parser on that file, through a zlib decompressor
-- though (check how I did that for the inventories parser)
--
-- TODO idea: Use hints to speed up finding the patch! In the repo history
-- log page, embed hints into the hyperlinks to the patches, and in the
-- patch page handler, use the hint to figure out the patch location.
-- Actually, since the inventory file contains patch content hashes, I can
-- use that as a hint and skip the whole step of looking for the patch!
--
-- TODO maybe start by finding the patch hash in patch_ids and use the
-- position as a hint to its location in the inventories
--
-- TODO maybe I can figure out from darcs source code how a given patch
-- hash is found? Just in case there's a faster way
--
-- TODO find out what's the index and patch_index files under _darcs and
-- maybe other files there, possibly there's a way to patch the info hash
-- with the content hash.
--
-- UPDATE: I read about index and patch_index, looks like they won't help.
-- But possibly the global cache system will? However interesting note:
-- Vervis on my laptop has a patch_index, but on the server it doesn't.
-- Probably because `darcs log` never runs on the server since I parse
-- patches manually. If I end up using the patch index for something, it
-- may be a good idea to trigger its generation, so that it's available
-- when people browser repo pages.
readPatch path hash = do
let pih = PatchInfoHash $ fst $ B16.decode $ encodeUtf8 hash
li <- handle =<< readLatestInventory path latestInventoryAllP
mp <- loop pih (liPatches li) (fst <$> liPrevTag li)
for mp $ \ (pi, pch) -> do
(_pir, hunks) <- handle =<< readCompressedPatch path pch P.patch
let (an, ae) =
either error id $
A.parseOnly (author <* A.endOfInput) $ piAuthor pi
return Patch
{ patchAuthorName = an
, patchAuthorEmail = ae
, patchTime = piTime pi
, patchTitle = piTitle pi
, patchDescription = fromMaybe "" $ piDescription pi
, patchDiff = map mkedit hunks
}
where
handle = either (const $ fail "readPatch failed") pure
lookup' pih ps = case F.find (\ (_pi, pih', _pch) -> pih' == pih) ps of
Nothing -> Nothing
Just (pi, _pih, pch) -> Just (pi, pch)
loop pih ps mih = case lookup' pih ps of
Just p -> return $ Just p
Nothing -> case mih of
Nothing -> return Nothing
Just ih -> do
i <- handle =<< readCompressedInventory path ih earlyInventoryAllP
case i of
Left ei -> loop pih (eiPatches ei) Nothing
Right mi -> loop pih (miPatches mi) (Just $ miPrevious mi)
email = maybe (fail "invalid email") pure . emailAddress . encodeUtf8
author = (,)
<$> A.takeWhile1 (const True)
<* " <"
<*> (A.takeWhile1 (/= '>') >>= email)
<* A.skip (== '>')
mkhunk h = case P.hunkRemove h of
[] -> Hunk
{ hunkAddFirst = map decodeUtf8 $ P.hunkAdd h
, hunkRemoveAdd = []
, hunkRemoveLast = []
}
r:rs -> case P.hunkAdd h of
[] -> Hunk
{ hunkAddFirst = []
, hunkRemoveAdd = []
, hunkRemoveLast = map decodeUtf8 $ r : rs
}
a:as -> Hunk
{ hunkAddFirst = []
, hunkRemoveAdd = [(decodeUtf8 <$> r :| rs, decodeUtf8 <$> a :| as)]
, hunkRemoveLast = []
}
mkedit hunk =
EditTextFile
(T.unpack $ decodeUtf8 $ P.hunkFile hunk)
V.empty
((False, P.hunkLine hunk, mkhunk hunk) :| [])
0 0