[PATCH RFC] convert: add support for recode in filemaps

Martin Geisler mg at lazybytes.net
Mon Dec 19 12:21:50 CST 2011


# HG changeset patch # User Martin Geisler <mg at lazybytes.net> # Date 1324318815 -3600 # Node ID 5bf6234ff33f997486c85210c6d7cec58f1fa524 # Parent 4841035f37b6df368682460d8a7cbf10276b8d1b convert: add support for recode in filemaps This command is used in a filemap like recode OLD NEW and will make convert recode all file names from OLD to NEW. This patch is not 100% done -- there could be a warning if recode is specified twice, for example. Also, the recoding is done before renames are taken into account. It should probably be done after since the filemap seems to work on source path names only. I made the patch to better support the case discussed here: http://serverfault.com/a/342446/14103 diff --git a/hgext/convert/filemap.py b/hgext/convert/filemap.py --- a/hgext/convert/filemap.py +++ b/hgext/convert/filemap.py @@ -4,7 +4,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. -import shlex +import shlex, codecs from mercurial.i18n import _ from mercurial import util from common import SKIPREV, converter_source @@ -26,6 +26,7 @@ self.include = {} self.exclude = {} self.rename = {} + self.recode = None if path: if self.parse(path): raise util.Abort(_('errors in filemap')) @@ -68,6 +69,14 @@ self.rename[src] = dest elif cmd == 'source': errs += self.parse(lex.get_token()) + elif cmd == 'recode': + self.recode = (lex.get_token(), lex.get_token()) + try: + codecs.getdecoder(self.recode[0]) + codecs.getencoder(self.recode[1]) + except LookupError, e: + self.ui.warn('%s:%d: %s\n' % (lex.infile, lex.lineno, e)) + errs += 1 else: self.ui.warn(_('%s:%d: unknown directive %r\n') % (lex.infile, lex.lineno, cmd)) @@ -84,6 +93,9 @@ return '', name, '' def __call__(self, name): + if self.recode: + name = name.decode(self.recode[0]).encode(self.recode[1]) + if self.include: inc = self.lookup(name, self.include)[0] else: @@ -106,7 +118,7 @@ return name def active(self): - return bool(self.include or self.exclude or self.rename) + return bool(self.include or self.exclude or self.rename or self.recode) # This class does two additional things compared to a regular source: # diff --git a/tests/test-convert-filemap.t b/tests/test-convert-filemap.t --- a/tests/test-convert-filemap.t +++ b/tests/test-convert-filemap.t @@ -375,3 +375,31 @@ | o 0 "addb" files: b + +Test recode command: + + $ hg init latin-1 + $ cd latin-1 + >>> open("p\xe6rer.txt", "w").write("pears\n") + $ hg commit -A -m Latin-1 + adding p\xe6rer.txt (esc) + $ cd .. + $ echo "recode latin-1 utf-8" > recode + $ hg convert latin-1 utf-8 --filemap recode + initializing destination utf-8 repository + scanning source... + sorting... + converting... + 0 Latin-1 + $ hg -R utf-8 manifest -r tip + p\xc3\xa6rer.txt (esc) + +Errors: + + $ echo "recode foo utf-8" >> recode + $ echo "recode latin-1 bar" >> recode + $ hg convert latin-1 utf-8 --filemap recode + recode:3: unknown encoding: foo + recode:4: unknown encoding: bar + abort: errors in filemap + [255] 


More information about the Mercurial-devel mailing list