Viewing file: genwincodec.py (1.7 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
"""This script generates a Python codec module from a Windows Code Page.
It uses the function MultiByteToWideChar to generate a decoding table. """
import ctypes from ctypes import wintypes from gencodec import codegen import unicodedata
def genwinmap(codepage): MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, wintypes.LPCSTR, ctypes.c_int, wintypes.LPWSTR, ctypes.c_int] MultiByteToWideChar.restype = ctypes.c_int
enc2uni = {}
for i in list(range(32)) + [127]: enc2uni[i] = (i, 'CONTROL CHARACTER')
for i in range(256): buf = ctypes.create_unicode_buffer(2) ret = MultiByteToWideChar( codepage, 0, bytes([i]), 1, buf, 2) assert ret == 1, "invalid code page" assert buf[1] == '\x00' try: name = unicodedata.name(buf[0]) except ValueError: try: name = enc2uni[i][1] except KeyError: name = ''
enc2uni[i] = (ord(buf[0]), name)
return enc2uni
def genwincodec(codepage): import platform map = genwinmap(codepage) encodingname = 'cp%d' % codepage code = codegen("", map, encodingname) # Replace first lines with our own docstring code = '''\ """Python Character Mapping Codec %s generated on Windows: %s with the command: python Tools/unicode/genwincodec.py %s """#" ''' % (encodingname, ' '.join(platform.win32_ver()), codepage ) + code.split('"""#"', 1)[1]
print(code)
if __name__ == '__main__': import sys genwincodec(int(sys.argv[1]))
|