Glibc确实支持具有显式状态的语言环境API。这是使用ctypes制作的API的快速包装。
# -*- coding: utf-8
import ctypes
class Locale(object):
def __init__(self, locale):
LC_ALL_MASK = 8127
# LC_COLLATE_MASK = 8
self.libc = ctypes.CDLL("libc.so.6")
self.ctx = self.libc.newlocale(LC_ALL_MASK, locale, 0)
def strxfrm(self, src, iteration=1):
size = 3 * iteration * len(src)
dest = ctypes.create_string_buffer('\000' * size)
n = self.libc.strxfrm_l(dest, src, size, self.ctx)
if n < size:
return dest.value
elif iteration<=4:
return self.strxfrm(src, iteration+1)
else:
raise Exception('max number of iterations trying to increase dest reached')
def __del__(self):
self.libc.freelocale(self.ctx)
和简短的测试
locale1 = Locale('C')
locale2 = Locale('mk_MK.UTF-8')
a_list = ['а', 'б', 'в', '?', '?', '?', 'ш']
import random
random.shuffle(a_list)
assert sorted(a_list, key=locale1.strxfrm) == ['а', 'б', 'в', 'ш', '?', '?', '?']
assert sorted(a_list, key=locale2.strxfrm) == ['а', 'б', 'в', '?', '?', '?', 'ш']
剩下要做的就是实现所有语言环境功能,支持python unicode字符串(我猜是wchar *函数),并自动导入包含文件定义或其他内容