Added utf8.encode

Here's a function that takes a Unicode code point and returns the corresponding UTF-8 encoded character bytes. Example:

    utf8.encode(0x265c) -- returns '♜'

Please feel free to edit or revert if it's not your style. It might not be relevant to Quickie, though it's a handy UTF-8 utility.
This commit is contained in:
hryx 2013-12-31 04:37:04 -08:00
parent b63895a2c7
commit 52d06037d1

View file

@ -123,6 +123,40 @@ local function reverse(s)
return table.concat(t)
end
-- Convert a Unicode code point to a UTF-8 byte sequence
-- Logic stolen from this page:
-- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa
--
-- Arguments:
-- Number representing the Unicode code point (e.g. 0x265c).
--
-- Returns:
-- UTF-8 encoded string of the given character.
-- Numbers out of range produce a blank string.
local function encode(code)
if code < 0 then
error('Code point must not be negative.')
elseif code <= 0x7f then
return string.char(code)
elseif code <= 0x7ff then
local c1 = code / 64 + 192
local c2 = code % 64 + 128
return string.char(c1, c2)
elseif code <= 0xffff then
local c1 = code / 4096 + 224
local c2 = code % 4096 / 64 + 128
local c3 = code % 64 + 128
return string.char(c1, c2, c3)
elseif code <= 0x10ffff then
local c1 = code / 262144 + 240
local c2 = code % 262144 / 4096 + 128
local c3 = code % 4096 / 64 + 128
local c4 = code % 64 + 128
return string.char(c1, c2, c3, c4)
end
return ''
end
return {
iter = iter,
chars = chars,
@ -130,4 +164,5 @@ return {
sub = sub,
split = split,
reverse = reverse,
encode = encode
}