Added utf8.encode
Here's a function that takes a Unicode code point and returns the corresponding UTF-8 encoded character bytes. Example: utf8.encode(0x265c) -- returns '♜' Please feel free to edit or revert if it's not your style. It might not be relevant to Quickie, though it's a handy UTF-8 utility.
This commit is contained in:
parent
b63895a2c7
commit
52d06037d1
1 changed files with 35 additions and 0 deletions
35
utf8.lua
35
utf8.lua
|
@ -123,6 +123,40 @@ local function reverse(s)
|
|||
return table.concat(t)
|
||||
end
|
||||
|
||||
-- Convert a Unicode code point to a UTF-8 byte sequence
|
||||
-- Logic stolen from this page:
|
||||
-- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa
|
||||
--
|
||||
-- Arguments:
|
||||
-- Number representing the Unicode code point (e.g. 0x265c).
|
||||
--
|
||||
-- Returns:
|
||||
-- UTF-8 encoded string of the given character.
|
||||
-- Numbers out of range produce a blank string.
|
||||
local function encode(code)
|
||||
if code < 0 then
|
||||
error('Code point must not be negative.')
|
||||
elseif code <= 0x7f then
|
||||
return string.char(code)
|
||||
elseif code <= 0x7ff then
|
||||
local c1 = code / 64 + 192
|
||||
local c2 = code % 64 + 128
|
||||
return string.char(c1, c2)
|
||||
elseif code <= 0xffff then
|
||||
local c1 = code / 4096 + 224
|
||||
local c2 = code % 4096 / 64 + 128
|
||||
local c3 = code % 64 + 128
|
||||
return string.char(c1, c2, c3)
|
||||
elseif code <= 0x10ffff then
|
||||
local c1 = code / 262144 + 240
|
||||
local c2 = code % 262144 / 4096 + 128
|
||||
local c3 = code % 4096 / 64 + 128
|
||||
local c4 = code % 64 + 128
|
||||
return string.char(c1, c2, c3, c4)
|
||||
end
|
||||
return ''
|
||||
end
|
||||
|
||||
return {
|
||||
iter = iter,
|
||||
chars = chars,
|
||||
|
@ -130,4 +164,5 @@ return {
|
|||
sub = sub,
|
||||
split = split,
|
||||
reverse = reverse,
|
||||
encode = encode
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue