Added utf8.encode
Here's a function that takes a Unicode code point and returns the corresponding UTF-8 encoded character bytes. Example: utf8.encode(0x265c) -- returns '♜' Please feel free to edit or revert if it's not your style. It might not be relevant to Quickie, though it's a handy UTF-8 utility.
This commit is contained in:
parent
b63895a2c7
commit
52d06037d1
1 changed files with 35 additions and 0 deletions
35
utf8.lua
35
utf8.lua
|
@ -123,6 +123,40 @@ local function reverse(s)
|
||||||
return table.concat(t)
|
return table.concat(t)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Convert a Unicode code point to a UTF-8 byte sequence
|
||||||
|
-- Logic stolen from this page:
|
||||||
|
-- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa
|
||||||
|
--
|
||||||
|
-- Arguments:
|
||||||
|
-- Number representing the Unicode code point (e.g. 0x265c).
|
||||||
|
--
|
||||||
|
-- Returns:
|
||||||
|
-- UTF-8 encoded string of the given character.
|
||||||
|
-- Numbers out of range produce a blank string.
|
||||||
|
local function encode(code)
|
||||||
|
if code < 0 then
|
||||||
|
error('Code point must not be negative.')
|
||||||
|
elseif code <= 0x7f then
|
||||||
|
return string.char(code)
|
||||||
|
elseif code <= 0x7ff then
|
||||||
|
local c1 = code / 64 + 192
|
||||||
|
local c2 = code % 64 + 128
|
||||||
|
return string.char(c1, c2)
|
||||||
|
elseif code <= 0xffff then
|
||||||
|
local c1 = code / 4096 + 224
|
||||||
|
local c2 = code % 4096 / 64 + 128
|
||||||
|
local c3 = code % 64 + 128
|
||||||
|
return string.char(c1, c2, c3)
|
||||||
|
elseif code <= 0x10ffff then
|
||||||
|
local c1 = code / 262144 + 240
|
||||||
|
local c2 = code % 262144 / 4096 + 128
|
||||||
|
local c3 = code % 4096 / 64 + 128
|
||||||
|
local c4 = code % 64 + 128
|
||||||
|
return string.char(c1, c2, c3, c4)
|
||||||
|
end
|
||||||
|
return ''
|
||||||
|
end
|
||||||
|
|
||||||
return {
|
return {
|
||||||
iter = iter,
|
iter = iter,
|
||||||
chars = chars,
|
chars = chars,
|
||||||
|
@ -130,4 +164,5 @@ return {
|
||||||
sub = sub,
|
sub = sub,
|
||||||
split = split,
|
split = split,
|
||||||
reverse = reverse,
|
reverse = reverse,
|
||||||
|
encode = encode
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue