diff --git a/druid/base/text.lua b/druid/base/text.lua
index e6ccf07..1641626 100644
--- a/druid/base/text.lua
+++ b/druid/base/text.lua
@@ -64,6 +64,18 @@ local function update_text_area_size(self)
 end
 
 
+-- calculate space width with font
+local function get_space_width(self, font)
+	if not self._space_width[font] then
+		local no_space = gui.get_text_metrics(font, "1", 0, false, 0, 0).width
+		local with_space = gui.get_text_metrics(font, " 1", 0, false, 0, 0).width
+		self._space_width[font] = with_space - no_space
+	end
+
+	return self._space_width[font]
+end
+
+
 --- Component init function
 -- @function text:init
 -- @tparam node node Gui text node
@@ -88,11 +100,34 @@ function M.init(self, node, value, no_adjust)
 	self.on_set_pivot = Event()
 	self.on_update_text_scale = Event()
 
+	self._space_width = {}
+
 	self:set_to(value or gui.get_text(self.node))
 	return self
 end
 
 
+--- Calculate text width with font with respect to trailing space
+-- @function text:get_text_width
+-- @tparam[opt] string text
+function M.get_text_width(self, text)
+	text = text or self.last_value
+	local font = gui.get_font(self.node)
+	local scale = gui.get_scale(self.node)
+	local result = gui.get_text_metrics(font, text, 0, false, 0, 0).width
+	for i = #text, 1, -1 do
+		local c = string.sub(text, i, i)
+		if c ~= ' ' then
+			break
+		end
+
+		result = result + get_space_width(self, font)
+	end
+
+	return result * scale.x
+end
+
+
 --- Set text to text field
 -- @function text:set_to
 -- @tparam string set_to Text for node
diff --git a/druid/system/utf8.lua b/druid/system/utf8.lua
new file mode 100644
index 0000000..7f13914
--- /dev/null
+++ b/druid/system/utf8.lua
@@ -0,0 +1,1045 @@
+-- $Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $
+--
+-- Provides UTF-8 aware string functions implemented in pure lua:
+-- * utf8len(s)
+-- * utf8sub(s, i, j)
+-- * utf8reverse(s)
+-- * utf8char(unicode)
+-- * utf8unicode(s, i, j)
+-- * utf8gensub(s, sub_len)
+-- * utf8find(str, regex, init, plain)
+-- * utf8match(str, regex, init)
+-- * utf8gmatch(str, regex, all)
+-- * utf8gsub(str, regex, repl, limit)
+--
+-- If utf8data.lua (containing the lower<->upper case mappings) is loaded, these
+-- additional functions are available:
+-- * utf8upper(s)
+-- * utf8lower(s)
+--
+-- All functions behave as their non UTF-8 aware counterparts with the exception
+-- that UTF-8 characters are used instead of bytes for all units.
+
+--[[
+Copyright (c) 2006-2007, Kyle Smith
+All rights reserved.
+
+Contributors:
+	Alimov Stepan
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the author nor the names of its contributors may be
+      used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--]]
+
+-- ABNF from RFC 3629
+--
+-- UTF8-octets = *( UTF8-char )
+-- UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
+-- UTF8-1      = %x00-7F
+-- UTF8-2      = %xC2-DF UTF8-tail
+-- UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+--               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+-- UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+--               %xF4 %x80-8F 2( UTF8-tail )
+-- UTF8-tail   = %x80-BF
+--
+
+local byte    = string.byte
+local char    = string.char
+local dump    = string.dump
+local find    = string.find
+local format  = string.format
+local len     = string.len
+local lower   = string.lower
+local rep     = string.rep
+local sub     = string.sub
+local upper   = string.upper
+
+-- returns the number of bytes used by the UTF-8 character at byte i in s
+-- also doubles as a UTF-8 character validator
+local function utf8charbytes (s, i)
+	-- argument defaults
+	i = i or 1
+
+	-- argument checking
+	if type(s) ~= "string" then
+		error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")")
+	end
+	if type(i) ~= "number" then
+		error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")")
+	end
+
+	local c = byte(s, i)
+
+	-- determine bytes needed for character, based on RFC 3629
+	-- validate byte 1
+	if c > 0 and c <= 127 then
+		-- UTF8-1
+		return 1
+
+	elseif c >= 194 and c <= 223 then
+		-- UTF8-2
+		local c2 = byte(s, i + 1)
+
+		if not c2 then
+			error("UTF-8 string terminated early")
+		end
+
+		-- validate byte 2
+		if c2 < 128 or c2 > 191 then
+			error("Invalid UTF-8 character")
+		end
+
+		return 2
+
+	elseif c >= 224 and c <= 239 then
+		-- UTF8-3
+		local c2 = byte(s, i + 1)
+		local c3 = byte(s, i + 2)
+
+		if not c2 or not c3 then
+			error("UTF-8 string terminated early")
+		end
+
+		-- validate byte 2
+		if c == 224 and (c2 < 160 or c2 > 191) then
+			error("Invalid UTF-8 character")
+		elseif c == 237 and (c2 < 128 or c2 > 159) then
+			error("Invalid UTF-8 character")
+		elseif c2 < 128 or c2 > 191 then
+			error("Invalid UTF-8 character")
+		end
+
+		-- validate byte 3
+		if c3 < 128 or c3 > 191 then
+			error("Invalid UTF-8 character")
+		end
+
+		return 3
+
+	elseif c >= 240 and c <= 244 then
+		-- UTF8-4
+		local c2 = byte(s, i + 1)
+		local c3 = byte(s, i + 2)
+		local c4 = byte(s, i + 3)
+
+		if not c2 or not c3 or not c4 then
+			error("UTF-8 string terminated early")
+		end
+
+		-- validate byte 2
+		if c == 240 and (c2 < 144 or c2 > 191) then
+			error("Invalid UTF-8 character")
+		elseif c == 244 and (c2 < 128 or c2 > 143) then
+			error("Invalid UTF-8 character")
+		elseif c2 < 128 or c2 > 191 then
+			error("Invalid UTF-8 character")
+		end
+
+		-- validate byte 3
+		if c3 < 128 or c3 > 191 then
+			error("Invalid UTF-8 character")
+		end
+
+		-- validate byte 4
+		if c4 < 128 or c4 > 191 then
+			error("Invalid UTF-8 character")
+		end
+
+		return 4
+
+	else
+		error("Invalid UTF-8 character")
+	end
+end
+
+-- returns the number of characters in a UTF-8 string
+local function utf8len (s)
+	-- argument checking
+	if type(s) ~= "string" then
+		for k,v in pairs(s) do print('"',tostring(k),'"',tostring(v),'"') end
+		error("bad argument #1 to 'utf8len' (string expected, got ".. type(s).. ")")
+	end
+
+	local pos = 1
+	local bytes = len(s)
+	local length = 0
+
+	while pos <= bytes do
+		length = length + 1
+		pos = pos + utf8charbytes(s, pos)
+	end
+
+	return length
+end
+
+-- functions identically to string.sub except that i and j are UTF-8 characters
+-- instead of bytes
+local function utf8sub (s, i, j)
+	-- argument defaults
+	j = j or -1
+
+	local pos = 1
+	local bytes = len(s)
+	local length = 0
+
+	-- only set l if i or j is negative
+	local l = (i >= 0 and j >= 0) or utf8len(s)
+	local startChar = (i >= 0) and i or l + i + 1
+	local endChar   = (j >= 0) and j or l + j + 1
+
+	-- can't have start before end!
+	if startChar > endChar then
+		return ""
+	end
+
+	-- byte offsets to pass to string.sub
+	local startByte,endByte = 1,bytes
+
+	while pos <= bytes do
+		length = length + 1
+
+		if length == startChar then
+			startByte = pos
+		end
+
+		pos = pos + utf8charbytes(s, pos)
+
+		if length == endChar then
+			endByte = pos - 1
+			break
+		end
+	end
+
+	if startChar > length then startByte = bytes+1   end
+	if endChar   < 1      then endByte   = 0         end
+
+	return sub(s, startByte, endByte)
+end
+
+--[[
+-- replace UTF-8 characters based on a mapping table
+local function utf8replace (s, mapping)
+	-- argument checking
+	if type(s) ~= "string" then
+		error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")")
+	end
+	if type(mapping) ~= "table" then
+		error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")")
+	end
+
+	local pos = 1
+	local bytes = len(s)
+	local charbytes
+	local newstr = ""
+
+	while pos <= bytes do
+		charbytes = utf8charbytes(s, pos)
+		local c = sub(s, pos, pos + charbytes - 1)
+
+		newstr = newstr .. (mapping[c] or c)
+
+		pos = pos + charbytes
+	end
+
+	return newstr
+end
+
+
+-- identical to string.upper except it knows about unicode simple case conversions
+local function utf8upper (s)
+	return utf8replace(s, utf8_lc_uc)
+end
+
+-- identical to string.lower except it knows about unicode simple case conversions
+local function utf8lower (s)
+	return utf8replace(s, utf8_uc_lc)
+end
+]]
+
+-- identical to string.reverse except that it supports UTF-8
+local function utf8reverse (s)
+	-- argument checking
+	if type(s) ~= "string" then
+		error("bad argument #1 to 'utf8reverse' (string expected, got ".. type(s).. ")")
+	end
+
+	local bytes = len(s)
+	local pos = bytes
+	local charbytes
+	local newstr = ""
+
+	while pos > 0 do
+		local c = byte(s, pos)
+		while c >= 128 and c <= 191 do
+			pos = pos - 1
+			c = byte(s, pos)
+		end
+
+		charbytes = utf8charbytes(s, pos)
+
+		newstr = newstr .. sub(s, pos, pos + charbytes - 1)
+
+		pos = pos - 1
+	end
+
+	return newstr
+end
+
+-- http://en.wikipedia.org/wiki/Utf8
+-- http://developer.coronalabs.com/code/utf-8-conversion-utility
+local function utf8char(unicode)
+	if unicode <= 0x7F then return char(unicode) end
+
+	if (unicode <= 0x7FF) then
+		local Byte0 = 0xC0 + math.floor(unicode / 0x40);
+		local Byte1 = 0x80 + (unicode % 0x40);
+		return char(Byte0, Byte1);
+	end;
+
+	if (unicode <= 0xFFFF) then
+		local Byte0 = 0xE0 +  math.floor(unicode / 0x1000);
+		local Byte1 = 0x80 + (math.floor(unicode / 0x40) % 0x40);
+		local Byte2 = 0x80 + (unicode % 0x40);
+		return char(Byte0, Byte1, Byte2);
+	end;
+
+	if (unicode <= 0x10FFFF) then
+		local code = unicode
+		local Byte3= 0x80 + (code % 0x40);
+		code       = math.floor(code / 0x40)
+		local Byte2= 0x80 + (code % 0x40);
+		code       = math.floor(code / 0x40)
+		local Byte1= 0x80 + (code % 0x40);
+		code       = math.floor(code / 0x40)
+		local Byte0= 0xF0 + code;
+
+		return char(Byte0, Byte1, Byte2, Byte3);
+	end;
+
+	error 'Unicode cannot be greater than U+10FFFF!'
+end
+
+local shift_6  = 2^6
+local shift_12 = 2^12
+local shift_18 = 2^18
+
+local utf8unicode
+utf8unicode = function(str, i, j, byte_pos)
+	i = i or 1
+	j = j or i
+
+	if i > j then return end
+
+	local ch,bytes
+
+	if byte_pos then
+		bytes = utf8charbytes(str,byte_pos)
+		ch  = sub(str,byte_pos,byte_pos-1+bytes)
+	else
+		ch,byte_pos = utf8sub(str,i,i), 0
+		bytes       = #ch
+	end
+
+	local unicode
+
+	if bytes == 1 then unicode = byte(ch) end
+	if bytes == 2 then
+		local byte0,byte1 = byte(ch,1,2)
+		local code0,code1 = byte0-0xC0,byte1-0x80
+		unicode = code0*shift_6 + code1
+	end
+	if bytes == 3 then
+		local byte0,byte1,byte2 = byte(ch,1,3)
+		local code0,code1,code2 = byte0-0xE0,byte1-0x80,byte2-0x80
+		unicode = code0*shift_12 + code1*shift_6 + code2
+	end
+	if bytes == 4 then
+		local byte0,byte1,byte2,byte3 = byte(ch,1,4)
+		local code0,code1,code2,code3 = byte0-0xF0,byte1-0x80,byte2-0x80,byte3-0x80
+		unicode = code0*shift_18 + code1*shift_12 + code2*shift_6 + code3
+	end
+
+	return unicode,utf8unicode(str, i+1, j, byte_pos+bytes)
+end
+
+-- Returns an iterator which returns the next substring and its byte interval
+local function utf8gensub(str, sub_len)
+	sub_len        = sub_len or 1
+	local byte_pos = 1
+	local length   = #str
+	return function(skip)
+		if skip then byte_pos = byte_pos + skip end
+		local char_count = 0
+		local start      = byte_pos
+		repeat
+			if byte_pos > length then return end
+			char_count  = char_count + 1
+			local bytes = utf8charbytes(str,byte_pos)
+			byte_pos    = byte_pos+bytes
+
+		until char_count == sub_len
+
+		local last  = byte_pos-1
+		local slice = sub(str,start,last)
+		return slice, start, last
+	end
+end
+
+local function binsearch(sortedTable, item, comp)
+	local head, tail = 1, #sortedTable
+	local mid = math.floor((head + tail)/2)
+	if not comp then
+		while (tail - head) > 1 do
+			if sortedTable[tonumber(mid)] > item then
+				tail = mid
+			else
+				head = mid
+			end
+			mid = math.floor((head + tail)/2)
+		end
+	end
+	if sortedTable[tonumber(head)] == item then
+		return true, tonumber(head)
+	elseif sortedTable[tonumber(tail)] == item then
+		return true, tonumber(tail)
+	else
+		return false
+	end
+end
+local function classMatchGenerator(class, plain)
+	local codes = {}
+	local ranges = {}
+	local ignore = false
+	local range = false
+	local firstletter = true
+	local unmatch = false
+
+	local it = utf8gensub(class)
+
+	local skip
+	for c, _, be in it do
+		skip = be
+		if not ignore and not plain then
+			if c == "%" then
+				ignore = true
+			elseif c == "-" then
+				table.insert(codes, utf8unicode(c))
+				range = true
+			elseif c == "^" then
+				if not firstletter then
+					error('!!!')
+				else
+					unmatch = true
+				end
+			elseif c == ']' then
+				break
+			else
+				if not range then
+					table.insert(codes, utf8unicode(c))
+				else
+					table.remove(codes) -- removing '-'
+					table.insert(ranges, {table.remove(codes), utf8unicode(c)})
+					range = false
+				end
+			end
+		elseif ignore and not plain then
+			if c == 'a' then -- %a: represents all letters. (ONLY ASCII)
+				table.insert(ranges, {65, 90}) -- A - Z
+				table.insert(ranges, {97, 122}) -- a - z
+			elseif c == 'c' then -- %c: represents all control characters.
+				table.insert(ranges, {0, 31})
+				table.insert(codes, 127)
+			elseif c == 'd' then -- %d: represents all digits.
+				table.insert(ranges, {48, 57}) -- 0 - 9
+			elseif c == 'g' then -- %g: represents all printable characters except space.
+				table.insert(ranges, {1, 8})
+				table.insert(ranges, {14, 31})
+				table.insert(ranges, {33, 132})
+				table.insert(ranges, {134, 159})
+				table.insert(ranges, {161, 5759})
+				table.insert(ranges, {5761, 8191})
+				table.insert(ranges, {8203, 8231})
+				table.insert(ranges, {8234, 8238})
+				table.insert(ranges, {8240, 8286})
+				table.insert(ranges, {8288, 12287})
+			elseif c == 'l' then -- %l: represents all lowercase letters. (ONLY ASCII)
+				table.insert(ranges, {97, 122}) -- a - z
+			elseif c == 'p' then -- %p: represents all punctuation characters. (ONLY ASCII)
+				table.insert(ranges, {33, 47})
+				table.insert(ranges, {58, 64})
+				table.insert(ranges, {91, 96})
+				table.insert(ranges, {123, 126})
+			elseif c == 's' then -- %s: represents all space characters.
+				table.insert(ranges, {9, 13})
+				table.insert(codes, 32)
+				table.insert(codes, 133)
+				table.insert(codes, 160)
+				table.insert(codes, 5760)
+				table.insert(ranges, {8192, 8202})
+				table.insert(codes, 8232)
+				table.insert(codes, 8233)
+				table.insert(codes, 8239)
+				table.insert(codes, 8287)
+				table.insert(codes, 12288)
+			elseif c == 'u' then -- %u: represents all uppercase letters. (ONLY ASCII)
+				table.insert(ranges, {65, 90}) -- A - Z
+			elseif c == 'w' then -- %w: represents all alphanumeric characters. (ONLY ASCII)
+				table.insert(ranges, {48, 57}) -- 0 - 9
+				table.insert(ranges, {65, 90}) -- A - Z
+				table.insert(ranges, {97, 122}) -- a - z
+			elseif c == 'x' then -- %x: represents all hexadecimal digits.
+				table.insert(ranges, {48, 57}) -- 0 - 9
+				table.insert(ranges, {65, 70}) -- A - F
+				table.insert(ranges, {97, 102}) -- a - f
+			else
+				if not range then
+					table.insert(codes, utf8unicode(c))
+				else
+					table.remove(codes) -- removing '-'
+					table.insert(ranges, {table.remove(codes), utf8unicode(c)})
+					range = false
+				end
+			end
+			ignore = false
+		else
+			if not range then
+				table.insert(codes, utf8unicode(c))
+			else
+				table.remove(codes) -- removing '-'
+				table.insert(ranges, {table.remove(codes), utf8unicode(c)})
+				range = false
+			end
+			ignore = false
+		end
+
+		firstletter = false
+	end
+
+	table.sort(codes)
+
+	local function inRanges(charCode)
+		for _,r in ipairs(ranges) do
+			if r[1] <= charCode and charCode <= r[2] then
+				return true
+			end
+		end
+		return false
+	end
+	if not unmatch then
+		return function(charCode)
+			return binsearch(codes, charCode) or inRanges(charCode)
+		end, skip
+	else
+		return function(charCode)
+			return charCode ~= -1 and not (binsearch(codes, charCode) or inRanges(charCode))
+		end, skip
+	end
+end
+
+--[[
+-- utf8sub with extra argument, and extra result value
+local function utf8subWithBytes (s, i, j, sb)
+	-- argument defaults
+	j = j or -1
+
+	local pos = sb or 1
+	local bytes = len(s)
+	local length = 0
+
+	-- only set l if i or j is negative
+	local l = (i >= 0 and j >= 0) or utf8len(s)
+	local startChar = (i >= 0) and i or l + i + 1
+	local endChar   = (j >= 0) and j or l + j + 1
+
+	-- can't have start before end!
+	if startChar > endChar then
+		return ""
+	end
+
+	-- byte offsets to pass to string.sub
+	local startByte,endByte = 1,bytes
+
+	while pos <= bytes do
+		length = length + 1
+
+		if length == startChar then
+			startByte = pos
+		end
+
+		pos = pos + utf8charbytes(s, pos)
+
+		if length == endChar then
+			endByte = pos - 1
+			break
+		end
+	end
+
+	if startChar > length then startByte = bytes+1   end
+	if endChar   < 1      then endByte   = 0         end
+
+	return sub(s, startByte, endByte), endByte + 1
+end
+]]
+
+local cache = setmetatable({},{
+	__mode = 'kv'
+})
+local cachePlain = setmetatable({},{
+	__mode = 'kv'
+})
+local function matcherGenerator(regex, plain)
+	local matcher = {
+		functions = {},
+		captures = {}
+	}
+	if not plain then
+		cache[regex] =  matcher
+	else
+		cachePlain[regex] = matcher
+	end
+	local function simple(func)
+		return function(cC)
+			if func(cC) then
+				matcher:nextFunc()
+				matcher:nextStr()
+			else
+				matcher:reset()
+			end
+		end
+	end
+	local function star(func)
+		return function(cC)
+			if func(cC) then
+				matcher:fullResetOnNextFunc()
+				matcher:nextStr()
+			else
+				matcher:nextFunc()
+			end
+		end
+	end
+	local function minus(func)
+		return function(cC)
+			if func(cC) then
+				matcher:fullResetOnNextStr()
+			end
+			matcher:nextFunc()
+		end
+	end
+	local function question(func)
+		return function(cC)
+			if func(cC) then
+				matcher:fullResetOnNextFunc()
+				matcher:nextStr()
+			end
+			matcher:nextFunc()
+		end
+	end
+
+	local function capture(id)
+		return function(_)
+			local l = matcher.captures[id][2] - matcher.captures[id][1]
+			local captured = utf8sub(matcher.string, matcher.captures[id][1], matcher.captures[id][2])
+			local check = utf8sub(matcher.string, matcher.str, matcher.str + l)
+			if captured == check then
+				for _ = 0, l do
+					matcher:nextStr()
+				end
+				matcher:nextFunc()
+			else
+				matcher:reset()
+			end
+		end
+	end
+	local function captureStart(id)
+		return function(_)
+			matcher.captures[id][1] = matcher.str
+			matcher:nextFunc()
+		end
+	end
+	local function captureStop(id)
+		return function(_)
+			matcher.captures[id][2] = matcher.str - 1
+			matcher:nextFunc()
+		end
+	end
+
+	local function balancer(str)
+		local sum = 0
+		local bc, ec = utf8sub(str, 1, 1), utf8sub(str, 2, 2)
+		local skip = len(bc) + len(ec)
+		bc, ec = utf8unicode(bc), utf8unicode(ec)
+		return function(cC)
+			if cC == ec and sum > 0 then
+				sum = sum - 1
+				if sum == 0 then
+					matcher:nextFunc()
+				end
+				matcher:nextStr()
+			elseif cC == bc then
+				sum = sum + 1
+				matcher:nextStr()
+			else
+				if sum == 0 or cC == -1 then
+					sum = 0
+					matcher:reset()
+				else
+					matcher:nextStr()
+				end
+			end
+		end, skip
+	end
+
+	matcher.functions[1] = function(_)
+		matcher:fullResetOnNextStr()
+		matcher.seqStart = matcher.str
+		matcher:nextFunc()
+		if (matcher.str > matcher.startStr and matcher.fromStart) or matcher.str >= matcher.stringLen then
+			matcher.stop = true
+			matcher.seqStart = nil
+		end
+	end
+
+	local lastFunc
+	local ignore = false
+	local skip = nil
+	local it = (function()
+		local gen = utf8gensub(regex)
+		return function()
+			return gen(skip)
+		end
+	end)()
+	local cs = {}
+	for c, bs, be in it do
+		skip = nil
+		if plain then
+			table.insert(matcher.functions, simple(classMatchGenerator(c, plain)))
+		else
+			if ignore then
+				if find('123456789', c, 1, true) then
+					if lastFunc then
+						table.insert(matcher.functions, simple(lastFunc))
+						lastFunc = nil
+					end
+					table.insert(matcher.functions, capture(tonumber(c)))
+				elseif c == 'b' then
+					if lastFunc then
+						table.insert(matcher.functions, simple(lastFunc))
+						lastFunc = nil
+					end
+					local b
+					b, skip = balancer(sub(regex, be + 1, be + 9))
+					table.insert(matcher.functions, b)
+				else
+					lastFunc = classMatchGenerator('%' .. c)
+				end
+				ignore = false
+			else
+				if c == '*' then
+					if lastFunc then
+						table.insert(matcher.functions, star(lastFunc))
+						lastFunc = nil
+					else
+						error('invalid regex after ' .. sub(regex, 1, bs))
+					end
+				elseif c == '+' then
+					if lastFunc then
+						table.insert(matcher.functions, simple(lastFunc))
+						table.insert(matcher.functions, star(lastFunc))
+						lastFunc = nil
+					else
+						error('invalid regex after ' .. sub(regex, 1, bs))
+					end
+				elseif c == '-' then
+					if lastFunc then
+						table.insert(matcher.functions, minus(lastFunc))
+						lastFunc = nil
+					else
+						error('invalid regex after ' .. sub(regex, 1, bs))
+					end
+				elseif c == '?' then
+					if lastFunc then
+						table.insert(matcher.functions, question(lastFunc))
+						lastFunc = nil
+					else
+						error('invalid regex after ' .. sub(regex, 1, bs))
+					end
+				elseif c == '^' then
+					if bs == 1 then
+						matcher.fromStart = true
+					else
+						error('invalid regex after ' .. sub(regex, 1, bs))
+					end
+				elseif c == '$' then
+					if be == len(regex) then
+						matcher.toEnd = true
+					else
+						error('invalid regex after ' .. sub(regex, 1, bs))
+					end
+				elseif c == '[' then
+					if lastFunc then
+						table.insert(matcher.functions, simple(lastFunc))
+					end
+					lastFunc, skip = classMatchGenerator(sub(regex, be + 1))
+				elseif c == '(' then
+					if lastFunc then
+						table.insert(matcher.functions, simple(lastFunc))
+						lastFunc = nil
+					end
+					table.insert(matcher.captures, {})
+					table.insert(cs, #matcher.captures)
+					table.insert(matcher.functions, captureStart(cs[#cs]))
+					if sub(regex, be + 1, be + 1) == ')' then matcher.captures[#matcher.captures].empty = true end
+				elseif c == ')' then
+					if lastFunc then
+						table.insert(matcher.functions, simple(lastFunc))
+						lastFunc = nil
+					end
+					local cap = table.remove(cs)
+					if not cap then
+						error('invalid capture: "(" missing')
+					end
+					table.insert(matcher.functions, captureStop(cap))
+				elseif c == '.' then
+					if lastFunc then
+						table.insert(matcher.functions, simple(lastFunc))
+					end
+					lastFunc = function(cC) return cC ~= -1 end
+				elseif c == '%' then
+					ignore = true
+				else
+					if lastFunc then
+						table.insert(matcher.functions, simple(lastFunc))
+					end
+					lastFunc = classMatchGenerator(c)
+				end
+			end
+		end
+	end
+	if #cs > 0 then
+		error('invalid capture: ")" missing')
+	end
+	if lastFunc then
+		table.insert(matcher.functions, simple(lastFunc))
+	end
+
+	table.insert(matcher.functions, function()
+		if matcher.toEnd and matcher.str ~= matcher.stringLen then
+			matcher:reset()
+		else
+			matcher.stop = true
+		end
+	end)
+
+	matcher.nextFunc = function(self)
+		self.func = self.func + 1
+	end
+	matcher.nextStr = function(self)
+		self.str = self.str + 1
+	end
+	matcher.strReset = function(self)
+		local oldReset = self.reset
+		local str = self.str
+		self.reset = function(s)
+			s.str = str
+			s.reset = oldReset
+		end
+	end
+	matcher.fullResetOnNextFunc = function(self)
+		local oldReset = self.reset
+		local func = self.func +1
+		local str = self.str
+		self.reset = function(s)
+			s.func = func
+			s.str = str
+			s.reset = oldReset
+		end
+	end
+	matcher.fullResetOnNextStr = function(self)
+		local oldReset = self.reset
+		local str = self.str + 1
+		local func = self.func
+		self.reset = function(s)
+			s.func = func
+			s.str = str
+			s.reset = oldReset
+		end
+	end
+
+	matcher.process = function(self, str, start)
+
+		self.func = 1
+		start = start or 1
+		self.startStr = (start >= 0) and start or utf8len(str) + start + 1
+		self.seqStart = self.startStr
+		self.str = self.startStr
+		self.stringLen = utf8len(str) + 1
+		self.string = str
+		self.stop = false
+
+		self.reset = function(s)
+			s.func = 1
+		end
+
+		-- local lastPos = self.str
+		-- local lastByte
+		local ch
+		while not self.stop do
+			if self.str < self.stringLen then
+				--[[ if lastPos < self.str then
+					print('last byte', lastByte)
+					ch, lastByte = utf8subWithBytes(str, 1, self.str - lastPos - 1, lastByte)
+					ch, lastByte = utf8subWithBytes(str, 1, 1, lastByte)
+					lastByte = lastByte - 1
+				else
+					ch, lastByte = utf8subWithBytes(str, self.str, self.str)
+				end
+				lastPos = self.str ]]
+				ch = utf8sub(str, self.str,self.str)
+				--print('char', ch, utf8unicode(ch))
+				self.functions[self.func](utf8unicode(ch))
+			else
+				self.functions[self.func](-1)
+			end
+		end
+
+		if self.seqStart then
+			local captures = {}
+			for _,pair in pairs(self.captures) do
+				if pair.empty then
+					table.insert(captures, pair[1])
+				else
+					table.insert(captures, utf8sub(str, pair[1], pair[2]))
+				end
+			end
+			return self.seqStart, self.str - 1, unpack(captures)
+		end
+	end
+
+	return matcher
+end
+
+-- string.find
+local function utf8find(str, regex, init, plain)
+	local matcher = cache[regex] or matcherGenerator(regex, plain)
+	return matcher:process(str, init)
+end
+
+-- string.match
+local function utf8match(str, regex, init)
+	init = init or 1
+	local found = {utf8find(str, regex, init)}
+	if found[1] then
+		if found[3] then
+			return unpack(found, 3)
+		end
+		return utf8sub(str, found[1], found[2])
+	end
+end
+
+-- string.gmatch
+local function utf8gmatch(str, regex, all)
+	regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex
+	local lastChar = 1
+	return function()
+		local found = {utf8find(str, regex, lastChar)}
+		if found[1] then
+			lastChar = found[2] + 1
+			if found[all and 1 or 3] then
+				return unpack(found, all and 1 or 3)
+			end
+			return utf8sub(str, found[1], found[2])
+		end
+	end
+end
+
+local function replace(repl, args)
+	local ret = ''
+	if type(repl) == 'string' then
+		local ignore = false
+		local num
+		for c in utf8gensub(repl) do
+			if not ignore then
+				if c == '%' then
+					ignore = true
+				else
+					ret = ret .. c
+				end
+			else
+				num = tonumber(c)
+				if num then
+					ret = ret .. args[num]
+				else
+					ret = ret .. c
+				end
+				ignore = false
+			end
+		end
+	elseif type(repl) == 'table' then
+		ret = repl[args[1] or args[0]] or ''
+	elseif type(repl) == 'function' then
+		if #args > 0 then
+			ret = repl(unpack(args, 1)) or ''
+		else
+			ret = repl(args[0]) or ''
+		end
+	end
+	return ret
+end
+-- string.gsub
+local function utf8gsub(str, regex, repl, limit)
+	limit = limit or -1
+	local ret = ''
+	local prevEnd = 1
+	local it = utf8gmatch(str, regex, true)
+	local found = {it()}
+	local n = 0
+	while #found > 0 and limit ~= n do
+		local args = {[0] = utf8sub(str, found[1], found[2]), unpack(found, 3)}
+		ret = ret .. utf8sub(str, prevEnd, found[1] - 1)
+		.. replace(repl, args)
+		prevEnd = found[2] + 1
+		n = n + 1
+		found = {it()}
+	end
+	return ret .. utf8sub(str, prevEnd), n
+end
+
+local utf8 = {}
+utf8.len = utf8len
+utf8.sub = utf8sub
+utf8.reverse = utf8reverse
+utf8.char = utf8char
+utf8.unicode = utf8unicode
+utf8.gensub = utf8gensub
+utf8.byte = utf8unicode
+utf8.find    = utf8find
+utf8.match   = utf8match
+utf8.gmatch  = utf8gmatch
+utf8.gsub    = utf8gsub
+utf8.dump    = dump
+utf8.format = format
+utf8.lower = lower
+utf8.upper = upper
+utf8.rep     = rep
+return utf8