Byterun:
A (C)Python interpreter in Python
Allison Kaptur
!
github.com/akaptur
akaptur.github.io
@akaptur
Byterun
Ned Batchelder
!
Based on
# pyvm2 by Paul Swartz (z3p)
from http://www.twistedmatrix.com/users/z3p/
Why would you do such a thing
>>> if a or b:
... do_stuff()
Some things we can do
out = ""
for i in range(5):
out = out + str(i)
print(out)
Some things we can do
def fn(a, b=17, c="Hello", d=[]):
d.append(99)
print(a, b, c, d)
!
fn(1)
fn(2, 3)
fn(3, c="Bye")
fn(4, d=["What?"])
fn(5, "b", "c")
Some things we can do
def verbose(func):
def _wrapper(*args, **kwargs):
return func(*args, **kwargs)
return _wrapper
!
@verbose
def add(x, y):
return x+y
!
add(7, 3)
Some things we can do
try:
raise ValueError("oops")
except ValueError as e:
print("Caught: %s" % e)
print("All done")
Some things we can do
class NullContext(object):
def __enter__(self):
l.append('i')
return self
!
def __exit__(self, exc_type, exc_val, exc_tb):
l.append('o')
return False
!
l = []
for i in range(3):
with NullContext():
l.append('w')
if i % 2:
break
l.append('z')
l.append('e')
!
l.append('r')
s = ''.join(l)
print("Look: %r" % s)
assert s == "iwzoeiwor"
Some things we can do
g = (x*x for x in range(3))
print(list(g))
A problem
g = (x*x for x in range(5))
h = (y+1 for y in g)
print(list(h))
The Python virtual machine:
!
A bytecode interpreter
Bytecode:
the internal representation of a python
program in the interpreter
Bytecode: it’s bytes!
>>> def mod(a, b):
... ans = a % b
... return ans
Bytecode: it’s bytes!
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod.func_code.co_code
Function Code
object
Bytecode
Bytecode: it’s bytes!
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod.func_code.co_code
'|x00x00|x01x00x16}x02x00|x02x00S'
Bytecode: it’s bytes!
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod.func_code.co_code
‘|x00x00|x01x00x16}x02x00|x02x00S'
>>> [ord(b) for b in mod.func_code.co_code]
[124, 0, 0, 124, 1, 0, 22, 125, 2, 0, 124, 2, 0, 83]
dis, a bytecode disassembler
>>> import dis
>>> dis.dis(mod)
2 0 LOAD_FAST 0 (a)
3 LOAD_FAST 1 (b)
6 BINARY_MODULO
7 STORE_FAST 2 (ans)
!
3 10 LOAD_FAST 2 (ans)
13 RETURN_VALUE
dis, a bytecode disassembler
>>> import dis
>>> dis.dis(mod)
2 0 LOAD_FAST 0 (a)
3 LOAD_FAST 1 (b)
6 BINARY_MODULO
7 STORE_FAST 2 (ans)
!
3 10 LOAD_FAST 2 (ans)
13 RETURN_VALUE
Line
Number
Index in
bytecode
Instruction
name, for
humans
More bytes, the
argument to each
instruction
Hint about
arguments
whatever
some other thing
something
whatever
some other thing
something
a
b
whatever
some other thing
something
ans
Before
After
BINARY_MODULO
After
LOAD_FAST
Data stack on a frame
def foo():
x = 1
def bar(y):
z = y + 2 # <--- (3)
return z
return bar(x) # <--- (2)
foo() # <--- (1)
!
c ---------------------
a | bar Frame | -> blocks: []
l | (newest) | -> data: [1, 2]
l ---------------------
| foo Frame | -> blocks: []
s | | -> data: [<foo.<lcl>.bar, 1]
t ---------------------
a | main (module) Frame | -> blocks: []
c | (oldest) | -> data: [<foo>]
k ---------------------
dis, a bytecode disassembler
>>> import dis
>>> dis.dis(mod)
2 0 LOAD_FAST 0 (a)
3 LOAD_FAST 1 (b)
6 BINARY_MODULO
7 STORE_FAST 2 (ans)
!
3 10 LOAD_FAST 2 (ans)
13 RETURN_VALUE
} /*switch*/
/* Main switch on opcode
*/
READ_TIMESTAMP(inst0);
!
switch (opcode) {
#ifdef CASE_TOO_BIG
default: switch (opcode) {
#endif
/* Turn this on if your compiler chokes on the big switch: */
/* #define CASE_TOO_BIG 1 */
Back to that bytecode
!
>>> dis.dis(mod)
2 0 LOAD_FAST 0 (a)
3 LOAD_FAST 1 (b)
6 BINARY_MODULO
7 STORE_FAST 2 (ans)
!
3 10 LOAD_FAST 2
(ans)
13 RETURN_VALUE
case LOAD_FAST:
x = GETLOCAL(oparg);
if (x != NULL) {
Py_INCREF(x);
PUSH(x);
goto fast_next_opcode;
}
format_exc_check_arg(PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(co->co_varnames, oparg));
break;
case BINARY_MODULO:
w = POP();
v = TOP();
if (PyString_CheckExact(v))
x = PyString_Format(v, w);
else
x = PyNumber_Remainder(v, w);
Py_DECREF(v);
Py_DECREF(w);
SET_TOP(x);
if (x != NULL) continue;
break;
It’s “dynamic”
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod(15, 4)
3
“Dynamic”
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod(15, 4)
3
>>> mod(“%s%s”, (“NYC”, “Python”))
“Dynamic”
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod(15, 4)
3
>>> mod(“%s %s”, (“NYC”, “Python”))
NYC Python
“Dynamic”
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod(15, 4)
3
>>> mod(“%s %s”, (“NYC”, “Python”))
NYC Python
>>> print “%s %s” % (“NYC”, “Python”)
NYC Python
case BINARY_MODULO:
w = POP();
v = TOP();
if (PyString_CheckExact(v))
x = PyString_Format(v, w);
else
x = PyNumber_Remainder(v, w);
Py_DECREF(v);
Py_DECREF(w);
SET_TOP(x);
if (x != NULL) continue;
break;
>>> class Surprising(object):
… def __mod__(self, other):
… print “Surprise!”
!
>>> s = Surprising()
>>> t = Surprsing()
>>> s % t
Surprise!
“In the general absence of type information, almost
every instruction must be treated as
INVOKE_ARBITRARY_METHOD.”
!
- Russell Power and Alex Rubinsteyn, “How Fast Can
We Make Interpreted Python?”
Back to our problem
g = (x*x for x in range(5))
h = (y+1 for y in g)
print(list(h))
def foo():
x = 1
def bar(y):
z = y + 2 # <--- (3)
return z
return bar(x) # <--- (2)
foo() # <--- (1)
!
c ---------------------
a | bar Frame | -> blocks: []
l | (newest) | -> data: [1, 2]
l ---------------------
| foo Frame | -> blocks: []
s | | -> data: [<foo.<lcl>.bar, 1]
t ---------------------
a | main (module) Frame | -> blocks: []
c | (oldest) | -> data: [<foo>]
k ---------------------
def foo():
x = 1
def bar(y):
z = y + 2 # <--- (3)
return z
return bar(x) # <--- (2)
foo() # <--- (1)
!
!
!
l ---------------------
| foo Frame | -> blocks: []
s | | -> data: [3]
t ---------------------
a | main (module) Frame | -> blocks: []
c | (oldest) | -> data: [<foo>]
k ---------------------
def foo():
x = 1
def bar(y):
z = y + 2 # <--- (3)
return z
return bar(x) # <--- (2)
foo() # <--- (1)
!
!
s
t ---------------------
a | main (module) Frame | -> blocks: []
c | (oldest) | -> data: [3]
k ---------------------
Back to our problem
g = (x*x for x in range(5))
h = (y+1 for y in g)
print(list(h))
More
Great blogs
http://tech.blog.aknin.name/category/my-projects/
pythons-innards/ by @aknin
http://eli.thegreenplace.net/ by Eli Bendersky
!
Contribute! Find bugs!
https://github.com/nedbat/byterun
!
Apply to Hacker School!
www.hackerschool.com/apply

Byterun, a Python bytecode interpreter - Allison Kaptur at NYCPython

  • 1.
    Byterun: A (C)Python interpreterin Python Allison Kaptur ! github.com/akaptur akaptur.github.io @akaptur
  • 2.
    Byterun Ned Batchelder ! Based on #pyvm2 by Paul Swartz (z3p) from http://www.twistedmatrix.com/users/z3p/
  • 3.
    Why would youdo such a thing >>> if a or b: ... do_stuff()
  • 4.
    Some things wecan do out = "" for i in range(5): out = out + str(i) print(out)
  • 5.
    Some things wecan do def fn(a, b=17, c="Hello", d=[]): d.append(99) print(a, b, c, d) ! fn(1) fn(2, 3) fn(3, c="Bye") fn(4, d=["What?"]) fn(5, "b", "c")
  • 6.
    Some things wecan do def verbose(func): def _wrapper(*args, **kwargs): return func(*args, **kwargs) return _wrapper ! @verbose def add(x, y): return x+y ! add(7, 3)
  • 7.
    Some things wecan do try: raise ValueError("oops") except ValueError as e: print("Caught: %s" % e) print("All done")
  • 8.
    Some things wecan do class NullContext(object): def __enter__(self): l.append('i') return self ! def __exit__(self, exc_type, exc_val, exc_tb): l.append('o') return False ! l = [] for i in range(3): with NullContext(): l.append('w') if i % 2: break l.append('z') l.append('e') ! l.append('r') s = ''.join(l) print("Look: %r" % s) assert s == "iwzoeiwor"
  • 9.
    Some things wecan do g = (x*x for x in range(3)) print(list(g))
  • 10.
    A problem g =(x*x for x in range(5)) h = (y+1 for y in g) print(list(h))
  • 11.
    The Python virtualmachine: ! A bytecode interpreter
  • 12.
    Bytecode: the internal representationof a python program in the interpreter
  • 13.
    Bytecode: it’s bytes! >>>def mod(a, b): ... ans = a % b ... return ans
  • 14.
    Bytecode: it’s bytes! >>>def mod(a, b): ... ans = a % b ... return ans >>> mod.func_code.co_code Function Code object Bytecode
  • 15.
    Bytecode: it’s bytes! >>>def mod(a, b): ... ans = a % b ... return ans >>> mod.func_code.co_code '|x00x00|x01x00x16}x02x00|x02x00S'
  • 16.
    Bytecode: it’s bytes! >>>def mod(a, b): ... ans = a % b ... return ans >>> mod.func_code.co_code ‘|x00x00|x01x00x16}x02x00|x02x00S' >>> [ord(b) for b in mod.func_code.co_code] [124, 0, 0, 124, 1, 0, 22, 125, 2, 0, 124, 2, 0, 83]
  • 17.
    dis, a bytecodedisassembler >>> import dis >>> dis.dis(mod) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_MODULO 7 STORE_FAST 2 (ans) ! 3 10 LOAD_FAST 2 (ans) 13 RETURN_VALUE
  • 18.
    dis, a bytecodedisassembler >>> import dis >>> dis.dis(mod) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_MODULO 7 STORE_FAST 2 (ans) ! 3 10 LOAD_FAST 2 (ans) 13 RETURN_VALUE Line Number Index in bytecode Instruction name, for humans More bytes, the argument to each instruction Hint about arguments
  • 19.
    whatever some other thing something whatever someother thing something a b whatever some other thing something ans Before After BINARY_MODULO After LOAD_FAST Data stack on a frame
  • 20.
    def foo(): x =1 def bar(y): z = y + 2 # <--- (3) return z return bar(x) # <--- (2) foo() # <--- (1) ! c --------------------- a | bar Frame | -> blocks: [] l | (newest) | -> data: [1, 2] l --------------------- | foo Frame | -> blocks: [] s | | -> data: [<foo.<lcl>.bar, 1] t --------------------- a | main (module) Frame | -> blocks: [] c | (oldest) | -> data: [<foo>] k ---------------------
  • 21.
    dis, a bytecodedisassembler >>> import dis >>> dis.dis(mod) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_MODULO 7 STORE_FAST 2 (ans) ! 3 10 LOAD_FAST 2 (ans) 13 RETURN_VALUE
  • 23.
    } /*switch*/ /* Mainswitch on opcode */ READ_TIMESTAMP(inst0); ! switch (opcode) {
  • 24.
    #ifdef CASE_TOO_BIG default: switch(opcode) { #endif /* Turn this on if your compiler chokes on the big switch: */ /* #define CASE_TOO_BIG 1 */
  • 25.
    Back to thatbytecode ! >>> dis.dis(mod) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_MODULO 7 STORE_FAST 2 (ans) ! 3 10 LOAD_FAST 2 (ans) 13 RETURN_VALUE
  • 26.
    case LOAD_FAST: x =GETLOCAL(oparg); if (x != NULL) { Py_INCREF(x); PUSH(x); goto fast_next_opcode; } format_exc_check_arg(PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG, PyTuple_GetItem(co->co_varnames, oparg)); break;
  • 27.
    case BINARY_MODULO: w =POP(); v = TOP(); if (PyString_CheckExact(v)) x = PyString_Format(v, w); else x = PyNumber_Remainder(v, w); Py_DECREF(v); Py_DECREF(w); SET_TOP(x); if (x != NULL) continue; break;
  • 28.
    It’s “dynamic” >>> defmod(a, b): ... ans = a % b ... return ans >>> mod(15, 4) 3
  • 29.
    “Dynamic” >>> def mod(a,b): ... ans = a % b ... return ans >>> mod(15, 4) 3 >>> mod(“%s%s”, (“NYC”, “Python”))
  • 30.
    “Dynamic” >>> def mod(a,b): ... ans = a % b ... return ans >>> mod(15, 4) 3 >>> mod(“%s %s”, (“NYC”, “Python”)) NYC Python
  • 31.
    “Dynamic” >>> def mod(a,b): ... ans = a % b ... return ans >>> mod(15, 4) 3 >>> mod(“%s %s”, (“NYC”, “Python”)) NYC Python >>> print “%s %s” % (“NYC”, “Python”) NYC Python
  • 32.
    case BINARY_MODULO: w =POP(); v = TOP(); if (PyString_CheckExact(v)) x = PyString_Format(v, w); else x = PyNumber_Remainder(v, w); Py_DECREF(v); Py_DECREF(w); SET_TOP(x); if (x != NULL) continue; break;
  • 33.
    >>> class Surprising(object): …def __mod__(self, other): … print “Surprise!” ! >>> s = Surprising() >>> t = Surprsing() >>> s % t Surprise!
  • 34.
    “In the generalabsence of type information, almost every instruction must be treated as INVOKE_ARBITRARY_METHOD.” ! - Russell Power and Alex Rubinsteyn, “How Fast Can We Make Interpreted Python?”
  • 35.
    Back to ourproblem g = (x*x for x in range(5)) h = (y+1 for y in g) print(list(h))
  • 36.
    def foo(): x =1 def bar(y): z = y + 2 # <--- (3) return z return bar(x) # <--- (2) foo() # <--- (1) ! c --------------------- a | bar Frame | -> blocks: [] l | (newest) | -> data: [1, 2] l --------------------- | foo Frame | -> blocks: [] s | | -> data: [<foo.<lcl>.bar, 1] t --------------------- a | main (module) Frame | -> blocks: [] c | (oldest) | -> data: [<foo>] k ---------------------
  • 37.
    def foo(): x =1 def bar(y): z = y + 2 # <--- (3) return z return bar(x) # <--- (2) foo() # <--- (1) ! ! ! l --------------------- | foo Frame | -> blocks: [] s | | -> data: [3] t --------------------- a | main (module) Frame | -> blocks: [] c | (oldest) | -> data: [<foo>] k ---------------------
  • 38.
    def foo(): x =1 def bar(y): z = y + 2 # <--- (3) return z return bar(x) # <--- (2) foo() # <--- (1) ! ! s t --------------------- a | main (module) Frame | -> blocks: [] c | (oldest) | -> data: [3] k ---------------------
  • 39.
    Back to ourproblem g = (x*x for x in range(5)) h = (y+1 for y in g) print(list(h))
  • 40.
    More Great blogs http://tech.blog.aknin.name/category/my-projects/ pythons-innards/ by@aknin http://eli.thegreenplace.net/ by Eli Bendersky ! Contribute! Find bugs! https://github.com/nedbat/byterun ! Apply to Hacker School! www.hackerschool.com/apply