Source code for mmappickle.picklers.base

import pickle
import struct
import pickletools
import weakref

from ..utils import *


[docs]class BasePickler: """Picklers will be attempted in decreasing priority order""" priority = 0
[docs] def __init__(self, parent_object): self._parent_object = weakref.ref(parent_object)
@property def _file(self): return self._parent_object()._file
[docs] @save_file_position def is_valid(self, offset, length): """ Return True if object starting at offset in f is valid. File position is kept. """ return False
[docs] def is_picklable(self, obj): """ Return True if object can be pickled with this pickler """ return False
[docs] @save_file_position def read(self, offset, length): """Return the unpickled object read from offset, and the length read. The file position is kept.""" raise NotImplementedError("Should be subclassed")
[docs] @save_file_position def write(self, obj, offset, memo_start_idx=0): """ Write the pickled object to the file stream, the file position is kept. Returns a tuple (number of bytes, last memo index)""" raise NotImplementedError("Should be subclassed")
def _pickle_load_fix(self, p): """Load a pickle object from p, adding the header and the terminator. Returns the object.""" p = pickle.PROTO + struct.pack('<B', 4) + p + pickle.STOP return pickle.loads(p) def _pickle_dump_fix(self, obj, memo_start_idx=0): ''' Pickle and object and optimize its string by changing MEMOIZE into PUT, removing unused PUT/MEMOIZE, fixing GET opcodes, and remove PROTO, FRAME, and STOP opcodes. Returns the pickle bytes, and the end memo index. ''' p = pickle.dumps(obj, 4) oldids = set() newids = {} opcodes = [] # Trick to avoid instanciating objects (we use the "is" operator) put = 'PUT' get = 'GET' ops = list(pickletools.genops(p)) ops = [(x[0], x[1], x[2], y[2]) for x, y in zip(ops[:-1], ops[1:])]+[(ops[-1][0], ops[-1][1], ops[-1][2], len(p))] for opcode, arg, pos, end_pos in ops: if opcode.name in ('FRAME', 'STOP'): # Ignore these pass elif opcode.name == 'PROTO': # Ignore, but check that it's version 4 assert arg == 4, "Pickle version should be 4" elif 'PUT' in opcode.name: oldids.add(arg) opcodes.append((put, arg)) elif opcode.name == 'MEMOIZE': idx = len(oldids) oldids.add(idx) opcodes.append((put, idx)) elif 'GET' in opcode.name: newids[arg] = None opcodes.append((get, arg)) else: opcodes.append((pos, end_pos)) del oldids out = [] memo_put_idx = memo_start_idx for op, arg in opcodes: if op is put: if arg not in newids: continue newids[arg] = memo_put_idx if memo_put_idx < 256: data = pickle.BINPUT + struct.pack('<B', memo_put_idx) else: data = pickle.LONG_BINPUT + struct.pack('<I', memo_put_idx) memo_put_idx += 1 elif op is get: memo_get_idx = newids[arg] if memo_get_idx < 256: data = pickle.BINGET + struct.pack('<B', memo_get_idx) else: data = pickle.LONG_BINGET + struct.pack('<I', memo_get_idx) else: data = p[op:arg] out.append(data) return b''.join(out), memo_put_idx
[docs]class GenericPickler(BasePickler): @property def priority(self): return -100
[docs] @save_file_position def is_valid(self, offset, length): return True # catch all
[docs] def is_picklable(self, obj): return True # catch all
[docs] @save_file_position def read(self, offset, length): self._file.seek(offset, io.SEEK_SET) return self._pickle_load_fix(self._file.read(length)), length
[docs] @save_file_position def write(self, obj, offset, memo_start_idx=0): self._file.seek(offset, io.SEEK_SET) data, memo_idx = self._pickle_dump_fix(obj, memo_start_idx) data_length = self._file.write(data) return data_length, memo_idx