okay fine

This commit is contained in:
pacnpal
2024-11-03 17:47:26 +00:00
parent 01c6004a79
commit 27eb239e97
10020 changed files with 1935769 additions and 2364 deletions

View File

@@ -0,0 +1,6 @@
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
Twisted Persisted: Utilities for managing persistence.
"""

View File

@@ -0,0 +1,150 @@
"""
FIXME:https://github.com/twisted/twisted/issues/3843
This can be removed once t.persisted.aot is removed.
New code should not make use of this.
Token constants.
vendored from https://github.[AWS-SECRET-REMOVED]41ca6f1647d83005c68199aa/Lib/token.py
Licence: https://docs.python.org/3/license.html
"""
# Auto-generated by Tools/scripts/generate_token.py
__all__ = ["tok_name", "ISTERMINAL", "ISNONTERMINAL", "ISEOF"]
ENDMARKER = 0
NAME = 1
NUMBER = 2
STRING = 3
NEWLINE = 4
INDENT = 5
DEDENT = 6
LPAR = 7
RPAR = 8
LSQB = 9
RSQB = 10
COLON = 11
COMMA = 12
SEMI = 13
PLUS = 14
MINUS = 15
STAR = 16
SLASH = 17
VBAR = 18
AMPER = 19
LESS = 20
GREATER = 21
EQUAL = 22
DOT = 23
PERCENT = 24
LBRACE = 25
RBRACE = 26
EQEQUAL = 27
NOTEQUAL = 28
LESSEQUAL = 29
GREATEREQUAL = 30
TILDE = 31
CIRCUMFLEX = 32
LEFTSHIFT = 33
RIGHTSHIFT = 34
DOUBLESTAR = 35
PLUSEQUAL = 36
MINEQUAL = 37
STAREQUAL = 38
SLASHEQUAL = 39
PERCENTEQUAL = 40
AMPEREQUAL = 41
VBAREQUAL = 42
CIRCUMFLEXEQUAL = 43
LEFTSHIFTEQUAL = 44
RIGHTSHIFTEQUAL = 45
DOUBLESTAREQUAL = 46
DOUBLESLASH = 47
DOUBLESLASHEQUAL = 48
AT = 49
ATEQUAL = 50
RARROW = 51
ELLIPSIS = 52
COLONEQUAL = 53
OP = 54
AWAIT = 55
ASYNC = 56
TYPE_IGNORE = 57
TYPE_COMMENT = 58
SOFT_KEYWORD = 59
# These aren't used by the C tokenizer but are needed for tokenize.py
ERRORTOKEN = 60
COMMENT = 61
NL = 62
ENCODING = 63
N_TOKENS = 64
# Special definitions for cooperation with parser
NT_OFFSET = 256
tok_name = {
value: name
for name, value in globals().items()
if isinstance(value, int) and not name.startswith("_")
}
__all__.extend(tok_name.values())
EXACT_TOKEN_TYPES = {
"!=": NOTEQUAL,
"%": PERCENT,
"%=": PERCENTEQUAL,
"&": AMPER,
"&=": AMPEREQUAL,
"(": LPAR,
")": RPAR,
"*": STAR,
"**": DOUBLESTAR,
"**=": DOUBLESTAREQUAL,
"*=": STAREQUAL,
"+": PLUS,
"+=": PLUSEQUAL,
",": COMMA,
"-": MINUS,
"-=": MINEQUAL,
"->": RARROW,
".": DOT,
"...": ELLIPSIS,
"/": SLASH,
"//": DOUBLESLASH,
"//=": DOUBLESLASHEQUAL,
"/=": SLASHEQUAL,
":": COLON,
":=": COLONEQUAL,
";": SEMI,
"<": LESS,
"<<": LEFTSHIFT,
"<<=": LEFTSHIFTEQUAL,
"<=": LESSEQUAL,
"=": EQUAL,
"==": EQEQUAL,
">": GREATER,
">=": GREATEREQUAL,
">>": RIGHTSHIFT,
">>=": RIGHTSHIFTEQUAL,
"@": AT,
"@=": ATEQUAL,
"[": LSQB,
"]": RSQB,
"^": CIRCUMFLEX,
"^=": CIRCUMFLEXEQUAL,
"{": LBRACE,
"|": VBAR,
"|=": VBAREQUAL,
"}": RBRACE,
"~": TILDE,
}
def ISTERMINAL(x):
return x < NT_OFFSET
def ISNONTERMINAL(x):
return x >= NT_OFFSET
def ISEOF(x):
return x == ENDMARKER

View File

@@ -0,0 +1,897 @@
"""
FIXME:https://github.com/twisted/twisted/issues/3843
This can be removed once t.persisted.aot is removed.
New code should not make use of this.
Tokenization help for Python programs.
vendored from https://github.[AWS-SECRET-REMOVED]41ca6f1647d83005c68199aa/Lib/tokenize.py
Licence: https://docs.python.org/3/license.html
tokenize(readline) is a generator that breaks a stream of bytes into
Python tokens. It decodes the bytes according to PEP-0263 for
determining source file encoding.
It accepts a readline-like method which is called repeatedly to get the
next line of input (or b"" for EOF). It generates 5-tuples with these
members:
the token type (see token.py)
the token (a string)
the starting (row, column) indices of the token (a 2-tuple of ints)
the ending (row, column) indices of the token (a 2-tuple of ints)
the original line (string)
It is designed to match the working of the Python tokenizer exactly, except
that it produces COMMENT tokens for comments and gives type OP for all
operators. Additionally, all token lists start with an ENCODING token
which tells you which encoding was used to decode the bytes stream.
"""
__author__ = "Ka-Ping Yee <ping@lfw.org>"
__credits__ = (
"GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, "
"Skip Montanaro, Raymond Hettinger, Trent Nelson, "
"Michael Foord"
)
import collections
import functools
import itertools as _itertools
import re
import sys
from builtins import open as _builtin_open
from codecs import BOM_UTF8, lookup
from io import TextIOWrapper
from ._token import (
AMPER,
AMPEREQUAL,
ASYNC,
AT,
ATEQUAL,
AWAIT,
CIRCUMFLEX,
CIRCUMFLEXEQUAL,
COLON,
COLONEQUAL,
COMMA,
COMMENT,
DEDENT,
DOT,
DOUBLESLASH,
DOUBLESLASHEQUAL,
DOUBLESTAR,
DOUBLESTAREQUAL,
ELLIPSIS,
ENCODING,
ENDMARKER,
EQEQUAL,
EQUAL,
ERRORTOKEN,
EXACT_TOKEN_TYPES,
GREATER,
GREATEREQUAL,
INDENT,
ISEOF,
ISNONTERMINAL,
ISTERMINAL,
LBRACE,
LEFTSHIFT,
LEFTSHIFTEQUAL,
LESS,
LESSEQUAL,
LPAR,
LSQB,
MINEQUAL,
MINUS,
N_TOKENS,
NAME,
NEWLINE,
NL,
NOTEQUAL,
NT_OFFSET,
NUMBER,
OP,
PERCENT,
PERCENTEQUAL,
PLUS,
PLUSEQUAL,
RARROW,
RBRACE,
RIGHTSHIFT,
RIGHTSHIFTEQUAL,
RPAR,
RSQB,
SEMI,
SLASH,
SLASHEQUAL,
SOFT_KEYWORD,
STAR,
STAREQUAL,
STRING,
TILDE,
TYPE_COMMENT,
TYPE_IGNORE,
VBAR,
VBAREQUAL,
tok_name,
)
__all__ = [
"tok_name",
"ISTERMINAL",
"ISNONTERMINAL",
"ISEOF",
"ENDMARKER",
"NAME",
"NUMBER",
"STRING",
"NEWLINE",
"INDENT",
"DEDENT",
"LPAR",
"RPAR",
"LSQB",
"RSQB",
"COLON",
"COMMA",
"SEMI",
"PLUS",
"MINUS",
"STAR",
"SLASH",
"VBAR",
"AMPER",
"LESS",
"GREATER",
"EQUAL",
"DOT",
"PERCENT",
"LBRACE",
"RBRACE",
"EQEQUAL",
"NOTEQUAL",
"LESSEQUAL",
"GREATEREQUAL",
"TILDE",
"CIRCUMFLEX",
"LEFTSHIFT",
"RIGHTSHIFT",
"DOUBLESTAR",
"PLUSEQUAL",
"MINEQUAL",
"STAREQUAL",
"SLASHEQUAL",
"PERCENTEQUAL",
"AMPEREQUAL",
"VBAREQUAL",
"CIRCUMFLEXEQUAL",
"LEFTSHIFTEQUAL",
"RIGHTSHIFTEQUAL",
"DOUBLESTAREQUAL",
"DOUBLESLASH",
"DOUBLESLASHEQUAL",
"AT",
"ATEQUAL",
"RARROW",
"ELLIPSIS",
"COLONEQUAL",
"OP",
"AWAIT",
"ASYNC",
"TYPE_IGNORE",
"TYPE_COMMENT",
"SOFT_KEYWORD",
"ERRORTOKEN",
"COMMENT",
"NL",
"ENCODING",
"N_TOKENS",
"NT_OFFSET",
"tokenize",
"generate_tokens",
"detect_encoding",
"untokenize",
"TokenInfo",
]
cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII)
blank_re = re.compile(rb"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
class TokenInfo(collections.namedtuple("TokenInfo", "type string start end line")):
def __repr__(self):
annotated_type = "%d (%s)" % (self.type, tok_name[self.type])
return (
"TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)"
% self._replace(type=annotated_type)
)
@property
def exact_type(self):
if self.type == OP and self.string in EXACT_TOKEN_TYPES:
return EXACT_TOKEN_TYPES[self.string]
else:
return self.type
def group(*choices):
return "(" + "|".join(choices) + ")"
def any(*choices):
return group(*choices) + "*"
def maybe(*choices):
return group(*choices) + "?"
# Note: we use unicode matching for names ("\w") but ascii matching for
# number literals.
Whitespace = r"[ \f\t]*"
Comment = r"#[^\r\n]*"
Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
Name = r"\w+"
Hexnumber = r"0[xX](?:_?[0-9a-fA-F])+"
Binnumber = r"0[bB](?:_?[01])+"
Octnumber = r"0[oO](?:_?[0-7])+"
Decnumber = r"(?:0(?:_?0)*|[1-9](?:_?[0-9])*)"
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
Exponent = r"[eE][-+]?[0-9](?:_?[0-9])*"
Pointfloat = group(
r"[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?", r"\.[0-9](?:_?[0-9])*"
) + maybe(Exponent)
Expfloat = r"[0-9](?:_?[0-9])*" + Exponent
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r"[0-9](?:_?[0-9])*[jJ]", Floatnumber + r"[jJ]")
Number = group(Imagnumber, Floatnumber, Intnumber)
# Return the empty string, plus all of the valid string prefixes.
def _all_string_prefixes():
# The valid string prefixes. Only contain the lower case versions,
# and don't contain any permutations (include 'fr', but not
# 'rf'). The various permutations will be generated.
_valid_string_prefixes = ["b", "r", "u", "f", "br", "fr"]
# if we add binary f-strings, add: ['fb', 'fbr']
result = {""}
for prefix in _valid_string_prefixes:
for t in _itertools.permutations(prefix):
# create a list with upper and lower versions of each
# character
for u in _itertools.product(*[(c, c.upper()) for c in t]):
result.add("".join(u))
return result
@functools.lru_cache(None)
def _compile(expr):
return re.compile(expr, re.UNICODE)
# Note that since _all_string_prefixes includes the empty string,
# StringPrefix can be the empty string (making it optional).
StringPrefix = group(*_all_string_prefixes())
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
# Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
# Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
# Single-line ' or " string.
String = group(
StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"',
)
# Sorting in reverse order puts the long operators before their prefixes.
# Otherwise if = came before ==, == would get recognized as two instances
# of =.
Special = group(*(re.escape(x) for x in sorted(EXACT_TOKEN_TYPES, reverse=True)))
Funny = group(r"\r?\n", Special)
PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
# First (or only) line of ' or " string.
ContStr = group(
StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
)
PseudoExtras = group(r"\\\r?\n|\Z", Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
# For a given string prefix plus quotes, endpats maps it to a regex
# to match the remainder of that string. _prefix can be empty, for
# a normal single or triple quoted string (with no prefix).
endpats = {}
for _prefix in _all_string_prefixes():
endpats[_prefix + "'"] = Single
endpats[_prefix + '"'] = Double
endpats[_prefix + "'''"] = Single3
endpats[_prefix + '"""'] = Double3
del _prefix
# A set of all of the single and triple quoted string prefixes,
# including the opening quotes.
single_quoted = set()
triple_quoted = set()
for t in _all_string_prefixes():
for u in (t + '"', t + "'"):
single_quoted.add(u)
for u in (t + '"""', t + "'''"):
triple_quoted.add(u)
del t, u
tabsize = 8
class TokenError(Exception):
pass
class StopTokenizing(Exception):
pass
class Untokenizer:
def __init__(self):
self.tokens = []
self.prev_row = 1
self.prev_col = 0
self.encoding = None
def add_whitespace(self, start):
row, col = start
if row < self.prev_row or row == self.prev_row and col < self.prev_col:
raise ValueError(
"start ({},{}) precedes previous end ({},{})".format(
row, col, self.prev_row, self.prev_col
)
)
row_offset = row - self.prev_row
if row_offset:
self.tokens.append("\\\n" * row_offset)
self.prev_col = 0
col_offset = col - self.prev_col
if col_offset:
self.tokens.append(" " * col_offset)
def untokenize(self, iterable):
it = iter(iterable)
indents = []
startline = False
for t in it:
if len(t) == 2:
self.compat(t, it)
break
tok_type, token, start, end, line = t
if tok_type == ENCODING:
self.encoding = token
continue
if tok_type == ENDMARKER:
break
if tok_type == INDENT:
indents.append(token)
continue
elif tok_type == DEDENT:
indents.pop()
self.prev_row, self.prev_col = end
continue
elif tok_type in (NEWLINE, NL):
startline = True
elif startline and indents:
indent = indents[-1]
if start[1] >= len(indent):
self.tokens.append(indent)
self.prev_col = len(indent)
startline = False
self.add_whitespace(start)
self.tokens.append(token)
self.prev_row, self.prev_col = end
if tok_type in (NEWLINE, NL):
self.prev_row += 1
self.prev_col = 0
return "".join(self.tokens)
def compat(self, token, iterable):
indents = []
toks_append = self.tokens.append
startline = token[0] in (NEWLINE, NL)
prevstring = False
for tok in _itertools.chain([token], iterable):
toknum, tokval = tok[:2]
if toknum == ENCODING:
self.encoding = tokval
continue
if toknum in (NAME, NUMBER):
tokval += " "
# Insert a space between two consecutive strings
if toknum == STRING:
if prevstring:
tokval = " " + tokval
prevstring = True
else:
prevstring = False
if toknum == INDENT:
indents.append(tokval)
continue
elif toknum == DEDENT:
indents.pop()
continue
elif toknum in (NEWLINE, NL):
startline = True
elif startline and indents:
toks_append(indents[-1])
startline = False
toks_append(tokval)
def untokenize(iterable):
"""Transform tokens back into Python source code.
It returns a bytes object, encoded using the ENCODING
token, which is the first token sequence output by tokenize.
Each element returned by the iterable must be a token sequence
with at least two elements, a token number and token value. If
only two tokens are passed, the resulting output is poor.
Round-trip invariant for full input:
Untokenized source will match input source exactly
Round-trip invariant for limited input:
# Output bytes will tokenize back to the input
t1 = [tok[:2] for tok in tokenize(f.readline)]
newcode = untokenize(t1)
readline = BytesIO(newcode).readline
t2 = [tok[:2] for tok in tokenize(readline)]
assert t1 == t2
"""
ut = Untokenizer()
out = ut.untokenize(iterable)
if ut.encoding is not None:
out = out.encode(ut.encoding)
return out
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
# Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
return "utf-8"
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith(
("latin-1-", "iso-8859-1-", "iso-latin-1-")
):
return "iso-8859-1"
return orig_enc
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
be used to decode a Python source file. It requires one argument, readline,
in the same way as the tokenize() generator.
It will call readline a maximum of twice, and return the encoding used
(as a string) and a list of any lines (left as bytes) it has read in.
It detects the encoding from the presence of a utf-8 bom or an encoding
cookie as specified in pep-0263. If both a bom and a cookie are present,
but disagree, a SyntaxError will be raised. If the encoding cookie is an
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
'utf-8-sig' is returned.
If no encoding is specified, then the default of 'utf-8' will be returned.
"""
try:
filename = readline.__self__.name
except AttributeError:
filename = None
bom_found = False
encoding = None
default = "utf-8"
def read_or_stop():
try:
return readline()
except StopIteration:
return b""
def find_cookie(line):
try:
# Decode as UTF-8. Either the line is an encoding declaration,
# in which case it should be pure ASCII, or it must be UTF-8
# per default encoding.
line_string = line.decode("utf-8")
except UnicodeDecodeError:
msg = "invalid or missing encoding declaration"
if filename is not None:
msg = "{} for {!r}".format(msg, filename)
raise SyntaxError(msg)
match = cookie_re.match(line_string)
if not match:
return None
encoding = _get_normal_name(match.group(1))
try:
lookup(encoding)
except LookupError:
# This behaviour mimics the Python interpreter
if filename is None:
msg = "unknown encoding: " + encoding
else:
msg = "unknown encoding for {!r}: {}".format(filename, encoding)
raise SyntaxError(msg)
if bom_found:
if encoding != "utf-8":
# This behaviour mimics the Python interpreter
if filename is None:
msg = "encoding problem: utf-8"
else:
msg = "encoding problem for {!r}: utf-8".format(filename)
raise SyntaxError(msg)
encoding += "-sig"
return encoding
first = read_or_stop()
if first.startswith(BOM_UTF8):
bom_found = True
first = first[3:]
default = "utf-8-sig"
if not first:
return default, []
encoding = find_cookie(first)
if encoding:
return encoding, [first]
if not blank_re.match(first):
return default, [first]
second = read_or_stop()
if not second:
return default, [first]
encoding = find_cookie(second)
if encoding:
return encoding, [first, second]
return default, [first, second]
def open(filename):
"""Open a file in read only mode using the encoding detected by
detect_encoding().
"""
buffer = _builtin_open(filename, "rb")
try:
encoding, lines = detect_encoding(buffer.readline)
buffer.seek(0)
text = TextIOWrapper(buffer, encoding, line_buffering=True)
text.mode = "r"
return text
except BaseException:
buffer.close()
raise
def tokenize(readline):
"""
The tokenize() generator requires one argument, readline, which
must be a callable object which provides the same interface as the
readline() method of built-in file objects. Each call to the function
should return one line of input as bytes. Alternatively, readline
can be a callable function terminating with StopIteration:
readline = open(myfile, 'rb').__next__ # Example of alternate readline
The generator produces 5-tuples with these members: the token type; the
token string; a 2-tuple (srow, scol) of ints specifying the row and
column where the token begins in the source; a 2-tuple (erow, ecol) of
ints specifying the row and column where the token ends in the source;
and the line on which the token was found. The line passed is the
physical line.
The first token sequence will always be an ENCODING token
which tells you which encoding was used to decode the bytes stream.
"""
encoding, consumed = detect_encoding(readline)
empty = _itertools.repeat(b"")
rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
return _tokenize(rl_gen.__next__, encoding)
def _tokenize(readline, encoding):
strstart = None
endprog = None
lnum = parenlev = continued = 0
numchars = "0123456789"
contstr, needcont = "", 0
contline = None
indents = [0]
if encoding is not None:
if encoding == "utf-8-sig":
# BOM will already have been stripped.
encoding = "utf-8"
yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), "")
last_line = b""
line = b""
while True: # loop over lines in stream
try:
# We capture the value of the line variable here because
# readline uses the empty string '' to signal end of input,
# hence `line` itself will always be overwritten at the end
# of this loop.
last_line = line
line = readline()
except StopIteration:
line = b""
if encoding is not None:
line = line.decode(encoding)
lnum += 1
pos, max = 0, len(line)
if contstr: # continued string
if not line:
raise TokenError("EOF in multi-line string", strstart)
endmatch = endprog.match(line)
if endmatch:
pos = end = endmatch.end(0)
yield TokenInfo(
STRING, contstr + line[:end], strstart, (lnum, end), contline + line
)
contstr, needcont = "", 0
contline = None
elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
yield TokenInfo(
ERRORTOKEN, contstr + line, strstart, (lnum, len(line)), contline
)
contstr = ""
contline = None
continue
else:
contstr = contstr + line
contline = contline + line
continue
elif parenlev == 0 and not continued: # new statement
if not line:
break
column = 0
while pos < max: # measure leading whitespace
if line[pos] == " ":
column += 1
elif line[pos] == "\t":
column = (column // tabsize + 1) * tabsize
elif line[pos] == "\f":
column = 0
else:
break
pos += 1
if pos == max:
break
if line[pos] in "#\r\n": # skip comments or blank lines
if line[pos] == "#":
comment_token = line[pos:].rstrip("\r\n")
yield TokenInfo(
COMMENT,
comment_token,
(lnum, pos),
(lnum, pos + len(comment_token)),
line,
)
pos += len(comment_token)
yield TokenInfo(NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
continue
if column > indents[-1]: # count indents or dedents
indents.append(column)
yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
while column < indents[-1]:
if column not in indents:
raise IndentationError(
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line),
)
indents = indents[:-1]
yield TokenInfo(DEDENT, "", (lnum, pos), (lnum, pos), line)
else: # continued statement
if not line:
raise TokenError("EOF in multi-line statement", (lnum, 0))
continued = 0
while pos < max:
pseudomatch = _compile(PseudoToken).match(line, pos)
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
if start == end:
continue
token, initial = line[start:end], line[start]
if initial in numchars or ( # ordinary number
initial == "." and token != "." and token != "..."
):
yield TokenInfo(NUMBER, token, spos, epos, line)
elif initial in "\r\n":
if parenlev > 0:
yield TokenInfo(NL, token, spos, epos, line)
else:
yield TokenInfo(NEWLINE, token, spos, epos, line)
elif initial == "#":
assert not token.endswith("\n")
yield TokenInfo(COMMENT, token, spos, epos, line)
elif token in triple_quoted:
endprog = _compile(endpats[token])
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
yield TokenInfo(STRING, token, spos, (lnum, pos), line)
else:
strstart = (lnum, start) # multiple lines
contstr = line[start:]
contline = line
break
# Check up to the first 3 chars of the token to see if
# they're in the single_quoted set. If so, they start
# a string.
# We're using the first 3, because we're looking for
# "rb'" (for example) at the start of the token. If
# we switch to longer prefixes, this needs to be
# adjusted.
# Note that initial == token[:1].
# Also note that single quote checking must come after
# triple quote checking (above).
elif (
initial in single_quoted
or token[:2] in single_quoted
or token[:3] in single_quoted
):
if token[-1] == "\n": # continued string
strstart = (lnum, start)
# Again, using the first 3 chars of the
# token. This is looking for the matching end
# regex for the correct type of quote
# character. So it's really looking for
# endpats["'"] or endpats['"'], by trying to
# skip string prefix characters, if any.
endprog = _compile(
endpats.get(initial)
or endpats.get(token[1])
or endpats.get(token[2])
)
contstr, needcont = line[start:], 1
contline = line
break
else: # ordinary string
yield TokenInfo(STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
yield TokenInfo(NAME, token, spos, epos, line)
elif initial == "\\": # continued stmt
continued = 1
else:
if initial in "([{":
parenlev += 1
elif initial in ")]}":
parenlev -= 1
yield TokenInfo(OP, token, spos, epos, line)
else:
yield TokenInfo(
ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line
)
pos += 1
# Add an implicit NEWLINE if the input doesn't end in one
if (
last_line
and last_line[-1] not in "\r\n"
and not last_line.strip().startswith("#")
):
yield TokenInfo(
NEWLINE, "", (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), ""
)
for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, "", (lnum, 0), (lnum, 0), "")
yield TokenInfo(ENDMARKER, "", (lnum, 0), (lnum, 0), "")
def generate_tokens(readline):
"""Tokenize a source reading Python code as unicode strings.
This has the same API as tokenize(), except that it expects the *readline*
callable to return str objects instead of bytes.
"""
return _tokenize(readline, None)
def main():
import argparse
# Helper error handling routines
def perror(message):
sys.stderr.write(message)
sys.stderr.write("\n")
def error(message, filename=None, location=None):
if location:
args = (filename,) + location + (message,)
perror("%s:%d:%d: error: %s" % args)
elif filename:
perror("%s: error: %s" % (filename, message))
else:
perror("error: %s" % message)
sys.exit(1)
# Parse the arguments and options
parser = argparse.ArgumentParser(prog="python -m tokenize")
parser.add_argument(
dest="filename",
nargs="?",
metavar="filename.py",
help="the file to tokenize; defaults to stdin",
)
parser.add_argument(
"-e",
"--exact",
dest="exact",
action="store_true",
help="display token names using the exact type",
)
args = parser.parse_args()
try:
# Tokenize the input
if args.filename:
filename = args.filename
with _builtin_open(filename, "rb") as f:
tokens = list(tokenize(f.readline))
else:
filename = "<stdin>"
tokens = _tokenize(sys.stdin.readline, None)
# Output the tokenization
for token in tokens:
token_type = token.type
if args.exact:
token_type = token.exact_type
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
print("%-20s%-15s%-15r" % (token_range, tok_name[token_type], token.string))
except IndentationError as err:
line, column = err.args[1][1:3]
error(err.args[0], filename, (line, column))
except TokenError as err:
line, column = err.args[1]
error(err.args[0], filename, (line, column))
except SyntaxError as err:
error(err, filename)
except OSError as err:
error(err)
except KeyboardInterrupt:
print("interrupted\n")
except Exception as err:
perror("unexpected error: %s" % err)
raise
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,631 @@
# -*- test-case-name: twisted.test.test_persisted -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
AOT: Abstract Object Trees
The source-code-marshallin'est abstract-object-serializin'est persister
this side of Marmalade!
"""
import copyreg as copy_reg
import re
import types
from twisted.persisted import crefutil
from twisted.python import log, reflect
from twisted.python.compat import _constructMethod
# tokenize from py3.11 is vendored to work around https://github.com/python/cpython/issues/105238
# on 3.12
from ._tokenize import generate_tokens as tokenize
###########################
# Abstract Object Classes #
###########################
# "\0" in a getSource means "insert variable-width indention here".
# see `indentify'.
class Named:
def __init__(self, name):
self.name = name
class Class(Named):
def getSource(self):
return "Class(%r)" % self.name
class Function(Named):
def getSource(self):
return "Function(%r)" % self.name
class Module(Named):
def getSource(self):
return "Module(%r)" % self.name
class InstanceMethod:
def __init__(self, name, klass, inst):
if not (
isinstance(inst, Ref)
or isinstance(inst, Instance)
or isinstance(inst, Deref)
):
raise TypeError("%s isn't an Instance, Ref, or Deref!" % inst)
self.name = name
self.klass = klass
self.instance = inst
def getSource(self):
return "InstanceMethod({!r}, {!r}, \n\0{})".format(
self.name,
self.klass,
prettify(self.instance),
)
class _NoStateObj:
pass
NoStateObj = _NoStateObj()
_SIMPLE_BUILTINS = [
bool,
bytes,
str,
int,
float,
complex,
type(None),
slice,
type(Ellipsis),
]
class Instance:
def __init__(self, className, __stateObj__=NoStateObj, **state):
if not isinstance(className, str):
raise TypeError("%s isn't a string!" % className)
self.klass = className
if __stateObj__ is not NoStateObj:
self.state = __stateObj__
self.stateIsDict = 0
else:
self.state = state
self.stateIsDict = 1
def getSource(self):
# XXX make state be foo=bar instead of a dict.
if self.stateIsDict:
stateDict = self.state
elif isinstance(self.state, Ref) and isinstance(self.state.obj, dict):
stateDict = self.state.obj
else:
stateDict = None
if stateDict is not None:
try:
return f"Instance({self.klass!r}, {dictToKW(stateDict)})"
except NonFormattableDict:
return f"Instance({self.klass!r}, {prettify(stateDict)})"
return f"Instance({self.klass!r}, {prettify(self.state)})"
class Ref:
def __init__(self, *args):
# blargh, lame.
if len(args) == 2:
self.refnum = args[0]
self.obj = args[1]
elif not args:
self.refnum = None
self.obj = None
def setRef(self, num):
if self.refnum:
raise ValueError(f"Error setting id {num}, I already have {self.refnum}")
self.refnum = num
def setObj(self, obj):
if self.obj:
raise ValueError(f"Error setting obj {obj}, I already have {self.obj}")
self.obj = obj
def getSource(self):
if self.obj is None:
raise RuntimeError(
"Don't try to display me before setting an object on me!"
)
if self.refnum:
return "Ref(%d, \n\0%s)" % (self.refnum, prettify(self.obj))
return prettify(self.obj)
class Deref:
def __init__(self, num):
self.refnum = num
def getSource(self):
return "Deref(%d)" % self.refnum
__repr__ = getSource
class Copyreg:
def __init__(self, loadfunc, state):
self.loadfunc = loadfunc
self.state = state
def getSource(self):
return f"Copyreg({self.loadfunc!r}, {prettify(self.state)})"
###############
# Marshalling #
###############
def getSource(ao):
"""Pass me an AO, I'll return a nicely-formatted source representation."""
return indentify("app = " + prettify(ao))
class NonFormattableDict(Exception):
"""A dictionary was not formattable."""
r = re.compile("[a-zA-Z_][a-zA-Z0-9_]*$")
def dictToKW(d):
out = []
items = list(d.items())
items.sort()
for k, v in items:
if not isinstance(k, str):
raise NonFormattableDict("%r ain't a string" % k)
if not r.match(k):
raise NonFormattableDict("%r ain't an identifier" % k)
out.append(f"\n\0{k}={prettify(v)},")
return "".join(out)
def prettify(obj):
if hasattr(obj, "getSource"):
return obj.getSource()
else:
# basic type
t = type(obj)
if t in _SIMPLE_BUILTINS:
return repr(obj)
elif t is dict:
out = ["{"]
for k, v in obj.items():
out.append(f"\n\0{prettify(k)}: {prettify(v)},")
out.append(len(obj) and "\n\0}" or "}")
return "".join(out)
elif t is list:
out = ["["]
for x in obj:
out.append("\n\0%s," % prettify(x))
out.append(len(obj) and "\n\0]" or "]")
return "".join(out)
elif t is tuple:
out = ["("]
for x in obj:
out.append("\n\0%s," % prettify(x))
out.append(len(obj) and "\n\0)" or ")")
return "".join(out)
else:
raise TypeError(f"Unsupported type {t} when trying to prettify {obj}.")
def indentify(s):
out = []
stack = []
l = ["", s]
for (
tokenType,
tokenString,
(startRow, startColumn),
(endRow, endColumn),
logicalLine,
) in tokenize(l.pop):
if tokenString in ["[", "(", "{"]:
stack.append(tokenString)
elif tokenString in ["]", ")", "}"]:
stack.pop()
if tokenString == "\0":
out.append(" " * len(stack))
else:
out.append(tokenString)
return "".join(out)
###########
# Unjelly #
###########
def unjellyFromAOT(aot):
"""
Pass me an Abstract Object Tree, and I'll unjelly it for you.
"""
return AOTUnjellier().unjelly(aot)
def unjellyFromSource(stringOrFile):
"""
Pass me a string of code or a filename that defines an 'app' variable (in
terms of Abstract Objects!), and I'll execute it and unjelly the resulting
AOT for you, returning a newly unpersisted Application object!
"""
ns = {
"Instance": Instance,
"InstanceMethod": InstanceMethod,
"Class": Class,
"Function": Function,
"Module": Module,
"Ref": Ref,
"Deref": Deref,
"Copyreg": Copyreg,
}
if hasattr(stringOrFile, "read"):
source = stringOrFile.read()
else:
source = stringOrFile
code = compile(source, "<source>", "exec")
eval(code, ns, ns)
if "app" in ns:
return unjellyFromAOT(ns["app"])
else:
raise ValueError("%s needs to define an 'app', it didn't!" % stringOrFile)
class AOTUnjellier:
"""I handle the unjellying of an Abstract Object Tree.
See AOTUnjellier.unjellyAO
"""
def __init__(self):
self.references = {}
self.stack = []
self.afterUnjelly = []
##
# unjelly helpers (copied pretty much directly from (now deleted) marmalade)
##
def unjellyLater(self, node):
"""Unjelly a node, later."""
d = crefutil._Defer()
self.unjellyInto(d, 0, node)
return d
def unjellyInto(self, obj, loc, ao):
"""Utility method for unjellying one object into another.
This automates the handling of backreferences.
"""
o = self.unjellyAO(ao)
obj[loc] = o
if isinstance(o, crefutil.NotKnown):
o.addDependant(obj, loc)
return o
def callAfter(self, callable, result):
if isinstance(result, crefutil.NotKnown):
listResult = [None]
result.addDependant(listResult, 1)
else:
listResult = [result]
self.afterUnjelly.append((callable, listResult))
def unjellyAttribute(self, instance, attrName, ao):
# XXX this is unused????
"""Utility method for unjellying into instances of attributes.
Use this rather than unjellyAO unless you like surprising bugs!
Alternatively, you can use unjellyInto on your instance's __dict__.
"""
self.unjellyInto(instance.__dict__, attrName, ao)
def unjellyAO(self, ao):
"""Unjelly an Abstract Object and everything it contains.
I return the real object.
"""
self.stack.append(ao)
t = type(ao)
if t in _SIMPLE_BUILTINS:
return ao
elif t is list:
l = []
for x in ao:
l.append(None)
self.unjellyInto(l, len(l) - 1, x)
return l
elif t is tuple:
l = []
tuple_ = tuple
for x in ao:
l.append(None)
if isinstance(self.unjellyInto(l, len(l) - 1, x), crefutil.NotKnown):
tuple_ = crefutil._Tuple
return tuple_(l)
elif t is dict:
d = {}
for k, v in ao.items():
kvd = crefutil._DictKeyAndValue(d)
self.unjellyInto(kvd, 0, k)
self.unjellyInto(kvd, 1, v)
return d
else:
# Abstract Objects
c = ao.__class__
if c is Module:
return reflect.namedModule(ao.name)
elif c in [Class, Function] or issubclass(c, type):
return reflect.namedObject(ao.name)
elif c is InstanceMethod:
im_name = ao.name
im_class = reflect.namedObject(ao.klass)
im_self = self.unjellyAO(ao.instance)
if im_name in im_class.__dict__:
if im_self is None:
return getattr(im_class, im_name)
elif isinstance(im_self, crefutil.NotKnown):
return crefutil._InstanceMethod(im_name, im_self, im_class)
else:
return _constructMethod(im_class, im_name, im_self)
else:
raise TypeError("instance method changed")
elif c is Instance:
klass = reflect.namedObject(ao.klass)
state = self.unjellyAO(ao.state)
inst = klass.__new__(klass)
if hasattr(klass, "__setstate__"):
self.callAfter(inst.__setstate__, state)
elif isinstance(state, dict):
inst.__dict__ = state
else:
inst.__dict__ = state.__getstate__()
return inst
elif c is Ref:
o = self.unjellyAO(ao.obj) # THIS IS CHANGING THE REF OMG
refkey = ao.refnum
ref = self.references.get(refkey)
if ref is None:
self.references[refkey] = o
elif isinstance(ref, crefutil.NotKnown):
ref.resolveDependants(o)
self.references[refkey] = o
elif refkey is None:
# This happens when you're unjellying from an AOT not read from source
pass
else:
raise ValueError(
"Multiple references with the same ID: %s, %s, %s!"
% (ref, refkey, ao)
)
return o
elif c is Deref:
num = ao.refnum
ref = self.references.get(num)
if ref is None:
der = crefutil._Dereference(num)
self.references[num] = der
return der
return ref
elif c is Copyreg:
loadfunc = reflect.namedObject(ao.loadfunc)
d = self.unjellyLater(ao.state).addCallback(
lambda result, _l: _l(*result), loadfunc
)
return d
else:
raise TypeError("Unsupported AOT type: %s" % t)
def unjelly(self, ao):
try:
l = [None]
self.unjellyInto(l, 0, ao)
for func, v in self.afterUnjelly:
func(v[0])
return l[0]
except BaseException:
log.msg("Error jellying object! Stacktrace follows::")
log.msg("\n".join(map(repr, self.stack)))
raise
#########
# Jelly #
#########
def jellyToAOT(obj):
"""Convert an object to an Abstract Object Tree."""
return AOTJellier().jelly(obj)
def jellyToSource(obj, file=None):
"""
Pass me an object and, optionally, a file object.
I'll convert the object to an AOT either return it (if no file was
specified) or write it to the file.
"""
aot = jellyToAOT(obj)
if file:
file.write(getSource(aot).encode("utf-8"))
else:
return getSource(aot)
def _classOfMethod(methodObject):
"""
Get the associated class of the given method object.
@param methodObject: a bound method
@type methodObject: L{types.MethodType}
@return: a class
@rtype: L{type}
"""
return methodObject.__self__.__class__
def _funcOfMethod(methodObject):
"""
Get the associated function of the given method object.
@param methodObject: a bound method
@type methodObject: L{types.MethodType}
@return: the function implementing C{methodObject}
@rtype: L{types.FunctionType}
"""
return methodObject.__func__
def _selfOfMethod(methodObject):
"""
Get the object that a bound method is bound to.
@param methodObject: a bound method
@type methodObject: L{types.MethodType}
@return: the C{self} passed to C{methodObject}
@rtype: L{object}
"""
return methodObject.__self__
class AOTJellier:
def __init__(self):
# dict of {id(obj): (obj, node)}
self.prepared = {}
self._ref_id = 0
self.stack = []
def prepareForRef(self, aoref, object):
"""I prepare an object for later referencing, by storing its id()
and its _AORef in a cache."""
self.prepared[id(object)] = aoref
def jellyToAO(self, obj):
"""I turn an object into an AOT and return it."""
objType = type(obj)
self.stack.append(repr(obj))
# immutable: We don't care if these have multiple refs!
if objType in _SIMPLE_BUILTINS:
retval = obj
elif issubclass(objType, types.MethodType):
# TODO: make methods 'prefer' not to jelly the object internally,
# so that the object will show up where it's referenced first NOT
# by a method.
retval = InstanceMethod(
_funcOfMethod(obj).__name__,
reflect.qual(_classOfMethod(obj)),
self.jellyToAO(_selfOfMethod(obj)),
)
elif issubclass(objType, types.ModuleType):
retval = Module(obj.__name__)
elif issubclass(objType, type):
retval = Class(reflect.qual(obj))
elif objType is types.FunctionType:
retval = Function(reflect.fullFuncName(obj))
else: # mutable! gotta watch for refs.
# Marmalade had the nicety of being able to just stick a 'reference' attribute
# on any Node object that was referenced, but in AOT, the referenced object
# is *inside* of a Ref call (Ref(num, obj) instead of
# <objtype ... reference="1">). The problem is, especially for built-in types,
# I can't just assign some attribute to them to give them a refnum. So, I have
# to "wrap" a Ref(..) around them later -- that's why I put *everything* that's
# mutable inside one. The Ref() class will only print the "Ref(..)" around an
# object if it has a Reference explicitly attached.
if id(obj) in self.prepared:
oldRef = self.prepared[id(obj)]
if oldRef.refnum:
# it's been referenced already
key = oldRef.refnum
else:
# it hasn't been referenced yet
self._ref_id = self._ref_id + 1
key = self._ref_id
oldRef.setRef(key)
return Deref(key)
retval = Ref()
def _stateFrom(state):
retval.setObj(
Instance(reflect.qual(obj.__class__), self.jellyToAO(state))
)
self.prepareForRef(retval, obj)
if objType is list:
retval.setObj([self.jellyToAO(o) for o in obj]) # hah!
elif objType is tuple:
retval.setObj(tuple(map(self.jellyToAO, obj)))
elif objType is dict:
d = {}
for k, v in obj.items():
d[self.jellyToAO(k)] = self.jellyToAO(v)
retval.setObj(d)
elif objType in copy_reg.dispatch_table:
unpickleFunc, state = copy_reg.dispatch_table[objType](obj)
retval.setObj(
Copyreg(reflect.fullFuncName(unpickleFunc), self.jellyToAO(state))
)
elif hasattr(obj, "__getstate__"):
_stateFrom(obj.__getstate__())
elif hasattr(obj, "__dict__"):
_stateFrom(obj.__dict__)
else:
raise TypeError("Unsupported type: %s" % objType.__name__)
del self.stack[-1]
return retval
def jelly(self, obj):
try:
ao = self.jellyToAO(obj)
return ao
except BaseException:
log.msg("Error jellying object! Stacktrace follows::")
log.msg("\n".join(self.stack))
raise

View File

@@ -0,0 +1,160 @@
# -*- test-case-name: twisted.test.test_persisted -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
Utility classes for dealing with circular references.
"""
from twisted.python import log, reflect
from twisted.python.compat import _constructMethod
class NotKnown:
def __init__(self):
self.dependants = []
self.resolved = 0
def addDependant(self, mutableObject, key):
assert not self.resolved
self.dependants.append((mutableObject, key))
resolvedObject = None
def resolveDependants(self, newObject):
self.resolved = 1
self.resolvedObject = newObject
for mut, key in self.dependants:
mut[key] = newObject
def __hash__(self):
assert 0, "I am not to be used as a dictionary key."
class _Container(NotKnown):
"""
Helper class to resolve circular references on container objects.
"""
def __init__(self, l, containerType):
"""
@param l: The list of object which may contain some not yet referenced
objects.
@param containerType: A type of container objects (e.g., C{tuple} or
C{set}).
"""
NotKnown.__init__(self)
self.containerType = containerType
self.l = l
self.locs = list(range(len(l)))
for idx in range(len(l)):
if not isinstance(l[idx], NotKnown):
self.locs.remove(idx)
else:
l[idx].addDependant(self, idx)
if not self.locs:
self.resolveDependants(self.containerType(self.l))
def __setitem__(self, n, obj):
"""
Change the value of one contained objects, and resolve references if
all objects have been referenced.
"""
self.l[n] = obj
if not isinstance(obj, NotKnown):
self.locs.remove(n)
if not self.locs:
self.resolveDependants(self.containerType(self.l))
class _Tuple(_Container):
"""
Manage tuple containing circular references. Deprecated: use C{_Container}
instead.
"""
def __init__(self, l):
"""
@param l: The list of object which may contain some not yet referenced
objects.
"""
_Container.__init__(self, l, tuple)
class _InstanceMethod(NotKnown):
def __init__(self, im_name, im_self, im_class):
NotKnown.__init__(self)
self.my_class = im_class
self.name = im_name
# im_self _must_ be a NotKnown
im_self.addDependant(self, 0)
def __call__(self, *args, **kw):
import traceback
log.msg(f"instance method {reflect.qual(self.my_class)}.{self.name}")
log.msg(f"being called with {args!r} {kw!r}")
traceback.print_stack(file=log.logfile)
assert 0
def __setitem__(self, n, obj):
assert n == 0, "only zero index allowed"
if not isinstance(obj, NotKnown):
method = _constructMethod(self.my_class, self.name, obj)
self.resolveDependants(method)
class _DictKeyAndValue:
def __init__(self, dict):
self.dict = dict
def __setitem__(self, n, obj):
if n not in (1, 0):
raise RuntimeError("DictKeyAndValue should only ever be called with 0 or 1")
if n: # value
self.value = obj
else:
self.key = obj
if hasattr(self, "key") and hasattr(self, "value"):
self.dict[self.key] = self.value
class _Dereference(NotKnown):
def __init__(self, id):
NotKnown.__init__(self)
self.id = id
from twisted.internet.defer import Deferred
class _Defer(Deferred[object], NotKnown):
def __init__(self):
Deferred.__init__(self)
NotKnown.__init__(self)
self.pause()
wasset = 0
def __setitem__(self, n, obj):
if self.wasset:
raise RuntimeError(
"setitem should only be called once, setting {!r} to {!r}".format(
n, obj
)
)
else:
self.wasset = 1
self.callback(obj)
def addDependant(self, dep, key):
# by the time I'm adding a dependant, I'm *not* adding any more
# callbacks
NotKnown.addDependant(self, dep, key)
self.unpause()
resovd = self.result
self.resolveDependants(resovd)

View File

@@ -0,0 +1,361 @@
# -*- test-case-name: twisted.test.test_dirdbm -*-
#
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
DBM-style interface to a directory.
Each key is stored as a single file. This is not expected to be very fast or
efficient, but it's good for easy debugging.
DirDBMs are *not* thread-safe, they should only be accessed by one thread at
a time.
No files should be placed in the working directory of a DirDBM save those
created by the DirDBM itself!
Maintainer: Itamar Shtull-Trauring
"""
import base64
import glob
import os
import pickle
from twisted.python.filepath import FilePath
try:
_open # type: ignore[has-type, used-before-def]
except NameError:
_open = open
class DirDBM:
"""
A directory with a DBM interface.
This class presents a hash-like interface to a directory of small,
flat files. It can only use strings as keys or values.
"""
def __init__(self, name):
"""
@type name: str
@param name: Base path to use for the directory storage.
"""
self.dname = os.path.abspath(name)
self._dnamePath = FilePath(name)
if not self._dnamePath.isdir():
self._dnamePath.createDirectory()
else:
# Run recovery, in case we crashed. we delete all files ending
# with ".new". Then we find all files who end with ".rpl". If a
# corresponding file exists without ".rpl", we assume the write
# failed and delete the ".rpl" file. If only a ".rpl" exist we
# assume the program crashed right after deleting the old entry
# but before renaming the replacement entry.
#
# NOTE: '.' is NOT in the base64 alphabet!
for f in glob.glob(self._dnamePath.child("*.new").path):
os.remove(f)
replacements = glob.glob(self._dnamePath.child("*.rpl").path)
for f in replacements:
old = f[:-4]
if os.path.exists(old):
os.remove(f)
else:
os.rename(f, old)
def _encode(self, k):
"""
Encode a key so it can be used as a filename.
"""
# NOTE: '_' is NOT in the base64 alphabet!
return base64.encodebytes(k).replace(b"\n", b"_").replace(b"/", b"-")
def _decode(self, k):
"""
Decode a filename to get the key.
"""
return base64.decodebytes(k.replace(b"_", b"\n").replace(b"-", b"/"))
def _readFile(self, path):
"""
Read in the contents of a file.
Override in subclasses to e.g. provide transparently encrypted dirdbm.
"""
with _open(path.path, "rb") as f:
s = f.read()
return s
def _writeFile(self, path, data):
"""
Write data to a file.
Override in subclasses to e.g. provide transparently encrypted dirdbm.
"""
with _open(path.path, "wb") as f:
f.write(data)
f.flush()
def __len__(self):
"""
@return: The number of key/value pairs in this Shelf
"""
return len(self._dnamePath.listdir())
def __setitem__(self, k, v):
"""
C{dirdbm[k] = v}
Create or modify a textfile in this directory
@type k: bytes
@param k: key to set
@type v: bytes
@param v: value to associate with C{k}
"""
if not type(k) == bytes:
raise TypeError("DirDBM key must be bytes")
if not type(v) == bytes:
raise TypeError("DirDBM value must be bytes")
k = self._encode(k)
# We create a new file with extension .new, write the data to it, and
# if the write succeeds delete the old file and rename the new one.
old = self._dnamePath.child(k)
if old.exists():
new = old.siblingExtension(".rpl") # Replacement entry
else:
new = old.siblingExtension(".new") # New entry
try:
self._writeFile(new, v)
except BaseException:
new.remove()
raise
else:
if old.exists():
old.remove()
new.moveTo(old)
def __getitem__(self, k):
"""
C{dirdbm[k]}
Get the contents of a file in this directory as a string.
@type k: bytes
@param k: key to lookup
@return: The value associated with C{k}
@raise KeyError: Raised when there is no such key
"""
if not type(k) == bytes:
raise TypeError("DirDBM key must be bytes")
path = self._dnamePath.child(self._encode(k))
try:
return self._readFile(path)
except OSError:
raise KeyError(k)
def __delitem__(self, k):
"""
C{del dirdbm[foo]}
Delete a file in this directory.
@type k: bytes
@param k: key to delete
@raise KeyError: Raised when there is no such key
"""
if not type(k) == bytes:
raise TypeError("DirDBM key must be bytes")
k = self._encode(k)
try:
self._dnamePath.child(k).remove()
except OSError:
raise KeyError(self._decode(k))
def keys(self):
"""
@return: a L{list} of filenames (keys).
"""
return list(map(self._decode, self._dnamePath.asBytesMode().listdir()))
def values(self):
"""
@return: a L{list} of file-contents (values).
"""
vals = []
keys = self.keys()
for key in keys:
vals.append(self[key])
return vals
def items(self):
"""
@return: a L{list} of 2-tuples containing key/value pairs.
"""
items = []
keys = self.keys()
for key in keys:
items.append((key, self[key]))
return items
def has_key(self, key):
"""
@type key: bytes
@param key: The key to test
@return: A true value if this dirdbm has the specified key, a false
value otherwise.
"""
if not type(key) == bytes:
raise TypeError("DirDBM key must be bytes")
key = self._encode(key)
return self._dnamePath.child(key).isfile()
def setdefault(self, key, value):
"""
@type key: bytes
@param key: The key to lookup
@param value: The value to associate with key if key is not already
associated with a value.
"""
if key not in self:
self[key] = value
return value
return self[key]
def get(self, key, default=None):
"""
@type key: bytes
@param key: The key to lookup
@param default: The value to return if the given key does not exist
@return: The value associated with C{key} or C{default} if not
L{DirDBM.has_key(key)}
"""
if key in self:
return self[key]
else:
return default
def __contains__(self, key):
"""
@see: L{DirDBM.has_key}
"""
return self.has_key(key)
def update(self, dict):
"""
Add all the key/value pairs in L{dict} to this dirdbm. Any conflicting
keys will be overwritten with the values from L{dict}.
@type dict: mapping
@param dict: A mapping of key/value pairs to add to this dirdbm.
"""
for key, val in dict.items():
self[key] = val
def copyTo(self, path):
"""
Copy the contents of this dirdbm to the dirdbm at C{path}.
@type path: L{str}
@param path: The path of the dirdbm to copy to. If a dirdbm
exists at the destination path, it is cleared first.
@rtype: C{DirDBM}
@return: The dirdbm this dirdbm was copied to.
"""
path = FilePath(path)
assert path != self._dnamePath
d = self.__class__(path.path)
d.clear()
for k in self.keys():
d[k] = self[k]
return d
def clear(self):
"""
Delete all key/value pairs in this dirdbm.
"""
for k in self.keys():
del self[k]
def close(self):
"""
Close this dbm: no-op, for dbm-style interface compliance.
"""
def getModificationTime(self, key):
"""
Returns modification time of an entry.
@return: Last modification date (seconds since epoch) of entry C{key}
@raise KeyError: Raised when there is no such key
"""
if not type(key) == bytes:
raise TypeError("DirDBM key must be bytes")
path = self._dnamePath.child(self._encode(key))
if path.isfile():
return path.getModificationTime()
else:
raise KeyError(key)
class Shelf(DirDBM):
"""
A directory with a DBM shelf interface.
This class presents a hash-like interface to a directory of small,
flat files. Keys must be strings, but values can be any given object.
"""
def __setitem__(self, k, v):
"""
C{shelf[foo] = bar}
Create or modify a textfile in this directory.
@type k: str
@param k: The key to set
@param v: The value to associate with C{key}
"""
v = pickle.dumps(v)
DirDBM.__setitem__(self, k, v)
def __getitem__(self, k):
"""
C{dirdbm[foo]}
Get and unpickle the contents of a file in this directory.
@type k: bytes
@param k: The key to lookup
@return: The value associated with the given key
@raise KeyError: Raised if the given key does not exist
"""
return pickle.loads(DirDBM.__getitem__(self, k))
def open(file, flag=None, mode=None):
"""
This is for 'anydbm' compatibility.
@param file: The parameter to pass to the DirDBM constructor.
@param flag: ignored
@param mode: ignored
"""
return DirDBM(file)
__all__ = ["open", "DirDBM", "Shelf"]

View File

@@ -0,0 +1,200 @@
# -*- test-case-name: twisted.test.test_sob -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
#
"""
Save and load Small OBjects to and from files, using various formats.
Maintainer: Moshe Zadka
"""
import os
import pickle
import sys
from zope.interface import Interface, implementer
from twisted.persisted import styles
from twisted.python import log, runtime
class IPersistable(Interface):
"""An object which can be saved in several formats to a file"""
def setStyle(style):
"""Set desired format.
@type style: string (one of 'pickle' or 'source')
"""
def save(tag=None, filename=None, passphrase=None):
"""Save object to file.
@type tag: string
@type filename: string
@type passphrase: string
"""
@implementer(IPersistable)
class Persistent:
style = "pickle"
def __init__(self, original, name):
self.original = original
self.name = name
def setStyle(self, style):
"""Set desired format.
@type style: string (one of 'pickle' or 'source')
"""
self.style = style
def _getFilename(self, filename, ext, tag):
if filename:
finalname = filename
filename = finalname + "-2"
elif tag:
filename = f"{self.name}-{tag}-2.{ext}"
finalname = f"{self.name}-{tag}.{ext}"
else:
filename = f"{self.name}-2.{ext}"
finalname = f"{self.name}.{ext}"
return finalname, filename
def _saveTemp(self, filename, dumpFunc):
with open(filename, "wb") as f:
dumpFunc(self.original, f)
def _getStyle(self):
if self.style == "source":
from twisted.persisted.aot import jellyToSource as dumpFunc
ext = "tas"
else:
def dumpFunc(obj, file=None):
pickle.dump(obj, file, 2)
ext = "tap"
return ext, dumpFunc
def save(self, tag=None, filename=None, passphrase=None):
"""Save object to file.
@type tag: string
@type filename: string
@type passphrase: string
"""
ext, dumpFunc = self._getStyle()
if passphrase is not None:
raise TypeError("passphrase must be None")
finalname, filename = self._getFilename(filename, ext, tag)
log.msg("Saving " + self.name + " application to " + finalname + "...")
self._saveTemp(filename, dumpFunc)
if runtime.platformType == "win32" and os.path.isfile(finalname):
os.remove(finalname)
os.rename(filename, finalname)
log.msg("Saved.")
# "Persistant" has been present since 1.0.7, so retain it for compatibility
Persistant = Persistent
class _EverythingEphemeral(styles.Ephemeral):
initRun = 0
def __init__(self, mainMod):
"""
@param mainMod: The '__main__' module that this class will proxy.
"""
self.mainMod = mainMod
def __getattr__(self, key):
try:
return getattr(self.mainMod, key)
except AttributeError:
if self.initRun:
raise
else:
log.msg("Warning! Loading from __main__: %s" % key)
return styles.Ephemeral()
def load(filename, style):
"""Load an object from a file.
Deserialize an object from a file. The file can be encrypted.
@param filename: string
@param style: string (one of 'pickle' or 'source')
"""
mode = "r"
if style == "source":
from twisted.persisted.aot import unjellyFromSource as _load
else:
_load, mode = pickle.load, "rb"
fp = open(filename, mode)
ee = _EverythingEphemeral(sys.modules["__main__"])
sys.modules["__main__"] = ee
ee.initRun = 1
with fp:
try:
value = _load(fp)
finally:
# restore __main__ if an exception is raised.
sys.modules["__main__"] = ee.mainMod
styles.doUpgrade()
ee.initRun = 0
persistable = IPersistable(value, None)
if persistable is not None:
persistable.setStyle(style)
return value
def loadValueFromFile(filename, variable):
"""Load the value of a variable in a Python file.
Run the contents of the file in a namespace and return the result of the
variable named C{variable}.
@param filename: string
@param variable: string
"""
with open(filename) as fileObj:
data = fileObj.read()
d = {"__file__": filename}
codeObj = compile(data, filename, "exec")
eval(codeObj, d, d)
value = d[variable]
return value
def guessType(filename):
ext = os.path.splitext(filename)[1]
return {
".tac": "python",
".etac": "python",
".py": "python",
".tap": "pickle",
".etap": "pickle",
".tas": "source",
".etas": "source",
}[ext]
__all__ = [
"loadValueFromFile",
"load",
"Persistent",
"Persistant",
"IPersistable",
"guessType",
]

View File

@@ -0,0 +1,391 @@
# -*- test-case-name: twisted.test.test_persisted -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
Different styles of persisted objects.
"""
import copy
import copyreg as copy_reg
import inspect
import pickle
import types
from io import StringIO as _cStringIO
from typing import Dict
from twisted.python import log, reflect
from twisted.python.compat import _PYPY
oldModules: Dict[str, types.ModuleType] = {}
_UniversalPicklingError = pickle.PicklingError
def pickleMethod(method):
"support function for copy_reg to pickle method refs"
return (
unpickleMethod,
(method.__name__, method.__self__, method.__self__.__class__),
)
def _methodFunction(classObject, methodName):
"""
Retrieve the function object implementing a method name given the class
it's on and a method name.
@param classObject: A class to retrieve the method's function from.
@type classObject: L{type}
@param methodName: The name of the method whose function to retrieve.
@type methodName: native L{str}
@return: the function object corresponding to the given method name.
@rtype: L{types.FunctionType}
"""
methodObject = getattr(classObject, methodName)
return methodObject
def unpickleMethod(im_name, im_self, im_class):
"""
Support function for copy_reg to unpickle method refs.
@param im_name: The name of the method.
@type im_name: native L{str}
@param im_self: The instance that the method was present on.
@type im_self: L{object}
@param im_class: The class where the method was declared.
@type im_class: L{type} or L{None}
"""
if im_self is None:
return getattr(im_class, im_name)
try:
methodFunction = _methodFunction(im_class, im_name)
except AttributeError:
log.msg("Method", im_name, "not on class", im_class)
assert im_self is not None, "No recourse: no instance to guess from."
# Attempt a last-ditch fix before giving up. If classes have changed
# around since we pickled this method, we may still be able to get it
# by looking on the instance's current class.
if im_self.__class__ is im_class:
raise
return unpickleMethod(im_name, im_self, im_self.__class__)
else:
maybeClass = ()
bound = types.MethodType(methodFunction, im_self, *maybeClass)
return bound
copy_reg.pickle(types.MethodType, pickleMethod)
def _pickleFunction(f):
"""
Reduce, in the sense of L{pickle}'s C{object.__reduce__} special method, a
function object into its constituent parts.
@param f: The function to reduce.
@type f: L{types.FunctionType}
@return: a 2-tuple of a reference to L{_unpickleFunction} and a tuple of
its arguments, a 1-tuple of the function's fully qualified name.
@rtype: 2-tuple of C{callable, native string}
"""
if f.__name__ == "<lambda>":
raise _UniversalPicklingError(f"Cannot pickle lambda function: {f}")
return (_unpickleFunction, tuple([".".join([f.__module__, f.__qualname__])]))
def _unpickleFunction(fullyQualifiedName):
"""
Convert a function name into a function by importing it.
This is a synonym for L{twisted.python.reflect.namedAny}, but imported
locally to avoid circular imports, and also to provide a persistent name
that can be stored (and deprecated) independently of C{namedAny}.
@param fullyQualifiedName: The fully qualified name of a function.
@type fullyQualifiedName: native C{str}
@return: A function object imported from the given location.
@rtype: L{types.FunctionType}
"""
from twisted.python.reflect import namedAny
return namedAny(fullyQualifiedName)
copy_reg.pickle(types.FunctionType, _pickleFunction)
def pickleModule(module):
"support function for copy_reg to pickle module refs"
return unpickleModule, (module.__name__,)
def unpickleModule(name):
"support function for copy_reg to unpickle module refs"
if name in oldModules:
log.msg("Module has moved: %s" % name)
name = oldModules[name]
log.msg(name)
return __import__(name, {}, {}, "x")
copy_reg.pickle(types.ModuleType, pickleModule)
def pickleStringO(stringo):
"""
Reduce the given cStringO.
This is only called on Python 2, because the cStringIO module only exists
on Python 2.
@param stringo: The string output to pickle.
@type stringo: C{cStringIO.OutputType}
"""
"support function for copy_reg to pickle StringIO.OutputTypes"
return unpickleStringO, (stringo.getvalue(), stringo.tell())
def unpickleStringO(val, sek):
"""
Convert the output of L{pickleStringO} into an appropriate type for the
current python version. This may be called on Python 3 and will convert a
cStringIO into an L{io.StringIO}.
@param val: The content of the file.
@type val: L{bytes}
@param sek: The seek position of the file.
@type sek: L{int}
@return: a file-like object which you can write bytes to.
@rtype: C{cStringIO.OutputType} on Python 2, L{io.StringIO} on Python 3.
"""
x = _cStringIO()
x.write(val)
x.seek(sek)
return x
def pickleStringI(stringi):
"""
Reduce the given cStringI.
This is only called on Python 2, because the cStringIO module only exists
on Python 2.
@param stringi: The string input to pickle.
@type stringi: C{cStringIO.InputType}
@return: a 2-tuple of (C{unpickleStringI}, (bytes, pointer))
@rtype: 2-tuple of (function, (bytes, int))
"""
return unpickleStringI, (stringi.getvalue(), stringi.tell())
def unpickleStringI(val, sek):
"""
Convert the output of L{pickleStringI} into an appropriate type for the
current Python version.
This may be called on Python 3 and will convert a cStringIO into an
L{io.StringIO}.
@param val: The content of the file.
@type val: L{bytes}
@param sek: The seek position of the file.
@type sek: L{int}
@return: a file-like object which you can read bytes from.
@rtype: C{cStringIO.OutputType} on Python 2, L{io.StringIO} on Python 3.
"""
x = _cStringIO(val)
x.seek(sek)
return x
class Ephemeral:
"""
This type of object is never persisted; if possible, even references to it
are eliminated.
"""
def __reduce__(self):
"""
Serialize any subclass of L{Ephemeral} in a way which replaces it with
L{Ephemeral} itself.
"""
return (Ephemeral, ())
def __getstate__(self):
log.msg("WARNING: serializing ephemeral %s" % self)
if not _PYPY:
import gc
if getattr(gc, "get_referrers", None):
for r in gc.get_referrers(self):
log.msg(f" referred to by {r}")
return None
def __setstate__(self, state):
log.msg("WARNING: unserializing ephemeral %s" % self.__class__)
self.__class__ = Ephemeral
versionedsToUpgrade: Dict[int, "Versioned"] = {}
upgraded = {}
def doUpgrade():
global versionedsToUpgrade, upgraded
for versioned in list(versionedsToUpgrade.values()):
requireUpgrade(versioned)
versionedsToUpgrade = {}
upgraded = {}
def requireUpgrade(obj):
"""Require that a Versioned instance be upgraded completely first."""
objID = id(obj)
if objID in versionedsToUpgrade and objID not in upgraded:
upgraded[objID] = 1
obj.versionUpgrade()
return obj
def _aybabtu(c):
"""
Get all of the parent classes of C{c}, not including C{c} itself, which are
strict subclasses of L{Versioned}.
@param c: a class
@returns: list of classes
"""
# begin with two classes that should *not* be included in the
# final result
l = [c, Versioned]
for b in inspect.getmro(c):
if b not in l and issubclass(b, Versioned):
l.append(b)
# return all except the unwanted classes
return l[2:]
class Versioned:
"""
This type of object is persisted with versioning information.
I have a single class attribute, the int persistenceVersion. After I am
unserialized (and styles.doUpgrade() is called), self.upgradeToVersionX()
will be called for each version upgrade I must undergo.
For example, if I serialize an instance of a Foo(Versioned) at version 4
and then unserialize it when the code is at version 9, the calls::
self.upgradeToVersion5()
self.upgradeToVersion6()
self.upgradeToVersion7()
self.upgradeToVersion8()
self.upgradeToVersion9()
will be made. If any of these methods are undefined, a warning message
will be printed.
"""
persistenceVersion = 0
persistenceForgets = ()
def __setstate__(self, state):
versionedsToUpgrade[id(self)] = self
self.__dict__ = state
def __getstate__(self, dict=None):
"""Get state, adding a version number to it on its way out."""
dct = copy.copy(dict or self.__dict__)
bases = _aybabtu(self.__class__)
bases.reverse()
bases.append(self.__class__) # don't forget me!!
for base in bases:
if "persistenceForgets" in base.__dict__:
for slot in base.persistenceForgets:
if slot in dct:
del dct[slot]
if "persistenceVersion" in base.__dict__:
dct[
f"{reflect.qual(base)}.persistenceVersion"
] = base.persistenceVersion
return dct
def versionUpgrade(self):
"""(internal) Do a version upgrade."""
bases = _aybabtu(self.__class__)
# put the bases in order so superclasses' persistenceVersion methods
# will be called first.
bases.reverse()
bases.append(self.__class__) # don't forget me!!
# first let's look for old-skool versioned's
if "persistenceVersion" in self.__dict__:
# Hacky heuristic: if more than one class subclasses Versioned,
# we'll assume that the higher version number wins for the older
# class, so we'll consider the attribute the version of the older
# class. There are obviously possibly times when this will
# eventually be an incorrect assumption, but hopefully old-school
# persistenceVersion stuff won't make it that far into multiple
# classes inheriting from Versioned.
pver = self.__dict__["persistenceVersion"]
del self.__dict__["persistenceVersion"]
highestVersion = 0
highestBase = None
for base in bases:
if "persistenceVersion" not in base.__dict__:
continue
if base.persistenceVersion > highestVersion:
highestBase = base
highestVersion = base.persistenceVersion
if highestBase:
self.__dict__[
"%s.persistenceVersion" % reflect.qual(highestBase)
] = pver
for base in bases:
# ugly hack, but it's what the user expects, really
if (
Versioned not in base.__bases__
and "persistenceVersion" not in base.__dict__
):
continue
currentVers = base.persistenceVersion
pverName = "%s.persistenceVersion" % reflect.qual(base)
persistVers = self.__dict__.get(pverName) or 0
if persistVers:
del self.__dict__[pverName]
assert persistVers <= currentVers, "Sorry, can't go backwards in time."
while persistVers < currentVers:
persistVers = persistVers + 1
method = base.__dict__.get("upgradeToVersion%s" % persistVers, None)
if method:
log.msg(
"Upgrading %s (of %s @ %s) to version %s"
% (
reflect.qual(base),
reflect.qual(self.__class__),
id(self),
persistVers,
)
)
method(self)
else:
log.msg(
"Warning: cannot upgrade {} to version {}".format(
base, persistVers
)
)

View File

@@ -0,0 +1,6 @@
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
Tests for L{twisted.persisted}.
"""

View File

@@ -0,0 +1,128 @@
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
Tests for L{twisted.persisted.styles}.
"""
import copy
import pickle
from twisted.persisted.styles import _UniversalPicklingError, unpickleMethod
from twisted.trial import unittest
class Foo:
"""
Helper class.
"""
def __init__(self) -> None:
self.instance_member = "test-value"
def method(self):
"""
Helper method.
"""
return self.instance_member
class Bar:
"""
Helper class.
"""
def sampleFunction() -> None:
"""
A sample function for pickling.
"""
lambdaExample = lambda x: x
class UniversalPicklingErrorTests(unittest.TestCase):
"""
Tests the L{_UniversalPicklingError} exception.
"""
def raise_UniversalPicklingError(self):
"""
Raise L{UniversalPicklingError}.
"""
raise _UniversalPicklingError
def test_handledByPickleModule(self) -> None:
"""
Handling L{pickle.PicklingError} handles
L{_UniversalPicklingError}.
"""
self.assertRaises(pickle.PicklingError, self.raise_UniversalPicklingError)
class UnpickleMethodTests(unittest.TestCase):
"""
Tests for the unpickleMethod function.
"""
def test_instanceBuildingNamePresent(self) -> None:
"""
L{unpickleMethod} returns an instance method bound to the
instance passed to it.
"""
foo = Foo()
m = unpickleMethod("method", foo, Foo)
self.assertEqual(m, foo.method)
self.assertIsNot(m, foo.method)
def test_instanceCopyMethod(self) -> None:
"""
Copying an instance method returns a new method with the same
behavior.
"""
foo = Foo()
m = copy.copy(foo.method)
self.assertEqual(m, foo.method)
self.assertIsNot(m, foo.method)
self.assertEqual("test-value", m())
foo.instance_member = "new-value"
self.assertEqual("new-value", m())
def test_instanceBuildingNameNotPresent(self) -> None:
"""
If the named method is not present in the class,
L{unpickleMethod} finds a method on the class of the instance
and returns a bound method from there.
"""
foo = Foo()
m = unpickleMethod("method", foo, Bar)
self.assertEqual(m, foo.method)
self.assertIsNot(m, foo.method)
def test_copyFunction(self) -> None:
"""
Copying a function returns the same reference, without creating
an actual copy.
"""
f = copy.copy(sampleFunction)
self.assertEqual(f, sampleFunction)
def test_primeDirective(self) -> None:
"""
We do not contaminate normal function pickling with concerns from
Twisted.
"""
def expected(n):
return "\n".join(
["c" + __name__, sampleFunction.__name__, "p" + n, "."]
).encode("ascii")
self.assertEqual(pickle.dumps(sampleFunction, protocol=0), expected("0"))
def test_lambdaRaisesPicklingError(self) -> None:
"""
Pickling a C{lambda} function ought to raise a L{pickle.PicklingError}.
"""
self.assertRaises(pickle.PicklingError, pickle.dumps, lambdaExample)