# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # SaferScanner is just like re.Scanner, but it neuters any grouping in the lexicon # regular expressions and throws an error on group references, named groups, or # regex in-pattern flags. Any of those can break correct operation of Scanner. import re try: from sre_constants import BRANCH, SUBPATTERN, GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS except ImportError: from re._constants import BRANCH, SUBPATTERN, GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS from sys import version_info class SaferScannerBase(re.Scanner): @classmethod def subpat(cls, phrase, flags): return cls.scrub_sub(re.sre_parse.parse(phrase, flags), flags) @classmethod def scrub_sub(cls, sub, flags): scrubbedsub = [] seqtypes = (type(()), type([])) for op, arg in sub.data: if type(arg) in seqtypes: arg = [cls.scrub_sub(a, flags) if isinstance(a, re.sre_parse.SubPattern) else a for a in arg] if op in (BRANCH, SUBPATTERN): arg = [None] + arg[1:] if op in (GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS): raise ValueError("Group references not allowed in SaferScanner lexicon") scrubbedsub.append((op, arg)) if sub.pattern.groupdict: raise ValueError("Named captures not allowed in SaferScanner lexicon") if sub.pattern.flags ^ flags: raise ValueError("RE flag setting not allowed in SaferScanner lexicon (%s)" % (bin(sub.pattern.flags),)) return re.sre_parse.SubPattern(sub.pattern, scrubbedsub) class Py36SaferScanner(SaferScannerBase): def __init__(self, lexicon, flags=0): self.lexicon = lexicon p = [] s = re.sre_parse.Pattern() s.flags = flags for phrase, action in lexicon: gid = s.opengroup() p.append(re.sre_parse.SubPattern(s, [(SUBPATTERN, (gid, 0, 0, re.sre_parse.parse(phrase, flags))), ])) s.closegroup(gid, p[-1]) p = re.sre_parse.SubPattern(s, [(BRANCH, (None, p))]) self.p = p self.scanner = re.sre_compile.compile(p) class Py38SaferScanner(SaferScannerBase): def __init__(self, lexicon, flags=0): self.lexicon = lexicon p = [] s = re.sre_parse.State() s.flags = flags for phrase, action in lexicon: gid = s.opengroup() p.append(re.sre_parse.SubPattern(s, [(SUBPATTERN, (gid, 0, 0, re.sre_parse.parse(phrase, flags))), ])) s.closegroup(gid, p[-1]) p = re.sre_parse.SubPattern(s, [(BRANCH, (None, p))]) self.p = p self.scanner = re.sre_compile.compile(p) class Py311SaferScanner(SaferScannerBase): def __init__(self, lexicon, flags=0): self.lexicon = lexicon p = [] s = re._parser.State() s.flags = flags for phrase, action in lexicon: gid = s.opengroup() p.append(re._parser.SubPattern(s, [(SUBPATTERN, (gid, 0, 0, re._parser.parse(phrase, flags))), ])) s.closegroup(gid, p[-1]) p = re._parser.SubPattern(s, [(BRANCH, (None, p))]) self.p = p self.scanner = re._compiler.compile(p) SaferScanner = Py36SaferScanner if version_info >= (3, 11): SaferScanner = Py311SaferScanner elif version_info >= (3, 8): SaferScanner = Py38SaferScanner