# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import calendar import datetime import math import os import re import sys from collections import defaultdict from cassandra.cqltypes import EMPTY from cassandra.util import datetime_from_timestamp from . import wcwidth from .displaying import colorme, get_str, FormattedValue, DEFAULT_VALUE_COLORS, NO_COLOR_MAP from .util import UTC unicode_controlchars_re = re.compile(r'[\x00-\x1f\x7f-\xa0]') controlchars_re = re.compile(r'[\x00-\x1f\x7f-\xff]') def _show_control_chars(match): txt = repr(match.group(0)) if txt.startswith('u'): txt = txt[2:-1] else: txt = txt[1:-1] return txt bits_to_turn_red_re = re.compile(r'\\([^uUx]|u[0-9a-fA-F]{4}|x[0-9a-fA-F]{2}|U[0-9a-fA-F]{8})') def _make_turn_bits_red_f(color1, color2): def _turn_bits_red(match): txt = match.group(0) if txt == '\\\\': return '\\' return color1 + txt + color2 return _turn_bits_red default_null_placeholder = 'null' default_float_precision = 3 default_colormap = DEFAULT_VALUE_COLORS empty_colormap = defaultdict(lambda: '') def format_by_type(val, cqltype, encoding, colormap=None, addcolor=False, nullval=None, date_time_format=None, float_precision=None, decimal_sep=None, thousands_sep=None, boolean_styles=None): if nullval is None: nullval = default_null_placeholder if val is None: return colorme(nullval, colormap, 'error') if addcolor is False: colormap = empty_colormap elif colormap is None: colormap = default_colormap if date_time_format is None: date_time_format = DateTimeFormat() if float_precision is None: float_precision = default_float_precision return format_value(val, cqltype=cqltype, encoding=encoding, colormap=colormap, date_time_format=date_time_format, float_precision=float_precision, nullval=nullval, decimal_sep=decimal_sep, thousands_sep=thousands_sep, boolean_styles=boolean_styles) def color_text(bval, colormap, displaywidth=None): # note that here, we render natural backslashes as just backslashes, # in the same color as surrounding text, when using color. When not # using color, we need to double up the backslashes so it's not # ambiguous. This introduces the unique difficulty of having different # display widths for the colored and non-colored versions. To avoid # adding the smarts to handle that in to FormattedValue, we just # make an explicit check to see if a null colormap is being used or # not. if displaywidth is None: displaywidth = len(bval) tbr = _make_turn_bits_red_f(colormap['blob'], colormap['text']) coloredval = colormap['text'] + bits_to_turn_red_re.sub(tbr, bval) + colormap['reset'] if colormap['text']: displaywidth -= bval.count(r'\\') return FormattedValue(bval, coloredval, displaywidth) DEFAULT_NANOTIME_FORMAT = '%H:%M:%S.%N' DEFAULT_DATE_FORMAT = '%Y-%m-%d' DEFAULT_TIMESTAMP_FORMAT = os.environ.get('CQLSH_DEFAULT_TIMESTAMP_FORMAT', '') if not DEFAULT_TIMESTAMP_FORMAT: DEFAULT_TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S.%f%z' class DateTimeFormat: def __init__(self, timestamp_format=DEFAULT_TIMESTAMP_FORMAT, date_format=DEFAULT_DATE_FORMAT, nanotime_format=DEFAULT_NANOTIME_FORMAT, timezone=None, milliseconds_only=False): self.timestamp_format = timestamp_format self.date_format = date_format self.nanotime_format = nanotime_format self.timezone = timezone self.milliseconds_only = milliseconds_only # the microseconds part, .NNNNNN, wil be rounded to .NNN class CqlType: """ A class for converting a string into a cql type name that can match a formatter and a list of its sub-types, if any. """ pattern = re.compile('^([^<]*)<(.*)>$') # *<*> def __init__(self, typestring, ksmeta=None): self.type_name, self.sub_types, self.formatter = self.parse(typestring, ksmeta) def __str__(self): return "%s%s" % (self.type_name, self.sub_types or '') __repr__ = __str__ def get_n_sub_types(self, num): """ Return the sub-types if the requested number matches the length of the sub-types (tuples) or the first sub-type times the number requested if the length of the sub-types is one (list, set), otherwise raise an exception """ if len(self.sub_types) == num: return self.sub_types elif len(self.sub_types) == 1: return [self.sub_types[0]] * num else: raise Exception("Unexpected number of subtypes %d - %s" % (num, self.sub_types)) def parse(self, typestring, ksmeta): """ Parse the typestring by looking at this pattern: *<*>. If there is no match then the type is either a simple type or a user type, otherwise it must be a composite type for which we need to look-up the sub-types. For user types the sub types can be extracted from the keyspace metadata. """ while True: m = self.pattern.match(typestring) if not m: # no match, either a simple or a user type name = typestring if ksmeta and name in ksmeta.user_types: # a user type, look at ks meta for sub types sub_types = [CqlType(t, ksmeta) for t in ksmeta.user_types[name].field_types] return name, sub_types, format_value_utype else: return name, [], self._get_formatter(name) else: if m.group(1) == 'frozen': # ignore frozen<> typestring = m.group(2) continue name = m.group(1) # a composite type, parse sub types return name, self.parse_sub_types(m.group(2), ksmeta), self._get_formatter(name) @staticmethod def _get_formatter(name): return _formatters.get(name.lower()) @staticmethod def parse_sub_types(val, ksmeta): """ Split val into sub-strings separated by commas but only if not within a <> pair Return a list of CqlType instances where each instance is initialized with the sub-strings that were found. """ last = 0 level = 0 ret = [] for i, c in enumerate(val): if c == '<': level += 1 elif c == '>': level -= 1 elif c == ',' and level == 0: ret.append(val[last:i].strip()) last = i + 1 if last < len(val) - 1: ret.append(val[last:].strip()) return [CqlType(r, ksmeta) for r in ret] def format_value_default(val, colormap, **_): val = str(val) escapedval = val.replace('\\', '\\\\') bval = controlchars_re.sub(_show_control_chars, escapedval) return bval if colormap is NO_COLOR_MAP else color_text(bval, colormap) # Mapping cql type base names ("int", "map", etc) to formatter functions, # making format_value a generic function _formatters = {} def format_value(val, cqltype, **kwargs): if val == EMPTY: return format_value_default('', **kwargs) formatter = get_formatter(val, cqltype) return formatter(val, cqltype=cqltype, **kwargs) def get_formatter(val, cqltype): if cqltype and cqltype.formatter: return cqltype.formatter return _formatters.get(type(val).__name__.lower(), format_value_default) def formatter_for(typname): def registrator(f): _formatters[typname.lower()] = f return f return registrator class BlobType: def __init__(self, val): self.val = val def __str__(self): return str(self.val) @formatter_for('BlobType') def format_value_blob(val, colormap, **_): bval = '0x' + val.hex() return colorme(bval, colormap, 'blob') formatter_for('bytearray')(format_value_blob) formatter_for('buffer')(format_value_blob) formatter_for('blob')(format_value_blob) def format_python_formatted_type(val, colormap, color, quote=False): bval = str(val) if quote: bval = "'%s'" % bval return colorme(bval, colormap, color) @formatter_for('Decimal') def format_value_decimal(val, float_precision, colormap, decimal_sep=None, thousands_sep=None, **_): if (decimal_sep and decimal_sep != '.') or thousands_sep: return format_floating_point_type(val, colormap, float_precision, decimal_sep, thousands_sep) return format_python_formatted_type(val, colormap, 'decimal') @formatter_for('UUID') def format_value_uuid(val, colormap, **_): return format_python_formatted_type(val, colormap, 'uuid') formatter_for('timeuuid')(format_value_uuid) @formatter_for('inet') def formatter_value_inet(val, colormap, quote=False, **_): return format_python_formatted_type(val, colormap, 'inet', quote=quote) @formatter_for('bool') def format_value_boolean(val, colormap, boolean_styles=None, **_): if boolean_styles: val = boolean_styles[0] if val else boolean_styles[1] return format_python_formatted_type(val, colormap, 'boolean') formatter_for('boolean')(format_value_boolean) def format_floating_point_type(val, colormap, float_precision, decimal_sep=None, thousands_sep=None, **_): if math.isnan(val): bval = 'NaN' elif math.isinf(val): bval = 'Infinity' if val > 0 else '-Infinity' else: if thousands_sep: dpart, ipart = math.modf(val) bval = format_integer_with_thousands_sep(ipart, thousands_sep) dpart_str = ('%.*f' % (float_precision, math.fabs(dpart)))[2:].rstrip('0') if dpart_str: bval += '%s%s' % ('.' if not decimal_sep else decimal_sep, dpart_str) else: exponent = int(math.log10(abs(val))) if abs(val) > sys.float_info.epsilon else -sys.maxsize - 1 if -4 <= exponent < float_precision: # when this is true %g will not use scientific notation, # increasing precision should not change this decision # so we increase the precision to take into account the # digits to the left of the decimal point float_precision = float_precision + exponent + 1 bval = '%.*g' % (float_precision, val) if decimal_sep: bval = bval.replace('.', decimal_sep) return colorme(bval, colormap, 'float') formatter_for('float')(format_floating_point_type) formatter_for('double')(format_floating_point_type) def format_integer_type(val, colormap, thousands_sep=None, **_): # base-10 only for now; support others? bval = format_integer_with_thousands_sep(val, thousands_sep) if thousands_sep else str(val) bval = str(bval) return colorme(bval, colormap, 'int') # We can get rid of this in cassandra-2.2 if sys.version_info >= (2, 7): def format_integer_with_thousands_sep(val, thousands_sep=','): return "{:,.0f}".format(val).replace(',', thousands_sep) else: def format_integer_with_thousands_sep(val, thousands_sep=','): if val < 0: return '-' + format_integer_with_thousands_sep(-val, thousands_sep) result = '' while val >= 1000: val, r = divmod(val, 1000) result = "%s%03d%s" % (thousands_sep, r, result) return "%d%s" % (val, result) formatter_for('long')(format_integer_type) formatter_for('int')(format_integer_type) formatter_for('bigint')(format_integer_type) formatter_for('varint')(format_integer_type) formatter_for('duration')(format_integer_type) @formatter_for('datetime') def format_value_timestamp(val, colormap, date_time_format, quote=False, **_): if isinstance(val, datetime.datetime): bval = strftime(date_time_format.timestamp_format, calendar.timegm(val.utctimetuple()), microseconds=val.microsecond, timezone=date_time_format.timezone) if date_time_format.milliseconds_only: bval = round_microseconds(bval) else: bval = str(val) if quote: bval = "'%s'" % bval return colorme(bval, colormap, 'timestamp') formatter_for('timestamp')(format_value_timestamp) def strftime(time_format, seconds, microseconds=0, timezone=None): ret_dt = datetime_from_timestamp(seconds) + datetime.timedelta(microseconds=microseconds) ret_dt = ret_dt.replace(tzinfo=UTC()) if timezone: ret_dt = ret_dt.astimezone(timezone) try: return ret_dt.strftime(time_format) except ValueError: # CASSANDRA-13185: if the date cannot be formatted as a string, return a string with the milliseconds # since the epoch. cqlsh does the exact same thing for values below datetime.MINYEAR (1) or above # datetime.MAXYEAR (9999). Some versions of strftime() also have problems for dates between MIN_YEAR and 1900. # cqlsh COPY assumes milliseconds from the epoch if it fails to parse a datetime string, and so it is # able to correctly import timestamps exported as milliseconds since the epoch. return '%d' % (seconds * 1000.0) microseconds_regex = re.compile(r"(.*)(?:\.(\d{1,6}))(.*)") def round_microseconds(val): """ For COPY TO, we need to round microsecond to milliseconds because server side TimestampSerializer.dateStringPatterns only parses milliseconds. If we keep microseconds, users may try to import with COPY FROM a file generated with COPY TO and have problems if prepared statements are disabled, see CASSANDRA-11631. """ m = microseconds_regex.match(val) if not m: return val milliseconds = int(m.group(2)) * pow(10, 3 - len(m.group(2))) return '%s.%03d%s' % (m.group(1), milliseconds, '' if not m.group(3) else m.group(3)) @formatter_for('Date') def format_value_date(val, colormap, **_): return format_python_formatted_type(val, colormap, 'date') @formatter_for('Time') def format_value_time(val, colormap, **_): return format_python_formatted_type(val, colormap, 'time') @formatter_for('Duration') def format_value_duration(val, colormap, **_): return format_python_formatted_type(duration_as_str(val.months, val.days, val.nanoseconds), colormap, 'duration') def duration_as_str(months, days, nanoseconds): builder = list() if months < 0 or days < 0 or nanoseconds < 0: builder.append('-') remainder = append(builder, abs(months), MONTHS_PER_YEAR, "y") append(builder, remainder, 1, "mo") append(builder, abs(days), 1, "d") if nanoseconds != 0: remainder = append(builder, abs(nanoseconds), NANOS_PER_HOUR, "h") remainder = append(builder, remainder, NANOS_PER_MINUTE, "m") remainder = append(builder, remainder, NANOS_PER_SECOND, "s") remainder = append(builder, remainder, NANOS_PER_MILLI, "ms") remainder = append(builder, remainder, NANOS_PER_MICRO, "us") append(builder, remainder, 1, "ns") return ''.join(builder) def append(builder, dividend, divisor, unit): if dividend == 0 or dividend < divisor: return dividend builder.append(str(dividend // divisor)) builder.append(unit) return dividend % divisor def decode_vint(buf): return decode_zig_zag_64(decode_unsigned_vint(buf)) def decode_unsigned_vint(buf): """ Cassandra vints are encoded differently than the varints used in protocol buffer. The Cassandra vints are encoded with the most significant group first. The most significant byte will contains the information about how many extra bytes need to be read as well as the most significant bits of the integer. The number extra bytes to read is encoded as 1 bits on the left side. For example, if we need to read 3 more bytes the first byte will start with 1110. """ first_byte = next(buf) if (first_byte >> 7) == 0: return first_byte size = number_of_extra_bytes_to_read(first_byte) retval = first_byte & (0xff >> size) for i in range(size): b = next(buf) retval <<= 8 retval |= b & 0xff return retval def number_of_extra_bytes_to_read(b): return 8 - (~b & 0xff).bit_length() def decode_zig_zag_64(n): return (n >> 1) ^ -(n & 1) @formatter_for('str') def format_value_text(val, encoding, colormap, quote=False, **_): escapedval = val.replace('\\', '\\\\') if quote: escapedval = escapedval.replace("'", "''") escapedval = unicode_controlchars_re.sub(_show_control_chars, escapedval) bval = escapedval if quote: bval = "'{}'".format(bval) return bval if colormap is NO_COLOR_MAP else color_text(bval, colormap, wcwidth.wcswidth(bval)) # name alias formatter_for('unicode')(format_value_text) formatter_for('text')(format_value_text) formatter_for('ascii')(format_value_text) def format_simple_collection(val, cqltype, lbracket, rbracket, encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles): subs = [format_value(sval, cqltype=stype, encoding=encoding, colormap=colormap, date_time_format=date_time_format, float_precision=float_precision, nullval=nullval, quote=True, decimal_sep=decimal_sep, thousands_sep=thousands_sep, boolean_styles=boolean_styles) for sval, stype in zip(val, cqltype.get_n_sub_types(len(val)))] bval = lbracket + ', '.join(get_str(sval) for sval in subs) + rbracket if colormap is NO_COLOR_MAP: return bval lb, sep, rb = [colormap['collection'] + s + colormap['reset'] for s in (lbracket, ', ', rbracket)] coloredval = lb + sep.join(sval.coloredval for sval in subs) + rb displaywidth = 2 * len(subs) + sum(sval.displaywidth for sval in subs) return FormattedValue(bval, coloredval, displaywidth) @formatter_for('list') def format_value_list(val, cqltype, encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles, **_): return format_simple_collection(val, cqltype, '[', ']', encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles) @formatter_for('tuple') def format_value_tuple(val, cqltype, encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles, **_): return format_simple_collection(val, cqltype, '(', ')', encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles) @formatter_for('set') def format_value_set(val, cqltype, encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles, **_): return format_simple_collection(val, cqltype, '{', '}', encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles) formatter_for('frozenset')(format_value_set) formatter_for('sortedset')(format_value_set) formatter_for('SortedSet')(format_value_set) @formatter_for('dict') def format_value_map(val, cqltype, encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles, **_): def subformat(v, t): return format_value(v, cqltype=t, encoding=encoding, colormap=colormap, date_time_format=date_time_format, float_precision=float_precision, nullval=nullval, quote=True, decimal_sep=decimal_sep, thousands_sep=thousands_sep, boolean_styles=boolean_styles) subs = [(subformat(k, cqltype.sub_types[0]), subformat(v, cqltype.sub_types[1])) for (k, v) in sorted(val.items())] bval = '{' + ', '.join(get_str(k) + ': ' + get_str(v) for (k, v) in subs) + '}' if colormap is NO_COLOR_MAP: return bval lb, comma, colon, rb = [colormap['collection'] + s + colormap['reset'] for s in ('{', ', ', ': ', '}')] coloredval = lb \ + comma.join(k.coloredval + colon + v.coloredval for (k, v) in subs) \ + rb displaywidth = 4 * len(subs) + sum(k.displaywidth + v.displaywidth for (k, v) in subs) return FormattedValue(bval, coloredval, displaywidth) formatter_for('OrderedDict')(format_value_map) formatter_for('OrderedMap')(format_value_map) formatter_for('OrderedMapSerializedKey')(format_value_map) formatter_for('map')(format_value_map) def format_value_utype(val, cqltype, encoding, colormap, date_time_format, float_precision, nullval, decimal_sep, thousands_sep, boolean_styles, **_): def format_field_value(v, t): if v is None: return colorme(nullval, colormap, 'error') return format_value(v, cqltype=t, encoding=encoding, colormap=colormap, date_time_format=date_time_format, float_precision=float_precision, nullval=nullval, quote=True, decimal_sep=decimal_sep, thousands_sep=thousands_sep, boolean_styles=boolean_styles) def format_field_name(name): return format_value_text(name, encoding=encoding, colormap=colormap, quote=False) subs = [(format_field_name(k), format_field_value(v, t)) for ((k, v), t) in zip(list(val._asdict().items()), cqltype.sub_types)] bval = '{' + ', '.join(get_str(k) + ': ' + get_str(v) for (k, v) in subs) + '}' if colormap is NO_COLOR_MAP: return bval lb, comma, colon, rb = [colormap['collection'] + s + colormap['reset'] for s in ('{', ', ', ': ', '}')] coloredval = lb \ + comma.join(k.coloredval + colon + v.coloredval for (k, v) in subs) \ + rb displaywidth = 4 * len(subs) + sum(k.displaywidth + v.displaywidth for (k, v) in subs) return FormattedValue(bval, coloredval, displaywidth) NANOS_PER_MICRO = 1000 NANOS_PER_MILLI = 1000 * NANOS_PER_MICRO NANOS_PER_SECOND = 1000 * NANOS_PER_MILLI NANOS_PER_MINUTE = 60 * NANOS_PER_SECOND NANOS_PER_HOUR = 60 * NANOS_PER_MINUTE MONTHS_PER_YEAR = 12