# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

import re
import urllib.parse

RE_CLOUDFRONT_DIST_ID = re.compile(r'/([0-9A-Z]{13,14})\.')


def transform(logdata):
    if logdata['cs_uri_query'] != '-':
        q = '?' + logdata['cs_uri_query']
    else:
        q = ''
    url_host = logdata['cs_protocol'] + '://' + logdata['x_host_header']
    path = logdata['cs_uri_stem']
    logdata['url']['full'] = url_host + path + q
    logdata['http']['version'] = logdata['cs_protocol_version'].split('/')[1]
    try:
        ua = logdata['user_agent']['original']
        logdata['user_agent']['original'] = urllib.parse.unquote(ua)
    except Exception:
        pass

    m = RE_CLOUDFRONT_DIST_ID.search(logdata['@log_s3key'])
    if m:
        logdata['distribution_id'] = m.group(1)
    else:
        logdata['distribution_id'] = "unknown"

    return logdata