# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 # # Permission is hereby granted, free of charge, to any person obtaining a copy of this # software and associated documentation files (the "Software"), to deal in the Software # without restriction, including without limitation the rights to use, copy, modify, # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from mpegdash.parser import MPEGDASHParser import os import urllib3 from isodate import parse_duration from datetime import datetime from pprint import pprint from urllib.parse import urlparse import re http = urllib3.PoolManager() # Supported Manifest # Compact Time/Number with Timeline # - AdaptationSet contains: # - Segment Template # - One or more representations (without Segment Template) # # Full Time/Number with Timeline # - AdaptationSet contains: # - One or more representations # - Each representations contains Segment Template class DashVodAsset: def __init__(self, masterManifest, authHeaders=None): self.masterManifest = masterManifest self.masterManifestContentType = None self.mediaSegmentList = [] self.commonPrefix = None self.allResource = None self.authHeaders = authHeaders self.parseDashVodAsset() # Function will parse variant manifest and extract a list of all media and init segments # media and init segments will store absolute URLs for segments in mediaSegmentList def parseDashVodAsset( self ): mediaSegments = [] # Retrieve Manifest (masterManifestBody, self.masterManifestContentType) = getManifest( self.masterManifest, self.authHeaders ) mpd = MPEGDASHParser.parse(masterManifestBody) mpdBaseUrl = os.path.dirname(self.masterManifest) # loop over periods periodCounter = 1 for period in mpd.periods: print("Starting processing Period %d ... " % periodCounter) # loop over all the adaptation sets in the period adaptationSetCounter = 1 for adaptationSet in period.adaptation_sets: print("Starting processing AdaptationSet %d with MimeType '%s'" % (adaptationSetCounter, adaptationSet.mime_type)) listOfSegments = getAdaptationSetSegmentList(mpdBaseUrl, adaptationSet, period) mediaSegments.extend(listOfSegments) print("Finished processing AdaptationSet %d." % adaptationSetCounter) # Increment AdaptationSet Counter adaptationSetCounter = adaptationSetCounter + 1 print("Finished processing Period %d." % periodCounter) # Increment Period Counter periodCounter = periodCounter + 1 # Identify common base URL for all resources allSegments = list(mediaSegments) allSegments.append(self.masterManifest) # Duplicates need to be removed from the list of all segments. Duplicate can occur when processing multiperiod DASH # streams where the init file does not change across period boundaries. To do this the list of segments will be # converted to a set and then back to a list. allSegmentsSet = set(allSegments) uniqueSegments = list(allSegmentsSet) # Set common prefix # The common prefix must end with '/' to indicate this is a path and does not include # the start of the name of the files. For example, if all the resources of an asset start # with 'asset1' and the content is stored in 'my/asset/path' the common prefix should be # 'my/asset/path.' not 'my/asset/path/asset1' commonPrefix = os.path.commonprefix( uniqueSegments ) if not commonPrefix.endswith('/'): # Strip off anything to the right of the last '/' because this component represents # the common string at the start of the filenames unwantedPathSuffix = os.path.basename(commonPrefix) pathSuffix = re.compile( unwantedPathSuffix + '$' ) commonPrefix = pathSuffix.sub('', commonPrefix) self.commonPrefix = commonPrefix self.mediaSegmentList = mediaSegments self.allResources = uniqueSegments return def getManifest( url, authHeaders ): contentType = None try: response = http.request( "GET", url, headers=authHeaders ) except IOError as urlErr: print("Exception occurred while attempting to get: %s" % url ) print(repr(urlErr)) urlPayload = None raise(urlErr) if response.status != 200: urlPayload = None print('http error', response.status, 'fetching', url) else: urlPayload = response.data contentType = response.headers['Content-Type'] expectedLen = int(response.headers['Content-Length']) receivedLen = len(urlPayload) if receivedLen != expectedLen: print('DashVodAsset: ', url, 'expected', expectedLen, '; received', receivedLen) urlPayload = None if not( urlPayload is None ): urlPayload = urlPayload.decode('utf-8') return ( urlPayload, contentType ) # Normalises url and removes additional '..' notations def normaliseUrl( url ): o = urlparse(url) absPath = os.path.normpath( o.path ) absUrl = "%s://%s%s" % (o.scheme, o.netloc, absPath) return absUrl def getAdaptationSetSegmentList(mpdBaseUrl, adaptationSet, period): mediaSegments = [] for representation in adaptationSet.representations: print("Processing Representation %s:" % representation.id) # Get segment Template # Segment template may be defined in representation or at the Adaptation set level segmentTemplates = None if representation.segment_templates: segmentTemplates = representation.segment_templates elif adaptationSet.segment_templates: segmentTemplates = adaptationSet.segment_templates else: print("Unable to find Segment Template for Representation %s" % representation.id) exit(1) # Assumption there is only one segment template per adaptation set if len(segmentTemplates) > 1: print("Unsupported DASH Manifest format. Maximum of one segment template per adaptations set") exit(2) ############################ # Process Media Files ############################ # Extract Media Segment template and fill in any required parameters (e.g. representation id) segmentTemplate = segmentTemplates[0] mediaSegmentTemplate = segmentTemplate.media if "$RepresentationID$" in mediaSegmentTemplate: mediaSegmentTemplate = mediaSegmentTemplate.replace("$RepresentationID$", str(representation.id)) print("Media Segment Template: %s" % mediaSegmentTemplate) # Generate a list of media files to be downloaded # Segment Templates do not exist for some renditions (e.g. 'image/jpeg') # For these renditions a segment timeline is inferred from the SegmentTemplate mediaSegmentTimes = None if segmentTemplate.segment_timelines: mediaSegmentTimes = getSegmentTimeline( segmentTemplate ) else: mediaSegmentTimes = getInferredSegmentTimeline( segmentTemplate.start_number, segmentTemplate.timescale, segmentTemplate.duration, period.duration ) mediaSegmentsForRepresentation = getMediaSegmentList( mediaSegmentTemplate, segmentTemplate.start_number, mediaSegmentTimes, mpdBaseUrl ) ############################ # Process Init File (it exists for this rendition) ############################ if segmentTemplate.initialization: # Extract init segment template and fill in any required parameters (e.g. representation id) initSegmentTemplate = segmentTemplate.initialization if "$RepresentationID$" in initSegmentTemplate: initSegmentTemplate = initSegmentTemplate.replace("$RepresentationID$", str(representation.id)) print("Init Segment Template: %s" % initSegmentTemplate) # Add init file to resource list absInitSegmentTemplate = normaliseUrl(mpdBaseUrl + '/' + initSegmentTemplate) # Append init files to list of media files to be downloaded mediaSegmentsForRepresentation.append(absInitSegmentTemplate) else: print("Skipping init file as there is no init for '%s' representation" % representation.id) # Append list of files to be downloaded as part of this adaptation set mediaSegments.extend(mediaSegmentsForRepresentation) return mediaSegments # Generate list of media segments def getMediaSegmentList( mediaSegmentTemplate, startNumber, mediaSegmentTimes, mpdBaseUrl ): mediaSegments = [] for t in mediaSegmentTimes: resource = mediaSegmentTemplate # Handle both Time with Timeline and Number with timeline mpd formats if "$Time$" in resource: # Time with Timeline mpd resource = resource.replace("$Time$", str(t)) else: # Number with Timeline mpd resource = resource.replace("$Number$", str(startNumber)) startNumber = startNumber + 1 absResource = normaliseUrl(mpdBaseUrl + '/' + resource) mediaSegments.append(absResource) return mediaSegments # Uses the segment template to generate a list of segment times def getSegmentTimeline( segmentTemplate ): segmentTimelines = segmentTemplate.segment_timelines # Iterate over the segment components to create a list of the times for segments to download mediaSegmentTimes = [] segmentTimelineComponents = segmentTimelines[0].Ss for segmentTimelineComponent in segmentTimelineComponents: t = segmentTimelineComponent.t # time d = segmentTimelineComponent.d # duration r = segmentTimelineComponent.r # repeats # add first segment # print("MediaSegmentTime: %d" % t) mediaSegmentTimes.append(t) # add any repeat segments if not (r is None): for x in range(1,r+1): # print("MediaSegmentTime (r): %s" % str(t + x*d)) mediaSegmentTimes.append(t + x*d) return mediaSegmentTimes # Infers a segment timeline if not explicitly defined def getInferredSegmentTimeline( startNumber, timescale, segmentTemplateDuration, periodDuration ): # Approach used here is to calculate segment size (i.e. segment template duration divided by timescale) # Dividing the duration of the period by the segment size should give the correct number of assets # Calculate segement size segmentSize = float(segmentTemplateDuration)/float(timescale) # Get period duration periodDuration = parse_duration(periodDuration).total_seconds() # Calculcate number of segments in period numberSegments = int(periodDuration / segmentSize) # Create array listing the segment numbers segmentTimelineNumbers = list(range(startNumber, startNumber+numberSegments)) return segmentTimelineNumbers