# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ .. _tutorial-micro-autotune: Autotuning with micro TVM ========================= **Authors**: `Andrew Reusch `_, `Mehrdad Hessar `_ This tutorial explains how to autotune a model using the C runtime. """ import numpy as np import subprocess import pathlib import tvm from tvm.relay.backend import Executor, Runtime #################### # Defining the model #################### # # To begin with, define a model in Relay to be executed on-device. Then create an IRModule from relay model and # fill parameters with random numbers. # data_shape = (1, 3, 10, 10) weight_shape = (6, 3, 5, 5) data = tvm.relay.var("data", tvm.relay.TensorType(data_shape, "float32")) weight = tvm.relay.var("weight", tvm.relay.TensorType(weight_shape, "float32")) y = tvm.relay.nn.conv2d( data, weight, padding=(2, 2), kernel_size=(5, 5), kernel_layout="OIHW", out_dtype="float32", ) f = tvm.relay.Function([data, weight], y) relay_mod = tvm.IRModule.from_expr(f) relay_mod = tvm.relay.transform.InferType()(relay_mod) weight_sample = np.random.rand( weight_shape[0], weight_shape[1], weight_shape[2], weight_shape[3] ).astype("float32") params = {"weight": weight_sample} ####################### # Defining the target # ####################### # Now we define the TVM target that describes the execution environment. This looks very similar # to target definitions from other microTVM tutorials. Alongside this we pick the C Runtime to code # generate our model against. # # When running on physical hardware, choose a target and a board that # describe the hardware. There are multiple hardware targets that could be selected from # PLATFORM list in this tutorial. You can chose the platform by passing --platform argument when running # this tutorial. # RUNTIME = Runtime("crt", {"system-lib": True}) TARGET = tvm.target.target.micro("host") # Compiling for physical hardware # -------------------------------------------------------------------------- # When running on physical hardware, choose a TARGET and a BOARD that describe the hardware. The # STM32L4R5ZI Nucleo target and board is chosen in the example below. # # TARGET = tvm.target.target.micro("stm32l4r5zi") # BOARD = "nucleo_l4r5zi" ######################### # Extracting tuning tasks ######################### # Not all operators in the Relay program printed above can be tuned. Some are so trivial that only # a single implementation is defined; others don't make sense as tuning tasks. Using # `extract_from_program`, you can produce a list of tunable tasks. # # Because task extraction involves running the compiler, we first configure the compiler's # transformation passes; we'll apply the same configuration later on during autotuning. pass_context = tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}) with pass_context: tasks = tvm.autotvm.task.extract_from_program(relay_mod["main"], {}, TARGET) assert len(tasks) > 0 ###################### # Configuring microTVM ###################### # Before autotuning, we need to define a module loader and then pass that to # a `tvm.autotvm.LocalBuilder`. Then we create a `tvm.autotvm.LocalRunner` and use # both builder and runner to generates multiple measurements for auto tunner. # # In this tutorial, we have the option to use x86 host as an example or use different targets # from Zephyr RTOS. If you choose pass `--platform=host` to this tutorial it will uses x86. You can # choose other options by choosing from `PLATFORM` list. # module_loader = tvm.micro.AutoTvmModuleLoader( template_project_dir=pathlib.Path(tvm.micro.get_microtvm_template_projects("crt")), project_options={"verbose": False}, ) builder = tvm.autotvm.LocalBuilder( n_parallel=1, build_kwargs={"build_option": {"tir.disable_vectorize": True}}, do_fork=True, build_func=tvm.micro.autotvm_build_func, runtime=RUNTIME, ) runner = tvm.autotvm.LocalRunner(number=1, repeat=1, timeout=100, module_loader=module_loader) measure_option = tvm.autotvm.measure_option(builder=builder, runner=runner) # Compiling for physical hardware # -------------------------------------------------------------------------- # module_loader = tvm.micro.AutoTvmModuleLoader( # template_project_dir=pathlib.Path(tvm.micro.get_microtvm_template_projects("zephyr")), # project_options={ # "zephyr_board": BOARD, # "west_cmd": "west", # "verbose": False, # "project_type": "host_driven", # }, # ) # builder = tvm.autotvm.LocalBuilder( # n_parallel=1, # build_kwargs={"build_option": {"tir.disable_vectorize": True}}, # do_fork=False, # build_func=tvm.micro.autotvm_build_func, # ) # runner = tvm.autotvm.LocalRunner(number=1, repeat=1, timeout=100, module_loader=module_loader) # # measure_option = tvm.autotvm.measure_option(builder=builder, runner=runner) ################ # Run Autotuning ################ # Now we can run autotuning separately on each extracted task. num_trials = 10 for task in tasks: tuner = tvm.autotvm.tuner.GATuner(task) tuner.tune( n_trial=num_trials, measure_option=measure_option, callbacks=[ tvm.autotvm.callback.log_to_file("microtvm_autotune.log.txt"), tvm.autotvm.callback.progress_bar(num_trials, si_prefix="M"), ], si_prefix="M", ) ############################ # Timing the untuned program ############################ # For comparison, let's compile and run the graph without imposing any autotuning schedules. TVM # will select a randomly-tuned implementation for each operator, which should not perform as well as # the tuned operator. with pass_context: lowered = tvm.relay.build(relay_mod, target=TARGET, runtime=RUNTIME, params=params) temp_dir = tvm.contrib.utils.tempdir() project = tvm.micro.generate_project( str(tvm.micro.get_microtvm_template_projects("crt")), lowered, temp_dir / "project", {"verbose": False}, ) # Compiling for physical hardware # -------------------------------------------------------------------------- # project = tvm.micro.generate_project( # str(tvm.micro.get_microtvm_template_projects("zephyr")), # lowered, # temp_dir / "project", # { # "zephyr_board": BOARD, # "west_cmd": "west", # "verbose": False, # "project_type": "host_driven", # }, # ) project.build() project.flash() with tvm.micro.Session(project.transport()) as session: debug_module = tvm.micro.create_local_debug_executor( lowered.get_graph_json(), session.get_system_lib(), session.device ) debug_module.set_input(**lowered.get_params()) print("########## Build without Autotuning ##########") debug_module.run() del debug_module ########################## # Timing the tuned program ########################## # Once autotuning completes, you can time execution of the entire program using the Debug Runtime: with tvm.autotvm.apply_history_best("microtvm_autotune.log.txt"): with pass_context: lowered_tuned = tvm.relay.build(relay_mod, target=TARGET, runtime=RUNTIME, params=params) temp_dir = tvm.contrib.utils.tempdir() project = tvm.micro.generate_project( str(tvm.micro.get_microtvm_template_projects("crt")), lowered_tuned, temp_dir / "project", {"verbose": False}, ) # Compiling for physical hardware # -------------------------------------------------------------------------- # project = tvm.micro.generate_project( # str(tvm.micro.get_microtvm_template_projects("zephyr")), # lowered_tuned, # temp_dir / "project", # { # "zephyr_board": BOARD, # "west_cmd": "west", # "verbose": False, # "project_type": "host_driven", # }, # ) project.build() project.flash() with tvm.micro.Session(project.transport()) as session: debug_module = tvm.micro.create_local_debug_executor( lowered_tuned.get_graph_json(), session.get_system_lib(), session.device ) debug_module.set_input(**lowered_tuned.get_params()) print("########## Build with Autotuning ##########") debug_module.run() del debug_module