/* * Copyright 2015 Databricks Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.databricks.spark.sql.perf import com.databricks.spark.sql.perf.mllib.ReflectionUtils /** * The performance results of all given queries for a single iteration. * * @param timestamp The timestamp indicates when the entire experiment is started. * @param iteration The index number of the current iteration. * @param tags Tags of this iteration (variations are stored at here). * @param configuration Configuration properties of this iteration. * @param results The performance results of queries for this iteration. */ case class ExperimentRun( timestamp: Long, iteration: Int, tags: Map[String, String], configuration: BenchmarkConfiguration, results: Seq[BenchmarkResult]) /** * The configuration used for an iteration of an experiment. * * @param sparkVersion The version of Spark. * @param sqlConf All configuration properties related to Spark SQL. * @param sparkConf All configuration properties of Spark. * @param defaultParallelism The default parallelism of the cluster. * Usually, it is the number of cores of the cluster. */ case class BenchmarkConfiguration( sparkVersion: String = org.apache.spark.SPARK_VERSION, sqlConf: Map[String, String], sparkConf: Map[String, String], defaultParallelism: Int, buildInfo: Map[String, String]) /** * The result of a query. * * @param name The name of the query. * @param mode The ExecutionMode of this run. * @param parameters Additional parameters that describe this query. * @param joinTypes The type of join operations in the query. * @param tables The tables involved in the query. * @param parsingTime The time used to parse the query. * @param analysisTime The time used to analyze the query. * @param optimizationTime The time used to optimize the query. * @param planningTime The time used to plan the query. * @param executionTime The time used to execute the query. * @param result the result of this run. It is not necessarily the result of the query. * For example, it can be the number of rows generated by this query or * the sum of hash values of rows generated by this query. * @param breakDown The breakdown results of the query plan tree. * @param queryExecution The query execution plan. * @param failure The failure message. * @param mlResult The result metrics specific to MLlib. * @param benchmarkId An optional ID to identify a series of benchmark runs. * In ML, this is generated based on the benchmark name and * the hash value of params. */ case class BenchmarkResult( name: String, mode: String, parameters: Map[String, String] = Map.empty[String, String], joinTypes: Seq[String] = Nil, tables: Seq[String] = Nil, parsingTime: Option[Double] = None, analysisTime: Option[Double] = None, optimizationTime: Option[Double] = None, planningTime: Option[Double] = None, executionTime: Option[Double] = None, result: Option[Long] = None, breakDown: Seq[BreakdownResult] = Nil, queryExecution: Option[String] = None, failure: Option[Failure] = None, mlResult: Option[Array[MLMetric]] = None, benchmarkId: Option[String] = None) /** * The execution time of a subtree of the query plan tree of a specific query. * * @param nodeName The name of the top physical operator of the subtree. * @param nodeNameWithArgs The name and arguments of the top physical operator of the subtree. * @param index The index of the top physical operator of the subtree * in the original query plan tree. The index starts from 0 * (0 represents the top physical operator of the original query plan tree). * @param executionTime The execution time of the subtree. */ case class BreakdownResult( nodeName: String, nodeNameWithArgs: String, index: Int, children: Seq[Int], executionTime: Double, delta: Double) case class Failure(className: String, message: String) /** * Class wrapping parameters for ML tests. * * KEEP CONSTRUCTOR ARGUMENTS SORTED BY NAME. * It simplifies lookup when checking if a parameter is here already. */ class MLParams( // *** Common to all algorithms *** val randomSeed: Option[Int] = Some(42), val numExamples: Option[Long] = None, val numTestExamples: Option[Long] = None, val numPartitions: Option[Int] = None, // *** Specialized and sorted by name *** val bucketizerNumBuckets: Option[Int] = None, val depth: Option[Int] = None, val docLength: Option[Int] = None, val elasticNetParam: Option[Double] = None, val family: Option[String] = None, val featureArity: Option[Int] = None, val itemSetSize: Option[Int] = None, val k: Option[Int] = None, val link: Option[String] = None, val maxIter: Option[Int] = None, val numClasses: Option[Int] = None, val numFeatures: Option[Int] = None, val numHashTables: Option[Int] = Some(1), val numSynonymsToFind: Option[Int] = None, val numInputCols: Option[Int] = None, val numItems: Option[Int] = None, val numUsers: Option[Int] = None, val optimizer: Option[String] = None, val regParam: Option[Double] = None, val relativeError: Option[Double] = Some(0.001), val rank: Option[Int] = None, val smoothing: Option[Double] = None, val tol: Option[Double] = None, val vocabSize: Option[Int] = None) { /** * Returns a map of param names to string representations of their values. Only params that * were defined (i.e., not equal to None) are included in the map. */ def toMap: Map[String, String] = { // Only outputs params that have values val allParams = ReflectionUtils.getConstructorArgs(this) allParams.flatMap { case (key: String, Some(value: Any)) => Some(key -> value.toString) case _ => None } } /** Returns a copy of the current MLParams instance */ def copy( // *** Common to all algorithms *** randomSeed: Option[Int] = randomSeed, numExamples: Option[Long] = numExamples, numTestExamples: Option[Long] = numTestExamples, numPartitions: Option[Int] = numPartitions, // *** Specialized and sorted by name *** bucketizerNumBuckets: Option[Int] = bucketizerNumBuckets, depth: Option[Int] = depth, docLength: Option[Int] = docLength, elasticNetParam: Option[Double] = elasticNetParam, family: Option[String] = family, featureArity: Option[Int] = featureArity, itemSetSize: Option[Int] = itemSetSize, k: Option[Int] = k, link: Option[String] = link, maxIter: Option[Int] = maxIter, numClasses: Option[Int] = numClasses, numFeatures: Option[Int] = numFeatures, numHashTables: Option[Int] = numHashTables, numSynonymsToFind: Option[Int] = numSynonymsToFind, numInputCols: Option[Int] = numInputCols, numItems: Option[Int] = numItems, numUsers: Option[Int] = numUsers, optimizer: Option[String] = optimizer, regParam: Option[Double] = regParam, relativeError: Option[Double] = relativeError, rank: Option[Int] = rank, smoothing: Option[Double] = smoothing, tol: Option[Double] = tol, vocabSize: Option[Int] = vocabSize): MLParams = { new MLParams( randomSeed = randomSeed, numExamples = numExamples, numTestExamples = numTestExamples, numPartitions = numPartitions, bucketizerNumBuckets = bucketizerNumBuckets, depth = depth, docLength = docLength, elasticNetParam = elasticNetParam, family = family, featureArity = featureArity, itemSetSize = itemSetSize, k = k, link = link, maxIter = maxIter, numClasses = numClasses, numFeatures = numFeatures, numHashTables = numHashTables, numSynonymsToFind = numSynonymsToFind, numInputCols = numInputCols, numItems = numItems, numUsers = numUsers, optimizer = optimizer, regParam = regParam, relativeError = relativeError, rank = rank, smoothing = smoothing, tol = tol, vocabSize = vocabSize) } } object MLParams { val empty = new MLParams() } /** * Metrics specific to MLlib benchmark. * * @param metricName the name of the metric * @param metricValue the value of the metric * @param isLargerBetter the indicator showing whether larger metric value is better */ case class MLMetric( metricName: String, metricValue: Double, isLargerBetter: Boolean) object MLMetric { val Invalid = MLMetric("Invalid", 0.0, false) }