syntax = "proto2"; package treelite_protobuf; message Model { repeated Tree trees = 1; optional int32 num_feature = 2; optional int32 num_output_group = 3; // >1 for multi-class classification; // =1 for everything else optional bool random_forest_flag = 4; // true for random forest // false for gradient boosted trees map<string, string> extra_params = 5; // extra parameters } message Tree { optional Node head = 1; } message Node { optional Node left_child = 1; // Left child; missing if leaf optional Node right_child = 2; // Right child; missing if leaf optional bool default_left = 3; // Default direction for missing values // true: default to left // false: default to right optional int32 split_index = 4; // Feature index used for the split; // missing if leaf enum SplitFeatureType { NUMERICAL = 0; CATEGORICAL = 1; } optional SplitFeatureType split_type = 5; // Type of feature used for the split // missing if leaf optional string op = 6; // Operation used for comparison (e.g. "<") // of form [feature value] OP [threshold]. // The left child is taken if the // expression evaluates to true; the right // child is taken otherwise. // missing if leaf or categorical split optional double threshold = 7; // Decision threshold // missing if leaf or categorical split repeated uint32 left_categories = 8; // List of all categories belonging to // the left child. All other categories // will belong to the right child. // missing if leaf or numerical split optional double leaf_value = 9; // Leaf value; missing if non-leaf // also missing if leaf_vector field exists repeated double leaf_vector = 10; // Usually missing; only used for random // forests with multi-class classification optional uint64 data_count = 11; // number of data points whose traversal // paths include this node. May be // ommitted if unavailable optional double sum_hess = 12; // sum of hessian values for all data // points whose traversal paths include // this node. This value is generally // correlated positively with the data // count. May be omitted if unavailable optional double gain = 13; // change in loss that is attributed to // particular split; may be omitted if // unavailable optional bool missing_category_to_zero = 14; // Whether to convert missing value to zero // Only applicable when split_type is // set to CATEGORICAL. When this flag is // set, it overrides behavior of // default_left }