""" Modified from https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On/blob/master/Chapter08/lib/environ.py """ import enum import gym import gym.spaces import numpy as np from config import * from data import * from gym.utils import seeding class TradingEnv(gym.Env): """ An environment for stock trading, based on [Lapan 2018](https://www.packtpub.com/big-data-and-business-intelligence/deep-reinforcement-learning-hands). """ def __init__( self, bars_count=10, commission=0.1, reset_on_close=True, random_ofs_on_reset=True, reward_on_close=False, volumes=False, ): """ bars_count - count of bars that we pass in observation commission - percentage of stock price we have to pay to the broker on buying and selling the stock reset_on_close - if True, we stop the episode every time the agent asks us to close the position random_ofs_on_reset - if True, on every reset of our environment, a random offset in time series is chosen reward_on_close - if True, agent will receive reward only when a position is closed volumes - if True, switches on volumes in observations """ datafile = DATA_DIR prices = {"YNDX": load_relative(datafile)} assert isinstance(prices, dict) self._prices = prices self._state = State( bars_count, commission, reset_on_close, reward_on_close=reward_on_close, volumes=volumes ) self.action_space = gym.spaces.Discrete(n=len(Actions)) self.observation_space = gym.spaces.Box( low=-np.inf, high=np.inf, shape=self._state.shape, dtype=np.float32 ) self.random_ofs_on_reset = random_ofs_on_reset self.seed() def reset(self): # make selection of the instrument and it's offset. Then reset the state self._instrument = self.np_random.choice(list(self._prices.keys())) prices = self._prices[self._instrument] bars = self._state.bars_count if self.random_ofs_on_reset: offset = self.np_random.choice(prices.high.shape[0] - bars * 10) + bars else: offset = bars self._state.reset(prices, offset) return self._state.encode() def step(self, action_idx): action = Actions(action_idx) reward, done = self._state.step(action) obs = self._state.encode() info = {"instrument": self._instrument, "offset": self._state._offset} return obs, reward, done, info def render(self, mode="human", close=False): pass def close(self): pass def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31 return [seed1, seed2] @classmethod def from_dir(cls, data_dir, **kwargs): prices = {file: load_relative(file) for file in price_files(data_dir)} return TradingEnv(prices, **kwargs) class Actions(enum.Enum): Skip = 0 Buy = 1 Close = 2 class State: def __init__( self, bars_count, commission_perc, reset_on_close, reward_on_close=True, volumes=True ): assert isinstance(bars_count, int) assert bars_count > 0 assert isinstance(commission_perc, float) assert commission_perc >= 0.0 assert isinstance(reset_on_close, bool) assert isinstance(reward_on_close, bool) self.bars_count = bars_count self.commission_perc = commission_perc self.reset_on_close = reset_on_close self.reward_on_close = reward_on_close self.volumes = volumes def reset(self, prices, offset): assert isinstance(prices, Prices) assert offset >= self.bars_count - 1 self.have_position = False self.open_price = 0.0 self._prices = prices self._offset = offset @property def shape(self): # [h, l, c] * bars + position_flag + rel_profit (since open) if self.volumes: return (4 * self.bars_count + 1 + 1,) else: return (3 * self.bars_count + 1 + 1,) def encode(self): """ Convert current state into numpy array. """ res = np.ndarray(shape=self.shape, dtype=np.float32) shift = 0 for bar_idx in range(-self.bars_count + 1, 1): res[shift] = self._prices.high[self._offset + bar_idx] shift += 1 res[shift] = self._prices.low[self._offset + bar_idx] shift += 1 res[shift] = self._prices.close[self._offset + bar_idx] shift += 1 if self.volumes: res[shift] = self._prices.volume[self._offset + bar_idx] shift += 1 res[shift] = float(self.have_position) shift += 1 if not self.have_position: res[shift] = 0.0 else: res[shift] = (self._cur_close() - self.open_price) / self.open_price return res def _cur_close(self): """ Calculate real close price for the current bar """ open = self._prices.open[self._offset] rel_close = self._prices.close[self._offset] return open * (1.0 + rel_close) def step(self, action): """ Perform one step in our price, adjust offset, check for the end of prices and handle position change :param action: :return: reward, done """ assert isinstance(action, Actions) reward = 0.0 done = False close = self._cur_close() if action == Actions.Buy and not self.have_position: self.have_position = True self.open_price = close reward -= self.commission_perc elif action == Actions.Close and self.have_position: reward -= self.commission_perc done |= self.reset_on_close if self.reward_on_close: reward += 100.0 * (close - self.open_price) / self.open_price self.have_position = False self.open_price = 0.0 self._offset += 1 prev_close = close close = self._cur_close() done |= self._offset >= self._prices.close.shape[0] - 1 if self.have_position and not self.reward_on_close: reward += 100.0 * (close - prev_close) / prev_close return reward, done