Controllers

Sinergym has a section to implement your own controllers. Currently, we have developed a random agent and a rule-based agent. You can find this code in sinergym/sinergym/utils/controllers.py. it is very useful in order to perform benchmarks as a reference point to study DRL algorithms: example

"""Implementation of basic controllers."""
from datetime import datetime
from typing import Any, List, Optional, Sequence, Tuple

from numpy import arange

from ..utils.common import get_season_comfort_range, parse_variables


class RandomController(object):

    def __init__(self, env: Any):
        """Random agent. It selects available actions randomly.

        Args:
            env (Any): Simulation environment.
        """
        self.env = env

    def act(self, observation: Optional[List[Any]] = None) -> Sequence[Any]:
        """Selects a random action from the environment's `action_space`.

        Args:
            observation (Optional[List[Any]], optional): Perceived observation. Defaults to None.

        Returns:
            Sequence[Any]: Action chosen.
        """
        action = self.env.action_space.sample()
        return action


class RuleBasedController(object):

    def __init__(self, env: Any) -> None:
        """Agent based on static rules.

        Args:
            env (Any): Simulation environment
        """

        self.env = env

        self.variables_path = self.env.variables_path
        self.variables = parse_variables(self.variables_path)
        self.variables['observation'].extend(['day', 'month', 'hour'])

    def act(self, observation: List[Any]) -> Sequence[Any]:
        """Select action based on outdoor air drybulb temperature and daytime.

        Args:
            observation (List[Any]): Perceived observation.

        Returns:
            Sequence[Any]: Action chosen.
        """
        obs_dict = dict(zip(self.variables['observation'], observation))

        out_temp = obs_dict['Site Outdoor Air Drybulb Temperature (Environment)']

        day = int(obs_dict['day'])
        month = int(obs_dict['month'])
        hour = int(obs_dict['hour'])

        season_comfort_range = get_season_comfort_range(month, day)

        if out_temp not in arange(
                season_comfort_range[0], season_comfort_range[1], .1):
            if hour in range(6, 18):  # day
                action = (19.44, 25.0)
            elif hour in range(18, 22):  # evening
                action = (20.0, 24.44)
            else:  # night
                action = (18.33, 23.33)
        else:  # maintain setpoints if comfort requirements are already met
            current_cool_setpoint = obs_dict[
                'Zone Thermostat Cooling Setpoint Temperature (SPACE1-1)']
            current_heat_setpoint = obs_dict[
                'Zone Thermostat Heating Setpoint Temperature (SPACE1-1)']
            action = (current_heat_setpoint, current_cool_setpoint)

        return action

The functionality is very simple; given an environment observation, these instances return an action to interact with the environment. You can develop your own controllers or modify rules of RuleBasedController, for example. An usage of these controllers could be the next:

import gym
import numpy as np

from sinergym.utils.controllers import RuleBasedController

env = gym.make('Eplus-5Zone-mixed-continuous-v1')

# create rule-controlled agent
agent = RuleBasedController(env)

for i in range(1):
    obs = env.reset()
    rewards = []
    done = False
    current_month = 0
while not done:
    action = agent.act(obs)
    obs, reward, done, info = env.step(action)
    rewards.append(reward)
    if info['month'] != current_month:  # display results every month
        current_month = info['month']
        print('Reward: ', sum(rewards), info)
print(
    'Episode ',
    i,
    'Mean reward: ',
    np.mean(rewards),
    'Cumulative reward: ',
    sum(rewards))

env.close()