Source code for opcc

import os
import pickle

import d4rl
import gym
import torch

from .config import ASSETS_DIR
from .config import ENV_CONFIGS, ENV_PERFORMANCE_STATS, ADROIT_ENV_CONFIGS
from .config import MAX_PRE_TRAINED_LEVEL
from .config import MIN_PRE_TRAINED_LEVEL
from .model import ActorNetwork

__all__ = [
    "get_queries",
    "get_policy",
    "get_sequence_dataset",
    "get_qlearning_dataset",
    "get_dataset_names",
    "get_env_names",
]


[docs]def get_queries(env_name): """ Retrieves queries for the environment. :param env_name: name of the environment :type env_name: str :return: A nested dictionary with the following structure: .. code-block:: python { (policy_a_args, policy_b_args): { 'obs_a': list, 'obs_b': list, 'action_a': list, 'action_b': list, 'target': list, 'horizon': list, } } :rtype: dict :example: >>> import opcc >>> opcc.get_queries('Hopper-v2') """ if env_name not in ENV_CONFIGS: raise ValueError( f"`{env_name}` not found. " f"It should be among following: {list(ENV_CONFIGS.keys())}" ) env_dir = os.path.join(ASSETS_DIR, env_name) queries_path = os.path.join(env_dir, "queries.p") queries = pickle.load(open(queries_path, "rb")) return queries
[docs]def get_policy(env_name: str, pre_trained: int = 1): """ Retrieves policies for the environment with the pre-trained quality marker. :param env_name: name of the environment :type env_name: str :param pre_trained: pre_trained level of the policy . It should be between 1 and 4(inclusive) , where 1 indicates best model and 5 indicates worst level. :type pre_trained: int :return: A tuple containing two objects: 1) policy. 2) a dictionary of performance stats of the policy for the given env_name :rtype: tuple of (ActorNetwork, dict) :example: >>> import opcc, torch >>> policy, policy_stats = opcc.get_policy('d4rl:maze2d-open-v0',pre_trained=1) >>> observation = torch.DoubleTensor([[0.5, 0.5, 0.5, 0.5]]) >>> action = policy(observation) >>> action tensor([[0.9977, 0.9998]], dtype=torch.float64, grad_fn=<MulBackward0>) """ if not (MIN_PRE_TRAINED_LEVEL <= pre_trained <= MAX_PRE_TRAINED_LEVEL): raise ValueError( f"pre_trained marker should be between" f" [{MIN_PRE_TRAINED_LEVEL},{MAX_PRE_TRAINED_LEVEL}]" f" where {MIN_PRE_TRAINED_LEVEL} indicates the best model " f"and {MAX_PRE_TRAINED_LEVEL} indicates the worst model" ) if env_name not in ENV_CONFIGS: raise ValueError( f"`{env_name}` not found. " f"It should be among following: {list(ENV_CONFIGS.keys())}" ) # retrieve model model_dir = os.path.join(ASSETS_DIR, env_name, "models") model_path = os.path.join(model_dir, "model_{}.p".format(pre_trained)) assert os.path.exists(model_path), f"model not found @ {model_path}" state_dict = torch.load(model_path, map_location=torch.device("cpu")) if env_name in ADROIT_ENV_CONFIGS.keys(): actor_state_dict = {k: v for k, v in state_dict["model"]["actor"].items()} else: actor_state_dict = { k.replace("actor.", ""): v for k, v in state_dict.items() if "actor" in k } # create model model = ActorNetwork(**ENV_CONFIGS[env_name]["actor_kwargs"]) model.load_state_dict(actor_state_dict) # Note: Gym returns observations with numpy float64( or double) type. # And, if the model is in "float" ( or float32) then we need to downcast # the observation to float32 before feeding them to the network. # However, this down-casting leads to miniscule differences in precision # over different system (processors). Though, these differences are # miniscule, they get propagated to the predicted actions which over longer # horizons when feedback back to the gym-environment lead to small # but significant difference in trajectories as reflected in monte-carlo # return. # In order to prevent above scenario, we simply upcast our model to double. model = model.double() return model, ENV_PERFORMANCE_STATS[env_name][pre_trained]
[docs]def get_sequence_dataset(env_name, dataset_name): """ Retrieves episodic dataset for the given environment and dataset_name :param env_name: name of the environment :type env_name: str :param dataset_name: name of the dataset :type dataset_name: str :return: A list of dictionaries. Each dictionary is an episode containing keys ['next_observations', 'observations', 'rewards', 'terminals', 'timeouts'] :rtype: list[dict] :example: >>> import opcc >>> dataset = opcc.get_sequence_dataset('Hopper-v2', 'medium') # list of episodes dictionaries >>> len(dataset) 2186 >>> dataset[0].keys() dict_keys(['actions', 'infos/action_log_probs', 'infos/qpos', 'infos/qvel', 'next_observations', 'observations', 'rewards', 'terminals', 'timeouts']) >>> len(dataset[0]['observations']) # episode length 470 """ if env_name not in ENV_CONFIGS: raise ValueError( f"`{env_name}` not found. " f"It should be among following: {list(ENV_CONFIGS.keys())}" ) if dataset_name not in ENV_CONFIGS[env_name]["datasets"]: raise ValueError( f"`{dataset_name}` not found. " f"It should be among following: " f"{list(ENV_CONFIGS[env_name]['datasets'].keys())}" ) dataset_env = ENV_CONFIGS[env_name]["datasets"][dataset_name]["name"] env = gym.make(dataset_env) dataset = env.get_dataset() # remove meta-data as the sequence dataset doesn't work with it. metadata_keys = [k for k in dataset.keys() if "meta" in k] for k in metadata_keys: dataset.pop(k) split = ENV_CONFIGS[env_name]["datasets"][dataset_name]["split"] if split is not None: dataset = {k: v[:split] for k, v in dataset.items()} dataset = [x for x in d4rl.sequence_dataset(env, dataset)] return dataset
[docs]def get_qlearning_dataset(env_name, dataset_name): """ Retrieves list of episodic transitions for the given environment and dataset_name :param env_name: name of the environment :type env_name: str :param dataset_name: name of the dataset :type dataset_name: str :example: >>> import opcc >>> dataset = opcc.get_qlearning_dataset('Hopper-v2', 'medium') # dictionaries >>> dataset.keys() dict_keys(['observations', 'actions', 'next_observations', 'rewards', 'terminals']) >>> len(dataset['observations']) # length of dataset 999998 """ if env_name not in ENV_CONFIGS: raise ValueError( f"`{env_name}` not found. " f"It should be among following: {list(ENV_CONFIGS.keys())}" ) if dataset_name not in ENV_CONFIGS[env_name]["datasets"]: raise ValueError( f"`{dataset_name}` not found. " f"It should be among following: " f"{list(ENV_CONFIGS[env_name]['datasets'].keys())}" ) dataset_env = ENV_CONFIGS[env_name]["datasets"][dataset_name]["name"] env = gym.make(dataset_env) dataset = d4rl.qlearning_dataset(env) split = ENV_CONFIGS[env_name]["datasets"][dataset_name]["split"] if split is not None: dataset = {k: v[:split] for k, v in dataset.items()} return dataset
[docs]def get_dataset_names(env_name): """ Retrieves list of dataset-names available for an environment :param env_name: name of the environment :type env_name: str :return: A list of dataset-names :rtype: list[str] :example: >>> import opcc >>> opcc.get_dataset_names('Hopper-v2') ['random', 'expert', 'medium', 'medium-replay', 'medium-expert'] """ if env_name not in ENV_CONFIGS: raise ValueError( f"`{env_name}` not found. " f"It should be among following: {list(ENV_CONFIGS.keys())}" ) return list(ENV_CONFIGS[env_name]["datasets"].keys())
[docs]def get_env_names(): """ Retrieves list of environment for which queries are available :return: A list of env-names :rtype: list[str] :example: >>> import opcc >>> opcc.get_env_names() ['HalfCheetah-v2', 'Hopper-v2', 'Walker2d-v2', 'd4rl:maze2d-large-v1', 'd4rl:maze2d-medium-v1', 'd4rl:maze2d-open-v0', 'd4rl:maze2d-umaze-v1'] """ return sorted(list(ENV_CONFIGS.keys()))