[docs]defget_queries(env_name):""" Retrieves queries for the environment. :param env_name: name of the environment :type env_name: str :return: A nested dictionary with the following structure: .. code-block:: python { (policy_a_args, policy_b_args): { 'obs_a': list, 'obs_b': list, 'action_a': list, 'action_b': list, 'target': list, 'horizon': list, } } :rtype: dict :example: >>> import opcc >>> opcc.get_queries('Hopper-v2') """ifenv_namenotinENV_CONFIGS:raiseValueError(f"`{env_name}` not found. "f"It should be among following: {list(ENV_CONFIGS.keys())}")env_dir=os.path.join(ASSETS_DIR,env_name)queries_path=os.path.join(env_dir,"queries.p")queries=pickle.load(open(queries_path,"rb"))returnqueries
[docs]defget_policy(env_name:str,pre_trained:int=1):""" Retrieves policies for the environment with the pre-trained quality marker. :param env_name: name of the environment :type env_name: str :param pre_trained: pre_trained level of the policy . It should be between 1 and 4(inclusive) , where 1 indicates best model and 5 indicates worst level. :type pre_trained: int :return: A tuple containing two objects: 1) policy. 2) a dictionary of performance stats of the policy for the given env_name :rtype: tuple of (ActorNetwork, dict) :example: >>> import opcc, torch >>> policy, policy_stats = opcc.get_policy('d4rl:maze2d-open-v0',pre_trained=1) >>> observation = torch.DoubleTensor([[0.5, 0.5, 0.5, 0.5]]) >>> action = policy(observation) >>> action tensor([[0.9977, 0.9998]], dtype=torch.float64, grad_fn=<MulBackward0>) """ifnot(MIN_PRE_TRAINED_LEVEL<=pre_trained<=MAX_PRE_TRAINED_LEVEL):raiseValueError(f"pre_trained marker should be between"f" [{MIN_PRE_TRAINED_LEVEL},{MAX_PRE_TRAINED_LEVEL}]"f" where {MIN_PRE_TRAINED_LEVEL} indicates the best model "f"and {MAX_PRE_TRAINED_LEVEL} indicates the worst model")ifenv_namenotinENV_CONFIGS:raiseValueError(f"`{env_name}` not found. "f"It should be among following: {list(ENV_CONFIGS.keys())}")# retrieve modelmodel_dir=os.path.join(ASSETS_DIR,env_name,"models")model_path=os.path.join(model_dir,"model_{}.p".format(pre_trained))assertos.path.exists(model_path),f"model not found @ {model_path}"state_dict=torch.load(model_path,map_location=torch.device("cpu"))ifenv_nameinADROIT_ENV_CONFIGS.keys():actor_state_dict={k:vfork,vinstate_dict["model"]["actor"].items()}else:actor_state_dict={k.replace("actor.",""):vfork,vinstate_dict.items()if"actor"ink}# create modelmodel=ActorNetwork(**ENV_CONFIGS[env_name]["actor_kwargs"])model.load_state_dict(actor_state_dict)# Note: Gym returns observations with numpy float64( or double) type.# And, if the model is in "float" ( or float32) then we need to downcast# the observation to float32 before feeding them to the network.# However, this down-casting leads to miniscule differences in precision# over different system (processors). Though, these differences are# miniscule, they get propagated to the predicted actions which over longer# horizons when feedback back to the gym-environment lead to small# but significant difference in trajectories as reflected in monte-carlo# return.# In order to prevent above scenario, we simply upcast our model to double.model=model.double()returnmodel,ENV_PERFORMANCE_STATS[env_name][pre_trained]
[docs]defget_sequence_dataset(env_name,dataset_name):""" Retrieves episodic dataset for the given environment and dataset_name :param env_name: name of the environment :type env_name: str :param dataset_name: name of the dataset :type dataset_name: str :return: A list of dictionaries. Each dictionary is an episode containing keys ['next_observations', 'observations', 'rewards', 'terminals', 'timeouts'] :rtype: list[dict] :example: >>> import opcc >>> dataset = opcc.get_sequence_dataset('Hopper-v2', 'medium') # list of episodes dictionaries >>> len(dataset) 2186 >>> dataset[0].keys() dict_keys(['actions', 'infos/action_log_probs', 'infos/qpos', 'infos/qvel', 'next_observations', 'observations', 'rewards', 'terminals', 'timeouts']) >>> len(dataset[0]['observations']) # episode length 470 """ifenv_namenotinENV_CONFIGS:raiseValueError(f"`{env_name}` not found. "f"It should be among following: {list(ENV_CONFIGS.keys())}")ifdataset_namenotinENV_CONFIGS[env_name]["datasets"]:raiseValueError(f"`{dataset_name}` not found. "f"It should be among following: "f"{list(ENV_CONFIGS[env_name]['datasets'].keys())}")dataset_env=ENV_CONFIGS[env_name]["datasets"][dataset_name]["name"]env=gym.make(dataset_env)dataset=env.get_dataset()# remove meta-data as the sequence dataset doesn't work with it.metadata_keys=[kforkindataset.keys()if"meta"ink]forkinmetadata_keys:dataset.pop(k)split=ENV_CONFIGS[env_name]["datasets"][dataset_name]["split"]ifsplitisnotNone:dataset={k:v[:split]fork,vindataset.items()}dataset=[xforxind4rl.sequence_dataset(env,dataset)]returndataset
[docs]defget_qlearning_dataset(env_name,dataset_name):""" Retrieves list of episodic transitions for the given environment and dataset_name :param env_name: name of the environment :type env_name: str :param dataset_name: name of the dataset :type dataset_name: str :example: >>> import opcc >>> dataset = opcc.get_qlearning_dataset('Hopper-v2', 'medium') # dictionaries >>> dataset.keys() dict_keys(['observations', 'actions', 'next_observations', 'rewards', 'terminals']) >>> len(dataset['observations']) # length of dataset 999998 """ifenv_namenotinENV_CONFIGS:raiseValueError(f"`{env_name}` not found. "f"It should be among following: {list(ENV_CONFIGS.keys())}")ifdataset_namenotinENV_CONFIGS[env_name]["datasets"]:raiseValueError(f"`{dataset_name}` not found. "f"It should be among following: "f"{list(ENV_CONFIGS[env_name]['datasets'].keys())}")dataset_env=ENV_CONFIGS[env_name]["datasets"][dataset_name]["name"]env=gym.make(dataset_env)dataset=d4rl.qlearning_dataset(env)split=ENV_CONFIGS[env_name]["datasets"][dataset_name]["split"]ifsplitisnotNone:dataset={k:v[:split]fork,vindataset.items()}returndataset
[docs]defget_dataset_names(env_name):""" Retrieves list of dataset-names available for an environment :param env_name: name of the environment :type env_name: str :return: A list of dataset-names :rtype: list[str] :example: >>> import opcc >>> opcc.get_dataset_names('Hopper-v2') ['random', 'expert', 'medium', 'medium-replay', 'medium-expert'] """ifenv_namenotinENV_CONFIGS:raiseValueError(f"`{env_name}` not found. "f"It should be among following: {list(ENV_CONFIGS.keys())}")returnlist(ENV_CONFIGS[env_name]["datasets"].keys())
[docs]defget_env_names():""" Retrieves list of environment for which queries are available :return: A list of env-names :rtype: list[str] :example: >>> import opcc >>> opcc.get_env_names() ['HalfCheetah-v2', 'Hopper-v2', 'Walker2d-v2', 'd4rl:maze2d-large-v1', 'd4rl:maze2d-medium-v1', 'd4rl:maze2d-open-v0', 'd4rl:maze2d-umaze-v1'] """returnsorted(list(ENV_CONFIGS.keys()))