`gradf` are NaN while running saasbo_nehvi.ipynb
See original GitHub issueHi im implementing my own optimization over the SAASBO.ipynb having the following error, which occurred a few times already, in the BoTorch engine, though I may assume it is hard to restore also due to the stochastic nature of the MC can you help?
Ax (0.2.2) and BoTorch (0.5.1)
not p.d., added jitter of 1.0e-08 to the diagonal
/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/gpytorch/utils/cholesky.py:44: NumericalWarning:
A not p.d., added jitter of 1.0e-07 to the diagonal
/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/gpytorch/utils/cholesky.py:44: NumericalWarning:
A not p.d., added jitter of 1.0e-06 to the diagonal
/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/gpytorch/lazy/lazy_tensor.py:1635: NumericalWarning:
Runtime Error when computing Cholesky decomposition: Matrix not positive definite after repeatedly adding jitter up to 1.0e-06… Using symeig method.
Traceback (most recent call last):
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/HMS/mobo_ANS_saasbo_nehvi.py”, line 409, in <module>
main()
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/HMS/mobo_ANS_saasbo_nehvi.py”, line 269, in main
generator_run = model.gen(BATCH_SIZE)
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/ax/modelbridge/multi_objective_torch.py”, line 231, in gen
return super().gen(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/ax/modelbridge/base.py”, line 669, in gen
observation_features, weights, best_obsf, gen_metadata = self._gen(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/ax/modelbridge/array.py”, line 274, in _gen
X, w, gen_metadata, candidate_metadata = self._model_gen(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/ax/modelbridge/multi_objective_torch.py”, line 165, in _model_gen
X, w, gen_metadata, candidate_metadata = self.model.gen(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/ax/models/torch/botorch_moo.py”, line 364, in gen
candidates, expected_acquisition_value = self.acqf_optimizer(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/ax/models/torch/botorch_defaults.py”, line 346, in scipy_optimizer
X, expected_acquisition_value = optimize_acqf(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/botorch/optim/optimize.py”, line 113, in optimize_acqf
candidate, acq_value = optimize_acqf(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/botorch/optim/optimize.py”, line 184, in optimize_acqf
batch_candidates_curr, batch_acq_values_curr = gen_candidates_scipy(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/botorch/generation/gen.py”, line 166, in gen_candidates_scipy
res = minimize(
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/_minimize.py”, line 623, in minimize
return _minimize_lbfgsb(fun, x0, args, jac, bounds,
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/lbfgsb.py”, line 306, in _minimize_lbfgsb
sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/optimize.py”, line 261, in _prepare_scalar_function
sf = ScalarFunction(fun, x0, args, grad, hess,
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py”, line 140, in init
self._update_fun()
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py”, line 233, in _update_fun
self._update_fun_impl()
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py”, line 137, in update_fun
self.f = fun_wrapped(self.x)
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py”, line 134, in fun_wrapped
return fun(np.copy(x), *args)
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/optimize.py”, line 74, in call
self._compute_if_needed(x, *args)
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/scipy/optimize/optimize.py”, line 68, in _compute_if_needed
fg = self.fun(x, *args)
File “/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/ans_compression_opt/lib/python3.8/site-packages/botorch/generation/gen.py”, line 162, in f
raise RuntimeError(msg)
RuntimeError: 100 elements of the 100 element gradient array gradf
are NaN. This often indicates numerical issues.
My code : #!/usr/bin/env python
coding: utf-8
# Fully Bayesian Multi-Objective Optimization using qNEHVI + SAASBO
### This Tutorial
import pandas as pd from ax import *
import torch import numpy as np
from ax.metrics.noisy_function import GenericNoisyFunctionMetric from ax.service.utils.report_utils import exp_to_df from ax.runners.synthetic import SyntheticRunner
Plotting imports and initialization
from ax.utils.notebook.plotting import render, init_notebook_plotting from ax.plot.contour import plot_contour from ax.plot.pareto_utils import compute_posterior_pareto_frontier from ax.plot.pareto_frontier import plot_pareto_frontier
init_notebook_plotting()
Model registry for creating multi-objective optimization models.
from ax.modelbridge.registry import Models
Analysis utilities, including a method to evaluate hypervolumes
from ax.modelbridge.modelbridge_utils import observed_hypervolume
from pylab import * import sys #sys.path.insert(1, ‘/home/hanoch/PycharmProjects/pythonProject/ans_compression_opt/laila/sefi/_skariel/mixed_quant/HMS’) import M import torch as nn import os from math import sqrt, log import time from argparse import ArgumentParser import matplotlib.pyplot as plt import numpy as np from bayesian_opt_utils import Cmax_compress_min_err_moo, print_arguments from matplotlib.cm import ScalarMappable
def main(): parser = ArgumentParser()
# parser.add_argument('--random-seed', type=int, default=None, metavar='INT', help="random seed to set")
parser.add_argument('--gpu-id', type=int, default=3, metavar='INT',
help="cuda device id ")
parser.add_argument('--model-path', type=str, default='ml_video_edit_art_generate.onnx', metavar='PATH',
help=" MODEL PATH TO BE OPTIMIZED")
parser.add_argument('--result-dir', type=str, default=None, metavar='PATH',
help="if given, all output of the training will be in this folder. "
"The exception is the tensorboard logs.")
parser.add_argument('--n-batch', type=int, default=20, metavar='INT', help="")
parser.add_argument('--batch-size', type=int, default=4, metavar='INT', help="")
parser.add_argument('--lambda-scale-rel-error', type=float, default=1.0, metavar='INT', help="")
parser.add_argument('--apply-ref-point', action='store_true',
help='')
parser.add_argument('--ans-opt-kernel', type=str, default='python_imp', choices=['c_type', 'python_imp'], metavar='STRING',
help='')
parser.add_argument('--verbose', action='store_true',
help='')
args = parser.parse_args()
# In[2]:
print_arguments(args)
# if args.verbose:
# os.system("g++ -DFSE_VERBOSE -Ofast ../cimpl/main.cc -lm -shared -o ../HMS/ans_cmprs.so") # no verbose
start = time.time()
print("SAASBO MOBO")
clock = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start))
print("Time/Date is {}".format(clock))
if args.ans_opt_kernel == 'python_imp':
python_imp = True
elif args.ans_opt_kernel == 'c_type':
python_imp = False
else:
raise ValueError("Wrong kernel type!!!!")
negate = False #True
verbose = args.verbose
compute_hv = False
args.python_imp = python_imp
args.negate = negate
args.algo = "SAASBO"
N_BATCH = args.n_batch
BATCH_SIZE = args.batch_size
unique_run_name = str(int(time.time()))
print("unique run name : {}".format(unique_run_name))
args.unique_run_name = unique_run_name
if not os.path.exists(args.result_dir):
os.makedirs(args.result_dir)
M.path_to_error_eval_images = '/mnt/users/sefi/images/**/*.jpg'
print("Model {}".format(args.model_path))
reshape_input_image = not (os.path.basename(args.model_path) == 'ml_video_edit_art_generate.onnx' or
os.path.basename(args.model_path) == 'ml_video_edit_art_transfer.onnx' )
images = M.Images(for_judge=reshape_input_image)
minsize = 10 #1048572 # HK : only one layer FC 1K
model_path = args.model_path
if os.path.basename(args.model_path) == 'ml_video_edit_art_transfer.onnx':
#run the generate 1st creating inputs to the transfer
ob = M.OnnxBridge(os.path.join(os.path.dirname(args.model_path), 'ml_video_edit_art_generate.onnx'))
# images = M.Images()
original_outs = [ob(i) for i in images]
################ original outs are needed to the transfer model, so we generate those inhere as well :o
ob = M.OnnxBridge(os.path.join(os.getcwd(), 'ml_video_edit_art_transfer.onnx'))
original_transfer_outs = []
for i, oo in zip(images, original_outs[-1:] + original_outs[:-1]):
x = [i, oo[0], oo[1]]
# original_transfer_outs.append(ob(x)[0])
original_transfer_outs.append(x) #creating inputs as list
images = original_transfer_outs
if python_imp:
print("python imp!!!")
# ### Load our sample 2-objective problem
# ## Define experiment configurations
if args.gpu_id == -1:
device = torch.device("cpu") # Force CPU
else:
device = torch.device("cuda:" + str(args.gpu_id) + "" if torch.cuda.is_available() else "cpu")
tkwargs = {
"dtype": torch.double,
"device": device, # HK
# "device": torch.device( "cpu"), #Force CPU
}
objective_ans = Cmax_compress_min_err_moo(model_path=model_path, images=images, #negate the objectives HK (-compression, rel error)
minsize=minsize, negate=negate,
python_imp=python_imp, verbose=verbose,
lambda_scl_rel_err=args.lambda_scale_rel_error,
apply_ref_point=args.apply_ref_point).to(**tkwargs)
args.objective_ans_cfg = objective_ans.__dict__.items()
# ### Search Space
upper = [0.1 if python_imp==True else 0.1][0] # HK was 0.005 for python 4.12 # to 0.1 for judge
print("Upper bound of param range ", upper)
search_space = SearchSpace(
parameters=[RangeParameter(name=f"x{i}", lower=0, upper=upper, parameter_type=ParameterType.FLOAT) for i in range(objective_ans.dim)],
)
# ### MultiObjectiveOptimizationConfig
#
# To optimize multiple objective we must create a `MultiObjective` containing the metrics we'll optimize and `MultiObjectiveOptimizationConfig` (which contains `ObjectiveThreshold`s) instead of our more typical `Objective` and `OptimizationConfig`. Additional resources:
# - To set up a custom metric for your problem, refer to the dedicated section of the Developer API tutorial: https://ax.dev/tutorials/gpei_hartmann_developer.html#8.-Defining-custom-metrics.
# - To avoid needing to setup up custom metrics by using multi-objective optimization in Ax Service API: https://ax.dev/tutorials/multiobjective_optimization.html#Using-the-Service-API.
#
# We define `GenericNoisyFunctionMetric`s to wrap our synthetic Branin-Currin problem's outputs.
# In[6]:
param_names = [f"x{i}" for i in range(objective_ans.dim)]
# In[7]:
def f1(x) -> float:
x_sorted = [x[p_name] for p_name in param_names]
return float(objective_ans(torch.tensor(x_sorted, **tkwargs))[0])
def f2(x) -> float:
x_sorted = [x[p_name] for p_name in param_names]
return float(objective_ans(torch.tensor(x_sorted, **tkwargs))[1])
metric_a = GenericNoisyFunctionMetric("a", f=f1, noise_sd=0.0, lower_is_better=False)
metric_b = GenericNoisyFunctionMetric("b", f=f2, noise_sd=0.0, lower_is_better=False)
# oc = OutcomeConstraint(metric=Metric(name="b"), op = ComparisonOp.LEQ, bound=0.005, relative=False)
mo = MultiObjective(
objectives=[Objective(metric=metric_a), Objective(metric=metric_b)],
)
objective_thresholds = [
ObjectiveThreshold(metric=metric, bound=val, relative=False)
for metric, val in zip(mo.metrics, objective_ans.ref_point)
]
optimization_config = MultiObjectiveOptimizationConfig(
objective=mo,
objective_thresholds=objective_thresholds,
# outcome_constraints = [oc] # HK added to focus ROI
)
# ## Define experiment creation utilities
# These construct our experiment, then initialize with Sobol points before we fit a Gaussian Process model to those initial points.
N_INIT = 2 * (objective_ans.dim + 1) #HK
def build_experiment():
experiment = Experiment(
name="pareto_experiment",
search_space=search_space,
optimization_config=optimization_config,
runner=SyntheticRunner(),
)
return experiment
## Initialize with Sobol samples
def initialize_experiment(experiment):
sobol = Models.SOBOL(search_space=experiment.search_space)
experiment.new_batch_trial(sobol.gen(N_INIT)).run()
return experiment.fetch_data()
# ## qNEHVI + SAASBO
# Noisy expected hypervolume improvement + fully Bayesian inference with SAAS priors.
# In[17]:
print("SOBOL random init HV can be 0") # by FAIR
experiment = build_experiment()
data = initialize_experiment(experiment)
# In[ ]:
from botorch.utils.multi_objective.box_decompositions.dominated import DominatedPartitioning
hv_list = []
model = None
for i in range(N_BATCH):
model = Models.FULLYBAYESIANMOO(
experiment=experiment,
data=data,
# use fewer num_samples and warmup_steps to speed up this tutorial
num_samples=256, #monte carlo MCMC
warmup_steps=512,
torch_device=tkwargs["device"],
verbose=verbose, # Set to True to print stats from MCMC gp_kernel="rbf" by defualt
disable_progbar=False, # Set to False to print a progress bar from MCMC
)
# print(model, data)
generator_run = model.gen(BATCH_SIZE)
trial = experiment.new_batch_trial(generator_run=generator_run)
trial.run()
data = Data.from_multiple_data([data, trial.fetch_data()])
exp_df = exp_to_df(experiment)
outcomes = torch.tensor(exp_df[['a', 'b']].values, **tkwargs)
partitioning = DominatedPartitioning(ref_point=objective_ans.ref_point, Y=outcomes)
if compute_hv: # disable increase speed
try:
hv = partitioning.compute_hypervolume().item()
except:
hv = 0
print("Failed to compute hv")
hv_list.append(hv)
print(f"Iteration: {i}, HV: {hv}")
df = exp_to_df(experiment).sort_values(by=["trial_index"])
# Handle scaling the target
df['b'] = df['b'].apply(lambda x: x / args.lambda_scale_rel_error)
outcomes = df[["a", "b"]].values
fig, axes = plt.subplots(1, 1, figsize=(8,6))
algos = ["qNEHVI"]
train_obj = outcomes
cm = plt.cm.get_cmap('viridis')
# n_results = N_BATCH*BATCH_SIZE + N_INIT
batch_number = df.trial_index.values
sc = axes.scatter(train_obj[:, 0], train_obj[:,1], c=batch_number, alpha=0.8)
axes.grid()
axes.axhline(y=-0.05)
axes.set_title(algos[0] + " w/ rel error scaling " + str(args.lambda_scale_rel_error) + " trials" + str(N_BATCH) + "_x_" + str(BATCH_SIZE) + " model:" + os.path.basename(args.model_path))
axes.set_xlabel("Compression")
axes.set_ylabel("-rel_error ")
norm = plt.Normalize(batch_number.min(), batch_number.max())
sm = ScalarMappable(norm=norm, cmap=cm)
sm.set_array([])
fig.subplots_adjust(right=0.9)
cbar_ax = fig.add_axes([0.93, 0.15, 0.01, 0.7])
cbar = fig.colorbar(sm, cax=cbar_ax)
cbar.ax.set_title("Iteration")
file_name = 'SAASBO_qNEHVI' + str(unique_run_name)
fig.savefig(os.path.join(args.result_dir, file_name + ".png"))
df['-rel_error'] = df['b']
df['compression_ratio'] = df['a']
df.to_csv(os.path.join(args.result_dir, file_name + ".csv"))
end = time.time()
print("Elapsed time for processing SAASBO",end - start)
print("unique run name : {}".format(unique_run_name))
args.process_time = end - start
df_result = pd.DataFrame.from_dict(list(vars(args).items()))
df_result.to_csv(os.path.join(args.result_dir, file_name + "_settings.csv"))
# # Hypervolume statistics
# The hypervolume of the space dominated by points that dominate the reference point.
# plt.show()
Issue Analytics
- State:
- Created 2 years ago
- Comments:8 (5 by maintainers)
Top GitHub Comments
Thanks for the update keep me posted.
On Thu, Dec 16, 2021, 01:32 Lena Kashtelyan @.***> wrote:
Not currently sure when we will be able to look into this, so I’ll put this investigation onto our wishlist for now