Skip to content

Commit f9256cc

Browse files
committed
update final state computation for solve_ocp on dtime systems
1 parent 33fafa1 commit f9256cc

File tree

3 files changed

+53
-17
lines changed

3 files changed

+53
-17
lines changed

control/optimal.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class OptimalControlProblem():
6666
`(fun, lb, ub)`. The constraints will be applied at each time point
6767
along the trajectory.
6868
terminal_cost : callable, optional
69-
Function that returns the terminal cost given the current state
69+
Function that returns the terminal cost given the final state
7070
and input. Called as terminal_cost(x, u).
7171
trajectory_method : string, optional
7272
Method to use for carrying out the optimization. Currently supported
@@ -287,12 +287,18 @@ def __init__(
287287
# time point and we use a trapezoidal approximation to compute the
288288
# integral cost, then add on the terminal cost.
289289
#
290-
# For shooting methods, given the input U = [u[0], ... u[N]] we need to
290+
# For shooting methods, given the input U = [u[t_0], ... u[t_N]] we need to
291291
# compute the cost of the trajectory generated by that input. This
292292
# means we have to simulate the system to get the state trajectory X =
293-
# [x[0], ..., x[N]] and then compute the cost at each point:
293+
# [x[t_0], ..., x[t_N]] and then compute the cost at each point:
294294
#
295-
# cost = sum_k integral_cost(x[k], u[k]) + terminal_cost(x[N], u[N])
295+
# cost = sum_k integral_cost(x[t_k], u[t_k])
296+
# + terminal_cost(x[t_N], u[t_N])
297+
#
298+
# The actual calculation is a bit more complex. For continuous time
299+
# systems, we use a trapezoial approximation for the integral cost.
300+
# For discrete time systems, when computing the terminal cost u[t_N] is
301+
# set to zero.
296302
#
297303
# The initial state used for generating the simulation is stored in the
298304
# class parameter `x` prior to calling the optimization algorithm.
@@ -321,16 +327,25 @@ def _cost_function(self, coeffs):
321327
# Approximate the integral using trapezoidal rule
322328
cost += 0.5 * (costs[i] + costs[i+1]) * dt[i]
323329

330+
# Save the final state and input for terminal cost
331+
final_state = states[:, -1]
332+
final_input = inputs[:, -1]
333+
324334
else:
325335
# Sum the integral cost over the time (second) indices
326336
# cost += self.integral_cost(states[:,i], inputs[:,i])
327337
cost = sum(map(
328-
self.integral_cost, np.transpose(states[:, :-1]),
329-
np.transpose(inputs[:, :-1])))
338+
self.integral_cost, states.transpose(), inputs.transpose()))
339+
340+
# Save the final state and input for terminal cost
341+
final_time = self.timepts[-1] + (self.timepts[1] - self.timepts[0])
342+
final_state = self.system._rhs(
343+
final_time, states[:, -1], inputs[:, -1])
344+
final_input = np.zeros_like(inputs[:, -1])
330345

331346
# Terminal cost
332347
if self.terminal_cost is not None:
333-
cost += self.terminal_cost(states[:, -1], inputs[:, -1])
348+
cost += self.terminal_cost(final_state, final_input)
334349

335350
# Update statistics
336351
self.cost_evaluations += 1
@@ -954,7 +969,22 @@ def solve_ocp(
954969
transpose=None, return_states=True, print_summary=True, log=False,
955970
**kwargs):
956971

957-
"""Compute the solution to an optimal control problem
972+
"""Compute the solution to an optimal control problem.
973+
974+
The optimal trajectory (states and inputs) is computed so as to
975+
approximately mimimize a cost function of the following form (for
976+
continuous time systems):
977+
978+
J(x(.), u(.)) = \int_0^T L(x(t), u(t)) dt + V(x(T)),
979+
980+
where T is the time horizon.
981+
982+
Discrete time systems use a similar formulation, with the integral
983+
replaced by a sum:
984+
985+
J(x[.], u[.]) = \sum_0^{N-1} L(x_k, u_k) + V(x_N),
986+
987+
where N is the time horizon.
958988
959989
Parameters
960990
----------
@@ -968,7 +998,7 @@ def solve_ocp(
968998
Initial condition (default = 0).
969999
9701000
cost : callable
971-
Function that returns the integral cost given the current state
1001+
Function that returns the integral cost (L) given the current state
9721002
and input. Called as `cost(x, u)`.
9731003
9741004
trajectory_constraints : list of tuples, optional
@@ -990,8 +1020,10 @@ def solve_ocp(
9901020
The constraints are applied at each time point along the trajectory.
9911021
9921022
terminal_cost : callable, optional
993-
Function that returns the terminal cost given the current state
994-
and input. Called as terminal_cost(x, u).
1023+
Function that returns the terminal cost (V) given the final state
1024+
and input. Called as terminal_cost(x, u). (For compatibility with
1025+
the form of the cost function, u is passed even though it is often
1026+
not part of the terminal cost.)
9951027
9961028
terminal_constraints : list of tuples, optional
9971029
List of constraints that should hold at the end of the trajectory.
@@ -1116,7 +1148,7 @@ def create_mpc_iosystem(
11161148
See :func:`~control.optimal.solve_ocp` for more details.
11171149
11181150
terminal_cost : callable, optional
1119-
Function that returns the terminal cost given the current state
1151+
Function that returns the terminal cost given the final state
11201152
and input. Called as terminal_cost(x, u).
11211153
11221154
terminal_constraints : list of tuples, optional

control/tests/optimal_test.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ def test_finite_horizon_simple(method):
7979
# Retrieve the full open-loop predictions
8080
res = opt.solve_ocp(
8181
sys, time, x0, cost, constraints, squeeze=True,
82-
trajectory_method=method,
83-
terminal_cost=cost) # include to match MPT3 formulation
82+
trajectory_method=method)
8483
t, u_openloop = res.time, res.inputs
8584
np.testing.assert_almost_equal(
8685
u_openloop, [-1, -1, 0.1393, 0.3361, -5.204e-16], decimal=4)
@@ -308,9 +307,7 @@ def test_constraint_specification(constraint_list):
308307

309308
# Create a model predictive controller system
310309
time = np.arange(0, 5, 1)
311-
optctrl = opt.OptimalControlProblem(
312-
sys, time, cost, constraints,
313-
terminal_cost=cost) # include to match MPT3 formulation
310+
optctrl = opt.OptimalControlProblem(sys, time, cost, constraints)
314311

315312
# Compute optimal control and compare against MPT3 solution
316313
x0 = [4, 0]

doc/optimal.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ can be on the input, the state, or combinations of input and state,
6565
depending on the form of :math:`g_i`. Furthermore, these constraints are
6666
intended to hold at all instants in time along the trajectory.
6767

68+
For a discrete time system, the same basic formulation applies except
69+
that the cost function is given by
70+
71+
.. math::
72+
73+
J(x, u) = \sum_{k=0}^{N-1} L(x_k, u_k)\, dt + V \bigl( x_N \bigr).
74+
6875
A common use of optimization-based control techniques is the implementation
6976
of model predictive control (also called receding horizon control). In
7077
model predictive control, a finite horizon optimal control problem is solved,

0 commit comments

Comments
 (0)