@@ -66,7 +66,7 @@ class OptimalControlProblem():
6666 `(fun, lb, ub)`. The constraints will be applied at each time point
6767 along the trajectory.
6868 terminal_cost : callable, optional
69- Function that returns the terminal cost given the current state
69+ Function that returns the terminal cost given the final state
7070 and input. Called as terminal_cost(x, u).
7171 trajectory_method : string, optional
7272 Method to use for carrying out the optimization. Currently supported
@@ -287,12 +287,18 @@ def __init__(
287287 # time point and we use a trapezoidal approximation to compute the
288288 # integral cost, then add on the terminal cost.
289289 #
290- # For shooting methods, given the input U = [u[0 ], ... u[N ]] we need to
290+ # For shooting methods, given the input U = [u[t_0 ], ... u[t_N ]] we need to
291291 # compute the cost of the trajectory generated by that input. This
292292 # means we have to simulate the system to get the state trajectory X =
293- # [x[0 ], ..., x[N ]] and then compute the cost at each point:
293+ # [x[t_0 ], ..., x[t_N ]] and then compute the cost at each point:
294294 #
295- # cost = sum_k integral_cost(x[k], u[k]) + terminal_cost(x[N], u[N])
295+ # cost = sum_k integral_cost(x[t_k], u[t_k])
296+ # + terminal_cost(x[t_N], u[t_N])
297+ #
298+ # The actual calculation is a bit more complex. For continuous time
299+ # systems, we use a trapezoial approximation for the integral cost.
300+ # For discrete time systems, when computing the terminal cost u[t_N] is
301+ # set to zero.
296302 #
297303 # The initial state used for generating the simulation is stored in the
298304 # class parameter `x` prior to calling the optimization algorithm.
@@ -321,16 +327,25 @@ def _cost_function(self, coeffs):
321327 # Approximate the integral using trapezoidal rule
322328 cost += 0.5 * (costs [i ] + costs [i + 1 ]) * dt [i ]
323329
330+ # Save the final state and input for terminal cost
331+ final_state = states [:, - 1 ]
332+ final_input = inputs [:, - 1 ]
333+
324334 else :
325335 # Sum the integral cost over the time (second) indices
326336 # cost += self.integral_cost(states[:,i], inputs[:,i])
327337 cost = sum (map (
328- self .integral_cost , np .transpose (states [:, :- 1 ]),
329- np .transpose (inputs [:, :- 1 ])))
338+ self .integral_cost , states .transpose (), inputs .transpose ()))
339+
340+ # Save the final state and input for terminal cost
341+ final_time = self .timepts [- 1 ] + (self .timepts [1 ] - self .timepts [0 ])
342+ final_state = self .system ._rhs (
343+ final_time , states [:, - 1 ], inputs [:, - 1 ])
344+ final_input = np .zeros_like (inputs [:, - 1 ])
330345
331346 # Terminal cost
332347 if self .terminal_cost is not None :
333- cost += self .terminal_cost (states [:, - 1 ], inputs [:, - 1 ] )
348+ cost += self .terminal_cost (final_state , final_input )
334349
335350 # Update statistics
336351 self .cost_evaluations += 1
@@ -954,7 +969,22 @@ def solve_ocp(
954969 transpose = None , return_states = True , print_summary = True , log = False ,
955970 ** kwargs ):
956971
957- """Compute the solution to an optimal control problem
972+ """Compute the solution to an optimal control problem.
973+
974+ The optimal trajectory (states and inputs) is computed so as to
975+ approximately mimimize a cost function of the following form (for
976+ continuous time systems):
977+
978+ J(x(.), u(.)) = \int_0^T L(x(t), u(t)) dt + V(x(T)),
979+
980+ where T is the time horizon.
981+
982+ Discrete time systems use a similar formulation, with the integral
983+ replaced by a sum:
984+
985+ J(x[.], u[.]) = \sum_0^{N-1} L(x_k, u_k) + V(x_N),
986+
987+ where N is the time horizon.
958988
959989 Parameters
960990 ----------
@@ -968,7 +998,7 @@ def solve_ocp(
968998 Initial condition (default = 0).
969999
9701000 cost : callable
971- Function that returns the integral cost given the current state
1001+ Function that returns the integral cost (L) given the current state
9721002 and input. Called as `cost(x, u)`.
9731003
9741004 trajectory_constraints : list of tuples, optional
@@ -990,8 +1020,10 @@ def solve_ocp(
9901020 The constraints are applied at each time point along the trajectory.
9911021
9921022 terminal_cost : callable, optional
993- Function that returns the terminal cost given the current state
994- and input. Called as terminal_cost(x, u).
1023+ Function that returns the terminal cost (V) given the final state
1024+ and input. Called as terminal_cost(x, u). (For compatibility with
1025+ the form of the cost function, u is passed even though it is often
1026+ not part of the terminal cost.)
9951027
9961028 terminal_constraints : list of tuples, optional
9971029 List of constraints that should hold at the end of the trajectory.
@@ -1116,7 +1148,7 @@ def create_mpc_iosystem(
11161148 See :func:`~control.optimal.solve_ocp` for more details.
11171149
11181150 terminal_cost : callable, optional
1119- Function that returns the terminal cost given the current state
1151+ Function that returns the terminal cost given the final state
11201152 and input. Called as terminal_cost(x, u).
11211153
11221154 terminal_constraints : list of tuples, optional
0 commit comments