update final state computation for solve_ocp on dtime systems

murrayrm · murrayrm · commit f9256cc408b1 · 2023-09-14T22:04:37.000-07:00
diff --git a/control/optimal.py b/control/optimal.py
@@ -66,7 +66,7 @@ class OptimalControlProblem():
        `(fun, lb, ub)`.  The constraints will be applied at each time point
        along the trajectory.
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
+        Function that returns the terminal cost given the final state
         and input.  Called as terminal_cost(x, u).
     trajectory_method : string, optional
         Method to use for carrying out the optimization. Currently supported
@@ -287,12 +287,18 @@ def __init__(
     # time point and we use a trapezoidal approximation to compute the
     # integral cost, then add on the terminal cost.
     #
-    # For shooting methods, given the input U = [u[0], ... u[N]] we need to
+    # For shooting methods, given the input U = [u[t_0], ... u[t_N]] we need to
     # compute the cost of the trajectory generated by that input.  This
     # means we have to simulate the system to get the state trajectory X =
-    # [x[0], ..., x[N]] and then compute the cost at each point:
+    # [x[t_0], ..., x[t_N]] and then compute the cost at each point:
     #
-    #   cost = sum_k integral_cost(x[k], u[k]) + terminal_cost(x[N], u[N])
+    #   cost = sum_k integral_cost(x[t_k], u[t_k])
+    #          + terminal_cost(x[t_N], u[t_N])
+    #
+    # The actual calculation is a bit more complex.  For continuous time
+    # systems, we use a trapezoial approximation for the integral cost.
+    # For discrete time systems, when computing the terminal cost u[t_N] is
+    # set to zero.
     #
     # The initial state used for generating the simulation is stored in the
     # class parameter `x` prior to calling the optimization algorithm.
@@ -321,16 +327,25 @@ def _cost_function(self, coeffs):
                 # Approximate the integral using trapezoidal rule
                 cost += 0.5 * (costs[i] + costs[i+1]) * dt[i]
 
+            # Save the final state and input for terminal cost
+            final_state = states[:, -1]
+            final_input = inputs[:, -1]
+
         else:
             # Sum the integral cost over the time (second) indices
             # cost += self.integral_cost(states[:,i], inputs[:,i])
             cost = sum(map(
-                self.integral_cost, np.transpose(states[:, :-1]),
-                np.transpose(inputs[:, :-1])))
+                self.integral_cost, states.transpose(), inputs.transpose()))
+
+            # Save the final state and input for terminal cost
+            final_time = self.timepts[-1] + (self.timepts[1] - self.timepts[0])
+            final_state = self.system._rhs(
+                final_time, states[:, -1], inputs[:, -1])
+            final_input = np.zeros_like(inputs[:, -1])
 
         # Terminal cost
         if self.terminal_cost is not None:
-            cost += self.terminal_cost(states[:, -1], inputs[:, -1])
+            cost += self.terminal_cost(final_state, final_input)
 
         # Update statistics
         self.cost_evaluations += 1
@@ -954,7 +969,22 @@ def solve_ocp(
         transpose=None, return_states=True, print_summary=True, log=False,
         **kwargs):
 
-    """Compute the solution to an optimal control problem
+    """Compute the solution to an optimal control problem.
+
+    The optimal trajectory (states and inputs) is computed so as to
+    approximately mimimize a cost function of the following form (for
+    continuous time systems):
+
+      J(x(.), u(.)) = \int_0^T L(x(t), u(t)) dt + V(x(T)),
+
+    where T is the time horizon.
+
+    Discrete time systems use a similar formulation, with the integral
+    replaced by a sum:
+
+      J(x[.], u[.]) = \sum_0^{N-1} L(x_k, u_k) + V(x_N),
+
+    where N is the time horizon.
 
     Parameters
     ----------
@@ -968,7 +998,7 @@ def solve_ocp(
         Initial condition (default = 0).
 
     cost : callable
-        Function that returns the integral cost given the current state
+        Function that returns the integral cost (L) given the current state
         and input.  Called as `cost(x, u)`.
 
     trajectory_constraints : list of tuples, optional
@@ -990,8 +1020,10 @@ def solve_ocp(
         The constraints are applied at each time point along the trajectory.
 
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
-        and input.  Called as terminal_cost(x, u).
+        Function that returns the terminal cost (V) given the final state
+        and input.  Called as terminal_cost(x, u).  (For compatibility with
+        the form of the cost function, u is passed even though it is often
+        not part of the terminal cost.)
 
     terminal_constraints : list of tuples, optional
         List of constraints that should hold at the end of the trajectory.
@@ -1116,7 +1148,7 @@ def create_mpc_iosystem(
         See :func:`~control.optimal.solve_ocp` for more details.
 
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
+        Function that returns the terminal cost given the final state
         and input.  Called as terminal_cost(x, u).
 
     terminal_constraints : list of tuples, optional
diff --git a/control/tests/optimal_test.py b/control/tests/optimal_test.py
@@ -79,8 +79,7 @@ def test_finite_horizon_simple(method):
     # Retrieve the full open-loop predictions
     res = opt.solve_ocp(
         sys, time, x0, cost, constraints, squeeze=True,
-        trajectory_method=method,
-        terminal_cost=cost)     # include to match MPT3 formulation
+        trajectory_method=method)
     t, u_openloop = res.time, res.inputs
     np.testing.assert_almost_equal(
         u_openloop, [-1, -1, 0.1393, 0.3361, -5.204e-16], decimal=4)
@@ -308,9 +307,7 @@ def test_constraint_specification(constraint_list):
 
     # Create a model predictive controller system
     time = np.arange(0, 5, 1)
-    optctrl = opt.OptimalControlProblem(
-        sys, time, cost, constraints,
-        terminal_cost=cost)     # include to match MPT3 formulation
+    optctrl = opt.OptimalControlProblem(sys, time, cost, constraints)
 
     # Compute optimal control and compare against MPT3 solution
     x0 = [4, 0]
diff --git a/doc/optimal.rst b/doc/optimal.rst
@@ -65,6 +65,13 @@ can be on the input, the state, or combinations of input and state,
 depending on the form of :math:`g_i`.  Furthermore, these constraints are
 intended to hold at all instants in time along the trajectory.
 
+For a discrete time system, the same basic formulation applies except
+that the cost function is given by
+
+.. math::
+
+  J(x, u) = \sum_{k=0}^{N-1} L(x_k, u_k)\, dt + V \bigl( x_N \bigr).
+
 A common use of optimization-based control techniques is the implementation
 of model predictive control (also called receding horizon control).  In
 model predictive control, a finite horizon optimal control problem is solved,