forked from tensorflow/serving
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathloader.h
More file actions
140 lines (124 loc) · 5.66 KB
/
Copy pathloader.h
File metadata and controls
140 lines (124 loc) · 5.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/* Copyright 2016 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_SERVING_CORE_LOADER_H_
#define TENSORFLOW_SERVING_CORE_LOADER_H_
#include <memory>
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow_serving/core/source.h"
#include "tensorflow_serving/resources/resources.pb.h"
#include "tensorflow_serving/util/any_ptr.h"
namespace tensorflow {
namespace serving {
// A standardized abstraction for an object that manages the lifecycle of a
// servable, including loading and unloading it. Servables are arbitrary objects
// that serve algorithms or data that often, though not necessarily, use a
// machine-learned model.
//
// A Loader for a servable object represents one instance of a stream of
// servable versions, all sharing a common name (e.g. "my_servable") and
// increasing version numbers, typically representing updated model parameters
// learned from fresh training data.
//
// A Loader should start in an unloaded state, meaning that no work has been
// done to prepare to perform operations. A typical instance that has not yet
// been loaded contains merely a pointer to a location from which its data can
// be loaded (e.g. a file-system path or network location). Construction and
// destruction of instances should be fairly cheap. Expensive initialization
// operations should be done in Load().
//
// Subclasses may optionally store a pointer to the Source that originated it,
// for accessing state shared across multiple servable objects in a given
// servable stream.
//
// Implementations need to ensure that the methods they expose are thread-safe,
// or carefully document and/or coordinate their thread-safety properties with
// their clients to ensure correctness.
// Servables do not need to worry about concurrent execution of Load()/Unload()
// as the caller will ensure that does not happen.
class Loader {
public:
// The destructor will never be called on a Loader whose servable is currently
// loaded, i.e. between (successful) calls to Load() and Unload().
virtual ~Loader() = default;
// Returns an estimate of the resources the servable will consume once loaded.
// If the servable has already been loaded, returns an estimate of the actual
// resource usage.
//
// IMPORTANT: This method's implementation must obey following requirements,
// which enable the serving system to reason correctly about which servables
// can be loaded safely:
// 1. The estimate must represent an upper bound on the actual value.
// 2. Prior to load, the estimate may include resources that are not bound
// to any specific device instance, e.g. RAM on one of the two GPUs.
// 3. While loaded, for any devices with multiple instances (e.g. two GPUs),
// the estimate must specify the instance to which each resource is bound.
// 4. The estimate must be monotonically non-increasing, i.e. it cannot
// increase over time.
virtual Status EstimateResources(ResourceAllocation* estimate) const = 0;
// Fetches any data that needs to be loaded before using the servable returned
// by servable(). May use no more resources than the estimate reported by
// EstimateResources().
virtual Status Load() = 0;
// Frees any resources allocated during Load() (except perhaps for resources
// shared across servables that are still needed for other active ones).
// The loader does not need to return to the "new" state (i.e. Load() cannot
// be called after Unload()).
virtual void Unload() = 0;
// Returns an opaque interface to the underlying servable object.
// The caller should know the precise type of the interface in order to make
// actual use of it. For example:
//
// CustomLoader implementation:
//
// class CustomLoader : public Loader {
// public:
// ...
// Status Load() override {
// servable_ = ...;
// }
//
// AnyPtr servable() override { return servable_; }
//
// private:
// CustomServable* servable_ = nullptr;
// };
//
// Serving user request:
//
// ServableHandle<CustomServable> handle = ...
// CustomServable* servable = handle.get();
// servable->...
//
// If servable() is called after successful Load() and before Unload(), it
// returns a valid, non-null AnyPtr object. If called before a successful
// Load() call or after Unload(), it returns null AnyPtr.
virtual AnyPtr servable() = 0;
};
// A Loader that is oblivious to resources. Its EstimateResources() method
// returns 0, thus effectively disabling resource-based safety checks in the
// serving system.
//
// Loaders that are experimental, or run in environments that do not need the
// resource safety checks, can subclass ResourceUnsafeLoader instead of Loader.
class ResourceUnsafeLoader : public Loader {
public:
Status EstimateResources(ResourceAllocation* estimate) const final {
estimate->Clear();
return Status::OK();
}
};
// A source that emits Loader unique pointers.
using LoaderSource = Source<std::unique_ptr<Loader>>;
} // namespace serving
} // namespace tensorflow
#endif // TENSORFLOW_SERVING_CORE_LOADER_H_