forked from Tensor-Array/Tensor-Array
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathattention.hh
More file actions
54 lines (45 loc) · 1.71 KB
/
attention.hh
File metadata and controls
54 lines (45 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
/*
Copyright 2024 TensorArray-Creators
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "linear.hh"
#include "sequential.hh"
#include "normalization.hh"
#pragma once
#ifdef _WIN32
#ifdef TENSOR_ARRAY_LAYERS_EXPORTS
#define TENSOR_ARRAY_API __declspec(dllexport)
#else
#define TENSOR_ARRAY_API __declspec(dllimport)
#endif
#else
#define TENSOR_ARRAY_API
#endif
namespace tensor_array
{
namespace layers
{
value::Tensor TENSOR_ARRAY_API scaled_dot_product_attention(const value::Tensor&, const value::Tensor&, const value::Tensor&, const value::Tensor& = value::Tensor());
class TENSOR_ARRAY_API MultiHeadAttentionImpl final :
public LayerImpl
{
private:
const unsigned int d_model, n_head;
Linear w_q, w_k, w_v, w_o;
public:
MultiHeadAttentionImpl(unsigned int, unsigned int);
void layer_init(std::vector<std::pair<std::initializer_list<unsigned int>, const std::type_info&>>&&) override;
value::Tensor calculate(const value::Tensor&, const value::Tensor&, const value::Tensor&, const value::Tensor& = value::Tensor());
};
using MultiHeadAttention = LayerHolder<MultiHeadAttentionImpl>;
}
}
#undef TENSOR_ARRAY_API