Transformer——多头注意力机制(Pytorch)
创始人
2025-01-11 08:34:26
0

1. 原理图

2. 代码

import torch import torch.nn as nn   class Multi_Head_Self_Attention(nn.Module):     def __init__(self, embed_size, heads):         super(Multi_Head_Self_Attention, self).__init__()         self.embed_size = embed_size         self.heads = heads         self.head_dim = embed_size // heads          self.queries = nn.Linear(self.embed_size, self.embed_size, bias=False)         self.keys = nn.Linear(self.embed_size, self.embed_size, bias=False)         self.values = nn.Linear(self.embed_size, self.embed_size, bias=False)         self.fc_out = nn.Linear(self.embed_size, self.embed_size, bias=False)      def forward(self,queries, keys, values, mask):         N = queries.shape[0]  # batch_size         query_len = queries.shape[1]  # sequence_length         key_len = keys.shape[1]  # sequence_length          value_len = values.shape[1]  # sequence_length          queries = self.queries(queries)         keys = self.keys(keys)         values = self.values(values)          # Split the embedding into self.heads pieces         # batch_size, sequence_length, embed_size(512) -->          # batch_size, sequence_length, heads(8), head_dim(64)         queries = queries.reshape(N, query_len, self.heads, self.head_dim)         keys = keys.reshape(N, key_len, self.heads, self.head_dim)         values = values.reshape(N, value_len, self.heads, self.head_dim)          # batch_size, sequence_length, heads(8), head_dim(64) -->          # batch_size, heads(8), sequence_length, head_dim(64)         queries = queries.transpose(1, 2)         keys = keys.transpose(1, 2)         values = values.transpose(1, 2)          # Scaled dot-product attention         score = torch.matmul(queries, keys.transpose(-2, -1)) / (self.head_dim ** (1/2))          if mask is not None:             score = score.masked_fill(mask == 0, float("-inf"))         # batch_size, heads(8), sequence_length, sequence_length         attention = torch.softmax(score, dim=-1)          out = torch.matmul(attention, values)         # batch_size, heads(8), sequence_length, head_dim(64) -->         # batch_size, sequence_length, heads(8), head_dim(64) -->         # batch_size, sequence_length, embed_size(512)         # 为了方便送入后面的网络         out = out.transpose(1, 2).contiguous().reshape(N, query_len, self.embed_size)         out = self.fc_out(out)          return out       batch_size = 64 sequence_length = 10 embed_size = 512 heads = 8 mask = None  Q = torch.randn(batch_size, sequence_length, embed_size)   K = torch.randn(batch_size, sequence_length, embed_size)   V = torch.randn(batch_size, sequence_length, embed_size)    model = Multi_Head_Self_Attention(embed_size, heads) output = model(Q, K, V, mask) print(output.shape)

 

相关内容

热门资讯

透视实锤!哈糖大菠萝助手,竞技... 透视实锤!哈糖大菠萝助手,竞技联盟辅助(原来真的有挂);1、打开软件启动之后找到中间准星的标志长按。...
透视安装!德普之星透视软件免费... 透视安装!德普之星透视软件免费入口官网(透视)透视辅助软件下载(都是有挂);亲,关键说明,德普之星透...
透视辅助!werplan透视挂... 透视辅助!werplan透视挂,拱趴大菠萝有什么挂(其实真的是有挂)1、拱趴大菠萝有什么挂ai辅助优...
透视科技!德普之星app安卓版... 透视科技!德普之星app安卓版破解版(透视)辅助软件(本来有挂)进入游戏-大厅左侧-新手福利-激活码...
透视规律!werplan免费挂... 透视规律!werplan免费挂下载,线上德州的辅助器是什么(本来存在有挂)1)线上德州的辅助器是什么...
透视有挂!如何下载德普之星辅助... 透视有挂!如何下载德普之星辅助软件(透视)免费透视(总是有挂)1、如何下载德普之星辅助软件ai辅助优...
透视挂透视!xpoker透视辅... 透视挂透视!xpoker透视辅助,拱趴游戏破解器(原来真的是有挂)1、拱趴游戏破解器ai辅助优化,拱...
透视ai代打!德扑圈透视挂(透... 透视ai代打!德扑圈透视挂(透视)辅助器app(确实存在有挂)1、在德扑圈透视挂ai机器人技巧中,中...
透视免费!xpoker辅助工具... 透视免费!xpoker辅助工具,werplan外卦神器(一贯真的是有挂)1、每一步都需要思考,不同水...
透视工具!德普之星辅助工具如何... 透视工具!德普之星辅助工具如何设置(透视)私人局辅助免费(都是是真的有挂)1、每一步都需要思考,不同...