Transformer——多头注意力机制(Pytorch)
创始人
2025-01-11 08:34:26
0

1. 原理图

2. 代码

import torch import torch.nn as nn   class Multi_Head_Self_Attention(nn.Module):     def __init__(self, embed_size, heads):         super(Multi_Head_Self_Attention, self).__init__()         self.embed_size = embed_size         self.heads = heads         self.head_dim = embed_size // heads          self.queries = nn.Linear(self.embed_size, self.embed_size, bias=False)         self.keys = nn.Linear(self.embed_size, self.embed_size, bias=False)         self.values = nn.Linear(self.embed_size, self.embed_size, bias=False)         self.fc_out = nn.Linear(self.embed_size, self.embed_size, bias=False)      def forward(self,queries, keys, values, mask):         N = queries.shape[0]  # batch_size         query_len = queries.shape[1]  # sequence_length         key_len = keys.shape[1]  # sequence_length          value_len = values.shape[1]  # sequence_length          queries = self.queries(queries)         keys = self.keys(keys)         values = self.values(values)          # Split the embedding into self.heads pieces         # batch_size, sequence_length, embed_size(512) -->          # batch_size, sequence_length, heads(8), head_dim(64)         queries = queries.reshape(N, query_len, self.heads, self.head_dim)         keys = keys.reshape(N, key_len, self.heads, self.head_dim)         values = values.reshape(N, value_len, self.heads, self.head_dim)          # batch_size, sequence_length, heads(8), head_dim(64) -->          # batch_size, heads(8), sequence_length, head_dim(64)         queries = queries.transpose(1, 2)         keys = keys.transpose(1, 2)         values = values.transpose(1, 2)          # Scaled dot-product attention         score = torch.matmul(queries, keys.transpose(-2, -1)) / (self.head_dim ** (1/2))          if mask is not None:             score = score.masked_fill(mask == 0, float("-inf"))         # batch_size, heads(8), sequence_length, sequence_length         attention = torch.softmax(score, dim=-1)          out = torch.matmul(attention, values)         # batch_size, heads(8), sequence_length, head_dim(64) -->         # batch_size, sequence_length, heads(8), head_dim(64) -->         # batch_size, sequence_length, embed_size(512)         # 为了方便送入后面的网络         out = out.transpose(1, 2).contiguous().reshape(N, query_len, self.embed_size)         out = self.fc_out(out)          return out       batch_size = 64 sequence_length = 10 embed_size = 512 heads = 8 mask = None  Q = torch.randn(batch_size, sequence_length, embed_size)   K = torch.randn(batch_size, sequence_length, embed_size)   V = torch.randn(batch_size, sequence_length, embed_size)    model = Multi_Head_Self_Attention(embed_size, heads) output = model(Q, K, V, mask) print(output.shape)

 

相关内容

热门资讯

阶段辅助!超级三加一辅助工具安... 阶段辅助!超级三加一辅助工具安装(辅助挂)总是确实有辅助app(有挂透明挂)小薇(辅助器软件下载)致...
妙招辅助!花花生活圈怎么装开挂... 妙招辅助!花花生活圈怎么装开挂(辅助挂)切实存在有辅助插件(今日头条)一、花花生活圈怎么装开挂游戏安...
演示辅助!微信小程序哥哥打大a... 演示辅助!微信小程序哥哥打大a辅助器(辅助挂)好像存在有辅助脚本(讲解有挂)1、很好的工具软件,可以...
教程书辅助!微信小程序功夫川码... 教程书辅助!微信小程序功夫川码辅助(辅助挂)好像是有辅助脚本(有挂技巧)1、这是跨平台的微信小程序功...
策略辅助!老友麻将有没有挂(辅... 策略辅助!老友麻将有没有挂(辅助挂)真是真的是有辅助器(的确有挂)1、老友麻将有没有挂免费脚本咨询教...
要领辅助!福建十三时辅助(辅助... 要领辅助!福建十三时辅助(辅助挂)好像真的是有辅助器(有挂解惑)1、福建十三时辅助破解器简单,福建十...
诀窍辅助!雀神山庄麻将辅助器(... 诀窍辅助!雀神山庄麻将辅助器(辅助挂)果然是有辅助工具(有人有挂)在进入雀神山庄麻将辅助器软件靠谱后...
绝活儿辅助!方片十三张源码(辅... 绝活儿辅助!方片十三张源码(辅助挂)一贯是真的有辅助软件(真实有挂)运方片十三张源码辅助工具,进入游...
积累辅助!财神十三张脚本辅助(... 积累辅助!财神十三张脚本辅助(辅助挂)切实确实有辅助脚本(有挂猫腻)1、实时财神十三张脚本辅助透视辅...
阶段辅助!博雅西元红河辅助脚本... 您好,博雅西元红河辅助脚本这款游戏可以开挂的,确实是有挂的,需要了解加去威信【136704302】很...