Reduce half of similarity muls after encoding
(cAB)^T = c B^T A^T Saves half of the similarity products in the CLIP model.py after the visual/text encoding stages
This commit is contained in:
parent
fa56f25251
commit
a8aa7cb265
@ -362,7 +362,7 @@ class CLIP(nn.Module):
|
||||
# cosine similarity as logits
|
||||
logit_scale = self.logit_scale.exp()
|
||||
logits_per_image = logit_scale * image_features @ text_features.t()
|
||||
logits_per_text = logit_scale * text_features @ image_features.t()
|
||||
logits_per_text = logits_per_image.t()
|
||||
|
||||
# shape = [global_batch_size, global_batch_size]
|
||||
return logits_per_image, logits_per_text
|
||||
|
Loading…
Reference in New Issue
Block a user