Reduce half of similarity muls after encoding

(cAB)^T = c B^T A^T
Saves half of the similarity products in the CLIP model.py after the visual/text encoding stages
This commit is contained in:
or-toledano 2021-08-13 13:06:00 +03:00 committed by GitHub
parent fa56f25251
commit a8aa7cb265
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -362,7 +362,7 @@ class CLIP(nn.Module):
# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_image = logit_scale * image_features @ text_features.t()
logits_per_text = logit_scale * text_features @ image_features.t()
logits_per_text = logits_per_image.t()
# shape = [global_batch_size, global_batch_size]
return logits_per_image, logits_per_text