From a8aa7cb2657e51771e93d987075c441d57a2640e Mon Sep 17 00:00:00 2001 From: or-toledano <49984106+or-toledano@users.noreply.github.com> Date: Fri, 13 Aug 2021 13:06:00 +0300 Subject: [PATCH] Reduce half of similarity muls after encoding (cAB)^T = c B^T A^T Saves half of the similarity products in the CLIP model.py after the visual/text encoding stages --- clip/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clip/model.py b/clip/model.py index f2c95c4..f7958f1 100644 --- a/clip/model.py +++ b/clip/model.py @@ -362,7 +362,7 @@ class CLIP(nn.Module): # cosine similarity as logits logit_scale = self.logit_scale.exp() logits_per_image = logit_scale * image_features @ text_features.t() - logits_per_text = logit_scale * text_features @ image_features.t() + logits_per_text = logits_per_image.t() # shape = [global_batch_size, global_batch_size] return logits_per_image, logits_per_text