From a8aa7cb2657e51771e93d987075c441d57a2640e Mon Sep 17 00:00:00 2001
From: or-toledano <49984106+or-toledano@users.noreply.github.com>
Date: Fri, 13 Aug 2021 13:06:00 +0300
Subject: [PATCH] Reduce half of similarity muls after encoding

(cAB)^T = c B^T A^T
Saves half of the similarity products in the CLIP model.py after the visual/text encoding stages
---
 clip/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clip/model.py b/clip/model.py
index f2c95c4..f7958f1 100644
--- a/clip/model.py
+++ b/clip/model.py
@@ -362,7 +362,7 @@ class CLIP(nn.Module):
         # cosine similarity as logits
         logit_scale = self.logit_scale.exp()
         logits_per_image = logit_scale * image_features @ text_features.t()
-        logits_per_text = logit_scale * text_features @ image_features.t()
+        logits_per_text = logits_per_image.t()
 
         # shape = [global_batch_size, global_batch_size]
         return logits_per_image, logits_per_text