From 3b473b0e682c091a9e53623eebc1ca1657385717 Mon Sep 17 00:00:00 2001 From: or-toledano <49984106+or-toledano@users.noreply.github.com> Date: Sun, 29 Aug 2021 12:15:03 +0300 Subject: [PATCH] Reduce half of similarity muls after encoding (#140) (cAB)^T = c B^T A^T Saves half of the similarity products in the CLIP model.py after the visual/text encoding stages --- clip/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clip/model.py b/clip/model.py index f2c95c4..f7958f1 100644 --- a/clip/model.py +++ b/clip/model.py @@ -362,7 +362,7 @@ class CLIP(nn.Module): # cosine similarity as logits logit_scale = self.logit_scale.exp() logits_per_image = logit_scale * image_features @ text_features.t() - logits_per_text = logit_scale * text_features @ image_features.t() + logits_per_text = logits_per_image.t() # shape = [global_batch_size, global_batch_size] return logits_per_image, logits_per_text