Reduce half of similarity muls after encoding

(cAB)^T = c B^T A^T Saves half of the similarity products in the CLIP model.py after the visual/text encoding stages
2021-08-13 13:06:00 +03:00
parent fa56f25251
commit a8aa7cb265
1 changed files with 1 additions and 1 deletions
--- a/clip/model.py
+++ b/clip/model.py
@ -362,7 +362,7 @@ class CLIP(nn.Module):
        # cosine similarity as logits
        logit_scale = self.logit_scale.exp()
        logits_per_image = logit_scale * image_features @ text_features.t()
-        logits_per_text = logit_scale * text_features @ image_features.t()
+        logits_per_text = logits_per_image.t()

        # shape = [global_batch_size, global_batch_size]
        return logits_per_image, logits_per_text