Reduce half of similarity muls after encoding (#140)

(cAB)^T = c B^T A^T
Saves half of the similarity products in the CLIP model.py after the visual/text encoding stages
This commit is contained in:
or-toledano 2021-08-29 12:15:03 +03:00 committed by GitHub
parent 539cdcbd48
commit 3b473b0e68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 1 additions and 1 deletions

View File

@ -362,7 +362,7 @@ class CLIP(nn.Module):
# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_image = logit_scale * image_features @ text_features.t()
logits_per_text = logit_scale * text_features @ image_features.t()
logits_per_text = logits_per_image.t()
# shape = [global_batch_size, global_batch_size]
return logits_per_image, logits_per_text