Reduce half of similarity muls after encoding
(cAB)^T = c B^T A^T Saves half of the similarity products in the CLIP model.py after the visual/text encoding stages
This commit is contained in:
		@ -362,7 +362,7 @@ class CLIP(nn.Module):
 | 
				
			|||||||
        # cosine similarity as logits
 | 
					        # cosine similarity as logits
 | 
				
			||||||
        logit_scale = self.logit_scale.exp()
 | 
					        logit_scale = self.logit_scale.exp()
 | 
				
			||||||
        logits_per_image = logit_scale * image_features @ text_features.t()
 | 
					        logits_per_image = logit_scale * image_features @ text_features.t()
 | 
				
			||||||
        logits_per_text = logit_scale * text_features @ image_features.t()
 | 
					        logits_per_text = logits_per_image.t()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # shape = [global_batch_size, global_batch_size]
 | 
					        # shape = [global_batch_size, global_batch_size]
 | 
				
			||||||
        return logits_per_image, logits_per_text
 | 
					        return logits_per_image, logits_per_text
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user