moved the notebook to subfolder

This commit is contained in:
Jong Wook Kim 2021-01-30 00:22:03 +09:00
parent 6bc0bd8873
commit 578a1d3e2e
3 changed files with 3 additions and 3 deletions

View File

@ -1,6 +1,6 @@
# CLIP
[[Blog]](https://openai.com/blog/clip/) [[Paper]](https://cdn.openai.com/papers/Learning_Transferable_Visual_Models_From_Natural_Language_Supervision.pdf) [[Model Card]](model-card.md) [[Colab]](https://colab.research.google.com/github/openai/clip/blob/master/Interacting_with_CLIP.ipynb)
[[Blog]](https://openai.com/blog/clip/) [[Paper]](https://cdn.openai.com/papers/Learning_Transferable_Visual_Models_From_Natural_Language_Supervision.pdf) [[Model Card]](model-card.md) [[Colab]](https://colab.research.google.com/github/openai/clip/blob/master/notebooks/Interacting_with_CLIP.ipynb)
CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing for the task, similarly to the zero-shot capabilities of GPT-2 and 3. We found CLIP matches the performance of the original ResNet50 on ImageNet “zero-shot” without using any of the original 1.28M labeled examples, overcoming several major challenges in computer vision.

View File

@ -329,11 +329,11 @@ class CLIP(nn.Module):
# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_iamge = logit_scale * image_features @ text_features.t()
logits_per_image = logit_scale * image_features @ text_features.t()
logits_per_text = logit_scale * text_features @ image_features.t()
# shape = [global_batch_size, global_batch_size]
return logits_per_iamge, logits_per_text
return logits_per_image, logits_per_text
def convert_weights(model: nn.Module):