From beba48f35392a73c6c47ae67ddffced81ad1916d Mon Sep 17 00:00:00 2001 From: Smittyvb Date: Sun, 7 Mar 2021 21:58:54 -0500 Subject: [PATCH] Update models URIs in notebook (#52) --- notebooks/Interacting_with_CLIP.ipynb | 49 ++++++++++++++------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/notebooks/Interacting_with_CLIP.ipynb b/notebooks/Interacting_with_CLIP.ipynb index b9c32b1..c728569 100644 --- a/notebooks/Interacting_with_CLIP.ipynb +++ b/notebooks/Interacting_with_CLIP.ipynb @@ -60,7 +60,7 @@ "else:\n", " torch_version_suffix = \"+cu110\"" ], - "execution_count": 1, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -83,7 +83,7 @@ "source": [ "! pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex" ], - "execution_count": 2, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -118,7 +118,7 @@ "\n", "print(\"Torch version:\", torch.__version__)" ], - "execution_count": 3, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -147,10 +147,13 @@ }, "source": [ "MODELS = {\n", - " \"ViT-B/32\": \"https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt\",\n", + " \"RN50\": \"https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt\",\n", + " \"RN101\": \"https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt\",\n", + " \"RN50x4\": \"https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt\",\n", + " \"ViT-B/32\": \"https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt\", \n", "}" ], - "execution_count": 4, + "execution_count": null, "outputs": [] }, { @@ -165,7 +168,7 @@ "source": [ "! wget {MODELS[\"ViT-B/32\"]} -O model.pt" ], - "execution_count": 5, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -206,7 +209,7 @@ "print(\"Context length:\", context_length)\n", "print(\"Vocab size:\", vocab_size)" ], - "execution_count": 6, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -250,7 +253,7 @@ "image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()\n", "image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()" ], - "execution_count": 7, + "execution_count": null, "outputs": [] }, { @@ -277,7 +280,7 @@ "! pip install ftfy regex\n", "! wget https://openaipublic.azureedge.net/clip/bpe_simple_vocab_16e6.txt.gz -O bpe_simple_vocab_16e6.txt.gz" ], - "execution_count": 8, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -438,7 +441,7 @@ " text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=\"replace\").replace('', ' ')\n", " return text\n" ], - "execution_count": 9, + "execution_count": null, "outputs": [] }, { @@ -485,7 +488,7 @@ " \"coffee\": \"a cup of coffee on a saucer\"\n", "}" ], - "execution_count": 10, + "execution_count": null, "outputs": [] }, { @@ -520,7 +523,7 @@ "\n", "plt.tight_layout()\n" ], - "execution_count": 11, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -561,7 +564,7 @@ "image_input -= image_mean[:, None, None]\n", "image_input /= image_std[:, None, None]" ], - "execution_count": 12, + "execution_count": null, "outputs": [] }, { @@ -573,7 +576,7 @@ "tokenizer = SimpleTokenizer()\n", "text_tokens = [tokenizer.encode(\"This is \" + desc) for desc in texts]" ], - "execution_count": 13, + "execution_count": null, "outputs": [] }, { @@ -592,7 +595,7 @@ "\n", "text_input = text_input.cuda()" ], - "execution_count": 14, + "execution_count": null, "outputs": [] }, { @@ -605,7 +608,7 @@ " image_features = model.encode_image(image_input).float()\n", " text_features = model.encode_text(text_input).float()" ], - "execution_count": 15, + "execution_count": null, "outputs": [] }, { @@ -629,7 +632,7 @@ "text_features /= text_features.norm(dim=-1, keepdim=True)\n", "similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T" ], - "execution_count": 16, + "execution_count": null, "outputs": [] }, { @@ -664,7 +667,7 @@ "\n", "plt.title(\"Cosine similarity between text and image features\", size=20)" ], - "execution_count": 17, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -722,7 +725,7 @@ "\n", "cifar100 = CIFAR100(os.path.expanduser(\"~/.cache\"), transform=preprocess, download=True)" ], - "execution_count": 18, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -753,7 +756,7 @@ "text_input = text_input.cuda()\n", "text_input.shape" ], - "execution_count": 19, + "execution_count": null, "outputs": [ { "output_type": "execute_result", @@ -782,7 +785,7 @@ "text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)\n", "top_probs, top_labels = text_probs.cpu().topk(5, dim=-1)" ], - "execution_count": 20, + "execution_count": null, "outputs": [] }, { @@ -815,7 +818,7 @@ "plt.subplots_adjust(wspace=0.5)\n", "plt.show()" ], - "execution_count": 21, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -844,7 +847,7 @@ "source": [ "" ], - "execution_count": 21, + "execution_count": null, "outputs": [] } ]