Update models URIs in notebook (#52)
This commit is contained in:
parent
fd6c1443c2
commit
beba48f353
|
@ -60,7 +60,7 @@
|
||||||
"else:\n",
|
"else:\n",
|
||||||
" torch_version_suffix = \"+cu110\""
|
" torch_version_suffix = \"+cu110\""
|
||||||
],
|
],
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
|
@ -83,7 +83,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"! pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex"
|
"! pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex"
|
||||||
],
|
],
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
|
@ -118,7 +118,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Torch version:\", torch.__version__)"
|
"print(\"Torch version:\", torch.__version__)"
|
||||||
],
|
],
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
|
@ -147,10 +147,13 @@
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"MODELS = {\n",
|
"MODELS = {\n",
|
||||||
" \"ViT-B/32\": \"https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt\",\n",
|
" \"RN50\": \"https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt\",\n",
|
||||||
|
" \"RN101\": \"https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt\",\n",
|
||||||
|
" \"RN50x4\": \"https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt\",\n",
|
||||||
|
" \"ViT-B/32\": \"https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt\", \n",
|
||||||
"}"
|
"}"
|
||||||
],
|
],
|
||||||
"execution_count": 4,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -165,7 +168,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"! wget {MODELS[\"ViT-B/32\"]} -O model.pt"
|
"! wget {MODELS[\"ViT-B/32\"]} -O model.pt"
|
||||||
],
|
],
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
|
@ -206,7 +209,7 @@
|
||||||
"print(\"Context length:\", context_length)\n",
|
"print(\"Context length:\", context_length)\n",
|
||||||
"print(\"Vocab size:\", vocab_size)"
|
"print(\"Vocab size:\", vocab_size)"
|
||||||
],
|
],
|
||||||
"execution_count": 6,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
|
@ -250,7 +253,7 @@
|
||||||
"image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()\n",
|
"image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()\n",
|
||||||
"image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()"
|
"image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()"
|
||||||
],
|
],
|
||||||
"execution_count": 7,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -277,7 +280,7 @@
|
||||||
"! pip install ftfy regex\n",
|
"! pip install ftfy regex\n",
|
||||||
"! wget https://openaipublic.azureedge.net/clip/bpe_simple_vocab_16e6.txt.gz -O bpe_simple_vocab_16e6.txt.gz"
|
"! wget https://openaipublic.azureedge.net/clip/bpe_simple_vocab_16e6.txt.gz -O bpe_simple_vocab_16e6.txt.gz"
|
||||||
],
|
],
|
||||||
"execution_count": 8,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
|
@ -438,7 +441,7 @@
|
||||||
" text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=\"replace\").replace('</w>', ' ')\n",
|
" text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=\"replace\").replace('</w>', ' ')\n",
|
||||||
" return text\n"
|
" return text\n"
|
||||||
],
|
],
|
||||||
"execution_count": 9,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -485,7 +488,7 @@
|
||||||
" \"coffee\": \"a cup of coffee on a saucer\"\n",
|
" \"coffee\": \"a cup of coffee on a saucer\"\n",
|
||||||
"}"
|
"}"
|
||||||
],
|
],
|
||||||
"execution_count": 10,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -520,7 +523,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"plt.tight_layout()\n"
|
"plt.tight_layout()\n"
|
||||||
],
|
],
|
||||||
"execution_count": 11,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "display_data",
|
"output_type": "display_data",
|
||||||
|
@ -561,7 +564,7 @@
|
||||||
"image_input -= image_mean[:, None, None]\n",
|
"image_input -= image_mean[:, None, None]\n",
|
||||||
"image_input /= image_std[:, None, None]"
|
"image_input /= image_std[:, None, None]"
|
||||||
],
|
],
|
||||||
"execution_count": 12,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -573,7 +576,7 @@
|
||||||
"tokenizer = SimpleTokenizer()\n",
|
"tokenizer = SimpleTokenizer()\n",
|
||||||
"text_tokens = [tokenizer.encode(\"This is \" + desc) for desc in texts]"
|
"text_tokens = [tokenizer.encode(\"This is \" + desc) for desc in texts]"
|
||||||
],
|
],
|
||||||
"execution_count": 13,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -592,7 +595,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"text_input = text_input.cuda()"
|
"text_input = text_input.cuda()"
|
||||||
],
|
],
|
||||||
"execution_count": 14,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -605,7 +608,7 @@
|
||||||
" image_features = model.encode_image(image_input).float()\n",
|
" image_features = model.encode_image(image_input).float()\n",
|
||||||
" text_features = model.encode_text(text_input).float()"
|
" text_features = model.encode_text(text_input).float()"
|
||||||
],
|
],
|
||||||
"execution_count": 15,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -629,7 +632,7 @@
|
||||||
"text_features /= text_features.norm(dim=-1, keepdim=True)\n",
|
"text_features /= text_features.norm(dim=-1, keepdim=True)\n",
|
||||||
"similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T"
|
"similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T"
|
||||||
],
|
],
|
||||||
"execution_count": 16,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -664,7 +667,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"plt.title(\"Cosine similarity between text and image features\", size=20)"
|
"plt.title(\"Cosine similarity between text and image features\", size=20)"
|
||||||
],
|
],
|
||||||
"execution_count": 17,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "execute_result",
|
"output_type": "execute_result",
|
||||||
|
@ -722,7 +725,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"cifar100 = CIFAR100(os.path.expanduser(\"~/.cache\"), transform=preprocess, download=True)"
|
"cifar100 = CIFAR100(os.path.expanduser(\"~/.cache\"), transform=preprocess, download=True)"
|
||||||
],
|
],
|
||||||
"execution_count": 18,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
|
@ -753,7 +756,7 @@
|
||||||
"text_input = text_input.cuda()\n",
|
"text_input = text_input.cuda()\n",
|
||||||
"text_input.shape"
|
"text_input.shape"
|
||||||
],
|
],
|
||||||
"execution_count": 19,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "execute_result",
|
"output_type": "execute_result",
|
||||||
|
@ -782,7 +785,7 @@
|
||||||
"text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)\n",
|
"text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)\n",
|
||||||
"top_probs, top_labels = text_probs.cpu().topk(5, dim=-1)"
|
"top_probs, top_labels = text_probs.cpu().topk(5, dim=-1)"
|
||||||
],
|
],
|
||||||
"execution_count": 20,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -815,7 +818,7 @@
|
||||||
"plt.subplots_adjust(wspace=0.5)\n",
|
"plt.subplots_adjust(wspace=0.5)\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
],
|
],
|
||||||
"execution_count": 21,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "display_data",
|
"output_type": "display_data",
|
||||||
|
@ -844,7 +847,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
""
|
""
|
||||||
],
|
],
|
||||||
"execution_count": 21,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue