From 3bee28119e6b28e75b82b811b87b56935314e6a5 Mon Sep 17 00:00:00 2001
From: boba_and_beer <jacky2wong@gmail.com>
Date: Sat, 30 Jan 2021 05:05:01 +1100
Subject: [PATCH] Make the repo installable as a package (#26)

---
 .gitignore                                    |  10 +++++++
 MANIFEST.in                                   |   1 +
 clip/__init__.py                              |   1 +
 .../bpe_simple_vocab_16e6.txt.gz              | Bin
 clip.py => clip/clip.py                       |  12 +++++----
 model.py => clip/model.py                     |   0
 .../simple_tokenizer.py                       |   0
 requirements.txt                              |   5 ++++
 setup.py                                      |  21 +++++++++++++++
 tests/test_consistency.py                     |  25 ++++++++++++++++++
 10 files changed, 70 insertions(+), 5 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 MANIFEST.in
 create mode 100644 clip/__init__.py
 rename bpe_simple_vocab_16e6.txt.gz => clip/bpe_simple_vocab_16e6.txt.gz (100%)
 rename clip.py => clip/clip.py (96%)
 rename model.py => clip/model.py (100%)
 rename simple_tokenizer.py => clip/simple_tokenizer.py (100%)
 create mode 100644 requirements.txt
 create mode 100644 setup.py
 create mode 100644 tests/test_consistency.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..321f181
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info
+.pytest_cache
+.ipynb_checkpoints
+
+thumbs.db
+.DS_Store
+.idea
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..effd8d9
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include clip/bpe_simple_vocab_16e6.txt.gz
diff --git a/clip/__init__.py b/clip/__init__.py
new file mode 100644
index 0000000..dcc5619
--- /dev/null
+++ b/clip/__init__.py
@@ -0,0 +1 @@
+from .clip import *
diff --git a/bpe_simple_vocab_16e6.txt.gz b/clip/bpe_simple_vocab_16e6.txt.gz
similarity index 100%
rename from bpe_simple_vocab_16e6.txt.gz
rename to clip/bpe_simple_vocab_16e6.txt.gz
diff --git a/clip.py b/clip/clip.py
similarity index 96%
rename from clip.py
rename to clip/clip.py
index 87a70c9..8f37ba8 100644
--- a/clip.py
+++ b/clip/clip.py
@@ -9,8 +9,8 @@ from PIL import Image
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from tqdm import tqdm
 
-from model import build_model
-from simple_tokenizer import SimpleTokenizer as _Tokenizer
+from .model import build_model
+from .simple_tokenizer import SimpleTokenizer as _Tokenizer
 
 __all__ = ["available_models", "load", "tokenize"]
 _tokenizer = _Tokenizer()
@@ -24,7 +24,7 @@ _MODELS = {
 def _download(url: str, root: str = os.path.expanduser("~/.cache/clip")):
     os.makedirs(root, exist_ok=True)
     filename = os.path.basename(url)
-    
+
     expected_sha256 = url.split("/")[-2]
     download_target = os.path.join(root, filename)
 
@@ -38,7 +38,7 @@ def _download(url: str, root: str = os.path.expanduser("~/.cache/clip")):
             warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
 
     with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
-        with tqdm(total=int(source.info().get("Content-Length")), ncols=80) as loop:        
+        with tqdm(total=int(source.info().get("Content-Length")), ncols=80) as loop:
             while True:
                 buffer = source.read(8192)
                 if not buffer:
@@ -75,6 +75,8 @@ def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_a
 
     if not jit:
         model = build_model(model.state_dict()).to(device)
+        if str(device) == "cpu":
+            model.float()
         return model, transform
 
     # patch the device names
@@ -96,7 +98,7 @@ def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_a
     patch_device(model.encode_text)
 
     # patch dtype to float32 on CPU
-    if device == "cpu":
+    if str(device) == "cpu":
         float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[])
         float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
         float_node = float_input.node()
diff --git a/model.py b/clip/model.py
similarity index 100%
rename from model.py
rename to clip/model.py
diff --git a/simple_tokenizer.py b/clip/simple_tokenizer.py
similarity index 100%
rename from simple_tokenizer.py
rename to clip/simple_tokenizer.py
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..289ca08
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+ftfy
+regex
+tqdm
+torch>=1.7.1,<1.7.2
+torchvision==0.8.2
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..c9ea7d0
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,21 @@
+import os
+
+import pkg_resources
+from setuptools import setup, find_packages
+
+setup(
+    name="clip",
+    py_modules=["clip"],
+    version="1.0",
+    description="",
+    author="OpenAI",
+    packages=find_packages(exclude=["tests*"]),
+    install_requires=[
+        str(r)
+        for r in pkg_resources.parse_requirements(
+            open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
+        )
+    ],
+    include_package_data=True,
+    extras_require={'dev': ['pytest']},
+)
diff --git a/tests/test_consistency.py b/tests/test_consistency.py
new file mode 100644
index 0000000..29d343d
--- /dev/null
+++ b/tests/test_consistency.py
@@ -0,0 +1,25 @@
+import numpy as np
+import pytest
+import torch
+from PIL import Image
+
+import clip
+
+
+@pytest.mark.parametrize('model_name', clip.available_models())
+def test_consistency(model_name):
+    device = "cpu"
+    jit_model, transform = clip.load(model_name, device=device)
+    py_model, _ = clip.load(model_name, device=device, jit=False)
+
+    image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device)
+    text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)
+
+    with torch.no_grad():
+        logits_per_image, _ = jit_model(image, text)
+        jit_probs = logits_per_image.softmax(dim=-1).cpu().numpy()
+
+        logits_per_image, _ = py_model(image, text)
+        py_probs = logits_per_image.softmax(dim=-1).cpu().numpy()
+
+    assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1)