added build and install instructions

2021-12-15 22:13:53 +01:00
parent 573315e83f
commit a05aeb7ef9
5 changed files with 215 additions and 41 deletions
--- a/README.md
+++ b/README.md
@ -1,51 +1,90 @@
-# CLIP
+# CLIP using nVidia Tesla K20Xm
+
+To use the old but cheap Tesla K20Xm with openAI Clip you need to downgrade torch and torchvision to a version that is still supported by this hardware. Sadly, building from source is the case.
+
+## Prerequisite 
+- Ubuntu 20.04
+- miniconda from https://docs.conda.io/en/latest/miniconda.html
+- `sudo apt -y install  gcc-8 g++-8 git build-essential` 
+
+
+### CUDA
+- nVidia Driver NVIDIA-Linux-x86_64-470.82.01.run 
+- `sudo apt install nvidia-settings`
+- Build `cuda_check.c` with `nvcc -o check_cuda check_cuda.c -lcuda`
+- Verify the output of check_cuda: `Compute Capability: 3.5`    
+
+## Installing
+
+### Creating a Conda env
+
+`conda create --name clip_Tesla`
+
+`conda activate clip_Tesla`
+
+### Building PyTorch 
+
+switch to gcc8
+`sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 8`
+`sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 8`
+
+Verify
+`gcc --version`
+
+`conda install astunparse numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses`
+
+`conda install -c magma-cuda101`
+
+change to a tmp dir 
+
+`git clone --recursive https://github.com/pytorch/pytorch`
+
+`cd pytorch`
+
+downgrade to 1.7.1
+`git checkout 57bffc3a8e4fee0cce31e1ff1f662ccf7b16db57`
+
+`git submodule sync`
+
+`git submodule update --init --recursive --jobs 0 `
+
+build pytorch and install it
+`clear && TORCH_CUDA_ARCH_LIST="3.5 6.1" python setup.py install`
+
+run the `torch_test.py` to verify that CUDA is available and version: 
+`True`
+`1.7.0a0+57bffc3`
+
+### Building TorchVision
+
+`git clone https://github.com/pytorch/vision.git`
+
+`cd vision/`
+
+`git checkout 2f40a483d73018ae6e1488a484c5927f2b309969`
+
+`conda install -c conda-forge ffmpeg`
+
+`python setup.py install`
+
+
+### Install CLIP
+
+`python setup.py install`
+
+Run the test
+`python clip_test.py`
+
+## CLIP

 [[Blog]](https://openai.com/blog/clip/) [[Paper]](https://arxiv.org/abs/2103.00020) [[Model Card]](model-card.md) [[Colab]](https://colab.research.google.com/github/openai/clip/blob/master/notebooks/Interacting_with_CLIP.ipynb)

 CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing for the task, similarly to the zero-shot capabilities of GPT-2 and 3. We found CLIP matches the performance of the original ResNet50 on ImageNet “zero-shot” without using any of the original 1.28M labeled examples, overcoming several major challenges in computer vision.

-
-
 ## Approach

 ![CLIP](CLIP.png)

-
-
-## Usage
-
-First, [install PyTorch 1.7.1](https://pytorch.org/get-started/locally/) and torchvision, as well as small additional dependencies, and then install this repo as a Python package. On a CUDA GPU machine, the following will do the trick:
-
-```bash
-$ conda install --yes -c pytorch pytorch=1.7.1 torchvision cudatoolkit=11.0
-$ pip install ftfy regex tqdm
-$ pip install git+https://github.com/openai/CLIP.git
-```
-
-Replace `cudatoolkit=11.0` above with the appropriate CUDA version on your machine or `cpuonly` when installing on a machine without a GPU.
-
-```python
-import torch
-import clip
-from PIL import Image
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model, preprocess = clip.load("ViT-B/32", device=device)
-
-image = preprocess(Image.open("CLIP.png")).unsqueeze(0).to(device)
-text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)
-
-with torch.no_grad():
-    image_features = model.encode_image(image)
-    text_features = model.encode_text(text)
-    
-    logits_per_image, logits_per_text = model(image, text)
-    probs = logits_per_image.softmax(dim=-1).cpu().numpy()
-
-print("Label probs:", probs)  # prints: [[0.9927937  0.00421068 0.00299572]]
-```
-
-
 ## API

 The CLIP module `clip` provides the following methods:
--- a/check_cuda.c
+++ b/check_cuda.c
@ -0,0 +1,109 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+/* Outputs some information on CUDA-enabled devices on your computer,
+ * including compute capability and current memory usage.
+ *
+ * On Linux, compile with: nvcc -o cuda_check cuda_check.c -lcuda
+ * On Windows, compile with: nvcc -o cuda_check.exe cuda_check.c -lcuda
+ *
+ * Authors: Thomas Unterthiner, Jan Schlüter
+ */
+
+int ConvertSMVer2Cores(int major, int minor)
+{
+        // Returns the number of CUDA cores per multiprocessor for a given
+        // Compute Capability version. There is no way to retrieve that via
+        // the API, so it needs to be hard-coded.
+        // See _ConvertSMVer2Cores in helper_cuda.h in NVIDIA's CUDA Samples.
+        switch ((major << 4) + minor) {
+                case 0x10: return 8;    // Tesla
+                case 0x11: return 8;
+                case 0x12: return 8;
+                case 0x13: return 8;
+                case 0x20: return 32;   // Fermi
+                case 0x21: return 48;
+                case 0x30: return 192;  // Kepler
+                case 0x32: return 192;
+                case 0x35: return 192;
+                case 0x37: return 192;
+                case 0x50: return 128;  // Maxwell
+                case 0x52: return 128;
+                case 0x53: return 128;
+                case 0x60: return 64;   // Pascal
+                case 0x61: return 128;
+                case 0x62: return 128;
+                case 0x70: return 64;   // Volta
+                case 0x72: return 64;   // Xavier
+                case 0x75: return 64;   // Turing
+                default: return 0;
+        }
+}
+
+int main()
+{
+        int nGpus;
+        int i;
+        char name[100];
+        int cc_major, cc_minor, cores, cuda_cores, threads_per_core, clockrate;
+        size_t freeMem;
+        size_t totalMem;
+
+        CUresult result;
+        CUdevice device;
+        CUcontext context;
+
+        result = cuInit(0);
+        if (result != CUDA_SUCCESS) {
+                printf("cuInit failed with error code %d: %s\n", result, cudaGetErrorString(result));
+                return 1;
+        }
+        result = cuDeviceGetCount(&nGpus);
+        if (result != CUDA_SUCCESS) {
+                printf("cuDeviceGetCount failed with error code %d: %s\n", result, cudaGetErrorString(result));
+                return 1;
+        }
+        printf("Found %d device(s).\n", nGpus);
+        for (i = 0; i < nGpus; i++) {
+                cuDeviceGet(&device, i);
+                printf("Device: %d\n", i);
+                if (cuDeviceGetName(&name[0], sizeof(name), device) == CUDA_SUCCESS) {
+                        printf("  Name: %s\n", &name[0]);
+                }
+                if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {
+                        printf("  Compute Capability: %d.%d\n", cc_major, cc_minor);
+                }
+                if (cuDeviceGetAttribute(&cores, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device) == CUDA_SUCCESS) {
+                        printf("  Multiprocessors: %d\n", cores);
+                        if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {
+                                cuda_cores = cores * ConvertSMVer2Cores(cc_major, cc_minor);
+                                if (cuda_cores > 0) {
+                                        printf("  CUDA Cores: %d\n", cuda_cores);
+                                }
+                                else {
+                                        printf("  CUDA Cores: unknown\n");
+                                }
+                        }
+                        if (cuDeviceGetAttribute(&threads_per_core, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, device) == CUDA_SUCCESS) {
+                                printf("  Concurrent threads: %d\n", cores*threads_per_core);
+                        }
+                }
+                if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device) == CUDA_SUCCESS) {
+                        printf("  GPU clock: %g MHz\n", clockrate/1000.);
+                }
+                if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device) == CUDA_SUCCESS) {
+                        printf("  Memory clock: %g MHz\n", clockrate/1000.);
+                }
+                cuCtxCreate(&context, 0, device);
+                result = cuMemGetInfo(&freeMem, &totalMem);
+                if (result == CUDA_SUCCESS ) {
+                        printf("  Total Memory: %ld MiB\n  Free Memory: %ld MiB\n", totalMem / ( 1024 * 1024 ), freeMem / ( 1024 * 1024 ));
+                } else {
+                        printf("  cMemGetInfo failed with error code %d: %s\n", result, cudaGetErrorString(result));
+                }
+                cuCtxDetach(context);
+        }
+        return 0;
+}
+
--- a/clip_test.py
+++ b/clip_test.py
@ -0,0 +1,19 @@
+import torch
+import clip
+from PIL import Image
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model, preprocess = clip.load("ViT-B/32", device=device)
+
+image = preprocess(Image.open("CLIP.png")).unsqueeze(0).to(device)
+text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)
+
+with torch.no_grad():
+    image_features = model.encode_image(image)
+    text_features = model.encode_text(text)
+    
+    logits_per_image, logits_per_text = model(image, text)
+    probs = logits_per_image.softmax(dim=-1).cpu().numpy()
+
+print("Label probs:", probs)  # prints: [[0.9927937  0.00421068 0.00299572]]
+
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 ftfy
 regex
 tqdm
-torch
-torchvision
+#torch
+#torchvision
--- a/torch_test.py
+++ b/torch_test.py
@ -0,0 +1,7 @@
+import torch
+
+print(torch.cuda.is_available())
+
+print(torch.__version__)
+
+