1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "name" : " Hummingbird-ML.ipynb" ,
7+ "provenance" : [],
8+ "collapsed_sections" : []
9+ },
10+ "kernelspec" : {
11+ "name" : " python3" ,
12+ "display_name" : " Python 3"
13+ },
14+ "accelerator" : " GPU"
15+ },
16+ "cells" : [
17+ {
18+ "cell_type" : " markdown" ,
19+ "metadata" : {
20+ "id" : " IpOdlr3WAPHJ" ,
21+ "colab_type" : " text"
22+ },
23+ "source" : [
24+ " # **Hummingbird-ML**\n " ,
25+ " \n " ,
26+ " [How to Harness GPU to Speed Up Machine Learning with Hummingbird-ML](https://www.youtube.com/watch?v=qN8jcUmo8TI)\n " ,
27+ " \n " ,
28+ " Adapted from: https://github.com/microsoft/hummingbird"
29+ ]
30+ },
31+ {
32+ "cell_type" : " markdown" ,
33+ "metadata" : {
34+ "id" : " ir3DZd5-_jiu" ,
35+ "colab_type" : " text"
36+ },
37+ "source" : [
38+ " # Install Hummingbird-ML"
39+ ]
40+ },
41+ {
42+ "cell_type" : " code" ,
43+ "metadata" : {
44+ "id" : " ra3JEgWN_bfp" ,
45+ "colab_type" : " code" ,
46+ "colab" : {
47+ "base_uri" : " https://localhost:8080/" ,
48+ "height" : 408
49+ },
50+ "outputId" : " 4fae39de-26f0-4939-846d-039fb876725a"
51+ },
52+ "source" : [
53+ " ! pip install hummingbird-ml[extra]"
54+ ],
55+ "execution_count" : 1 ,
56+ "outputs" : [
57+ {
58+ "output_type" : " stream" ,
59+ "text" : [
60+ " Collecting hummingbird-ml[extra]\n " ,
61+ " \u001b [?25l Downloading https://files.pythonhosted.org/packages/ed/3b/cf1b8c1e7531377adead8de29e29b00b5aed380544ad0def4c0188b50d80/hummingbird_ml-0.0.5-py2.py3-none-any.whl (60kB)\n " ,
62+ " \r \u001b [K |█████▌ | 10kB 16.6MB/s eta 0:00:01\r \u001b [K |███████████ | 20kB 1.8MB/s eta 0:00:01\r \u001b [K |████████████████▍ | 30kB 2.2MB/s eta 0:00:01\r \u001b [K |█████████████████████▉ | 40kB 2.5MB/s eta 0:00:01\r \u001b [K |███████████████████████████▎ | 51kB 2.0MB/s eta 0:00:01\r \u001b [K |████████████████████████████████| 61kB 1.8MB/s \n " ,
63+ " \u001b [?25hRequirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (1.18.5)\n " ,
64+ " Requirement already satisfied: torch>=1.4.* in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (1.6.0+cu101)\n " ,
65+ " Collecting onnxconverter-common>=1.6.0\n " ,
66+ " \u001b [?25l Downloading https://files.pythonhosted.org/packages/fe/7a/7e30c643cd7d2ad87689188ef34ce93e657bd14da3605f87bcdbc19cd5b1/onnxconverter_common-1.7.0-py2.py3-none-any.whl (64kB)\n " ,
67+ " \u001b [K |████████████████████████████████| 71kB 3.7MB/s \n " ,
68+ " \u001b [?25hRequirement already satisfied: scikit-learn>=0.22.1 in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (0.22.2.post1)\n " ,
69+ " Requirement already satisfied: xgboost==0.90; extra == \" extra\" in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (0.90)\n " ,
70+ " Requirement already satisfied: lightgbm>=2.2; extra == \" extra\" in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (2.2.3)\n " ,
71+ " Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch>=1.4.*->hummingbird-ml[extra]) (0.16.0)\n " ,
72+ " Collecting onnx\n " ,
73+ " \u001b [?25l Downloading https://files.pythonhosted.org/packages/36/ee/bc7bc88fc8449266add978627e90c363069211584b937fd867b0ccc59f09/onnx-1.7.0-cp36-cp36m-manylinux1_x86_64.whl (7.4MB)\n " ,
74+ " \u001b [K |████████████████████████████████| 7.4MB 16.0MB/s \n " ,
75+ " \u001b [?25hRequirement already satisfied: protobuf in /usr/local/lib/python3.6/dist-packages (from onnxconverter-common>=1.6.0->hummingbird-ml[extra]) (3.12.4)\n " ,
76+ " Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.22.1->hummingbird-ml[extra]) (0.16.0)\n " ,
77+ " Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.22.1->hummingbird-ml[extra]) (1.4.1)\n " ,
78+ " Requirement already satisfied: typing-extensions>=3.6.2.1 in /usr/local/lib/python3.6/dist-packages (from onnx->onnxconverter-common>=1.6.0->hummingbird-ml[extra]) (3.7.4.3)\n " ,
79+ " Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from onnx->onnxconverter-common>=1.6.0->hummingbird-ml[extra]) (1.15.0)\n " ,
80+ " Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf->onnxconverter-common>=1.6.0->hummingbird-ml[extra]) (49.6.0)\n " ,
81+ " Installing collected packages: onnx, onnxconverter-common, hummingbird-ml\n " ,
82+ " Successfully installed hummingbird-ml-0.0.5 onnx-1.7.0 onnxconverter-common-1.7.0\n "
83+ ],
84+ "name" : " stdout"
85+ }
86+ ]
87+ },
88+ {
89+ "cell_type" : " markdown" ,
90+ "metadata" : {
91+ "id" : " YnA-PmeA_q70" ,
92+ "colab_type" : " text"
93+ },
94+ "source" : [
95+ " # Import libraries"
96+ ]
97+ },
98+ {
99+ "cell_type" : " code" ,
100+ "metadata" : {
101+ "id" : " lkIThThi_puf" ,
102+ "colab_type" : " code" ,
103+ "colab" : {}
104+ },
105+ "source" : [
106+ " import numpy as np\n " ,
107+ " from sklearn.ensemble import RandomForestClassifier\n " ,
108+ " from hummingbird.ml import convert"
109+ ],
110+ "execution_count" : 2 ,
111+ "outputs" : []
112+ },
113+ {
114+ "cell_type" : " markdown" ,
115+ "metadata" : {
116+ "id" : " rFw_4cGa_-tF" ,
117+ "colab_type" : " text"
118+ },
119+ "source" : [
120+ " # Create some random data for binary classification"
121+ ]
122+ },
123+ {
124+ "cell_type" : " code" ,
125+ "metadata" : {
126+ "id" : " hGGngPPp__mx" ,
127+ "colab_type" : " code" ,
128+ "colab" : {}
129+ },
130+ "source" : [
131+ " num_classes = 2\n " ,
132+ " X = np.random.rand(100000, 28)\n " ,
133+ " y = np.random.randint(num_classes, size=100000)"
134+ ],
135+ "execution_count" : 3 ,
136+ "outputs" : []
137+ },
138+ {
139+ "cell_type" : " markdown" ,
140+ "metadata" : {
141+ "id" : " WusxNKH4AHII" ,
142+ "colab_type" : " text"
143+ },
144+ "source" : [
145+ " # Create and train a model (scikit-learn RandomForestClassifier)"
146+ ]
147+ },
148+ {
149+ "cell_type" : " code" ,
150+ "metadata" : {
151+ "id" : " GMRJRuBwAGeV" ,
152+ "colab_type" : " code" ,
153+ "colab" : {}
154+ },
155+ "source" : [
156+ " skl_model = RandomForestClassifier(n_estimators=10, max_depth=10)"
157+ ],
158+ "execution_count" : 4 ,
159+ "outputs" : []
160+ },
161+ {
162+ "cell_type" : " code" ,
163+ "metadata" : {
164+ "id" : " M_kGo80yAYTn" ,
165+ "colab_type" : " code" ,
166+ "colab" : {
167+ "base_uri" : " https://localhost:8080/" ,
168+ "height" : 34
169+ },
170+ "outputId" : " aa863652-02f8-4578-8fb7-e3b028685cd7"
171+ },
172+ "source" : [
173+ " %%timeit\n " ,
174+ " skl_model.fit(X, y)"
175+ ],
176+ "execution_count" : 5 ,
177+ "outputs" : [
178+ {
179+ "output_type" : " stream" ,
180+ "text" : [
181+ " 1 loop, best of 3: 4.78 s per loop\n "
182+ ],
183+ "name" : " stdout"
184+ }
185+ ]
186+ },
187+ {
188+ "cell_type" : " code" ,
189+ "metadata" : {
190+ "id" : " Hp4a8I0tAbBl" ,
191+ "colab_type" : " code" ,
192+ "colab" : {
193+ "base_uri" : " https://localhost:8080/" ,
194+ "height" : 34
195+ },
196+ "outputId" : " 4e083fd5-981f-4238-9158-3f4500585560"
197+ },
198+ "source" : [
199+ " %%timeit\n " ,
200+ " skl_model.predict(X)"
201+ ],
202+ "execution_count" : 6 ,
203+ "outputs" : [
204+ {
205+ "output_type" : " stream" ,
206+ "text" : [
207+ " 10 loops, best of 3: 85.6 ms per loop\n "
208+ ],
209+ "name" : " stdout"
210+ }
211+ ]
212+ },
213+ {
214+ "cell_type" : " markdown" ,
215+ "metadata" : {
216+ "id" : " mNiBvy9BA7wR" ,
217+ "colab_type" : " text"
218+ },
219+ "source" : [
220+ " # Use Hummingbird to convert the model to PyTorch"
221+ ]
222+ },
223+ {
224+ "cell_type" : " code" ,
225+ "metadata" : {
226+ "id" : " vcAOpuxxAzPc" ,
227+ "colab_type" : " code" ,
228+ "colab" : {}
229+ },
230+ "source" : [
231+ " model = convert(skl_model, 'pytorch')"
232+ ],
233+ "execution_count" : 7 ,
234+ "outputs" : []
235+ },
236+ {
237+ "cell_type" : " markdown" ,
238+ "metadata" : {
239+ "id" : " dpt6_4l8BF7e" ,
240+ "colab_type" : " text"
241+ },
242+ "source" : [
243+ " # Run predictions on CPU"
244+ ]
245+ },
246+ {
247+ "cell_type" : " code" ,
248+ "metadata" : {
249+ "id" : " _BiU63hNBDu-" ,
250+ "colab_type" : " code" ,
251+ "colab" : {
252+ "base_uri" : " https://localhost:8080/" ,
253+ "height" : 34
254+ },
255+ "outputId" : " 1bd8b158-a62b-4fe0-be09-ca382c817247"
256+ },
257+ "source" : [
258+ " %%timeit\n " ,
259+ " model.predict(X)"
260+ ],
261+ "execution_count" : 8 ,
262+ "outputs" : [
263+ {
264+ "output_type" : " stream" ,
265+ "text" : [
266+ " 1 loop, best of 3: 174 ms per loop\n "
267+ ],
268+ "name" : " stdout"
269+ }
270+ ]
271+ },
272+ {
273+ "cell_type" : " markdown" ,
274+ "metadata" : {
275+ "id" : " F10tJEMKBPZG" ,
276+ "colab_type" : " text"
277+ },
278+ "source" : [
279+ " # Run predictions on GPU"
280+ ]
281+ },
282+ {
283+ "cell_type" : " code" ,
284+ "metadata" : {
285+ "id" : " l2PUbqoHBJBX" ,
286+ "colab_type" : " code" ,
287+ "colab" : {}
288+ },
289+ "source" : [
290+ " model.to('cuda')"
291+ ],
292+ "execution_count" : 9 ,
293+ "outputs" : []
294+ },
295+ {
296+ "cell_type" : " code" ,
297+ "metadata" : {
298+ "id" : " -AB23_VTBRMP" ,
299+ "colab_type" : " code" ,
300+ "colab" : {
301+ "base_uri" : " https://localhost:8080/" ,
302+ "height" : 51
303+ },
304+ "outputId" : " b9efea7d-913c-4326-c14a-6b6ca0e9c063"
305+ },
306+ "source" : [
307+ " %%timeit\n " ,
308+ " model.predict(X)"
309+ ],
310+ "execution_count" : 10 ,
311+ "outputs" : [
312+ {
313+ "output_type" : " stream" ,
314+ "text" : [
315+ " The slowest run took 5.22 times longer than the fastest. This could mean that an intermediate result is being cached.\n " ,
316+ " 100 loops, best of 3: 14.8 ms per loop\n "
317+ ],
318+ "name" : " stdout"
319+ }
320+ ]
321+ },
322+ {
323+ "cell_type" : " markdown" ,
324+ "metadata" : {
325+ "id" : " dbkQU69JDt7T" ,
326+ "colab_type" : " text"
327+ },
328+ "source" : [
329+ " # Calculation Time"
330+ ]
331+ },
332+ {
333+ "cell_type" : " markdown" ,
334+ "metadata" : {
335+ "id" : " Hr1R_9nwDwpc" ,
336+ "colab_type" : " text"
337+ },
338+ "source" : [
339+ " Methods | Timing | Performance\n " ,
340+ " --|--|--\n " ,
341+ " scikit-learn | 85.6 ms | -\n " ,
342+ " PyTorch (CPU) | 174 ms | 2 X slower than scikit-learn\n " ,
343+ " PyTorch (GPU) | 14.8 ms | Almost 6 X faster than scikit-learn; Almost 12 X faster than PyTorch (CPU)"
344+ ]
345+ },
346+ {
347+ "cell_type" : " code" ,
348+ "metadata" : {
349+ "id" : " 9lmR3LHoEzhl" ,
350+ "colab_type" : " code" ,
351+ "colab" : {}
352+ },
353+ "source" : [
354+ " "
355+ ],
356+ "execution_count" : null ,
357+ "outputs" : []
358+ }
359+ ]
360+ }
0 commit comments