<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:georss='http://www.georss.org/georss' xmlns:gd='http://schemas.google.com/g/2005' xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-7815401213250921645</id><updated>2012-02-01T23:47:22.918-06:00</updated><category term='GPU'/><category term='simulation'/><category term='molecule'/><category term='web'/><category term='cross-compilation'/><category term='xor128'/><category term='Smoldyn'/><category term='DSMC'/><category term='Java'/><category term='GUDA'/><category term='Apple'/><category term='django'/><category term='random number'/><category term='reaction'/><category term='Boston'/><category term='chemical'/><category term='iPhone'/><category term='SpringSim'/><category term='San Francisco'/><category term='journal'/><category term='Mac'/><category term='CUDA'/><category term='VNC'/><category term='google'/><category term='MersenneTwister'/><title type='text'>GPU Geek's blog</title><subtitle type='html'>A blog about GPU computing and computer graphics.</subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default?max-results=100'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>11</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>100</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-4671690845720815001</id><published>2012-01-24T23:09:00.000-06:00</published><updated>2012-01-24T23:09:01.193-06:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='journal'/><category scheme='http://www.blogger.com/atom/ns#' term='DSMC'/><category scheme='http://www.blogger.com/atom/ns#' term='simulation'/><category scheme='http://www.blogger.com/atom/ns#' term='GUDA'/><category scheme='http://www.blogger.com/atom/ns#' term='GPU'/><title type='text'>CUDA DSMC paper has been published</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;My CUDA DSMC paper has finally been published in SIMULATION journal. You can find it &lt;a href="http://sim.sagepub.com/content/early/2011/09/23/0037549711418787.abstract"&gt;here&lt;/a&gt;.&lt;div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Thanks!&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-4671690845720815001?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/4671690845720815001/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=4671690845720815001' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/4671690845720815001'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/4671690845720815001'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2012/01/cuda-dsmc-paper-has-been-published.html' title='CUDA DSMC paper has been published'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-180181855679706987</id><published>2011-11-09T14:00:00.000-06:00</published><updated>2011-11-09T14:00:04.222-06:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='San Francisco'/><title type='text'>Dropped out</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;I dropped out from the school and working at a start up company in San Francisco now. I'll&amp;nbsp;continue&amp;nbsp;blogging soon...&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-180181855679706987?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/180181855679706987/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=180181855679706987' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/180181855679706987'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/180181855679706987'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2011/11/dropped-out.html' title='Dropped out'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-809044382326271014</id><published>2011-05-11T15:55:00.000-05:00</published><updated>2011-05-13T15:26:18.999-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='call'/><category scheme='http://www.blogger.com/atom/ns#' term='Zingaya'/><title type='text'>Call me from the blog!</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;Thanks to my friends from &lt;a href="http://www.zingaya.com/"&gt;Zingaya, Ltd&lt;/a&gt;&amp;nbsp;you now can call me from this blog by clicking the button on the right side (&lt;b&gt;Call me!&lt;/b&gt;). To do so, you just need to have a Flash player and microphone installed. You can call from anywhere and the call will be redirected to my cell phone.&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Amazing technology indeed!&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-809044382326271014?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/809044382326271014/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=809044382326271014' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/809044382326271014'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/809044382326271014'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2011/05/call-me-from-blog.html' title='Call me from the blog!'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-8787496279482270886</id><published>2011-04-08T13:10:00.000-05:00</published><updated>2011-04-08T13:10:08.849-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Smoldyn'/><category scheme='http://www.blogger.com/atom/ns#' term='SpringSim'/><category scheme='http://www.blogger.com/atom/ns#' term='Boston'/><title type='text'>SpringSim 2011</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;I went to Boston for SpringSim 2011 this week for presenting GPU Smoldyn. It was fun. I liked the city a lot. I also met lots of interesting people there. You can check out the Smoldyn paper I was presenting and the presentation itself at the following links:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://cssl.ceas.uwm.edu/data/smoldyn_paper.pdf"&gt;Paper&lt;/a&gt;&lt;br /&gt;&lt;a href="http://cssl.ceas.uwm.edu/data/smoldyn_slides.pdf"&gt;Slides&lt;/a&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-8787496279482270886?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/8787496279482270886/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=8787496279482270886' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/8787496279482270886'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/8787496279482270886'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2011/04/springsim-2011.html' title='SpringSim 2011'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-3080781281509937006</id><published>2011-03-03T17:04:00.000-06:00</published><updated>2011-03-03T17:04:27.325-06:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='MersenneTwister'/><category scheme='http://www.blogger.com/atom/ns#' term='CUDA'/><category scheme='http://www.blogger.com/atom/ns#' term='random number'/><category scheme='http://www.blogger.com/atom/ns#' term='xor128'/><category scheme='http://www.blogger.com/atom/ns#' term='GPU'/><title type='text'>Random number generators on GPU</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;Almost every simulation requires random numbers generation (RNG) for achieving accurate results. In my simulations I used 2 RNGs. &lt;b&gt;MersenneTwister&lt;/b&gt; - has a very long period and can be used in&amp;nbsp;simulations&amp;nbsp;that are supposed to be running for a long time, and &lt;b&gt;XOR128&lt;/b&gt; - extremely fast RNG, however its period is not as large as MerseneTwister's one. I created a small library that contains 3 RNGs: MersenneTwister, XOR128 and Tausworthe generator.&lt;br /&gt;&lt;br /&gt;&lt;a name='more'&gt;&lt;/a&gt;For implementing MersenneTwister I deeply modified NVIDIA's sample. Each thread in a grid runs its own RNG. States are kept in a global memory. Each thread is supposed to read a state in the beginning of execution and save it at the end.&amp;nbsp;I have a helper classes, which do it.&lt;br /&gt;The library is&amp;nbsp;available&amp;nbsp;at &lt;a href="http://cssl.ceas.uwm.edu/data/rng_lib.zip"&gt;&lt;span id="goog_41020443"&gt;&lt;/span&gt;link&lt;span id="goog_41020444"&gt;&lt;/span&gt;&lt;/a&gt;. And it contains an example file, however I'm going to put some small tutorial here.&lt;br /&gt;&lt;br /&gt;The library consists of 2 parts - device and host.&lt;br /&gt;&lt;br /&gt;Device code for RNGs is defined in files:&lt;br /&gt;&lt;br /&gt;&lt;b&gt;mtwister.cuh&lt;/b&gt; - for MersenneTwister&lt;br /&gt;&lt;b&gt;rndfast.cuh &lt;/b&gt;&amp;nbsp; &amp;nbsp;- for XOR128 and&amp;nbsp;Tausworthe&lt;br /&gt;&lt;br /&gt;So if you want to use these generators in your projects you just need to include one of these files in your CU file. Then in kernels you can use generators like this (MersenneTwister example):&lt;br /&gt;&lt;pre class="brush: js"&gt;const int maxValue = 1024;&lt;br /&gt;const int tid = blockDim.x*blockIdx.x + &lt;br /&gt;                 threadIdx.x;&lt;br /&gt;&lt;br /&gt;MTGenerator gen(tid);&lt;br /&gt;&lt;br /&gt;int   intNum   = gen.GetInt(maxValue);&lt;br /&gt;flaot floatNum = gen.GetFloat();&lt;br /&gt;&lt;/pre&gt;If you want to use other generators just use classes&amp;nbsp;&lt;b&gt;TausGenerator&lt;/b&gt; for&amp;nbsp;Tausworthe and&amp;nbsp;&lt;b&gt;Xor128Generator&lt;/b&gt; for XOR128.&lt;br /&gt;&lt;br /&gt;You will also need to initialize seeds. These functions are defined in files:&lt;br /&gt;&lt;br /&gt;&lt;b&gt;mtwister.h&lt;/b&gt; - for MersennwTwister&lt;br /&gt;&lt;b&gt;seedgen.h&lt;/b&gt; &amp;nbsp; - for&amp;nbsp;XOR128 and&amp;nbsp;Tausworthe&lt;br /&gt;&lt;br /&gt;Simply call one of these functions for&amp;nbsp;initialization:&lt;br /&gt;&lt;pre class="brush: js"&gt;void InitGPUTwisters(const char* fname, &lt;br /&gt;                     int numGen, int seed);&lt;br /&gt;void InitFastRngs(int numRngs);&lt;br /&gt;&lt;/pre&gt;And one of these functions for deinitialization:&lt;br /&gt;&lt;pre class="brush: js"&gt;void DeinitGPUTwisters();&lt;br /&gt;void DeinitFastRngs();&lt;br /&gt;&lt;/pre&gt;&lt;div&gt;&lt;br /&gt;Again, there is a good example in the package, so feel free to download and use it. If you have any questions, please send me an email or comment. &lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-3080781281509937006?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/3080781281509937006/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=3080781281509937006' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/3080781281509937006'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/3080781281509937006'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2011/03/random-number-generators-on-gpu.html' title='Random number generators on GPU'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-2941994450600064966</id><published>2011-03-01T23:42:00.000-06:00</published><updated>2011-03-01T23:42:09.209-06:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='CUDA'/><category scheme='http://www.blogger.com/atom/ns#' term='django'/><category scheme='http://www.blogger.com/atom/ns#' term='Smoldyn'/><category scheme='http://www.blogger.com/atom/ns#' term='web'/><category scheme='http://www.blogger.com/atom/ns#' term='GPU'/><title type='text'>Web access to GPU Smoldyn</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;In my previous post I described a GPU implementation of Smoldyn. In this post I'm going to describe a Web-Service that provides an internet access to it. Using this service a user can start a simulation just uploading a configuration file describing a model. The service executes this file and uploads screenshots and other output files to user's profile, so a user can access them. This allows users, who doesn't have access to high performance GPUs enjoy all benefits of GPU&amp;nbsp;implementation&amp;nbsp;(200x speedup for instance).&lt;br /&gt;&lt;br /&gt;&lt;a name='more'&gt;&lt;/a&gt;The service consists of 2 parts. The first part is a web-interface, that manages user's profile and provides access to information is implemented using &lt;a href="http://www.djangoproject.com/"&gt;django&lt;/a&gt; framework. The database it uses is a MySql database. The second part is an application that executes simulations themselves. Currently it is a python script that fetches data from the database, executes simulations using this data (the script executes an original GPU Smoldyn application written in CUDA C++ using OS services). Then the script uploads the output information to the database. Current architecture is presented on the image below.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="https://lh4.googleusercontent.com/-Toy3FVIYlwE/TW3YlDtC3vI/AAAAAAAABAM/ZH3iV-jL9zA/s1600/arch1.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="250" src="https://lh4.googleusercontent.com/-Toy3FVIYlwE/TW3YlDtC3vI/AAAAAAAABAM/ZH3iV-jL9zA/s400/arch1.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;In the&amp;nbsp;nearest&amp;nbsp;future I'm going to improve this service by adding Amazon Simple Storage (SSS) support for storage user files like output files and screenshots. Also I'm going to add a new level - dispatcher level that would be providing jobs for execution nodes, achieving better scalability. The future architecture is presented on the picture below.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="https://lh5.googleusercontent.com/--DUGfLQ7LCs/TW3X5xeYT4I/AAAAAAAABAI/zIwy-cVKAeU/s1600/arch2.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="245" src="https://lh5.googleusercontent.com/--DUGfLQ7LCs/TW3X5xeYT4I/AAAAAAAABAI/zIwy-cVKAeU/s400/arch2.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;You can try the service by clicking the &lt;a href="http://cssl.ceas.uwm.edu/websim"&gt;link&lt;/a&gt;. You would need to register, but it is free and there is no any obligations.&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-2941994450600064966?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/2941994450600064966/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=2941994450600064966' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/2941994450600064966'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/2941994450600064966'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2011/03/web-access-to-gpu-smoldyn.html' title='Web access to GPU Smoldyn'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='https://lh4.googleusercontent.com/-Toy3FVIYlwE/TW3YlDtC3vI/AAAAAAAABAM/ZH3iV-jL9zA/s72-c/arch1.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-6318391729293665587</id><published>2011-02-23T18:19:00.006-06:00</published><updated>2011-03-03T14:20:13.865-06:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='CUDA'/><category scheme='http://www.blogger.com/atom/ns#' term='reaction'/><category scheme='http://www.blogger.com/atom/ns#' term='simulation'/><category scheme='http://www.blogger.com/atom/ns#' term='molecule'/><category scheme='http://www.blogger.com/atom/ns#' term='GPU'/><category scheme='http://www.blogger.com/atom/ns#' term='chemical'/><title type='text'>Biochemical simulations on GPU</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;&lt;div&gt;My second project is a CUDA implementation of &lt;a href="http://www.smoldyn.org/"&gt;Smoldyn&lt;/a&gt; simulator. Smoldyn is cell-scale biochemical simulator which simulates each molecule of interest individually to capture natural stochasticity and for nanometer-scale spatial resolution. This is a particle-based method that allows simulation of chemical molecules with Brownian motion and different reaction types.&lt;/div&gt;&lt;div&gt;&lt;a name='more'&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;The current version of GPU Smoldyn supports the following features:&lt;/div&gt;&lt;div&gt;&lt;ul&gt;&lt;li&gt;Isotropic diffusion;&lt;/li&gt;&lt;li&gt;Zero/First/Second order reactions;&lt;/li&gt;&lt;li&gt;User commands;&lt;/li&gt;&lt;li&gt;Surfaces interactions;&lt;/li&gt;&lt;/ul&gt;&lt;/div&gt;&lt;div&gt;So, using this functionality different molecule to surface interactions can be simulated. Reaction-diffusion systems, like a molecular mutation (A-&amp;gt;B), synthesis of molecules, or even bi-molecular reactions (A+B-&amp;gt;C) can be simulated as well.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Implementation details&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;This simulation is implemented for GPU using CUDA. It is implemented for Linux, so OpenGL was used for visualization.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;I widely used &lt;a href="http://code.google.com/p/thrust/"&gt;Thrust&lt;/a&gt; in this simulation, especially for operations like stream compaction, reduction and scan. For second order reaction processing I needed sorting for spatial hashing. Firstly I used thrust::sort, however, it turned out, that NVIDIA's radix sort kernel is much faster than thrust::sort, because in NVIDIA's implementation you can specify maximum number of sorting bits, which is very convenient for cases, when the maximum value for a sorting key is known. In my case, it is a number of cells-1.&lt;/div&gt;&lt;br /&gt;&lt;div&gt;The tricky aspect of this simulation is a dynamic memory allocation, since molecules can die and re spawn during the simulation. Every molecule has a type field. In fact, a molecule is described as:&lt;/div&gt;&lt;br /&gt;struct molecule_t&lt;br /&gt;{&lt;br /&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; float3 pos;&lt;br /&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; int &amp;nbsp; &amp;nbsp; &amp;nbsp;type;&lt;br /&gt;};&lt;br /&gt;&lt;br /&gt;&lt;div&gt;Type equal to -1 represents dead molecule. I allocate enough memory to handle new particles creation during the simulation. At the initialization stage the molecule array is filled with a desired number of molecules, and the rest of it is initialized with dead molecules (with type == -1). A pointer to the last alive molecule is maintained in a global memory. If during the simulation I need to allocate a molecule, the pointer is advanced using atomicAdd and new molecule is generated (new molecules are spawned at the end of the array, maintaining proper order of alive molecules). If I need to deallocate molecule, I just set its type to -1 and then do compaction after the kernel finishes. I use thrust::remove_if operation to move dead molecules to the end of the array.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;I achieved average speedup of 200x for this implementation against original Smoldyn and this implementation is statistically correct. I wrote a paper about this implementation and I'll go to &lt;a href="http://hosting.cs.vt.edu/hpc2011/"&gt;SpringSim 2011&lt;/a&gt; to present it, so if you're interested, you can catch me there (or send me an email). I'll post the paper here after it is published in the proceedings.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Some screenshots&lt;/div&gt;&lt;div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-iAQt5G_ST4A/TWgRLUc92ZI/AAAAAAAAA_c/5WS315VQB9U/s1600/screen2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://1.bp.blogspot.com/-iAQt5G_ST4A/TWgRLUc92ZI/AAAAAAAAA_c/5WS315VQB9U/s320/screen2.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-ImYjJJYqQNA/TWbY3j2IKjI/AAAAAAAAA-M/SpMsdsKd0XY/s1600/screen3.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://2.bp.blogspot.com/-ImYjJJYqQNA/TWbY3j2IKjI/AAAAAAAAA-M/SpMsdsKd0XY/s320/screen3.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-UZRtuWYGj1Y/TWbY32XHseI/AAAAAAAAA-U/BYvpxvK1ikM/s1600/screen5.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://4.bp.blogspot.com/-UZRtuWYGj1Y/TWbY32XHseI/AAAAAAAAA-U/BYvpxvK1ikM/s320/screen5.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-6318391729293665587?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/6318391729293665587/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=6318391729293665587' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/6318391729293665587'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/6318391729293665587'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2011/02/biochemical-simulations-on-gpu.html' title='Biochemical simulations on GPU'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-iAQt5G_ST4A/TWgRLUc92ZI/AAAAAAAAA_c/5WS315VQB9U/s72-c/screen2.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-5423873426063774556</id><published>2011-02-21T21:56:00.002-06:00</published><updated>2011-03-08T14:28:02.834-06:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='CUDA'/><category scheme='http://www.blogger.com/atom/ns#' term='DSMC'/><category scheme='http://www.blogger.com/atom/ns#' term='GPU'/><title type='text'>GPU-based Direct Simulation Monte Carlo</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;It's been a while since my last post. Lots of things have changed. Now I'm a PhD student at UW-Milwaukee and a research assistant at the Complex Systems Simulation Lab. In this post I will describe my first project which is a GPU (CUDA) implementation of DSMC method for particle simulations.&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;a href="http://en.wikipedia.org/wiki/Direct_simulation_Monte_Carlo"&gt;DSMC&lt;/a&gt; is a computational method for fluid mechanics simulation. Simply put, it can be used for simulation of interaction between gas molecules and a solid body. For example, the case, when a space ship enters the atmosphere can be simulated. The method is relatively simple, but it requires simulation of huge number of molecules for an accurate result, so it is a good candidate for parallelization.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;a name='more'&gt;&lt;/a&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Implementation details&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Since it is a Monte Carlo-based method, it requires a random number generator. The Mersenne Twister is one of the best RNGs at the moment, however it has a large state which doesn't make it very suitable for GPU, however this RNG was used in the simulation. I modified the NVIDIA's &lt;a href="http://developer.download.nvidia.com/compute/cuda/2_2/sdk/website/projects/MersenneTwister/doc/MersenneTwister.pdf"&gt;sample&lt;/a&gt;, so each thread in the simulation can run its own twister. Also &lt;a href="http://www.jstatsoft.org/v08/i14/paper"&gt;XOR128&lt;/a&gt; was implemented as well and is used for very large simulations.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;For inter-particle collisions I used &lt;a href="http://gab.com.au/"&gt;Bird's&lt;/a&gt; collision method, which is currently the most accurate one. Since it is a particle-based simulation, I adopted particles' hashing method from NVIDIA's particles sample (using radixsort).&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;During the simulation particles can collide with an obstacle (polygonal object). For more efficient processing this object is subdivided using a regular grid. For AABB to triangle testing I used very fast and accurate &lt;a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.2.4803&amp;amp;rep=rep1&amp;amp;type=pdf"&gt;method&lt;/a&gt; by Moller. There is also very nice ray to triangle testing method by the same author, I used this &lt;a href="http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf"&gt;method &lt;/a&gt;as well. The simulation can easily run in a real time with systems up to 16 mln particles and with a complex polygonal object in it.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;As a result, I achieved a speedup of 65x over a CPU implementation and got some nice screenshots:&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;a href="https://lh6.googleusercontent.com/_3C8mfvf3C3w/TWK1heCmOPI/AAAAAAAAA8k/XJjtN1TNzys/s800/DSMC_running.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img alt="" border="0" src="https://lh6.googleusercontent.com/_3C8mfvf3C3w/TWK1heCmOPI/AAAAAAAAA8k/XJjtN1TNzys/s800/DSMC_running.png" style="cursor: hand; cursor: pointer; float: left; height: 235px; margin: 0 10px 10px 0; width: 400px;" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="https://lh5.googleusercontent.com/_3C8mfvf3C3w/TWK1h8PPlKI/AAAAAAAAA8o/IFlBtL8g748/s800/DSMC_running2.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img alt="" border="0" src="https://lh5.googleusercontent.com/_3C8mfvf3C3w/TWK1h8PPlKI/AAAAAAAAA8o/IFlBtL8g748/s800/DSMC_running2.png" style="cursor: hand; cursor: pointer; float: left; height: 235px; margin: 0 10px 10px 0; width: 400px;" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;In these screenshots a small window in a top right corner shows per-cell concentration. Red is high, blue is low, green is in between. The blue cells in the simulation show the current grid slice that is presented in the concentration window.&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;I also submitted a paper to SIMULATION journal about this method. It got accepted, so I'll post it here when the journal is released. I will also describe particular aspects of this implementation in more details in my further posts, since it will take lots of space.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;This was my first project, but I have some more to describe, so stay tuned :)&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-5423873426063774556?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/5423873426063774556/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=5423873426063774556' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/5423873426063774556'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/5423873426063774556'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2011/02/gpu-based-direct-simulation-monte-carlo.html' title='GPU-based Direct Simulation Monte Carlo'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='https://lh6.googleusercontent.com/_3C8mfvf3C3w/TWK1heCmOPI/AAAAAAAAA8k/XJjtN1TNzys/s72-c/DSMC_running.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-2791797468526045151</id><published>2009-05-25T07:04:00.000-05:00</published><updated>2011-02-21T21:26:55.383-06:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='VNC'/><category scheme='http://www.blogger.com/atom/ns#' term='Mac'/><category scheme='http://www.blogger.com/atom/ns#' term='Apple'/><title type='text'>VNC for Mac</title><content type='html'>Set up remote access to my Mac. Server is VineVNC for Mac OS X. Client is TightVNC for WinXP. Works well. Now I can develop iPhone applications from around the world :) (even from iPhone with Mocha VNC).&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-2791797468526045151?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/2791797468526045151/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=2791797468526045151' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/2791797468526045151'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/2791797468526045151'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2009/05/vnc-for-mac.html' title='VNC for Mac'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-5381882882595136503</id><published>2009-05-24T06:01:00.000-05:00</published><updated>2011-02-21T21:24:38.907-06:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='cross-compilation'/><category scheme='http://www.blogger.com/atom/ns#' term='Java'/><category scheme='http://www.blogger.com/atom/ns#' term='iPhone'/><category scheme='http://www.blogger.com/atom/ns#' term='google'/><title type='text'>Java bytecode cross-compilation for iPhone</title><content type='html'>Just have seen a pretty interesting lecture about Java bytecode cross compilation for iPhone on Google Tech Talks. I assume this may let iPhone developers not to study ObjC (which pretty nice though). I guess it would be interesting to try load such cross-compiled application to real device or even to AppStore. Maybe I will try to do that.&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;a href="http://www.youtube.com/watch?v=s8nMpi5-P-I"&gt;Lecture&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;a href="http://plum.sfsu.edu/xml11-external/java4iphone-google.pdf"&gt;Slides&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Check it out later :)&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-5381882882595136503?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/5381882882595136503/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=5381882882595136503' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/5381882882595136503'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/5381882882595136503'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2009/05/java-code-cross-compilation-for-iphone.html' title='Java bytecode cross-compilation for iPhone'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7815401213250921645.post-3252855100254849523</id><published>2008-10-08T05:15:00.000-05:00</published><updated>2008-10-08T05:17:14.017-05:00</updated><title type='text'>Hello World!</title><content type='html'>Hi, here is a first message in my shiny new blog :)&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Check it out later!&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7815401213250921645-3252855100254849523?l=gpugeek.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://gpugeek.blogspot.com/feeds/3252855100254849523/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=7815401213250921645&amp;postID=3252855100254849523' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/3252855100254849523'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7815401213250921645/posts/default/3252855100254849523'/><link rel='alternate' type='text/html' href='http://gpugeek.blogspot.com/2008/10/hello-world.html' title='Hello World!'/><author><name>_den</name><uri>http://www.blogger.com/profile/14479668554149216392</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='27' height='32' src='http://4.bp.blogspot.com/_3C8mfvf3C3w/Sh6Kq4oYx5I/AAAAAAAAAKw/6PWx7j2vC7s/S220/NES_2853_2.jpg'/></author><thr:total>2</thr:total></entry></feed>
