programming the gpu on cg szirmay-kalos lászló email: [email protected] web: szirmay
TRANSCRIPT
Programming the GPU Programming the GPU on Cgon Cg
Szirmay-Kalos Lászlóemail: [email protected]
Web: http://www.iit.bme.hu/~szirmay
HardwareHardware
GPU Framebuffer
display
CPU
Memory
I/O
Graphics card
Program
Op
enG
L
AP
I
OpenGL APIOpenGL APIglLightfv(GL_LIGHT0, GL_DIFFUSE, I);glMaterialfv( GL_FRONT, GL_DIFFUSE, kd);glViewport( 0, 0, width, height);gluLookAt(ex, ey, ez, lax, lay, laz,upx, upy, upz); glScalef(sx, sy, sz); glTranslatef(px, py,pz); glRotatef(ang, axisx,axisy,axisz);
glBegin(GL_TRIANGLES); glNormal3f(nx1,ny1,nz1); glColor3f(r1,g1,b1); glTexCoord2f(u1,v1)
glVertex3f(x1,y1,z1);…
glEnd( );
CPU GPU
StateUniform variables
GeometryVertex properties
Vertices
PA
SS
Rendering PipelineRendering Pipeline
Virtual worldCamera space,illumination
Perspectivetransformation +Clipping + Homogeneous div.
1.2.
Viewport transf+Rasterization+interpolationdisplay
color depth
MODELVIEW PROJECTION
Texture mappingTexture mapping
(u1, v1)
(u2, v2)
(u3, v3)
x1,y1,z1
x2,y2,z2
x3,y3,z3
szín
Texturing hardwareTexturing hardware
(u3, v3)
(u1, v1)
(u2, v2)
Linear interpolation:(u, v)
Texture object inGPU memory
Why is linear Why is linear interpolation our friend?interpolation our friend?
X
Y
I I(X,Y) = aX + bY + c
I(X,Y)
I(X+1,Y) = I(X,Y) + a
(X1,Y1,I1)
(X2,Y2,I2)
(X3,Y3,I3)
I(X,Y)
X counter I register
a
X
CLK
GPU hardware achitectureGPU hardware achitectureInterface
Transform+Illumination
Clipping + Hom.division+ Viewport transform
Projection + Rasterization + Linear interpolation
Texturing
Compositing (Z-buffer, transparency)
Texturememory
Early Z-cull
vertices
triangles
fragments
VertexShader
FragmentShader
Why is it fast? Stream processingWhy is it fast? Stream processingProc 2Proc 1
Proc 1
Proc 21
Proc 22
Pipelining
Parallelism
Elements are processed INDEPENDENTLY• No internal storage• Parallel execution without synchronization
Vertex shader and its neighborhoodVertex shader and its neighborhood
Clipping: -w<X<w, -w<Y<w, -w<Z<w, 0<color<1
StateTransformsLightsources
Materials
POSITION, NORMAL, COLOR0, TEXTCOORD0,…
glVertex glNormal glColor glTextCoordglBegin(GL_TRIANGLES)
glEnd( )
POSITION, COLOR0, TEXTCOORD0, … for triangle vertices
Homogeneous division: x=X/w, y=Y/w, z=Z/w
POSITION, COLOR0, TEXTCOORD0,… for triangle vertices
*MVP
*MV *MVIT
IlluminationVertex shader
Viewport transform: xv = center.x + viewsize.x * x / 2
CPU
GPU
StandardStandard vertex shader (Cg) vertex shader (Cg)struct ins { float4 position : POSITION; // glVertex float3 normal : NORMAL; // glNormal float4 color : COLOR0; // glColor float2 texcoord : TEXCOORD0; // glTexCoord};
struct outs { float4 hposition : POSITION; float4 color : COLOR0; float2 texcoord : TEXCOORD0;};
outs main( ins IN, uniform float4x4 MVP : state.matrix.mvp ) { outs OUT; OUT.hposition = mul(MVP, IN.position); OUT.texcoord = IN.texcoord; OUT.color = IN.color; return OUT;} glDisable(GL_LIGHTING );glDisable(GL_LIGHTING );
PPositional light sourceositional light sourceoutputs main( ins IN,
uniform float4x4 MV, uniform float4x4 MVIT, uniform float4x4 MVP, uniform float3 lightpos,
uniform float4 Idiff, Iamb, Ispec, uniform float4 em, ka, kd, ks,
uniform float shininess ) { outs OUT; OUT.hposition = mul(MVP, IN.position);
float3 N = mul(MVIT, IN.normal).xyz; N = normalize(N); // glEnable(GL_NORMALIZE) float3 cpos = mul(MV, IN.position).xyz; float3 L = normalize(lightpos – cpos); float3 H = normalize(L + V); OUT.color = em + Iamb * ka + Idiff * kd * saturate(dot(N, L)) + Ispec * ks * pow(saturate(dot(N, H)),shininess); return OUT;}
glEnable(GL_LIGHTING );glEnable(GL_LIGHTING );
NL
V
FragmentFragment shader and its neighborhood shader and its neighborhood
StateTexture id,
texturing environment
POSITION, COLOR0, TEXTCOORD0,… for triangle vertices
POSITION, COLOR
Compositing: blending, z-buffering
Projection, Rasterization and linear interpolation
Fragment shaderTexturing:
text2d(u,v)*color0
POSITION, COLOR0, TEXTCOORD0 for fragments
Texture memory
Frame buffer
Z-cull
StandardStandard fragment shader fragment shader
glglDisableDisable(GL_(GL_TEXTURE_2DTEXTURE_2D););
glglEnableEnable(GL_(GL_TEXTURE_2DTEXTURE_2D);); with GL_REPLACE mode
float4 main(in float3 color : COLOR0) : COLOR
{return color;
}
float4 main(in float2 texcoord : TEXCOORD0,in float3 color : COLOR0,uniform sampler2D texture_map ) : COLOR
{ return text2D(texture_map, texcoord);}
What can we What can we do with itdo with it??
Vertex shader:– General BRDF models– Spec. transformations, smooth binding– Waving, procedural animation
Fragment shader:– Phong shading, shadows– bump/parallax/displacement/reflection mapping
Both:– General purpose computation
Example 1: Phong shading Example 1: Phong shading instead of Gouraud shadinginstead of Gouraud shading
ambientdiffuse
specular
GouraudGouraud versus Phong versus Phong shadingshading
Gouraud Phong
Phong
Gouraud
Gouraud shadingGouraud shading
CPU program
Vertex shader
Pixelshader
PositionNormal
TransformationsMaterialsLights
TransformedpositionColor
Rasterization Interpolation
Illumination
Interpolated color
Phong shadingPhong shading
CPU program
Vertex shader
Pixelshader
PositionNormal
TransformationsLight position
Transf.positionTransf.normalViewLight
Rasterization Interpolation
Illumination
Interpolated NormalViewLight
MaterialsLight intensity
ProgramsPrograms .cpp CPU program:
– Capability query of the GPU (profile)– Definition of the Shader environment– Vertex/fragment program load from file and compile: CREATE– Vertex/fragment program upload to the GPU: LOAD– Selection of the current Vertex/fragment program: BIND– Uniform vertex/fragment variable definition– Uniform vertex/fragment variable setting– Non-uniform variables set (glVertex, glColor, glTexCoord…)
.cg vertex program– Fragment program’s non-uniform variables + homogeneous
position .cg fragment program
– Color output
Initializatio
nD
ispla
y
CPU program - InitializationCPU program - Initialization
#include <Cg/cgGL.h> // cg functions
CGparameter Lightpos, Shine, Ks, Kd; // uniform pars
main( ) { CGprofile vertexProf, fragmentProf; // profiles vertexProf = cgGLGetLatestProfile(CG_GL_VERTEX); fragmentProf = cgGLGetLatestProfile(CG_GL_FRAGMENT);
cgGLEnableProfile(vertexProf); cgGLEnableProfile(fragmentProf);
CGcontext shaderContext = cgCreateContext();
Vertex program loadingVertex program loadingCGprogram vertexProgram = cgCreateProgramFromFile(
shaderContext, CG_SOURCE,
“vertex.cg", vertexProf,
NULL, NULL);
cgGLLoadProgram(vertexProgram); // upload to the GPUcgGLBindProgram(vertexProgram); // this program is to run
// vertex program uniform parametersLightpos = DefineCGParameter(VertexProgram, "lightcam");
Fragment program Fragment program loadingloadingCGprogram fragmentProgram = cgCreateProgramFromFile(
shaderContext, CG_SOURCE,
“fragment.cg", fragmentProf,
NULL, NULL);
cgGLLoadProgram(fragmentProgram); // upload to the GPUcgGLBindProgram(fragmentProgram); // this program is to run
// fragment program uniform parametersShine = DefineCGParameter(fragmentProgram, "shininess");Kd = DefineCGParameter(fragmentProgram, "kd");Ks = DefineCGParameter(fragmentProgram, "ks");
… OpenGL initialization
CPU program - OpenGL displayCPU program - OpenGL displayvoid Display( ) {
// state (uniform) parameter setting glLoadIdentity(); gluLookAt(0, 0, -10, 0, 0, 0, 0, 1, 0); glRotatef(angle, 0, 1, 0);
// uniform parameter setting cgGLSetParameter3f(Lightpos, 10, 20, 30); cgGLSetParameter1f(Shine, 40); cgGLSetParameter3f(Kd, 1, 0.8, 0.2); cgGLSetParameter3f(Ks, 2, 2, 2);
// non uniform parameters glBegin( GL_TRIANGLES ); for( … ) {
glNormal3f(nx, ny, nz); // NORMAL registerglVertex3f(x, y, z); // POSITION register
} glEnd();}
Phong shadingPhong shading: vertex shader: vertex shaderstruct outs { float4 hposition : POSITION; float3 normal : TEXCOORD0; float3 view : TEXCOORD1; float3 light : TEXCOORD2; };outs main(
in float4 position : POSITION;in float4 normal : NORMAL;uniform float4x4 MVP : state.matrix.mvp,uniform float4x4 MV : state.matrix.modelview,uniform float4x4 MVIT : state.matrix.modelview.invtrans,uniform float3 lightcam
) { outs OUT;
OUT.hposition = mul(MVP, IN.position);float3 poscam = mul(MV, IN.position).xyz;OUT.normal = mul(MVIT, IN.normal).xyz;OUT.light = lightcam - poscam;OUT.view = -poscam; return OUT;
}
VertexShader
NL
V
Phong shadingPhong shading: : fragmentfragment shader shaderfloat3 main( in float3 normal : TEXCOORD0,
in float3 view : TEXCOORD1,in float3 light : TEXCOORD2,uniform float shininess,uniform float3 kd,uniform float3 ks ) : COLOR
{normal = normalize(normal);view = normalize(view);light = normalize(light);
float3 half = normalize(view + light);float3 color = kd * saturate(dot(normal, light)) +
ks * pow( saturate(dot(normal, half)), shininess );return color;
}
fragmentshader
Example 2Example 2:: Refraction Refraction
Example 2Example 2:: Refraction Refraction
ResultResult
Refraction computationRefraction computation
CPU program
Vertex shader
Pixelshader
PositionNormal
TransformsIndex of refraction
Transf. posRefraction direction
Environment map id
RasterizationInterpolation
Env.Maptexels
Interpolated Refractiondirection
Env.map lookup
RefractionRefraction: vertex shader: vertex shaderstruct outs { float4 hPosition : POSITION;
float3 refractdir : TEXCOORD0;};
outs main( in float4 position : POSITION, in float4 normal : NORMAL, uniform float4x4 MVP, uniform float4x4 MV,
uniform float4x4 MVIT,uniform float n
) {
outs OUT; OUT.hPosition = mul(MVP, position);
float3 view = normalize( mul(MV, position).xyz ); float3 normcam = normalize( mul(MVIT, normal).xyz );
OUT.refractdir = refract(view, normcam, n); return OUT;}
VertexShader
RefractionRefraction: : fragmentfragment shader shader
float3 main( in float3 refractdir : TEXCOORD0, uniform samplerCUBE envMap ) : COLOR
{ return texCUBE(envMap, refractdir).rgb; }
fragmentshader
Pixelcolor
Keyframe character animationKeyframe character animation
Mesh Mesh morphingmorphing::t= 0
t= 1
Two enclosing keys
Time: t
vertices
Linear interpolation of
the vertices
Mesh deformMesh deformationation
Bone animationBone animation
Complete animationComplete animation
Example 3:Example 3:Bone animationBone animation rigid and smooth rigid and smooth
bindingbinding
Rigid Smooth
Smooth bindingSmooth binding: vertex shader: vertex shaderoutputs main(in float4 pos : POSITION,
in float4 indices : COLOR0, in float4 weights : NORMAL,
uniform float4x4 MVP, uniform float3x4 bones[30] ) { outs OUT; float4 tpos = float4(0, 0, 0, 0); for (float i = 0; i < 4; i++) {
tpos += weights.x * mul(bones[indices.x], pos);indices = indices.yzwx;weights = weights.yzwx;
} OUT.hPosition = mul(MVP, tpos); return OUT;}
Stream processingStream processing
Proc. 1 Proc. 2
Elements are processed INDEPENDENTLY– Pipelining– Parallelization– No internal storages
Stream processor typesStream processor types
Map
Amplify
Reduce
Sum
GPGPU stream programmingGPGPU stream programming
Clippling
Triangle setup + rasterization+
Linear interpolation
Compositing
Texturememory
VertexShader
PixelShader
Mapping:Change of stream element data
Mapping
Framebuffer
CPUVertices + properties:Input stream of elements 13 x 4 floats
Conditional reduction
Amplification
Sum + min + reduction
Input/Output and couplingInput/Output and coupling
Input – stream of vertices and properties– Texture memory
Output– Frame buffer– Texture memory
feedback
Mapping algorithms onto the GPUMapping algorithms onto the GPUProblem 1Problem 1
Globals globals;for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals );}
2D array (texture) is available :
u = (float)(i / M) / M; v = (float)(i % M) / M; oarray[u][v] = Computation( iarray[u][v], globals );
Globals are uniform parameters
Output array goes to a texture or to the frame buffer
Input array is either a texture or vertex data
Solution 1: Input array is vertex dataSolution 1: Input array is vertex dataGlobals globals;for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals );}
CPU program:
GlobalPar = DefineCGParameter(vertexProg, “globals"); cgGLSetParameter4f(GlobalPar, 10, 20, 30, 40);
glViewport(0, 0, M, M); glBegin(GL_POINTS);for(int i = 0; i < N; i++) { // M * M > N float x = (float)(i / M) / M * 2 - 1; // -1..1 float y = (float)(i % M) / M * 2 - 1; // -1..1 glColor4fv( &iarray[i] ); glVertex2f(x, y); // POSITION}glEnd( );
void main( in float2 index : POSITION, in float4 iarray : COLOR0, out float4 hpos : POSITION, out float4 oarray : TEXCOORD0, uniform float4 globals ) { hpos = float2(index, 0, 1); oarray = Computation( iarray, globals );}
Solution 1: Vertex shader computingSolution 1: Vertex shader computingGlobals globals;for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals );}
Vertex
shad
er
float4 main( in float4 oarray : TEXCOORD0 ) : COLOR { return oarray;}
Fragm
ent sh
ader
Solution 2: Fragment shader computingSolution 2: Fragment shader computingGlobals globals;for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals );}
Vertex
shad
er
float4 main( in float4 iarray : TEXCOORD0, uniform float4 globals ) : COLOR { return Computation( iarray, globals );}
Fragm
ent sh
ader
void main( in float2 index : POSITION, in float4 iarray : COLOR0, out float4 hpos : POSITION, out float4 array : TEXCOORD0) { hpos = float2(index, 0, 1); array = iarray;}
Solution 3: Input array is in textureSolution 3: Input array is in textureGlobals globals;for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals );}
CPU program:
glViewport(0, 0, M, M);cgGLSetParameter4f(GlobalPar, 10, 20, 30, 40);
glBegin(GL_QUADS); glTexCoord2f(0, 0); glVertex2f(-1, -1); glTexCoord2f(0, 1); glVertex2f(-1, 1); glTexCoord2f(1, 1); glVertex2f( 1, 1); glTexCoord2f(1, 0); glVertex2f( 1, -1);glEnd( );
Solution 3: Input array is in textureSolution 3: Input array is in textureGlobals globals;for(int i = 0; i < N; i++) { oarray[i] = Computation( iarray[i], globals );}
Verte
xshad
er
float4 main( in float4 iindex : TEXCOORD0, uniform float4 globals, uniform sampler2D iarraytex ) : COLOR { float4 irray = tex2D(iarraytex, iindex); return Computation( iarray, globals );}
Fragm
ent sh
ader
void main( in float2 oindex : POSITION, in float2 iindex : TEXCOORD0, out float4 hpos : POSITION, out float2 index : TEXCOORD0 ) { hpos = float4(oindex, 0, 1); index = iindex;}
Problem 2Problem 2Globals globals;for(int i = 0; i < N; i++) { int j = IarrayIdx( iarray, i, globals); oarray[i] = Computation( iarray[j], globals );}
Verte
xshad
er
float4 main( in float4 iindex : TEXCOORD0, uniform float4 globals, uniform sampler2D iarraytex ) : COLOR { float2 j = IarrayIdx(iarraytex, iindex, globals); float4 iarray = tex2D(iarraytex, j); return Computation( iarray, globals );}
Fragm
ent sh
ader
void main( in float2 oindex : POSITION, in float2 iindex : TEXCOORD0, out float4 hpos : POSITION, out float2 index : TEXCOORD0 ) { hpos = float4(oindex, 0, 1); index = iindex;}
Problem 3Problem 3Globals globals1, global2;for(int i = 0; i < N; i++) { int j = OarrayIdx(i, globals1); oarray[j] = Computation( iarray[i], globals2 );}
Verte
xshad
er
float4 main( in float4 iindex : TEXCOORD0, uniform float4 globals2, uniform sampler2D iarraytex ) : COLOR { float4 irray = tex2D(iarraytex, iindex); return Computation( iarray, globals );}
Fragm
ent sh
ader
void main( in float2 oindex : POSITION, in float2 iindex : TEXCOORD0, out float4 hpos : POSITION, out float2 index : TEXCOORD0, uniform float4 globals1 ) { float2 newoindex = OarrayIdx(iindex, globals1); hpos = float4(newoindex * 2 – float2(1,1), 0, 1); index = iindex;}
Other problemsOther problemsGlobals globals;float sum = 0for(int i = 0; i < N; i++) { sum += Computation( iarray[i] );}
Globals globals;float min = MAX;for(int i = 0; i < N; i++) { c = Computation( iarray[i] ); if (min > c) min = c;}
Ray tracing on the GPURay tracing on the GPURay tracing:
for each ray do t = infinity for each triangle do
tnew = Intersect(triangle, ray)if (tnew < t) t = tnew
endfor hit[ray] = ray.o + ray.dir * tendfor
Problems: • two loops - all elements with all elements• t is a global variable
Input stream of geometry
Texturesz-buffer
Ray engineRay engine
Input texture: rays
Combination: a triangle with each ray
pixels
A) a triangle is a point, and pixel shader loopsB) a triangle is a full screen quad, pixel shader intersects a triangle with a ray
Input stream: triangles
Output texture: hits
Ray engineRay engine
CPU program
Vertex shader
Pixelshader
“Triangles”as full screenquads
“Triangles”as full screenquads
Ray texture ids
RasterizationInterpolation
Rays in Texture
maps
Intersectionbetween onetriangle anda ray
Trianglesas many timesas pixelsthe quad has
CPU: triangles as full screen quadsCPU: triangles as full screen quadsTriangle triang[ntriangles];
void Display( ) { ... glBegin( GL_QUADS ); for(int i = 0; i < ntriangles, i++) { glMultiTexCoord2fARB(GL_TEXTURE1_ARB, // TEXCOORD1
triang[i].v1.x, triang[i].v1.y, triang[i].v1.z); glMultiTexCoord2fARB(GL_TEXTURE2_ARB, // TEXCOORD2
triang[i].v2.x, triang[i].v2.y, triang[i].v2.z); glMultiTexCoord2fARB(GL_TEXTURE3_ARB, // TEXCOORD3
triang[i].v3.x, triang[i].v3.y, triang[i].v3.z);
glTexCoord2f(0,0); glVertex3f(-1,-1,0); // TEXCOORD0,POSITION glTexCoord2f(0,1); glVertex3f(-1, 1,0); // TEXCOORD0,POSITION glTexCoord2f(1,1); glVertex3f( 1, 1,0); // TEXCOORD0,POSITION glTexCoord2f(1,0); glVertex3f( 1,-1,0); // TEXCOORD0,POSITION } glEnd();}
Vertex shader does “nothing”Vertex shader does “nothing”struct outs { float3 hposition : POSITION, float2 rayuv : TEXCOORD0, float3 r1 : TEXCOORD1, float3 r2 : TEXCOORD2, float3 r3 : TEXCOORD3};
outs main( in float3 position : POSITION, in float2 rayuv : TEXCOORD0, in float3 r1 : TEXCOORD1, in float3 r2 : TEXCOORD2, in float3 r3 : TEXCOORD3 ) { outs OUT; OUT.r1 = IN.r1; OUT.r2 = IN.r2; OUT.r3 = IN.r3; OUT.rayuv = IN.rayuv; OUT.hposition = float4(IN.position, 1); return OUT;}
Triangle-ray intersectionTriangle-ray intersection
1. Plane intersection: p = rayo + raydir · t, t > 0(p - r1) ·n = 0,
normal: n = (r2 - r1) x (r3 - r1)
2. Is the intersection inside the triangle?((r2 - r1) x (p - r1)) ·n > 0 ((r3 - r2) x (p - r2)) ·n > 0 ((r1 - r3) x (p - r3)) ·n > 0
r1 r1 r2
p
r3
(r1 – rayo) · nraydir · nt =
Pixel shader: ray-triangle intersectionPixel shader: ray-triangle intersectionvoid main(in float2 rayuv : TEXCOORD0, // ray index in float3 r1 : TEXCOORD1, // vertex 1 in float3 r2 : TEXCOORD2, // vertex 2 in float3 r3 : TEXCOORD3, // vertex 3 of triang
out float3 p : COLOR, // hit point to texture out float t : DEPTH, // z buffer finds the min
uniform sampler2D rayorgs, // array of rays uniform sampler2D raydirs, uniform float maxdepth) {
float3 rayo = tex2D(rayorgs, rayuv); // ray pars float3 raydir = tex2d(raydirs, rayuv); float3 normal = cross(r2 – r1, r3 – r1); t = dot(p1 – rayo, normal)/ dot(raydir, normal); p = rayo + raydir * t; if (dot(cross(r2-r1, p-r1), normal) < 0 || dot(cross(r3-r2, p-r2), normal) < 0 || dot(cross(r1-r3, p-r3), normal) < 0) t = 2; // ignore else t /= maxdepth;}