diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/Makefile ode.step/Makefile
--- ode/Makefile	Fri Jul  4 23:05:56 2003
+++ ode.step/Makefile	Thu Jul 10 17:36:54 2003
@@ -55,6 +55,7 @@
 	ode/src/mass.cpp \
 	ode/src/ode.cpp \
 	ode/src/step.cpp \
+	ode/src/stepfast.cpp \
 	ode/src/lcp.cpp \
 	ode/src/joint.cpp \
 	ode/src/timer.cpp \
@@ -111,6 +112,7 @@
 	ode/test/test_collision.cpp \
 	ode/test/test_boxstack.cpp \
 	ode/test/test_buggy.cpp \
+	ode/test/test_crash.cpp \
 	ode/test/test_joints.cpp \
 	ode/test/test_space.cpp \
 	ode/test/test_I.cpp \
diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/Makefile.deps ode.step/Makefile.deps
--- ode/Makefile.deps	Mon Nov 11 00:43:20 2002
+++ ode.step/Makefile.deps	Thu Jul 10 17:36:54 2003
@@ -97,6 +97,28 @@
   include/ode/timer.h \
   include/ode/matrix.h \
   ode/src/lcp.h
+ode/src/stepfast.o: \
+  ode/src/stepfast.cpp \
+  ode/src/objects.h \
+  include/ode/common.h \
+  include/ode/config.h \
+  include/ode/error.h \
+  include/ode/memory.h \
+  include/ode/mass.h \
+  ode/src/array.h \
+  ode/src/joint.h \
+  include/ode/contact.h \
+  ode/src/obstack.h \
+  include/ode/odemath.h \
+  include/ode/rotation.h \
+  include/ode/timer.h \
+  include/ode/matrix.h \
+  include/ode/misc.h \
+  include/ode/odecpp.h \
+  include/ode/collision_space.h \
+  include/ode/collision.h \
+  include/ode/odecpp_collision.h \
+  ode/src/lcp.h
 ode/src/lcp.o: \
   ode/src/lcp.cpp \
   include/ode/common.h \
@@ -430,6 +452,27 @@
   include/drawstuff/version.h
 ode/test/test_buggy.o: \
   ode/test/test_buggy.cpp \
+  include/ode/ode.h \
+  include/ode/config.h \
+  include/ode/common.h \
+  include/ode/error.h \
+  include/ode/contact.h \
+  include/ode/memory.h \
+  include/ode/odemath.h \
+  include/ode/matrix.h \
+  include/ode/timer.h \
+  include/ode/rotation.h \
+  include/ode/mass.h \
+  include/ode/misc.h \
+  include/ode/objects.h \
+  include/ode/odecpp.h \
+  include/ode/collision_space.h \
+  include/ode/collision.h \
+  include/ode/odecpp_collision.h \
+  include/drawstuff/drawstuff.h \
+  include/drawstuff/version.h
+ode/test/test_crash.o: \
+  ode/test/test_crash.cpp \
   include/ode/ode.h \
   include/ode/config.h \
   include/ode/common.h \
diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/include/ode/objects.h ode.step/include/ode/objects.h
--- ode/include/ode/objects.h	Tue Jul  1 12:52:08 2003
+++ ode.step/include/ode/objects.h	Thu Jul 10 17:37:05 2003
@@ -43,6 +43,9 @@
 void dWorldSetCFM (dWorldID, dReal cfm);
 dReal dWorldGetCFM (dWorldID);
 void dWorldStep (dWorldID, dReal stepsize);
+void dWorldStepFast (dWorldID, dReal stepsize, int iterations);
+void dWorldSetAutoEnableDepth(dWorldID world, int autodepth);
+int dWorldGetAutoEnableDepth(dWorldID world);
 void dWorldImpulseToForce (dWorldID, dReal stepsize,
 			   dReal ix, dReal iy, dReal iz, dVector3 force);
 
diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/ode/doc/stepfast/graph.gif ode.step/ode/doc/stepfast/graph.gif
--- ode/ode/doc/stepfast/graph.gif	Thu Jan  1 01:00:00 1970
+++ ode.step/ode/doc/stepfast/graph.gif	Thu Jul 10 17:36:54 2003
@@ -0,0 +1,17 @@
+GIF89a      f  #  !   ,     I8ͻ`(dihlp,tmx|pH,Ȥrl:ШtJZجvzxL.z& wENp |u{,;=wZ~!t{Jsqz%5_pv͙ÐȲ^ԂێI߳y&ގ1Wҡ4uznKyҗھl:rU$h0z%r[VA2X%UM0RƎ8;FwsdEf1OLgQ6(jbͣ,5
+էbُCVVԷR{z5Vԧn%֩hM񬻕gFFKڳTޱ2n<ذGcqy_GykД	.WP=j.
+]E%_^Oޣ
+k㈡U[Zs"K?nL6_<ƿ͗Oy>qow*;L>RGZWRq[|]_]8I- \՟yQ7 k`8]r-H_8m|h/"zXeG#[iamici8^&؜fb\#>jtG#t7a4U'V]Eۖ'ЗQ$V%Ri2SViDX~4dtܒb_%U'~=*)FPj|^~؆A\ kG"1kKí*9ګ8fk6F+Vkfv+k覫k l'0
+7۫,Gl[qw, (<,,0\'l5߬93D;tHkq4L?RO]tЉXUos^ߌ
+`sfÌv,lp-)mk߀.cG.Wng9`38NN㛗nk޹?+~0 nήΤ@ o|ns(?W/yλ<	gG/*o?B~OCnD }׿fm@-t &'=qv`#LJX?Ѕ`36Ln`*7?MЇ&aQ(AQ|PK[u@X	@"WL"ɢ0dtv[4/B<Tb&J{h.Q`tXHqac`aF&dCC=F_,FLfr![H'?S_T~	1W/>b+3e!3[f|9zA3r2c'ȯ'A99v$<!YMzyܜ7}ML(3ρ7 ZTCʈfpH/P{rGO4i QRi@K+:Nt$%G47wS)B2JFIUj?uzⱁ	uIEЧVfkdX+\D('ӵZ:`%])Wx^ش~tlEc+-KH\2t,?#ٹ"eɯfvc*_j>|Ey"ѵ@Ma?btE.\)	QN7Y]Ѷ}Ms\'B70^_^iz׽-/\:߄}={'`':٬&L70kAwV	@ c3Ɖw^c1]|`$[T8:n1a9oJ(%y+kTMxoOrޤ\##\$T+7Yya<*h\xfѳ%;+ZXMJa3ϖSs(97UƄ4fnRݲIN{)&Lj-ޱ,]!t^4YOa#lAjyG[|]IvӍ<3 O(V>g7uqiW_l67/%fz_T{cl.hv^[wo<qPx5ND>r>9ʙj*x O!o:9}~;/sԏN)L8uT_uT8unqawȮۜN'ucWݭH]nnOm|ʬs']6;Kc: yvUyJI9Vi75Y>WmiGz_mc_>1K^rOvQXw^>`kO]>rە}yYN4(}9X7pA't_l/qg4sؗ2> Dqq8o5A1l'w%8T}8:$g-h/1X;yǁ5hoS 7JLx2woC%re4<f3^UWH{ZP. hA473X@mȗjx6䅺ekh?Ӂcwz.g9燆28\H`:#CtWȉ6㉟qsf_xM68!5Fr5+88X^hW؅-ȇ@yH8HX8m؈@ʓ5ƋX󊃨ӨȊxs#ȍ##9HgevCpȌ		{#Hٍ
+HcHdv80)8eXh16kFg's&l+I<ِ):+s5D741)I8/6#LNO	t(H?;Ii*CY7I;I\/Wה6o	(,=yY	D)SS65y|([ɒވJ_B&'cٖLG6iY{səi@QYrdO=Ә^T"N}InٙE;Y	O5c>Spkiq0#Rĉ;)VӜi4؇=-V'Ȗ"#i{W2;)ÞjSgYzuK#L6ɛQ
+	wY^2Ta2jZ؝7e[w)
+Ie[MDY0X9V
+Z@0:*z4*z8A<Cz=R%Z6':"Z-Z/IZYSNgĴ:Ue$y7ԡh[d[oJ`qN09qwd:2fVħ&kǊ:M1u8Ts)(^Ħa*:0j@<ڒl63J/:p:,z3z/F+d:7)0JCڬ,&6@ꕥں
+R9z3fSd
+0ڗ *0j63*Jr2۰p6ኰ	kj1۰Qt4
+<kʓ{gs/$[Efd1+kJD)!+2:S@
+3*HH27˲ô*;ziT9c܊whayj{ZG[zkCtuxx4	'1t#:z{˷3
+0۬03{\C,U;;P겤d1{/{ʠ(3I[[72H,68k;+;2ΫI໺¼󺽊ڽ+n۾+Ebܻ/G8mT`ſ̺QX0uj|u;j/ɴ<i':lJxA.	7[9 ÚKh=%L@<MXF|HJLNĂVrM4XZ|]_\lYblg\Ih,jolnv,w	ml+{y
+e~,ȃp/ȊȌȎȐɒ<cҲ'L
+Y-9-,}ɖʜ,<C&+|-R#b:ܲe˙̓%2zØ˦+ʰC2ڼ<\|<\|,4#d(ۨ'PJ22,h a(-(%Z̙0mЬ<9H.G")8(06q!z}!x&o]||d6!z@)-}C}8Bӌ(0!=Ѽh"jwb(MUVk]1~Lo},y*m&<A*S\2zA!r!iq֞|ьt-)Ё͒P]}ښ=	=52O$Zq֙ڵI=&dV)v"%8M֥׫M*=.m-/Ͳm]} ;
\ No newline at end of file
diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/ode/doc/stepfast/graph2.gif ode.step/ode/doc/stepfast/graph2.gif
--- ode/ode/doc/stepfast/graph2.gif	Thu Jan  1 01:00:00 1970
+++ ode.step/ode/doc/stepfast/graph2.gif	Thu Jul 10 17:36:54 2003
@@ -0,0 +1,17 @@
+GIF89a      f# !   ,     80I8ͻ`(dihlp,tmx|pH,Ȥrl:ШtJZجvz` 07b2`j>-=,w2hRstmjpzAvk
+lny-Vw7x6ġSoɳdq4~ԸƆu5NۮڔzۏF I_Aw,ߞy׎jP&ѕjc4Pʊйo
+&J<+W"yrbNQ)$ w@T։Ԭ`ZRV&Eh&1Ks{uߏ )+V.w^c3ߍq]o"n8utláCfKl]ycٳd}RȩzKr2fĶA43b=ggr۹ܓ-Dvc)xaV|	57 w aw725ݧ&mGyhZ핸{P\1GWn:Ȟw(s"z#r\bƕU7ӂ/6l-GGWFR=ԥ}{Mfܖ*.DfPY~t9BQĐ5t=t؛CBT&d֤c)n:CpjZj$BѪJkEέ+k&6F+Vkfv+$e覫A+kk/  .lz 4G,Wl[L0w,r#lr
+p,074<s8|s<r	+7@Dm='}J7M/NGM$Gg5PK]s^+v%_-/#|3-pнuc-rz7k2VvMwwONȖ_pËrs8␗׍ܯG޸~.#s~6㬛x㯫~:/츗y:C߱?t#x]w?/l~R_2c?n=Z;cb=vSUyW%P#eحy_F̅ GU1-o7Ps2a!,ƶ,~B&-BHŬ9QzW`(z`q7i]6M>Ψ4lt&||\Ȼ=}$Y"mA$IIol^&OGZ/d%GɼEkC/HR,ٺ0n~-ef}r+de+?ٰW
+@-</6~rz 1cNlu8'~!	IUTd 4H:̛f:~L:^Sj\BFza\`&Z/lfLH?sEPI,L N泜-3y҅ͦ"iI8S>e*И4iH}>~&EU	SK&ɱm $%3sӤ5lo!YjϞu7k5˩~]GWvl`:X.nM\=fa4`XuX~L5df1YγEVֺV--</.C/ĭu[Mymoe;ڽvm=8[%.:`pblMSnkX֦ѵvk^zsz&4ڜ{[ߞ7ol(y&WMf`Bx,`^8&W\L~x:, ox/$[c5pˁ86&1xicNH]Syb&9e׊\#1< XK$㐕B2<+C/ؼ7X3qa\M?9|CNGi:zפ)b,yV4+KG^IKS0sYmJWkU5\5yM
+`[غpfP;ʮɌkaÑ؇0vamkzf$4lBۭthȠv^7pff7>Q;r70o{7up_{୦x}i3ߖG܂<2yrK*rb#5k>P:m:"^·sA_r;	y}>OkC"s1gSJ}e"AvΞ=mmOsuϭ+;z
+^pŞh0q7/G>t.C^~Gs~xz]SVwgU=z?Ð=^sJIGZ{Z>)/Xid+\5ʟÌۋu?_oC{~Zw~7t|w'}Dwjw~7]'ESGY{Ȁ'5GWn!y{׀Qs({hF/?E$nug_7-X4sz	Vs4@vMs&8UGXJ{#煀cHh|c8e׃vk(Om8	+wxQv%W>MuhM)+ȂViiݐ'0hEg0hXaWz5h~7G][hxSg/X.)*vtGZhkj7XD؈kјB<HsH؋xRxR/)(8kTɇ׈aW収)WF7xďӗʨ錾7wtȐ@H&#)i?)To/i1DLv8yA
+)w֨`ف8'j5	bEiy g>e@IXK)UYSW죓ƒ&=Șaɓ#וCIflysAxDIWoi{jCsؗ;y7`)qgIFE~_Dag%pgFdD3pEٙjhk֚&chP!4yp邒micLpG+)&#ٝIm)y虞[	ѷYy`e9{Ke`YiA )fnY}٠kǜ{Y29(s*¡$OJ;T.ʝ5K+a/z
+(ꞝP8>ʆ8ri磵4Jz'jh
+9/ibNJZ8\:X=ye*٘Fk٦
+*aM*mtʡI4&~z*AZڨ:Z8)ˠک: :*ZZʪ
+*Ejꪹ)sZk䫼*
+:xDʬ`J@.ؚںڭ*r^':*2b"ך \y] Z뚮yvRpXZ^X ?;
+뮌)6!K2	+ʯ1*' Ky5$K#@;D[F{HJL۴NPR;T[V{XZ\۵^`b;d[L*K  .[){"W1_Am0} :s{*)  vzf ԠEAQ&p۷k*񷏠(K۪ \
+6}߱ˍ!ʺ&+K,
+k%7s{c(竽*
+z+˲ƛ;{)ʻ?R;'[ŻxKe{< \辝+ #ёۿ#̼=&Ӂ)g!Lҋ4rR(m{;/^1?HܰM|PQ\V|XZ\^`	  ;
\ No newline at end of file
diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/ode/doc/stepfast/stepfast-doc.html ode.step/ode/doc/stepfast/stepfast-doc.html
--- ode/ode/doc/stepfast/stepfast-doc.html	Thu Jan  1 01:00:00 1970
+++ ode.step/ode/doc/stepfast/stepfast-doc.html	Thu Jul 10 17:36:54 2003
@@ -0,0 +1,106 @@
+<html>
+<head>
+<title>ODE StepFast Documentation</title>
+</head>
+<body>
+<h2>5.1. StepFast1</h2>
+<pre><font color="#0000ff">void dWorldStepFast1(dWorldID, dReal stepsize, int maxiterations);</font></pre>
+The StepFast1 algorithm is designed to sacrifice some accuracy for a big gain in speed and memory.  The chart below illustrates
+this speed advantage over the standard dWorldStep algorithm.<br><br>
+<center><img src="graph.gif"></center><br><br>
+The graph relates the number of Degrees Of Freedom (DOFs) removed from a system to the running time of the step.  You may be able
+to tell that the dWorldStep algorithm's running time is proportional to the cube of the number of DOF's removed.  The StepFast1
+algorithm, however, is roughly linear.  So as islands increase in size (for example, when there is a large pile-up of cars, a
+pile of "ragdoll corpses", or a wall of bricks) the StepFast1 algorithm scales better than dWorldStep.  All this means that your
+application is more likely to keep a steady framerate, even in the worst case scenario.
+<br><br>
+The graph of DOFs removed to memory looks quite similar.<br><br>
+<center><img src="graph2.gif"></center><br><br>
+dWorldStep requires memory proportional only to the square of the number of DOF's removed.  StepFast1, though, is still linear, but
+it has nothing to do with the number of iterations per step.  So this means the dreaded "There was a big pile-up and ODE crashed
+without an error message" problems (usually stack overflows) won't happen with StepFast1.  Or at least that you'll be rendering at
+a minute per frame or slower before they do.
+
+<h3>5.1.1.  When to use StepFast1</h3>
+As shown above, StepFast1 is quite good when it comes to speed and memory usage.  All this power doesn't come for free, though; all
+optimizations are a trade-off of one kind or another.  I've already mentioned that StepFast1 trades off accuracy for it's speed and
+memory advantages.  You actually get to choose just how much accuracy you give away though, at the cost of speed, by adjusting the
+number of iterations per step.  Though you may never reach the accuracy of dWorldStep (or you may surpass it, depending on the type of
+inaccuracy), you can be almost certain that a larger number of iterations will give you more accurate results (more slowly).  So
+StepFast1 can be used in a good variety of situations.
+<br><br>
+The general answer to this question then, is: use StepFast1 when you don't mind having a few more parameters to play with to get the
+system stable, and you want to take advantage of it's speed or memory advantages.  If you find yourself running into situations in
+your simulation where large numbers of bodies come in contact, and dWorldStep becomes too slow, try switching to StepFast1.  Many
+systems will work just fine with nothing more than changing the dWorldStep function call to dWorldStepFast1.  Others will require a
+little tweaking to get them to work well with StepFast1, usually in the masses of the bodies.  When a joint connects two bodies with
+a large mass ratio (i.e. one body has several times the mass of the other body) StepFast1 may have trouble solving it.
+<br><br>
+Another prospect for StepFast1 is designing for it from the ground up.  If you know you are going to build large worlds with many
+physically based objects in them, then go ahead and plan to use StepFast1.  Noting the mass ratio problem above, you might want to
+consider making the mass of every body in your system equal to 1.0.  Or in a very small range, for example between 0.5 and 1.5.  Most
+of the other suggestions for speed and stability apply to StepFast1, except that the object is no longer to remove as many joints as
+possible from the equation.  It can likely be shown that you will get a better performance to stability ratio by spreading out
+mass among several bodies connected by fixed joints rather than trying to implement it as one massive body, especially if that one
+massive body means you have to switch back to dWorldStep to keep things stable.
+<br><br>
+A final prospect for StepFast1 is to use it only when you need to.  Since StepFast1 uses the body and world structures in exactly
+the same way as dWorldStep, you can actually switch back and forth between the two solvers at will.  A good heuristic for when to
+make this switch is to simply count contact joints while you are running the collision detection.  Since collision detection is
+normally called before the step, using this method will ensure that the large island that would slow you down is never sent to
+the dWorldStep solver (as opposed to waiting until after you've already taken a step at 1 fps...).  The only better solution would
+be a hybrid island creation function, that sends small islands to dWorldStep, and large islands to dWorldStepFast1.  This may make
+it in the source at some point in the future.
+
+<h3>5.1.2.  When NOT to use StepFast1</h3>
+Though there are several specific situations when it as advisable not to use StepFast1, I believe they can all be summed up in a
+single statement:  Don't use StepFast1 when accuracy is more important than speed or memory to you.  You may still want to evaluate
+it in this case and see if the inaccuracies are even noticeable, perhaps with a relatively large number of iterations (20+).
+
+<h3>5.1.3.  How it works</h3>
+For any interested parties out there, here's a quick rundown of how the StepFast1 algorithm works.  The general problem that ODE
+tries to solve is a system of linear (in)equalities in (m = constraints) unknowns, where one constraint constrains 1 Degree of
+Freedom in one joint.  For large islands of bodies with many joints between them, this can take a rather large O(m^2) array, which
+takes O(m^3) time to solve.  StepFast1 completely avoids creating the large matrix by making an assumption: at relatively small
+timesteps, the effect of any given joint is so localized that it can be calculated without respect to any other joint in the system,
+and any conflicting joints will cancel each other out before the body actually moves.  So StepFast1 uses the same solution method
+(LCP) to solve the same problem, only localized to a single joint (where m &lt;= 6).  It gets away with this by sub-dividing the timestep
+and repeating the process over really small timesteps (i = maxiterations) times.  So the running time of StepFast1 is "roughly" O(m*i).
+It's really closer to O(j*i*(m/j)^3) = O(m*i*(m/j)^2), where j = joints, but (m/j) is never > 6, so (m/j)^2 is factored out as a constant.
+
+<h3>5.1.3.  Experimental Utilities included with StepFast1</h3>
+Several experimental functions have been added to ODE as part of the StepFast1 flow of code, at least until they are validated.  Most
+have to do with the automatic disabling and enabling of bodies as yet another bit of optimization.  Here's the general idea:
+<ul>
+	<li>The body is considered a candidate for disabling when it falls below a certain speed (linear and angular), called the
+	AutoDisableThreshold.  In the interest of speedy execution, the actual speed measured is the square of the speed of the body.  So
+	you may need to set a lower value than you expected.  0.003 works well in test_crash, and is the default.</li>
+	<li>When the body has remained a disable candidate for a certain number of steps (AutoDisableSteps), it is disabled.  This is almost
+	completely for boxes, which like to land and bounce up on two points, and teeter motionless for a few steps before falling back down.
+	Round items generally need a much lower (like 1) AutoDisableSteps than boxes do (10+), 10 is the default.</li>
+	<li>AutoDisabling is disabled by default, use dBodySetAutoDisableSF1(body, true) to enable it.</li>
+	<li>A body is automatically re-enabled when it comes in contact with another enabled body.</li>
+	<li>Enabled bodies only enable bodies within (AutoEnableDepth) bodies of them each step.  This, in conjunction with AutoDisabling,
+	causes a rim of bodies that are enabled and disabled each step to form, containing the enabled bodies to the smallest area allowed
+	by the AutoDisable parameters.  Setting AutoEnableDepth to a really large number will retain the current functionality.  Setting it
+	to 0 will give you a new functionality: disabled bodies will never be automatically re-enabled, acting like geoms only.  3 seems to
+	be a good value for the wall in test_crash, but 1000 is the default to retain standard functionality.</li>
+</ul>
+
+<pre><font color="#0000ff">
+void dWorldSetAutoEnableDepthSF1(dWorldID, int autoEnableDepth);
+int dWorldGetAutoEnableDepthSF1(dWorldID);
+
+void dBodySetAutoDisableThresholdSF1(dBodyID, dReal autoDisableThreshold);
+dReal dBodyGetAutoDisableThresholdSF1(dBodyID);
+
+void dBodySetAutoDisableStepsSF1(dBodyID, int AutoDisableSteps);
+int dBodyGetAutoDisableStepsSF1(dBodyID);
+
+void dBodySetAutoDisableSF1(dBodyID, bool doAutoDisable);
+bool dBodyGetAutoDisableSF1(dBodyID);
+</font></pre>
+<br><br>
+
+</body>
+</html>
diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/ode/src/stepfast.cpp ode.step/ode/src/stepfast.cpp
--- ode/ode/src/stepfast.cpp	Thu Jan  1 01:00:00 1970
+++ ode.step/ode/src/stepfast.cpp	Thu Jul 10 17:36:54 2003
@@ -0,0 +1,1139 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Fast iterative solver, David Whittaker. Email: david@csworkbench.com  *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include "objects.h"
+#include "joint.h"
+#include <ode/config.h>
+#include <ode/odemath.h>
+#include <ode/rotation.h>
+#include <ode/timer.h>
+#include <ode/error.h>
+#include <ode/matrix.h>
+#include "lcp.h"
+#include "step.h"
+
+
+
+// misc defines
+#define ALLOCA dALLOCA16
+
+#define RANDOM_JOINT_ORDER
+
+//#define FAST_FACTOR	//use a factorization approximation to the LCP solver (fast, theoretically less accurate)
+#define SLOW_LCP      //use the old LCP solver
+
+//#define NO_ISLANDS    //does not perform island creation code (3~4% of simulation time), body disabling doesn't work
+
+//#define TIMING
+
+static int autoEnableDepth = 2;
+
+extern "C" void dWorldSetAutoEnableDepth(dxWorld *world, int autodepth)
+{
+	if (autodepth > 0)
+		autoEnableDepth = autodepth;
+	else
+		autoEnableDepth = 0;
+}
+
+extern "C" int dWorldGetAutoEnableDepth(dxWorld *world)
+{
+	return autoEnableDepth;
+}
+
+//little bit of math.... the _sym_ functions assume the return matrix will be symmetric
+static void
+Multiply2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
+{
+	int i, j;
+	dReal sum, *aa, *ad, *bb, *cc;
+	dIASSERT (p > 0 && A && B && C);
+	bb = B;
+	for (i = 0; i < p; i++)
+	{
+		//aa is going accross the matrix, ad down
+		aa = ad = A;
+		cc = C;
+		for (j = i; j < p; j++)
+		{
+			sum = bb[0] * cc[0];
+			sum += bb[1] * cc[1];
+			sum += bb[2] * cc[2];
+			sum += bb[4] * cc[4];
+			sum += bb[5] * cc[5];
+			sum += bb[6] * cc[6];
+			*(aa++) = *ad = sum;
+			ad += Askip;
+			cc += 8;
+		}
+		bb += 8;
+		A += Askip + 1;
+		C += 8;
+	}
+}
+
+static void
+MultiplyAdd2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
+{
+	int i, j;
+	dReal sum, *aa, *ad, *bb, *cc;
+	dIASSERT (p > 0 && A && B && C);
+	bb = B;
+	for (i = 0; i < p; i++)
+	{
+		//aa is going accross the matrix, ad down
+		aa = ad = A;
+		cc = C;
+		for (j = i; j < p; j++)
+		{
+			sum = bb[0] * cc[0];
+			sum += bb[1] * cc[1];
+			sum += bb[2] * cc[2];
+			sum += bb[4] * cc[4];
+			sum += bb[5] * cc[5];
+			sum += bb[6] * cc[6];
+			*(aa++) += sum;
+			*ad += sum;
+			ad += Askip;
+			cc += 8;
+		}
+		bb += 8;
+		A += Askip + 1;
+		C += 8;
+	}
+}
+
+
+// this assumes the 4th and 8th rows of B are zero.
+
+static void
+Multiply0_p81 (dReal * A, dReal * B, dReal * C, int p)
+{
+	int i;
+	dIASSERT (p > 0 && A && B && C);
+	dReal sum;
+	for (i = p; i; i--)
+	{
+		sum = B[0] * C[0];
+		sum += B[1] * C[1];
+		sum += B[2] * C[2];
+		sum += B[4] * C[4];
+		sum += B[5] * C[5];
+		sum += B[6] * C[6];
+		*(A++) = sum;
+		B += 8;
+	}
+}
+
+
+// this assumes the 4th and 8th rows of B are zero.
+
+static void
+MultiplyAdd0_p81 (dReal * A, dReal * B, dReal * C, int p)
+{
+	int i;
+	dIASSERT (p > 0 && A && B && C);
+	dReal sum;
+	for (i = p; i; i--)
+	{
+		sum = B[0] * C[0];
+		sum += B[1] * C[1];
+		sum += B[2] * C[2];
+		sum += B[4] * C[4];
+		sum += B[5] * C[5];
+		sum += B[6] * C[6];
+		*(A++) += sum;
+		B += 8;
+	}
+}
+
+
+// this assumes the 4th and 8th rows of B are zero.
+
+static void
+MultiplyAdd1_8q1 (dReal * A, dReal * B, dReal * C, int q)
+{
+	int k;
+	dReal sum;
+	dIASSERT (q > 0 && A && B && C);
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[k * 8] * C[k];
+	A[0] += sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[1 + k * 8] * C[k];
+	A[1] += sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[2 + k * 8] * C[k];
+	A[2] += sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[4 + k * 8] * C[k];
+	A[4] += sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[5 + k * 8] * C[k];
+	A[5] += sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[6 + k * 8] * C[k];
+	A[6] += sum;
+}
+
+
+// this assumes the 4th and 8th rows of B are zero.
+
+static void
+Multiply1_8q1 (dReal * A, dReal * B, dReal * C, int q)
+{
+	int k;
+	dReal sum;
+	dIASSERT (q > 0 && A && B && C);
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[k * 8] * C[k];
+	A[0] = sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[1 + k * 8] * C[k];
+	A[1] = sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[2 + k * 8] * C[k];
+	A[2] = sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[4 + k * 8] * C[k];
+	A[4] = sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[5 + k * 8] * C[k];
+	A[5] = sum;
+	sum = 0;
+	for (k = 0; k < q; k++)
+		sum += B[6 + k * 8] * C[k];
+	A[6] = sum;
+}
+
+//****************************************************************************
+// body rotation
+
+// return sin(x)/x. this has a singularity at 0 so special handling is needed
+// for small arguments.
+
+static inline dReal
+sinc (dReal x)
+{
+	// if |x| < 1e-4 then use a taylor series expansion. this two term expansion
+	// is actually accurate to one LS bit within this range if double precision
+	// is being used - so don't worry!
+	if (dFabs (x) < 1.0e-4)
+		return REAL (1.0) - x * x * REAL (0.166666666666666666667);
+	else
+		return dSin (x) / x;
+}
+
+
+// given a body b, apply its linear and angular rotation over the time
+// interval h, thereby adjusting its position and orientation.
+
+static inline void
+moveAndRotateBody (dxBody * b, dReal h)
+{
+	int j;
+
+	// handle linear velocity
+	for (j = 0; j < 3; j++)
+		b->pos[j] += h * b->lvel[j];
+
+	if (b->flags & dxBodyFlagFiniteRotation)
+	{
+		dVector3 irv;			// infitesimal rotation vector
+		dQuaternion q;			// quaternion for finite rotation
+
+		if (b->flags & dxBodyFlagFiniteRotationAxis)
+		{
+			// split the angular velocity vector into a component along the finite
+			// rotation axis, and a component orthogonal to it.
+			dVector3 frv, irv;	// finite rotation vector
+			dReal k = dDOT (b->finite_rot_axis, b->avel);
+			frv[0] = b->finite_rot_axis[0] * k;
+			frv[1] = b->finite_rot_axis[1] * k;
+			frv[2] = b->finite_rot_axis[2] * k;
+			irv[0] = b->avel[0] - frv[0];
+			irv[1] = b->avel[1] - frv[1];
+			irv[2] = b->avel[2] - frv[2];
+
+			// make a rotation quaternion q that corresponds to frv * h.
+			// compare this with the full-finite-rotation case below.
+			h *= REAL (0.5);
+			dReal theta = k * h;
+			q[0] = dCos (theta);
+			dReal s = sinc (theta) * h;
+			q[1] = frv[0] * s;
+			q[2] = frv[1] * s;
+			q[3] = frv[2] * s;
+		}
+		else
+		{
+			// make a rotation quaternion q that corresponds to w * h
+			dReal wlen = dSqrt (b->avel[0] * b->avel[0] + b->avel[1] * b->avel[1] + b->avel[2] * b->avel[2]);
+			h *= REAL (0.5);
+			dReal theta = wlen * h;
+			q[0] = dCos (theta);
+			dReal s = sinc (theta) * h;
+			q[1] = b->avel[0] * s;
+			q[2] = b->avel[1] * s;
+			q[3] = b->avel[2] * s;
+		}
+
+		// do the finite rotation
+		dQuaternion q2;
+		dQMultiply0 (q2, q, b->q);
+		for (j = 0; j < 4; j++)
+			b->q[j] = q2[j];
+
+		// do the infitesimal rotation if required
+		if (b->flags & dxBodyFlagFiniteRotationAxis)
+		{
+			dReal dq[4];
+			dWtoDQ (irv, b->q, dq);
+			for (j = 0; j < 4; j++)
+				b->q[j] += h * dq[j];
+		}
+	}
+	else
+	{
+		// the normal way - do an infitesimal rotation
+		dReal dq[4];
+		dWtoDQ (b->avel, b->q, dq);
+		for (j = 0; j < 4; j++)
+			b->q[j] += h * dq[j];
+	}
+
+	// normalize the quaternion and convert it to a rotation matrix
+	dNormalize4 (b->q);
+	dQtoR (b->q, b->R);
+
+	// notify all attached geoms that this body has moved
+	for (dxGeom * geom = b->geom; geom; geom = dGeomGetBodyNext (geom))
+		dGeomMoved (geom);
+}
+
+//****************************************************************************
+//This is an implementation of the iterated/relaxation algorithm.
+//Here is a quick overview of the algorithm per Sergi Valverde's posts to the
+//mailing list:
+//
+//  for i=0..N-1 do
+//      for c = 0..C-1 do
+//          Solve constraint c-th
+//          Apply forces to constraint bodies
+//      next
+//  next
+//  Integrate bodies
+
+void
+dInternalStepFast (dxWorld * world, dxBody * body[2], dReal * GI[2], dReal * GinvI[2], dxJoint * joint, dxJoint::Info1 info, dxJoint::Info2 Jinfo, dReal stepsize)
+{
+	int i, j, k;
+# ifdef TIMING
+	dTimerNow ("constraint preprocessing");
+# endif
+
+	dReal stepsize1 = dRecip (stepsize);
+
+	int m = info.m;
+	// nothing to do if no constraints.
+	if (m <= 0)
+		return;
+
+	int nub = 0;
+	if (info.nub == info.m)
+		nub = m;
+
+	// compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
+	// format as J so we just go through the constraints in J multiplying by
+	// the appropriate scalars and matrices.
+#   ifdef TIMING
+	dTimerNow ("compute A");
+#   endif
+	dReal JinvM[2 * 6 * 8];
+	dSetZero (JinvM, 2 * m * 8);
+
+	dReal *Jsrc = Jinfo.J1l;
+	dReal *Jdst = JinvM;
+	if (body[0])
+	{
+		for (j = m - 1; j >= 0; j--)
+		{
+			for (k = 0; k < 3; k++)
+				Jdst[k] = Jsrc[k] * body[0]->invMass;
+			dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[0]);
+			Jsrc += 8;
+			Jdst += 8;
+		}
+	}
+	if (body[1])
+	{
+		Jsrc = Jinfo.J2l;
+		Jdst = JinvM + 8 * m;
+		for (j = m - 1; j >= 0; j--)
+		{
+			for (k = 0; k < 3; k++)
+				Jdst[k] = Jsrc[k] * body[1]->invMass;
+			dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[1]);
+			Jsrc += 8;
+			Jdst += 8;
+		}
+	}
+
+
+	// now compute A = JinvM * J'.
+	int mskip = dPAD (m);
+	dReal A[6 * 8];
+	dSetZero (A, 6 * 8);
+
+	if (body[0])
+		Multiply2_sym_p8p (A, JinvM, Jinfo.J1l, m, mskip);
+	if (body[1])
+		MultiplyAdd2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l, m, mskip);
+
+	// add cfm to the diagonal of A
+	for (i = 0; i < m; i++)
+		A[i * mskip + i] += Jinfo.cfm[i] * stepsize1;
+
+	// compute the right hand side `rhs'
+#   ifdef TIMING
+	dTimerNow ("compute rhs");
+#   endif
+	dReal tmp1[16];
+	dSetZero (tmp1, 16);
+	// put v/h + invM*fe into tmp1
+	for (i = 0; i < 2; i++)
+	{
+		if (!body[i])
+			continue;
+		for (j = 0; j < 3; j++)
+			tmp1[i * 8 + j] = body[i]->facc[j] * body[i]->invMass + body[i]->lvel[j] * stepsize1;
+		dMULTIPLY0_331 (tmp1 + i * 8 + 4, GinvI[i], body[i]->tacc);
+		for (j = 0; j < 3; j++)
+			tmp1[i * 8 + 4 + j] += body[i]->avel[j] * stepsize1;
+	}
+	// put J*tmp1 into rhs
+	dReal rhs[6];
+	dSetZero (rhs, 6);
+
+	if (body[0])
+		Multiply0_p81 (rhs, Jinfo.J1l, tmp1, m);
+	if (body[1])
+		MultiplyAdd0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
+
+	// complete rhs
+	for (i = 0; i < m; i++)
+		rhs[i] = Jinfo.c[i] * stepsize1 - rhs[i];
+
+#ifdef SLOW_LCP
+	// solve the LCP problem and get lambda.
+	// this will destroy A but that's okay
+#	ifdef TIMING
+	dTimerNow ("solving LCP problem");
+#	endif
+	dReal *lambda = (dReal *) ALLOCA (m * sizeof (dReal));
+	dReal *residual = (dReal *) ALLOCA (m * sizeof (dReal));
+	dReal lo[6], hi[6];
+	memcpy (lo, Jinfo.lo, m * sizeof (dReal));
+	memcpy (hi, Jinfo.hi, m * sizeof (dReal));
+	dSolveLCP (m, A, lambda, rhs, residual, nub, lo, hi, Jinfo.findex);
+#endif
+
+	// LCP Solver replacement:
+	// This algorithm goes like this:
+	// Do a straightforward LDLT factorization of the matrix A, solving for
+	// A*x = rhs
+	// For each x[i] that is outside of the bounds of lo[i] and hi[i],
+	//    clamp x[i] into that range.
+	//    Substitute into A the now known x's
+	//    subtract the residual away from the rhs.
+	//    Remove row and column i from L, updating the factorization
+	//    place the known x's at the end of the array, keeping up with location in p
+	// Repeat until all constraints have been clamped or all are within bounds
+	//
+	// This is probably only faster in the single joint case where only one repeat is
+	// the norm.
+
+#ifdef FAST_FACTOR
+	// factorize A (L*D*L'=A)
+#	ifdef TIMING
+	dTimerNow ("factorize A");
+#	endif
+	dReal d[6];
+	dReal L[6 * 8];
+	memcpy (L, A, m * mskip * sizeof (dReal));
+	dFactorLDLT (L, d, m, mskip);
+
+	// compute lambda
+#	ifdef TIMING
+	dTimerNow ("compute lambda");
+#	endif
+
+	int left = m;				//constraints left to solve.
+	bool remove[6];
+	dReal lambda[6];
+	dReal x[6];
+	int p[6];
+	for (i = 0; i < 6; i++)
+		p[i] = i;
+	while (true)
+	{
+		memcpy (x, rhs, left * sizeof (dReal));
+		dSolveLDLT (L, d, x, left, mskip);
+
+		int fixed = 0;
+		for (i = 0; i < left; i++)
+		{
+			j = p[i];
+			remove[i] = false;
+			// This isn't the exact same use of findex as dSolveLCP.... since x[findex]
+			// may change after I've already clamped x[i], but it should be close
+			if (Jinfo.findex[j] > -1)
+			{
+				dReal f = fabs (Jinfo.hi[j] * x[p[Jinfo.findex[j]]]);
+				if (x[i] > f)
+					x[i] = f;
+				else if (x[i] < -f)
+					x[i] = -f;
+				else
+					continue;
+			}
+			else
+			{
+				if (x[i] > Jinfo.hi[j])
+					x[i] = Jinfo.hi[j];
+				else if (x[i] < Jinfo.lo[j])
+					x[i] = Jinfo.lo[j];
+				else
+					continue;
+			}
+			remove[i] = true;
+			fixed++;
+		}
+		if (fixed == 0 || fixed == left)	//no change or all constraints solved
+			break;
+
+		for (i = 0; i < left; i++)	//sub in to right hand side.
+			if (remove[i])
+				for (j = 0; j < left; j++)
+					if (!remove[j])
+						rhs[j] -= A[j * mskip + i] * x[i];
+
+		for (int r = left - 1; r >= 0; r--)	//eliminate row/col for fixed variables
+		{
+			if (remove[r])
+			{
+				//dRemoveLDLT adapted for use without row pointers.
+				if (r == left - 1)
+				{
+					left--;
+					continue;	// deleting last row/col is easy
+				}
+				else if (r == 0)
+				{
+					dReal a[6];
+					for (i = 0; i < left; i++)
+						a[i] = -A[i * mskip];
+					a[0] += REAL (1.0);
+					dLDLTAddTL (L, d, a, left, mskip);
+				}
+				else
+				{
+					dReal t[6];
+					dReal a[6];
+					for (i = 0; i < r; i++)
+						t[i] = L[r * mskip + i] / d[i];
+					for (i = 0; i < left - r; i++)
+						a[i] = dDot (L + (r + i) * mskip, t, r) - A[(r + i) * mskip + r];
+					a[0] += REAL (1.0);
+					dLDLTAddTL (L + r * mskip + r, d + r, a, left - r, mskip);
+				}
+
+				dRemoveRowCol (L, left, mskip, r);
+				//end dRemoveLDLT
+
+				left--;
+				if (r < (left - 1))
+				{
+					dReal tx = x[r];
+					memmove (d + r, d + r + 1, (left - r) * sizeof (dReal));
+					memmove (rhs + r, rhs + r + 1, (left - r) * sizeof (dReal));
+					//x will get written over by rhs anyway, no need to move it around
+					//just store the fixed value we just discovered in it.
+					x[left] = tx;
+					for (i = 0; i < m; i++)
+						if (p[i] > r && p[i] <= left)
+							p[i]--;
+					p[r] = left;
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < m; i++)
+		lambda[i] = x[p[i]];
+#	endif
+	// compute the constraint force `cforce'
+#	ifdef TIMING
+	dTimerNow ("compute constraint force");
+#endif
+
+	// compute cforce = J'*lambda
+	dJointFeedback *fb = joint->feedback;
+	dReal cforce[16];
+	dSetZero (cforce, 16);
+
+	if (fb)
+	{
+		// the user has requested feedback on the amount of force that this
+		// joint is applying to the bodies. we use a slightly slower
+		// computation that splits out the force components and puts them
+		// in the feedback structure.
+		dReal data1[8], data2[8];
+		if (body[0])
+		{
+			Multiply1_8q1 (data1, Jinfo.J1l, lambda, m);
+			dReal *cf1 = cforce;
+			cf1[0] = (fb->f1[0] = data1[0]);
+			cf1[1] = (fb->f1[1] = data1[1]);
+			cf1[2] = (fb->f1[2] = data1[2]);
+			cf1[4] = (fb->t1[0] = data1[4]);
+			cf1[5] = (fb->t1[1] = data1[5]);
+			cf1[6] = (fb->t1[2] = data1[6]);
+		}
+		if (body[1])
+		{
+			Multiply1_8q1 (data2, Jinfo.J2l, lambda, m);
+			dReal *cf2 = cforce + 8;
+			cf2[0] = (fb->f2[0] = data2[0]);
+			cf2[1] = (fb->f2[1] = data2[1]);
+			cf2[2] = (fb->f2[2] = data2[2]);
+			cf2[4] = (fb->t2[0] = data2[4]);
+			cf2[5] = (fb->t2[1] = data2[5]);
+			cf2[6] = (fb->t2[2] = data2[6]);
+		}
+	}
+	else
+	{
+		// no feedback is required, let's compute cforce the faster way
+		if (body[0])
+			Multiply1_8q1 (cforce, Jinfo.J1l, lambda, m);
+		if (body[1])
+			Multiply1_8q1 (cforce + 8, Jinfo.J2l, lambda, m);
+	}
+
+	for (i = 0; i < 2; i++)
+	{
+		if (!body[i])
+			continue;
+		for (j = 0; j < 3; j++)
+		{
+			body[i]->facc[j] += cforce[i * 8 + j];
+			body[i]->tacc[j] += cforce[i * 8 + 4 + j];
+		}
+	}
+}
+
+void
+dInternalStepIslandFast (dxWorld * world, dxBody * const *bodies, int nb, dxJoint * const *_joints, int nj, dReal stepsize, int maxiterations)
+{
+#   ifdef TIMING
+	dTimerNow ("preprocessing");
+#   endif
+	dxBody *bodyPair[2], *body;
+	dReal *GIPair[2], *GinvIPair[2];
+	dxJoint *joint;
+	int iter, b, j, i;
+	dReal ministep = stepsize / maxiterations;
+
+	// make a local copy of the joint array, because we might want to modify it.
+	// (the "dxJoint *const*" declaration says we're allowed to modify the joints
+	// but not the joint array, because the caller might need it unchanged).
+	dxJoint **joints = (dxJoint **) ALLOCA (nj * sizeof (dxJoint *));
+	memcpy (joints, _joints, nj * sizeof (dxJoint *));
+
+	// get m = total constraint dimension, nub = number of unbounded variables.
+	// create constraint offset array and number-of-rows array for all joints.
+	// the constraints are re-ordered as follows: the purely unbounded
+	// constraints, the mixed unbounded + LCP constraints, and last the purely
+	// LCP constraints. this assists the LCP solver to put all unbounded
+	// variables at the start for a quick factorization.
+	//
+	// joints with m=0 are inactive and are removed from the joints array
+	// entirely, so that the code that follows does not consider them.
+	// also number all active joints in the joint list (set their tag values).
+	// inactive joints receive a tag value of -1.
+
+	int m = 0;
+	dxJoint::Info1 * info = (dxJoint::Info1 *) ALLOCA (nj * sizeof (dxJoint::Info1));
+	int *ofs = (int *) ALLOCA (nj * sizeof (int));
+	for (i = 0, j = 0; j < nj; j++)
+	{	// i=dest, j=src
+		joints[j]->vtable->getInfo1 (joints[j], info + i);
+		dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m);
+		if (info[i].m > 0)
+		{
+			joints[i] = joints[j];
+			joints[i]->tag = i;
+			i++;
+		}
+		else
+		{
+			joints[j]->tag = -1;
+		}
+	}
+	nj = i;
+
+	// the purely unbounded constraints
+	for (i = 0; i < nj; i++)
+	{
+		ofs[i] = m;
+		m += info[i].m;
+	}
+
+	// create a constraint equation right hand side vector `c', a constraint
+	// force mixing vector `cfm', and LCP low and high bound vectors, and an
+	// 'findex' vector.
+	dReal *c = (dReal *) ALLOCA (m * sizeof (dReal));
+	dReal *cfm = (dReal *) ALLOCA (m * sizeof (dReal));
+	dReal *lo = (dReal *) ALLOCA (m * sizeof (dReal));
+	dReal *hi = (dReal *) ALLOCA (m * sizeof (dReal));
+	int *findex = (int *) ALLOCA (m * sizeof (int));
+	dSetZero (c, m);
+	dSetValue (cfm, m, world->global_cfm);
+	dSetValue (lo, m, -dInfinity);
+	dSetValue (hi, m, dInfinity);
+	for (i = 0; i < m; i++)
+		findex[i] = -1;
+
+	// get jacobian data from constraints. a (2*m)x8 matrix will be created
+	// to store the two jacobian blocks from each constraint. it has this
+	// format:
+	//
+	//   l l l 0 a a a 0  \    .
+	//   l l l 0 a a a 0   }-- jacobian body 1 block for joint 0 (3 rows)
+	//   l l l 0 a a a 0  /
+	//   l l l 0 a a a 0  \    .
+	//   l l l 0 a a a 0   }-- jacobian body 2 block for joint 0 (3 rows)
+	//   l l l 0 a a a 0  /
+	//   l l l 0 a a a 0  }--- jacobian body 1 block for joint 1 (1 row)
+	//   l l l 0 a a a 0  }--- jacobian body 2 block for joint 1 (1 row)
+	//   etc...
+	//
+	//   (lll) = linear jacobian data
+	//   (aaa) = angular jacobian data
+	//
+#   ifdef TIMING
+	dTimerNow ("create J");
+#   endif
+	dReal *J = (dReal *) ALLOCA (2 * m * 8 * sizeof (dReal));
+	dSetZero (J, 2 * m * 8);
+	dxJoint::Info2 * Jinfo = (dxJoint::Info2 *) ALLOCA (nj * sizeof (dxJoint::Info2));
+	for (i = 0; i < nj; i++)
+	{
+		Jinfo[i].rowskip = 8;
+		Jinfo[i].fps = dRecip (stepsize);
+		Jinfo[i].erp = world->global_erp;
+		Jinfo[i].J1l = J + 2 * 8 * ofs[i];
+		Jinfo[i].J1a = Jinfo[i].J1l + 4;
+		Jinfo[i].J2l = Jinfo[i].J1l + 8 * info[i].m;
+		Jinfo[i].J2a = Jinfo[i].J2l + 4;
+		Jinfo[i].c = c + ofs[i];
+		Jinfo[i].cfm = cfm + ofs[i];
+		Jinfo[i].lo = lo + ofs[i];
+		Jinfo[i].hi = hi + ofs[i];
+		Jinfo[i].findex = findex + ofs[i];
+		//joints[i]->vtable->getInfo2 (joints[i], Jinfo+i);
+	}
+
+	dReal *saveFacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
+	dReal *saveTacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
+	dReal *globalI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
+	dReal *globalInvI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
+	for (b = 0; b < nb; b++)
+	{
+		for (i = 0; i < 4; i++)
+		{
+			saveFacc[b * 4 + i] = bodies[b]->facc[i];
+			saveTacc[b * 4 + i] = bodies[b]->tacc[i];
+			bodies[b]->tag = b;
+		}
+	}
+
+	for (iter = 0; iter < maxiterations; iter++)
+	{
+#	ifdef TIMING
+		dTimerNow ("applying inertia and gravity");
+#	endif
+		dReal tmp[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+		for (b = 0; b < nb; b++)
+		{
+			body = bodies[b];
+
+			// for all bodies, compute the inertia tensor and its inverse in the global
+			// frame, and compute the rotational force and add it to the torque
+			// accumulator. I and invI are vertically stacked 3x4 matrices, one per body.
+			// @@@ check computation of rotational force.
+
+			// compute inertia tensor in global frame
+			dMULTIPLY2_333 (tmp, body->mass.I, body->R);
+			dMULTIPLY0_333 (globalI + b * 12, body->R, tmp);
+			// compute inverse inertia tensor in global frame
+			dMULTIPLY2_333 (tmp, body->invI, body->R);
+			dMULTIPLY0_333 (globalInvI + b * 12, body->R, tmp);
+
+			for (i = 0; i < 4; i++)
+				body->tacc[i] = saveTacc[b * 4 + i];
+			// compute rotational force
+			dMULTIPLY0_331 (tmp, globalI + b * 12, body->avel);
+			dCROSS (body->tacc, -=, body->avel, tmp);
+
+			// add the gravity force to all bodies
+			if ((body->flags & dxBodyNoGravity) == 0)
+			{
+				body->facc[0] = saveFacc[b * 4 + 0] + body->mass.mass * world->gravity[0];
+				body->facc[1] = saveFacc[b * 4 + 1] + body->mass.mass * world->gravity[1];
+				body->facc[2] = saveFacc[b * 4 + 2] + body->mass.mass * world->gravity[2];
+				body->facc[3] = 0;
+			}
+
+		}
+
+#ifdef RANDOM_JOINT_ORDER
+#ifdef TIMING
+		dTimerNow ("randomizing joint order");
+#endif
+		//randomize the order of the joints by looping through the array
+		//and swapping the current joint pointer with a random one before it.
+		for (j = 0; j < nj; j++)
+		{
+			joint = joints[j];
+			dxJoint::Info1 i1 = info[j];
+			dxJoint::Info2 i2 = Jinfo[j];
+			int r = rand () % (j + 1);
+			joints[j] = joints[r];
+			info[j] = info[r];
+			Jinfo[j] = Jinfo[r];
+			joints[r] = joint;
+			info[r] = i1;
+			Jinfo[r] = i2;
+		}
+#endif
+
+		//now iterate through the random ordered joint array we created.
+		for (j = 0; j < nj; j++)
+		{
+#ifdef TIMING
+			dTimerNow ("setting up joint");
+#endif
+			joint = joints[j];
+			bodyPair[0] = joint->node[0].body;
+			bodyPair[1] = joint->node[1].body;
+
+			if (bodyPair[0] && (bodyPair[0]->flags & dxBodyDisabled))
+				bodyPair[0] = 0;
+			if (bodyPair[1] && (bodyPair[1]->flags & dxBodyDisabled))
+				bodyPair[1] = 0;
+			
+			//if this joint is not connected to any enabled bodies, skip it.
+			if (!bodyPair[0] && !bodyPair[1])
+				continue;
+			
+			if (bodyPair[0])
+			{
+				GIPair[0] = globalI + bodyPair[0]->tag * 12;
+				GinvIPair[0] = globalInvI + bodyPair[0]->tag * 12;
+			}
+			if (bodyPair[1])
+			{
+				GIPair[1] = globalI + bodyPair[1]->tag * 12;
+				GinvIPair[1] = globalInvI + bodyPair[1]->tag * 12;
+			}
+
+			joints[j]->vtable->getInfo2 (joints[j], Jinfo + j);
+
+			//dInternalStepIslandFast is an exact copy of the old routine with one
+			//modification: the calculated forces are added back to the facc and tacc
+			//vectors instead of applying them to the bodies and moving them.
+			dInternalStepFast (world, bodyPair, GIPair, GinvIPair, joint, info[j], Jinfo[j], ministep);
+
+		}
+		//  }
+#	ifdef TIMING
+		dTimerNow ("moving bodies");
+#	endif
+		//Now we can simulate all the free floating bodies, and move them.
+		for (b = 0; b < nb; b++)
+		{
+			body = bodies[b];
+
+			for (i = 0; i < 4; i++)
+			{
+				body->facc[i] *= ministep;
+				body->tacc[i] *= ministep;
+			}
+
+			//apply torque
+			dMULTIPLYADD0_331 (body->avel, globalInvI + b * 12, body->tacc);
+
+			//apply force
+			for (i = 0; i < 3; i++)
+				body->lvel[i] += body->invMass * body->facc[i];
+
+			//move It!
+			moveAndRotateBody (body, ministep);
+		}
+	}
+	for (b = 0; b < nb; b++)
+		for (j = 0; j < 4; j++)
+			bodies[b]->facc[j] = bodies[b]->tacc[j] = 0;
+}
+
+
+#ifdef NO_ISLANDS
+
+// Since the iterative algorithm doesn't care about islands of bodies, this is a
+// faster algorithm that just sends it all the joints and bodies in one array.
+// It's downfall is it's inability to handle disabled bodies as well as the old one.
+static void
+processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
+{
+	// nothing to do if no bodies
+	if (world->nb <= 0)
+		return;
+
+#	ifdef TIMING
+	dTimerStart ("creating joint and body arrays");
+#	endif
+	dxBody **bodies, *body;
+	dxJoint **joints, *joint;
+	joints = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
+	bodies = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
+
+	int nj = 0;
+	for (joint = world->firstjoint; joint; joint = (dxJoint *) joint->next)
+		joints[nj++] = joint;
+
+	int nb = 0;
+	for (body = world->firstbody; body; body = (dxBody *) body->next)
+		bodies[nb++] = body;
+
+	dInternalStepIslandFast (world, bodies, nb, joints, nj, stepsize, maxiterations);
+#	ifdef TIMING
+	dTimerEnd ();
+	dTimerReport (stdout, 1);
+#	endif
+}
+
+#else
+
+//****************************************************************************
+// island processing
+
+// this groups all joints and bodies in a world into islands. all objects
+// in an island are reachable by going through connected bodies and joints.
+// each island can be simulated separately.
+// note that joints that are not attached to anything will not be included
+// in any island, an so they do not affect the simulation.
+//
+// this function starts new island from unvisited bodies. however, it will
+// never start a new islands from a disabled body. thus islands of disabled
+// bodies will not be included in the simulation. disabled bodies are
+// re-enabled if they are found to be part of an active island.
+
+static void
+processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
+{
+#ifdef TIMING
+	dTimerStart ("Island Setup");
+#endif
+	dxBody *b, *bb, **body;
+	dxJoint *j, **joint;
+
+	// nothing to do if no bodies
+	if (world->nb <= 0)
+		return;
+
+	// make arrays for body and joint lists (for a single island) to go into
+	body = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
+	joint = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
+	int bcount = 0;				// number of bodies in `body'
+	int jcount = 0;				// number of joints in `joint'
+	int tbcount = 0;
+	int tjcount = 0;
+	
+	// set all body/joint tags to 0
+	for (b = world->firstbody; b; b = (dxBody *) b->next)
+		b->tag = 0;
+	for (j = world->firstjoint; j; j = (dxJoint *) j->next)
+		j->tag = 0;
+
+	// allocate a stack of unvisited bodies in the island. the maximum size of
+	// the stack can be the lesser of the number of bodies or joints, because
+	// new bodies are only ever added to the stack by going through untagged
+	// joints. all the bodies in the stack must be tagged!
+	int stackalloc = (world->nj < world->nb) ? world->nj : world->nb;
+	dxBody **stack = (dxBody **) ALLOCA (stackalloc * sizeof (dxBody *));
+	int *autostack = (int *) ALLOCA (stackalloc * sizeof (int));
+
+	for (bb = world->firstbody; bb; bb = (dxBody *) bb->next)
+	{
+#ifdef TIMING
+		dTimerNow ("Island Processing");
+#endif
+		// get bb = the next enabled, untagged body, and tag it
+		if (bb->tag || (bb->flags & dxBodyDisabled))
+			continue;
+		bb->tag = 1;
+
+		// tag all bodies and joints starting from bb.
+		int stacksize = 0;
+		int autoDepth = autoEnableDepth;
+		b = bb;
+		body[0] = bb;
+		bcount = 1;
+		jcount = 0;
+		goto quickstart;
+		while (stacksize > 0)
+		{
+			b = stack[--stacksize];	// pop body off stack
+			autoDepth = autostack[stacksize];
+			body[bcount++] = b;	// put body on body list
+		  quickstart:
+
+			// traverse and tag all body's joints, add untagged connected bodies
+			// to stack
+			for (dxJointNode * n = b->firstjoint; n; n = n->next)
+			{
+				if (!n->joint->tag)
+				{
+					int thisDepth = autoEnableDepth;
+					n->joint->tag = 1;
+					joint[jcount++] = n->joint;
+					if (n->body && !n->body->tag)
+					{
+						if (n->body->flags & dxBodyDisabled)
+							thisDepth = autoDepth - 1;
+						if (thisDepth < 0)
+							continue;
+						n->body->flags &= ~dxBodyDisabled;
+						n->body->tag = 1;
+						autostack[stacksize] = thisDepth;
+						stack[stacksize++] = n->body;
+					}
+				}
+			}
+			dIASSERT (stacksize <= world->nb);
+			dIASSERT (stacksize <= world->nj);
+		}
+
+		// now do something with body and joint lists
+		dInternalStepIslandFast (world, body, bcount, joint, jcount, stepsize, maxiterations);
+
+		// what we've just done may have altered the body/joint tag values.
+		// we must make sure that these tags are nonzero.
+		// also make sure all bodies are in the enabled state.
+		int i;
+		for (i = 0; i < bcount; i++)
+		{
+			body[i]->tag = 1;
+			body[i]->flags &= ~dxBodyDisabled;
+		}
+		for (i = 0; i < jcount; i++)
+			joint[i]->tag = 1;
+		
+		tbcount += bcount;
+		tjcount += jcount;
+	}
+	
+#ifdef TIMING
+	dMessage(0, "Total joints processed: %i, bodies: %i", tjcount, tbcount);
+#endif
+
+	// if debugging, check that all objects (except for disabled bodies,
+	// unconnected joints, and joints that are connected to disabled bodies)
+	// were tagged.
+# ifndef dNODEBUG
+	for (b = world->firstbody; b; b = (dxBody *) b->next)
+	{
+		if (b->flags & dxBodyDisabled)
+		{
+			if (b->tag)
+				dDebug (0, "disabled body tagged");
+		}
+		else
+		{
+			if (!b->tag)
+				dDebug (0, "enabled body not tagged");
+		}
+	}
+	for (j = world->firstjoint; j; j = (dxJoint *) j->next)
+	{
+		if ((j->node[0].body && (j->node[0].body->flags & dxBodyDisabled) == 0) || (j->node[1].body && (j->node[1].body->flags & dxBodyDisabled) == 0))
+		{
+			if (!j->tag)
+				dDebug (0, "attached enabled joint not tagged");
+		}
+		else
+		{
+			if (j->tag)
+				dDebug (0, "unattached or disabled joint tagged");
+		}
+	}
+# endif
+
+#	ifdef TIMING
+	dTimerEnd ();
+	dTimerReport (stdout, 1);
+#	endif
+}
+
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+	void dWorldStepFast (dWorldID w, dReal stepsize, int maxiterations)
+	{
+		dUASSERT (w, "bad world argument");
+		dUASSERT (stepsize > 0, "stepsize must be > 0");
+		processIslandsFast (w, stepsize, maxiterations);
+	}
+
+#ifdef __cplusplus
+}
+#endif
diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/ode/src/stepfast.h ode.step/ode/src/stepfast.h
--- ode/ode/src/stepfast.h	Thu Jan  1 01:00:00 1970
+++ ode.step/ode/src/stepfast.h	Thu Jul 10 17:36:54 2003
@@ -0,0 +1,35 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Fast iterative solver, David Whittaker. Email: david@csworkbench.com  *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_STEP_H_
+#define _ODE_STEP_H_
+
+#include <ode/common.h>
+
+void dWorldStepFast(dxWorld *world, dReal stepsize, int iterations);
+
+void dWorldSetAutoEnableDepth(dxWorld *world, int autodepth);
+int dWorldGetAutoEnableDepth(dxWorld *world);
+
+#endif
diff -a -x *.exe -x *CVS* -x *.o -u -r -N ode/ode/test/test_crash.cpp ode.step/ode/test/test_crash.cpp
--- ode/ode/test/test_crash.cpp	Thu Jan  1 01:00:00 1970
+++ ode.step/ode/test/test_crash.cpp	Thu Jul 10 17:36:54 2003
@@ -0,0 +1,566 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/ode.h>
+#include <drawstuff/drawstuff.h>
+
+#ifdef MSVC
+#pragma warning(disable:4244 4305)  // for VC++, no precision loss complaints
+#endif
+
+// select correct drawing functions
+
+#ifdef dDOUBLE
+#define dsDrawBox dsDrawBoxD
+#define dsDrawSphere dsDrawSphereD
+#define dsDrawCylinder dsDrawCylinderD
+#define dsDrawCappedCylinder dsDrawCappedCylinderD
+#endif
+
+
+// some constants
+
+#define LENGTH 3.5		// chassis length
+#define WIDTH 2.5		// chassis width
+#define HEIGHT 1.0		// chassis height
+#define RADIUS 0.5		// wheel radius
+#define STARTZ 1.0		// starting height of chassis
+#define CMASS 1			// chassis mass
+#define WMASS 1			// wheel mass
+#define COMOFFSET -5	// center of mass offset
+#define WALLMASS 1		// wall box mass
+#define BALLMASS 1		// ball mass
+#define FMAX 25			// car engine fmax
+#define ROWS 1			// rows of cars
+#define COLS 1			// columns of cars
+#define ITERS 10		// number of iterations
+#define WBOXSIZE 1.0	// size of wall boxes
+#define WALLWIDTH 20	//width of wall
+#define WALLHEIGHT 10	//height of wall
+#define DISABLE_THRESHOLD 0.008	//maximum velocity (squared) a body can have and be disabled
+#define DISABLE_STEPS 10		//number of steps a box has to have been disable-able before it will be disabled
+
+//#define BOX
+#define CARS
+#define WALL
+//#define BALLS
+//#define BALLSTACK
+//#define ONEBALL
+//#define CENTIPEDE
+
+// dynamics and collision objects (chassis, 3 wheels, environment)
+
+static dWorldID world;
+static dSpaceID space;
+static dBodyID body[10000];
+static int bodies;
+static dJointID joint[100000];
+static int joints;
+static dJointGroupID contactgroup;
+static dGeomID ground;
+static dGeomID box[10000];
+static int boxes;
+static dGeomID sphere[10000];
+static int spheres;
+static dGeomID ground_box;
+static dGeomID wall_boxes[10000];
+static dBodyID wall_bodies[10000];
+static int wb_stepsdis[10000];
+static int wb;
+static bool doFast;
+static dBodyID b;
+static dMass m;
+
+
+// things that the user controls
+
+static dReal turn = 0, speed = 0;	// user commands
+
+
+
+// this is called by dSpaceCollide when two objects in space are
+// potentially colliding.
+
+static void nearCallback (void *data, dGeomID o1, dGeomID o2)
+{
+	int i,n;
+	
+	dBodyID b1 = dGeomGetBody(o1);
+	dBodyID b2 = dGeomGetBody(o2);
+	if (b1 && b2 && dAreConnected(b1, b2))
+		return;
+	
+	const int N = 10;
+	dContact contact[N];
+	n = dCollide (o1,o2,N,&contact[0].geom,sizeof(dContact));
+	if (n > 0) {
+		for (i=0; i<n; i++) {
+			contact[i].surface.mode = dContactSlip1 | dContactSlip2 | dContactSoftERP | dContactSoftCFM | dContactApprox1;
+			if (dGeomGetClass(o1) == dSphereClass || dGeomGetClass(o2) == dSphereClass)
+				contact[i].surface.mu = 20;
+			else
+				contact[i].surface.mu = 0.5;
+			contact[i].surface.slip1 = 0.0;
+			contact[i].surface.slip2 = 0.0;
+			contact[i].surface.soft_erp = 0.8;
+			contact[i].surface.soft_cfm = 0.01;
+			dJointID c = dJointCreateContact (world,contactgroup,contact+i);
+			dJointAttach (c,dGeomGetBody(o1),dGeomGetBody(o2));
+		}
+	}
+}
+
+
+// start simulation - set viewpoint
+
+static void start()
+{
+	static float xyz[3] = {10.0f,0.0f,2.000f};
+	static float hpr[3] = {180.0f,0.0f,0.25f};
+	dsSetViewpoint (xyz,hpr);
+	printf ("Press:\t'a' to increase speed.\n"
+			"\t'z' to decrease speed.\n"
+			"\t',' to steer left.\n"
+			"\t'.' to steer right.\n"
+			"\t' ' to reset speed and steering.\n"
+			"\t'f' to toggle fast step mode.\n"
+			"\t'+' to increase AutoEnableDepth.\n"
+			"\t'-' to decrease AutoEnableDepth.\n"
+			"\t'r' to reset simulation.\n");
+}
+
+
+void makeCar(dReal x, dReal y, int &bodyI, int &jointI, int &boxI, int &sphereI)
+{
+	int i;
+	dMass m;
+	
+	// chassis body
+	body[bodyI] = dBodyCreate (world);
+	dBodySetPosition (body[bodyI],x,y,STARTZ);
+	dMassSetBox (&m,1,LENGTH,WIDTH,HEIGHT);
+	dMassAdjust (&m,CMASS/2.0);
+	dBodySetMass (body[bodyI],&m);
+	box[boxI] = dCreateBox (space,LENGTH,WIDTH,HEIGHT);
+	dGeomSetBody (box[boxI],body[bodyI]);
+	
+	// wheel bodies
+	for (i=1; i<=4; i++) {
+		body[bodyI+i] = dBodyCreate (world);
+		dQuaternion q;
+		dQFromAxisAndAngle (q,1,0,0,M_PI*0.5);
+		dBodySetQuaternion (body[bodyI+i],q);
+		dMassSetSphere (&m,1,RADIUS);
+		dMassAdjust (&m,WMASS);
+		dBodySetMass (body[bodyI+i],&m);
+		sphere[sphereI+i-1] = dCreateSphere (space,RADIUS);
+		dGeomSetBody (sphere[sphereI+i-1],body[bodyI+i]);
+	}
+	dBodySetPosition (body[bodyI+1],x+0.4*LENGTH-0.5*RADIUS,y+WIDTH*0.5,STARTZ-HEIGHT*0.5);
+	dBodySetPosition (body[bodyI+2],x+0.4*LENGTH-0.5*RADIUS,y-WIDTH*0.5,STARTZ-HEIGHT*0.5);
+	dBodySetPosition (body[bodyI+3],x-0.4*LENGTH+0.5*RADIUS,y+WIDTH*0.5,STARTZ-HEIGHT*0.5);
+	dBodySetPosition (body[bodyI+4],x-0.4*LENGTH+0.5*RADIUS,y-WIDTH*0.5,STARTZ-HEIGHT*0.5);
+	
+	// front and back wheel hinges
+	for (i=0; i<4; i++) {
+		joint[jointI+i] = dJointCreateHinge2 (world,0);
+		dJointAttach (joint[jointI+i],body[bodyI],body[bodyI+i+1]);
+		const dReal *a = dBodyGetPosition (body[bodyI+i+1]);
+		dJointSetHinge2Anchor (joint[jointI+i],a[0],a[1],a[2]);
+		dJointSetHinge2Axis1 (joint[jointI+i],0,0,(i<2 ? 1 : -1));
+		dJointSetHinge2Axis2 (joint[jointI+i],0,1,0);
+		dJointSetHinge2Param (joint[jointI+i],dParamSuspensionERP,0.8);
+		dJointSetHinge2Param (joint[jointI+i],dParamSuspensionCFM,1e-5);
+		dJointSetHinge2Param (joint[jointI+i],dParamVel2,0);
+		dJointSetHinge2Param (joint[jointI+i],dParamFMax2,FMAX);
+	}
+	
+	//center of mass offset body. (hang another copy of the body COMOFFSET units below it by a fixed joint)
+	dBodyID b = dBodyCreate (world);
+	dBodySetPosition (b,x,y,STARTZ+COMOFFSET);
+	dMassSetBox (&m,1,LENGTH,WIDTH,HEIGHT);
+	dMassAdjust (&m,CMASS/2.0);
+	dBodySetMass (b,&m);
+	dJointID j = dJointCreateFixed(world, 0);
+	dJointAttach(j, body[bodyI], b);
+	dJointSetFixed(j);
+	//box[boxI+1] = dCreateBox(space,LENGTH,WIDTH,HEIGHT);
+	//dGeomSetBody (box[boxI+1],b);
+	
+	bodyI	+= 5;
+	jointI	+= 4;
+	boxI	+= 1;
+	sphereI	+= 4;
+}
+
+void resetSimulation()
+{
+	int i;
+	i = 0;
+	// destroy world if it exists
+	if (bodies)
+	{
+		dJointGroupDestroy (contactgroup);
+		dSpaceDestroy (space);
+		dWorldDestroy (world);
+	}
+	
+	for (i = 0; i < 1000; i++)
+		wb_stepsdis[i] = 0;
+
+	// recreate world
+	
+	world = dWorldCreate();
+	space = dHashSpaceCreate (0);
+	contactgroup = dJointGroupCreate (0);
+	dWorldSetGravity (world,0,0,-1.5);
+	dWorldSetCFM (world, 1e-5);
+	dWorldSetERP (world, 0.8);
+	ground = dCreatePlane (space,0,0,1,0);
+	
+	bodies = 0;
+	joints = 0;
+	boxes = 0;
+	spheres = 0;
+	wb = 0;
+	
+#ifdef CARS
+	for (dReal x = 0.0; x < COLS*(LENGTH+RADIUS); x += LENGTH+RADIUS)
+		for (dReal y = -((ROWS-1)*(WIDTH/2+RADIUS)); y <= ((ROWS-1)*(WIDTH/2+RADIUS)); y += WIDTH+RADIUS*2)
+			makeCar(x, y, bodies, joints, boxes, spheres);
+#endif
+#ifdef WALL
+	bool offset = false;
+	for (dReal z = WBOXSIZE/2.0; z <= WALLHEIGHT; z+=WBOXSIZE)
+	{
+		offset = !offset;
+		for (dReal y = (-WALLWIDTH+z)/2; y <= (WALLWIDTH-z)/2; y+=WBOXSIZE)
+		{
+			wall_bodies[wb] = dBodyCreate (world);
+			dBodySetPosition (wall_bodies[wb],-20,y,z);
+			dMassSetBox (&m,1,WBOXSIZE,WBOXSIZE,WBOXSIZE);
+			dMassAdjust (&m, WALLMASS);
+			dBodySetMass (wall_bodies[wb],&m);
+			wall_boxes[wb] = dCreateBox (space,WBOXSIZE,WBOXSIZE,WBOXSIZE);
+			dGeomSetBody (wall_boxes[wb],wall_bodies[wb]);
+			//dBodyDisable(wall_bodies[wb++]);
+			wb++;
+		}
+	}
+	dMessage(0,"wall boxes: %i", wb);
+#endif
+#ifdef BALLS
+	for (dReal x = -7; x <= -4; x+=1)
+		for (dReal y = -1.5; y <= 1.5; y+=1)
+			for (dReal z = 1; z <= 4; z+=1)
+			{
+				b = dBodyCreate (world);
+				dBodySetPosition (b,x*RADIUS*2,y*RADIUS*2,z*RADIUS*2);
+				dMassSetSphere (&m,1,RADIUS);
+				dMassAdjust (&m, BALLMASS);
+				dBodySetMass (b,&m);
+				sphere[spheres] = dCreateSphere (space,RADIUS);
+				dGeomSetBody (sphere[spheres++],b);
+			}
+#endif
+#ifdef ONEBALL
+	b = dBodyCreate (world);
+	dBodySetPosition (b,0,0,2);
+	dMassSetSphere (&m,1,RADIUS);
+	dMassAdjust (&m, 1);
+	dBodySetMass (b,&m);
+	sphere[spheres] = dCreateSphere (space,RADIUS);
+	dGeomSetBody (sphere[spheres++],b);
+#endif
+#ifdef BALLSTACK
+	for (dReal z = 1; z <= 6; z+=1)
+	{
+		b = dBodyCreate (world);
+		dBodySetPosition (b,0,0,z*RADIUS*2);
+		dMassSetSphere (&m,1,RADIUS);
+		dMassAdjust (&m, 0.1);
+		dBodySetMass (b,&m);
+		sphere[spheres] = dCreateSphere (space,RADIUS);
+		dGeomSetBody (sphere[spheres++],b);
+	}
+#endif
+#ifdef CENTIPEDE
+	dBodyID lastb = 0;
+	for (dReal y = 0; y < 10*LENGTH; y+=LENGTH+0.1)
+	{
+		// chassis body
+		
+		b = body[bodies] = dBodyCreate (world);
+		dBodySetPosition (body[bodies],-15,y,STARTZ);
+		dMassSetBox (&m,1,WIDTH,LENGTH,HEIGHT);
+		dMassAdjust (&m,CMASS);
+		dBodySetMass (body[bodies],&m);
+		box[boxes] = dCreateBox (space,WIDTH,LENGTH,HEIGHT);
+		dGeomSetBody (box[boxes++],body[bodies++]);
+		
+		for (dReal x = -17; x > -20; x-=RADIUS*2)
+		{
+			body[bodies] = dBodyCreate (world);
+			dBodySetPosition(body[bodies], x, y, STARTZ);
+			dMassSetSphere(&m, 1, RADIUS);
+			dMassAdjust(&m, WMASS);
+			dBodySetMass(body[bodies], &m);
+			sphere[spheres] = dCreateSphere (space, RADIUS);
+			dGeomSetBody (sphere[spheres++], body[bodies]);
+			
+			joint[joints] = dJointCreateHinge2 (world,0);
+			if (x == -17)
+				dJointAttach (joint[joints],b,body[bodies]);
+			else
+				dJointAttach (joint[joints],body[bodies-2],body[bodies]);
+			const dReal *a = dBodyGetPosition (body[bodies++]);
+			dJointSetHinge2Anchor (joint[joints],a[0],a[1],a[2]);
+			dJointSetHinge2Axis1 (joint[joints],0,0,1);
+			dJointSetHinge2Axis2 (joint[joints],1,0,0);
+			dJointSetHinge2Param (joint[joints],dParamSuspensionERP,1.0);
+			dJointSetHinge2Param (joint[joints],dParamSuspensionCFM,1e-5);
+			dJointSetHinge2Param (joint[joints],dParamLoStop,0);
+			dJointSetHinge2Param (joint[joints],dParamHiStop,0);
+			dJointSetHinge2Param (joint[joints],dParamVel2,-10.0);
+			dJointSetHinge2Param (joint[joints++],dParamFMax2,FMAX);
+
+			body[bodies] = dBodyCreate (world);
+			dBodySetPosition(body[bodies], -30 - x, y, STARTZ);
+			dMassSetSphere(&m, 1, RADIUS);
+			dMassAdjust(&m, WMASS);
+			dBodySetMass(body[bodies], &m);
+			sphere[spheres] = dCreateSphere (space, RADIUS);
+			dGeomSetBody (sphere[spheres++], body[bodies]);
+			
+			joint[joints] = dJointCreateHinge2 (world,0);
+			if (x == -17)
+				dJointAttach (joint[joints],b,body[bodies]);
+			else
+				dJointAttach (joint[joints],body[bodies-2],body[bodies]);
+			const dReal *b = dBodyGetPosition (body[bodies++]);
+			dJointSetHinge2Anchor (joint[joints],b[0],b[1],b[2]);
+			dJointSetHinge2Axis1 (joint[joints],0,0,1);
+			dJointSetHinge2Axis2 (joint[joints],1,0,0);
+			dJointSetHinge2Param (joint[joints],dParamSuspensionERP,1.0);
+			dJointSetHinge2Param (joint[joints],dParamSuspensionCFM,1e-5);
+			dJointSetHinge2Param (joint[joints],dParamLoStop,0);
+			dJointSetHinge2Param (joint[joints],dParamHiStop,0);
+			dJointSetHinge2Param (joint[joints],dParamVel2,10.0);
+			dJointSetHinge2Param (joint[joints++],dParamFMax2,FMAX);
+		}
+		if (lastb)
+		{
+			dJointID j = dJointCreateFixed(world,0);
+			dJointAttach (j, b, lastb);
+			dJointSetFixed(j);
+		}
+		lastb = b;
+	}
+#endif
+#ifdef BOX
+	body[bodies] = dBodyCreate (world);
+	dBodySetPosition (body[bodies],0,0,HEIGHT/2);
+	dMassSetBox (&m,1,LENGTH,WIDTH,HEIGHT);
+	dMassAdjust (&m, 1);
+	dBodySetMass (body[bodies],&m);
+	box[boxes] = dCreateBox (space,LENGTH,WIDTH,HEIGHT);
+	dGeomSetBody (box[boxes++],body[bodies++]);	
+#endif
+}
+
+// called when a key pressed
+
+static void command (int cmd)
+{
+	switch (cmd) {
+	case 'a': case 'A':
+		speed += 0.3;
+	break;
+	case 'z': case 'Z':
+		speed -= 0.3;
+	break;
+	case ',':
+		turn += 0.1;
+		if (turn > 0.3)
+			turn = 0.3;
+	break;
+	case '.':
+		turn -= 0.1;
+		if (turn < -0.3)
+			turn = -0.3;
+	break;
+	case ' ':
+		speed = 0;
+		turn = 0;
+	break;
+	case 'f': case 'F':
+		doFast = !doFast;
+	break;
+	case '+':
+		dWorldSetAutoEnableDepth(world, dWorldGetAutoEnableDepth(world) + 1);
+	break;
+	case '-':
+		dWorldSetAutoEnableDepth(world, dWorldGetAutoEnableDepth(world) - 1);
+	break;
+	case 'r': case 'R':
+		resetSimulation();
+	break;
+	}
+}
+
+
+// simulation loop
+
+static void simLoop (int pause)
+{
+	int i, j;
+		
+	dsSetTexture (DS_WOOD);
+
+	if (!pause) {
+#ifdef BOX
+		dBodyAddForce(body[bodies-1],lspeed,0,0);
+#endif
+		for (j = 0; j < joints; j++)
+		{
+			dReal curturn = dJointGetHinge2Angle1 (joint[j]);
+			//dMessage (0,"curturn %e, turn %e, vel %e", curturn, turn, (turn-curturn)*1.0);
+			dJointSetHinge2Param(joint[j],dParamVel,(turn-curturn)*1.0);
+			dJointSetHinge2Param(joint[j],dParamFMax,dInfinity);
+			dJointSetHinge2Param(joint[j],dParamVel2,speed);
+			dJointSetHinge2Param(joint[j],dParamFMax2,FMAX);
+			dBodyEnable(dJointGetBody(joint[j],0));
+			dBodyEnable(dJointGetBody(joint[j],1));
+		}		
+		if (doFast)
+		{
+			dSpaceCollide (space,0,&nearCallback);
+			dWorldStepFast (world,0.05,ITERS);
+			dJointGroupEmpty (contactgroup);
+		}
+		else
+		{
+			dSpaceCollide (space,0,&nearCallback);
+			dWorldStep (world,0.05);
+			dJointGroupEmpty (contactgroup);
+		}
+		
+		for (i = 0; i < wb; i++)
+		{
+			b = dGeomGetBody(wall_boxes[i]);
+			if (dBodyIsEnabled(b)) 
+			{
+				bool disable = true;
+				const dReal *lvel = dBodyGetLinearVel(b);
+				dReal lspeed = lvel[0]*lvel[0]+lvel[1]*lvel[1]+lvel[2]*lvel[2];
+				if (lspeed > DISABLE_THRESHOLD)
+					disable = false;
+				const dReal *avel = dBodyGetAngularVel(b);
+				dReal aspeed = avel[0]*avel[0]+avel[1]*avel[1]+avel[2]*avel[2];
+				if (aspeed > DISABLE_THRESHOLD)
+					disable = false;
+				
+				if (disable)
+					wb_stepsdis[i]++;
+				else
+					wb_stepsdis[i] = 0;
+				
+				if (wb_stepsdis[i] > DISABLE_STEPS)
+				{
+					dBodyDisable(b);
+					dsSetColor(0.5,0.5,1);
+				}
+				else
+					dsSetColor(1,1,1);
+
+			}
+			else
+				dsSetColor(0.4,0.4,0.4);
+			dVector3 ss;
+			dGeomBoxGetLengths (wall_boxes[i], ss);
+			dsDrawBox(dGeomGetPosition(wall_boxes[i]), dGeomGetRotation(wall_boxes[i]), ss);
+		}
+	}
+	else
+	{
+		for (i = 0; i < wb; i++)
+		{
+			b = dGeomGetBody(wall_boxes[i]);
+			if (dBodyIsEnabled(b))
+				dsSetColor(1,1,1);
+			else
+				dsSetColor(0.4,0.4,0.4);
+			dVector3 ss;
+			dGeomBoxGetLengths (wall_boxes[i], ss);
+			dsDrawBox(dGeomGetPosition(wall_boxes[i]), dGeomGetRotation(wall_boxes[i]), ss);
+		}
+	}
+	
+	if (doFast)
+		dsSetColor(0,1,0);
+	else
+		dsSetColor(1,0,0);
+	dReal pos[3] = {-10,0,10};
+	dReal R[12] = {1,0,0,0,0,1,0,0,0,0,1,0};
+	dsDrawSphere(pos, R, 2);
+	
+	dsSetColor (0,1,1);
+	dReal sides[3] = {LENGTH,WIDTH,HEIGHT};
+	for (i = 0; i < boxes; i++)
+		dsDrawBox (dGeomGetPosition(box[i]),dGeomGetRotation(box[i]),sides);
+	dsSetColor (1,1,1);
+	for (i=0; i< spheres; i++) dsDrawSphere (dGeomGetPosition(sphere[i]),
+				   dGeomGetRotation(sphere[i]),RADIUS);
+	
+	
+}
+
+int main (int argc, char **argv)
+{
+	doFast = true;
+	
+	// setup pointers to drawstuff callback functions
+	dsFunctions fn;
+	fn.version = DS_VERSION;
+	fn.start = &start;
+	fn.step = &simLoop;
+	fn.command = &command;
+	fn.stop = 0;
+	fn.path_to_textures = "../../drawstuff/textures";
+	
+	bodies = 0;
+	joints = 0;
+	boxes = 0;
+	spheres = 0;
+	
+	resetSimulation();
+	
+	// run simulation
+	dsSimulationLoop (argc,argv,352,288,&fn);
+	
+	dJointGroupDestroy (contactgroup);
+	dSpaceDestroy (space);
+	dWorldDestroy (world);
+
+	return 0;
+}
