Parallel Programming in C for the Transputer
© D. Thiébaut, 1995



/* =======================================================================
   nbody.c

   DESCRIPTION:

     This program computes the interaction that N bodies exert on
     each other in 2-D space.  Each body is defined by its (x,y) position
     and its mass.

     The program simulates the interaction over some number of steps
     provided by the user (NoSteps).

   SYNTAX:
        ld-net nbody  NumberOfBodies NumberOfTransputers NumberOfSteps

   VIRTUAL network:

         +-----+   +-----+   +-----+   +-----+             +-----+
    +----|  1  +---|  2  +---|  3  +---|  4  +---  . . . --|  P  +----+
    |    +-----+   +-----+   +-----+   +-----+             +-----+    |
    |                                                                 |
    +-----------------------------------------------------------------+

     Example with 6 transputers
     Node[Vchan] ---> Node[Vchan]
     --------------------------
          1[6] --->2[12]
          2[7] --->3[13]
          3[8] --->4[14]
          4[9] --->5[15]
          5[10]--->6[16]
          6[11]--->1[17]

 ====================================================================== */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "conc.h"

#define G           6.670E-11           /* gravitational constant       */
#define sqr(x)      ((x)*(x))
#define swap(x,y,z) z=x; x=y; y=z
#define BODYSIZE    ((int) sizeof(BODY))
#define FORCESIZE   ((int) sizeof(FORCE))

typedef struct body BODY;
struct body
{
    float x;           /* coordinates */
    float y;
    float mass;
};

typedef struct force FORCE;
struct force
{
    float x;           /* x direction of force */
    float y;           /* y direction of force */
};

/* ============================= GLOBALS ============================== */
BODY  *MyBodies;    /* array of all bodies maintained by this node      */
FORCE *MyForces;    /* array of forces acting on them                   */
BODY  *OtherBodies1;/* array of bodies passed from node to node         */
BODY  *OtherBodies2;/* needs a second one when we receive the neighbor's*/
                    /* copy before we can send out our own.             */
BODY  *TempBodies;  /* used for swapping OtherBodies arrays             */
FORCE *OtherForces; /* array of forces for those bodies                 */
int   P;            /* number of transputers in network                 */
int   N;            /* total number of bodies in system                 */
int   MyN;          /* number of bodies maintained by this node         */
int   NoSteps;      /* total number of iterations required              */
Channel *left;      /* virtual channel with left neighbor               */
Channel *right;     /* virtual channel with right neighbor              */

/* =========================== PROTOTYPES ============================= */
void Initialize(void);
void ComputeForces(void);
void DoComputation(void);

/* -------------------------------------------------------------------- */
/*                                 MAIN                                 */
/* -------------------------------------------------------------------- */
void main(int argc, char *argv[])
{
    if (argc<4)
    {
        if (_node_number==1)
            fprintf(stderr,"%s%s",
                            "Syntax: ld-net nbody #bodies ",
                               "#Nodes #simulation_steps\n");
        exit(1);
    }

    _heapend = (void *)0x800FFFFC;  /* maximize heap size */

    N       = atoi(argv[1]);
    P       = atoi(argv[2]);
    NoSteps = atoi(argv[3]);

    Initialize();
    DoComputation();
    exit(0);
}

/* -------------------------------------------------------------------- */
/* INITIALIZE                                                           */
/* Allocates arrays for private and temporary slices                    */
/* Creates left and right virtual channels                              */
/* -------------------------------------------------------------------- */
void Initialize()
{
    int i;

    MyN            = N/P;

    /*--- allocate storage ---*/
    MyBodies     = (BODY *)  malloc((size_t) N*BODYSIZE);
    MyForces     = (FORCE *) malloc((size_t) N*FORCESIZE);
    OtherBodies1 = (BODY *)  malloc((size_t) N*BODYSIZE);
    OtherBodies2 = (BODY *)  malloc((size_t) N*BODYSIZE);
    OtherForces  = (FORCE *) malloc((size_t) N*FORCESIZE);

    if ((!MyBodies) || (!MyForces) || (!OtherBodies1)
          || (!OtherBodies2) || (!OtherForces))
    {
        if (_node_number==1)
            fprintf(stderr,"malloc error\n");
        exit(1);
    }

    /*--- initialize bodies maintained by this node ---*/
    for (i=0; i<MyN; i++)
    {
        MyBodies[i].x    = rand()*1.0;
        MyBodies[i].y    = rand()*1.0;
        MyBodies[i].mass = rand()*1.0;
    }

    /*--- initialize virtual channels ---*/
    right = VChan(5+_node_number);
    left        = (_node_number==1)? VChan(5+2*P)
                                : VChan(4+P+_node_number);
}

/* -------------------------------------------------------------------- */
/* CLEARFORCES                                                          */
/* -------------------------------------------------------------------- */
void ClearForces(void)
{
    int i;

    for (i=0; i<MyN; I++)
    {
        MyForces[i].x = 0;
        MyForces[i].y = 0;
    }
}

/* -------------------------------------------------------------------- */
/* DOCOMPUTATION                                                        */
/* Takes care of the simulation steps.  Each step start with the nodes  */
/* sending MyBodies to the right neighbor.  Then the cycle of (Compute, */
/* Shift) operations start.  This program does not do anything with     */
/* the resulting forces, but discards them.                             */
/* -------------------------------------------------------------------- */
void DoComputation()
{
    int i, step;

    printf("%d started\n",_node_number);

    /*--- do all simulation steps---*/
    for (step = 0; step < NoSteps; step++)
    {
        /*--- clear the forces ---*/
        ClearForces();

        /*--- First send our bodies to right neighbor ---*/
        if (_node_number%2) /* odd Ids send first*/
        {
            VChanOut(right,MyBodies,MyN*BODYSIZE);
            VChanIn(left,OtherBodies1,MyN*BODYSIZE);
        }
        else                /* even Ids receive first */
        {
            VChanIn(left,OtherBodies1,MyN*BODYSIZE);
            VChanOut(right,MyBodies, MyN*BODYSIZE);
        }

        /*--- Then we compute and shift the bodies ---*/
        /*--- so that we go once around the ring.  ---*/
        for (i=0; i<P-2; i++)
        {
            ComputeForces();
            if (_node_number%2) /* Odd Ids send first */
            {
                VChanOut(right,OtherBodies1,MyN*BODYSIZE);
                VChanIn(left,  OtherBodies1,MyN*BODYSIZE);
            }
            else                /* Even Ids receive first */
            {
                VChanIn(left,OtherBodies2, MyN*BODYSIZE);
                VChanOut(right,OtherBodies1, MyN*BODYSIZE);
                /*---swap OtherBodies arrays so that OtherBodies1 ---*/
                /*---contains the new slice                       ---*/
                swap(OtherBodies1, OtherBodies2, TempBodies);
            }
        }
    }
}

/* -------------------------------------------------------------------- */
/* COMPUTEFORCES                                                        */
/* Computes the gravitational force between private slice               */
/* (MyBodies) and temporary slice (OtherBodies1).                       */
/* -------------------------------------------------------------------- */
void ComputeForces()
{
    int   i, j;
    float distance2;                   /* Square of distance            */
    float distance;                    /* distance between two bodies   */
    float distancex;                   /* x component of distance       */
    float distancey;                   /* y component of distance       */
    float ReciprocalForce;             /* force between two bodies      */

    for (i=0; i<MyN; i++)
    {
        for (j=0; j<MyN; j++)
        {
            if (i==j) continue;
            distancex = MyBodies[i].x-OtherBodies1[j].x;
            distancey = MyBodies[i].y-OtherBodies1[j].y;
            distance2 = sqr(distancex) + sqr(distancey);
            distance  = sqrt((double) distance2);
            ReciprocalForce= G * MyBodies[i].mass * OtherBodies1[j].mass
                                 / distance2;
            MyForces[i].x += ReciprocalForce * distancex/distance;
            MyForces[i].y += ReciprocalForce * distancey/distance;
        }
    }
}

Listing 9-1: N-body program.

Note that our original description of the problem assumes that the shift of the temporary slice is done synchronously by all the transputers, and that the incoming slice replaces the outgoing one without any information getting lost. This is, of course, not possible with the transputers, since the communication is asynchronous and unbuffered. Therefore we must have a third buffer in the transputers with an even _node_number, so that the slice coming from the left can be saved before the outgoing slice leaves for the right neighbor. In the code above, we solved this problem by having two buffers, OtherBodies1 and OtherBodies2, and one pointer, OtherBodies, which we swap from one buffer to the next between shift-compute steps. The result is that OtherBodies1 always contains the newly received slice.

The Barnes-Hut N-body model

We should mention that we have concentrated here on a naive decomposition of the N-body problem so that we could focus our attention on the decomposition of the domain. More sophisticated domain decompositions exist for this problem. The Barnes-Hut model [BARN89] presents an elegant example of the combination of an efficient domain decomposition with ingenious model simplifications. In the Barnes-Hut model, the domain, which we assume is two-dimensional, is divided into squares, selected as large as possible, such that the whole domain is completely covered by squares, no two square overlapping each other, and each square containing at most 1 body. Figure 9-6a shows an example of such a decomposition, where we started by decomposing the square domain into four quarters, and kept subdividing the quarters into quarters as long as we had more than one body per square. The advantage of this decomposition is that a natural hierarchy of the bodies can be created with a quad-tree (we divide each block into four sub-blocks), where each leaf is a square containing one or zero bodies, and internal nodes represent groups of bodies stored in larger squares (Figure 9-6b). This tree can now replace the original domain, and be cut into subtrees distributed among the processors. Now that the processors contain a hierarchical grouping of trees, a simplifying assumption can make the computation even more efficient [SING93]. The simplification (already observed by Newton in 1687!) is that if a cluster of bodies is far enough from another body, their combined action on that body can be approximated by that of a single body located at the center of gravity of the cluster, with mass equal to the mass of the cluster. This means that if two processors contain clusters of bodies that are far away from each other[1] they only need to exchange the location of their center of gravity and their total mass. If the mapping of the subtrees to the processors is done in such a way that geographically close clusters are stored in near-neighbor processors, this means that the exchange of information between processors might also have a hierarchical level, with the amount of information decreasing with the distance separating the processors.


Figure 9- 6: Example of a domain decomposition for the
N-body problem proposed by Barnes and Hut. (a) The domain is decomposed in non-overlapping squares, such that each square contains at most one body. (b) the quad-tree representing the domain as a hierarchy of clusters.




[Previous] [HOME] [NEXT]