Software World: Şubat 2010

7 Şubat 2010 Pazar

Process 0 starts off the ring pass. * Time communication around a ring of processes. * Guaranteed to have bugs

/* comm_time.c
* Version 2: Process 0 starts off the ring pass.
* Time communication around a ring of processes.
* Guaranteed to have bugs.
*
* Input: None (see notes).
*
* Output: Average, minimum, and maximum time for messages
* of varying sizes to be forwarded around a ring of
* processes.
*
* Algorithm:
* 1. Allocate and initialize storage for messages
* and communication times
* 2. Compute ranks of neighbors in ring.
* 3. Foreach message size
* 3b. Foreach test
* 3a. Start clock
* 3c. Send message around loop
* 3d. Add elapsed time to running sum
* 3e. Update max/min elapsed time
* 4. Print times.
*
* Functions:
* Initialize: Allocate and initialize arrays
* Print_results: Send results to I/O process
* and print.
*
* Notes:
* 1. Due to difficulties some MPI implementations
* have with input, the number of tests, the max
* message size, the min message size, and the size
* increment are hardwired.
* 2. We assume that the size increment evenly divides
* the difference max_size - min_size
*
* See Chap 9, pp. 192 & ff and pp. 202 & ff in PPMPI.
*/
#include
#include "mpi.h"
#include "cio.h"

void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr);

void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count,
double* times, double* max_times, double* min_times);

main(int argc, char* argv[]) {
int test_count = 2; /* Number of tests */
int max_size = 1000; /* Max msg. length */
int min_size = 1000; /* Min msg. length */
int size_incr = 1000; /* Increment for */
/* msg. sizes */
float* x; /* Message buffer */
double* times; /* Elapsed times */
double* max_times; /* Max times */
double* min_times; /* Min times */
int time_array_order; /* Size of timing */
/* arrays. */
double start; /* Start time */
double elapsed; /* Elapsed time */
int i, test, size; /* Loop variables */
int p, my_rank, source, dest;
MPI_Comm io_comm;
MPI_Status status;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_dup(MPI_COMM_WORLD, &io_comm);
Cache_io_rank(MPI_COMM_WORLD, io_comm);

Cprintf(io_comm,"","Before Initialize, p = %d, my_rank = %d",
p, my_rank);
Initialize(max_size, min_size, size_incr, my_rank,
&x, ×, &max_times, &min_times,
&time_array_order);

source = (my_rank - 1) % p;
dest = (my_rank + 1) % p;

/* For each message size, find average circuit time */
/* Loop var size = message size */
/* Loop var i = index into arrays for timings */
for (size = min_size, i = 0; size <= max_size;
size = size + size_incr, i++) {
Cprintf(io_comm,"",
"Before if, my_rank = %d, source = %d, dest = %d",
my_rank, source, dest);
if (my_rank == 0) {
times[i] =0.0;
max_times[i] = 0.0;
min_times[i] = 1000000.0;
for (test = 0; test < test_count; test++) {
start = MPI_Wtime();
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
elapsed = MPI_Wtime() - start;
times[i] = times[i] + elapsed;
if (elapsed > max_times[i])
max_times[i] = elapsed;
if (elapsed < min_times[i])
min_times[i] = elapsed;
}
} else { /* my_rank != 0 */
for (test = 0; test < test_count; test++) {
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
}
}
} /* for size . . . */

Print_results(io_comm, my_rank, min_size, max_size,
size_incr, time_array_order, test_count, times,
max_times, min_times);

MPI_Finalize();
} /* main */

/********************************************************/
void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr) {
int i;

*x_ptr = (float *) malloc(max_size*sizeof(float));

*order_ptr = (max_size - min_size)/size_incr;
*times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*max_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*min_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));

/* Initialize buffer -- why this? */
for (i = 0; i < max_size; i++)
(*x_ptr)[i] = (float) my_rank;
} /* Initialize */

/********************************************************/
/* Send results from process 0 in MPI_COMM_WORLD to */
/* I/O process in io_comm, which prints the results. */
void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count, double* times,
double* max_times, double* min_times) {
int i;
int size;
MPI_Status status;
int io_process;
int io_rank;

Get_io_rank(io_comm, &io_process);
MPI_Comm_rank(io_comm, &io_rank);

if (my_rank == 0) {
MPI_Send(times, time_array_order, MPI_DOUBLE,
io_rank, 0, io_comm);
MPI_Send(max_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
MPI_Send(min_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
}
if (io_rank == io_process) {
MPI_Recv(times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(max_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(min_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);

printf("Message size (floats): ");
for (size = min_size;
size <= max_size; size += size_incr)
printf("%10d ", size);
printf("\n");
printf("Avg circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*times[i]/test_count);
printf("\n");
printf("Max circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*max_times[i]);
printf("\n");
printf("Min circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*min_times[i]);
printf("\n\n");
fflush(stdout);
}
} /* Print_results */

Process 0 starts off the ring pass. * Includes extra printf in Print_results. * Time communication around a ring of processes

/* comm_time.c
* Version 2a: Process 0 starts off the ring pass.
* Includes extra printf in Print_results.
* Time communication around a ring of processes.
* Guaranteed to have bugs.
*
* Input: None (see notes).
*
* Output: Average, minimum, and maximum time for messages
* of varying sizes to be forwarded around a ring of
* processes.
*
* Algorithm:
* 1. Allocate and initialize storage for messages
* and communication times
* 2. Compute ranks of neighbors in ring.
* 3. Foreach message size
* 3b. Foreach test
* 3a. Start clock
* 3c. Send message around loop
* 3d. Add elapsed time to running sum
* 3e. Update max/min elapsed time
* 4. Print times.
*
* Functions:
* Initialize: Allocate and initialize arrays
* Print_results: Send results to I/O process
* and print.
*
* Notes:
* 1. Due to difficulties some MPI implementations
* have with input, the number of tests, the max
* message size, the min message size, and the size
* increment are hardwired.
* 2. We assume that the size increment evenly divides
* the difference max_size - min_size
*
* See Chap 9, pp. 192 & ff and pp. 203 & ff in PPMPI.
*/
#include
#include "mpi.h"
#include "cio.h"

void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr);

void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count,
double* times, double* max_times, double* min_times);

main(int argc, char* argv[]) {
int test_count = 2; /* Number of tests */
int max_size = 1000; /* Max msg. length */
int min_size = 1000; /* Min msg. length */
int size_incr = 1000; /* Increment for */
/* msg. sizes */
float* x; /* Message buffer */
double* times; /* Elapsed times */
double* max_times; /* Max times */
double* min_times; /* Min times */
int time_array_order; /* Size of timing */
/* arrays. */
double start; /* Start time */
double elapsed; /* Elapsed time */
int i, test, size; /* Loop variables */
int p, my_rank, source, dest;
MPI_Comm io_comm;
MPI_Status status;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_dup(MPI_COMM_WORLD, &io_comm);
Cache_io_rank(MPI_COMM_WORLD, io_comm);

Cprintf(io_comm,"","Before Initialize, p = %d, my_rank = %d",
p, my_rank);
Initialize(max_size, min_size, size_incr, my_rank,
&x, ×, &max_times, &min_times,
&time_array_order);

source = (my_rank - 1) % p;
dest = (my_rank + 1) % p;

/* For each message size, find average circuit time */
/* Loop var size = message size */
/* Loop var i = index into arrays for timings */
for (size = min_size, i = 0; size <= max_size;
size = size + size_incr, i++) {
Cprintf(io_comm,"",
"Before if, my_rank = %d, source = %d, dest = %d",
my_rank, source, dest);
if (my_rank == 0) {
times[i] =0.0;
max_times[i] = 0.0;
min_times[i] = 1000000.0;
for (test = 0; test < test_count; test++) {
start = MPI_Wtime();
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
elapsed = MPI_Wtime() - start;
times[i] = times[i] + elapsed;
if (elapsed > max_times[i])
max_times[i] = elapsed;
if (elapsed < min_times[i])
min_times[i] = elapsed;
}
} else { /* my_rank != 0 */
for (test = 0; test < test_count; test++) {
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
}
}
} /* for size . . . */

Print_results(io_comm, my_rank, min_size, max_size,
size_incr, time_array_order, test_count, times,
max_times, min_times);

MPI_Finalize();
} /* main */

/********************************************************/
void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr) {
int i;

*x_ptr = (float *) malloc(max_size*sizeof(float));

*order_ptr = (max_size - min_size)/size_incr;
*times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*max_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*min_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));

/* Initialize buffer -- why this? */
for (i = 0; i < max_size; i++)
(*x_ptr)[i] = (float) my_rank;
} /* Initialize */

/********************************************************/
/* Send results from process 0 in MPI_COMM_WORLD to */
/* I/O process in io_comm, which prints the results. */
void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count, double* times,
double* max_times, double* min_times) {
int i;
int size;
MPI_Status status;
int io_process;
int io_rank;

Get_io_rank(io_comm, &io_process);
MPI_Comm_rank(io_comm, &io_rank);

if (my_rank == 0) {
MPI_Send(times, time_array_order, MPI_DOUBLE,
io_rank, 0, io_comm);
MPI_Send(max_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
MPI_Send(min_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
}
if (io_rank == io_process) {
MPI_Recv(times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(max_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(min_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);

printf("Message size (floats): ");
for (size = min_size;
size <= max_size; size += size_incr)
printf("%10d ", size);
printf("\n");

printf("io_process = %d, time_array_order = %d\n",
io_process, time_array_order);
fflush(stdout);

printf("Avg circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*times[i]/test_count);
printf("\n");
printf("Max circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*max_times[i]);
printf("\n");
printf("Min circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*min_times[i]);
printf("\n\n");
fflush(stdout);
}
} /* Print_results */

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize. * Time communication around a ring of processes.

/* comm_time.c
* Version 3: Process 0 starts off the ring pass.
* Fixed erroneous calc of time_array_order in Initialize.
* Time communication around a ring of processes.
* Guaranteed to have bugs.
*
* Input: None (see notes).
*
* Output: Average, minimum, and maximum time for messages
* of varying sizes to be forwarded around a ring of
* processes.
*
* Algorithm:
* 1. Allocate and initialize storage for messages
* and communication times
* 2. Compute ranks of neighbors in ring.
* 3. Foreach message size
* 3b. Foreach test
* 3a. Start clock
* 3c. Send message around loop
* 3d. Add elapsed time to running sum
* 3e. Update max/min elapsed time
* 4. Print times.
*
* Functions:
* Initialize: Allocate and initialize arrays
* Print_results: Send results to I/O process
* and print.
*
* Notes:
* 1. Due to difficulties some MPI implementations
* have with input, the number of tests, the max
* message size, the min message size, and the size
* increment are hardwired.
* 2. We assume that the size increment evenly divides
* the difference max_size - min_size
*
* See Chap 9, pp. 192 & ff and p. 205 in PPMPI.
*/
#include
#include "mpi.h"
#include "cio.h"

void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr);

void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count,
double* times, double* max_times, double* min_times);

main(int argc, char* argv[]) {
int test_count = 2; /* Number of tests */
int max_size = 1000; /* Max msg. length */
int min_size = 1000; /* Min msg. length */
int size_incr = 1000; /* Increment for */
/* msg. sizes */
float* x; /* Message buffer */
double* times; /* Elapsed times */
double* max_times; /* Max times */
double* min_times; /* Min times */
int time_array_order; /* Size of timing */
/* arrays. */
double start; /* Start time */
double elapsed; /* Elapsed time */
int i, test, size; /* Loop variables */
int p, my_rank, source, dest;
MPI_Comm io_comm;
MPI_Status status;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_dup(MPI_COMM_WORLD, &io_comm);
Cache_io_rank(MPI_COMM_WORLD, io_comm);

Cprintf(io_comm,"","Before Initialize, p = %d, my_rank = %d",
p, my_rank);
Initialize(max_size, min_size, size_incr, my_rank,
&x, ×, &max_times, &min_times,
&time_array_order);

source = (my_rank - 1) % p;
dest = (my_rank + 1) % p;

/* For each message size, find average circuit time */
/* Loop var size = message size */
/* Loop var i = index into arrays for timings */
for (size = min_size, i = 0; size <= max_size;
size = size + size_incr, i++) {
Cprintf(io_comm,"",
"Before if, my_rank = %d, source = %d, dest = %d",
my_rank, source, dest);
if (my_rank == 0) {
times[i] =0.0;
max_times[i] = 0.0;
min_times[i] = 1000000.0;
for (test = 0; test < test_count; test++) {
start = MPI_Wtime();
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
elapsed = MPI_Wtime() - start;
times[i] = times[i] + elapsed;
if (elapsed > max_times[i])
max_times[i] = elapsed;
if (elapsed < min_times[i])
min_times[i] = elapsed;
}
} else { /* my_rank != 0 */
for (test = 0; test < test_count; test++) {
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
}
}
} /* for size . . . */

Print_results(io_comm, my_rank, min_size, max_size,
size_incr, time_array_order, test_count, times,
max_times, min_times);

MPI_Finalize();
} /* main */

/********************************************************/
void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr) {
int i;

*x_ptr = (float *) malloc(max_size*sizeof(float));

*order_ptr = (max_size - min_size)/size_incr + 1;
*times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*max_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*min_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));

/* Initialize buffer -- why this? */
for (i = 0; i < max_size; i++)
(*x_ptr)[i] = (float) my_rank;
} /* Initialize */

/********************************************************/
/* Send results from process 0 in MPI_COMM_WORLD to */
/* I/O process in io_comm, which prints the results. */
void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count, double* times,
double* max_times, double* min_times) {
int i;
int size;
MPI_Status status;
int io_process;
int io_rank;

Get_io_rank(io_comm, &io_process);
MPI_Comm_rank(io_comm, &io_rank);

if (my_rank == 0) {
MPI_Send(times, time_array_order, MPI_DOUBLE,
io_rank, 0, io_comm);
MPI_Send(max_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
MPI_Send(min_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
}
if (io_rank == io_process) {
MPI_Recv(times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(max_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(min_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);

printf("Message size (floats): ");
for (size = min_size;
size <= max_size; size += size_incr)
printf("%10d ", size);
printf("\n");

printf("io_process = %d, time_array_order = %d\n",
io_process, time_array_order);
fflush(stdout);

printf("Avg circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*times[i]/test_count);
printf("\n");
printf("Max circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*max_times[i]);
printf("\n");
printf("Min circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*min_times[i]);
printf("\n\n");
fflush(stdout);
}
} /* Print_results */

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize. * Changed number of message sizes to 2.

/* comm_time.c
* Version 3a: Process 0 starts off the ring pass.
* Fixed erroneous calc of time_array_order in Initialize.
* Changed number of message sizes to 2.
* Time communication around a ring of processes.
* Guaranteed to have bugs.
*
* Input: None (see notes).
*
* Output: Average, minimum, and maximum time for messages
* of varying sizes to be forwarded around a ring of
* processes.
*
* Algorithm:
* 1. Allocate and initialize storage for messages
* and communication times
* 2. Compute ranks of neighbors in ring.
* 3. Foreach message size
* 3b. Foreach test
* 3a. Start clock
* 3c. Send message around loop
* 3d. Add elapsed time to running sum
* 3e. Update max/min elapsed time
* 4. Print times.
*
* Functions:
* Initialize: Allocate and initialize arrays
* Print_results: Send results to I/O process
* and print.
*
* Notes:
* 1. Due to difficulties some MPI implementations
* have with input, the number of tests, the max
* message size, the min message size, and the size
* increment are hardwired.
* 2. We assume that the size increment evenly divides
* the difference max_size - min_size
*
* See Chap 9, pp. 192 & ff and p. 206 in PPMPI.
*/
#include
#include "mpi.h"
#include "cio.h"

void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr);

void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count,
double* times, double* max_times, double* min_times);

main(int argc, char* argv[]) {
int test_count = 2; /* Number of tests */
int max_size = 1000; /* Max msg. length */
int min_size = 0; /* Min msg. length */
int size_incr = 1000; /* Increment for */
/* msg. sizes */
float* x; /* Message buffer */
double* times; /* Elapsed times */
double* max_times; /* Max times */
double* min_times; /* Min times */
int time_array_order; /* Size of timing */
/* arrays. */
double start; /* Start time */
double elapsed; /* Elapsed time */
int i, test, size; /* Loop variables */
int p, my_rank, source, dest;
MPI_Comm io_comm;
MPI_Status status;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_dup(MPI_COMM_WORLD, &io_comm);
Cache_io_rank(MPI_COMM_WORLD, io_comm);

Cprintf(io_comm,"","Before Initialize, p = %d, my_rank = %d",
p, my_rank);
Initialize(max_size, min_size, size_incr, my_rank,
&x, ×, &max_times, &min_times,
&time_array_order);

source = (my_rank - 1) % p;
dest = (my_rank + 1) % p;

/* For each message size, find average circuit time */
/* Loop var size = message size */
/* Loop var i = index into arrays for timings */
for (size = min_size, i = 0; size <= max_size;
size = size + size_incr, i++) {
Cprintf(io_comm,"",
"Before if, my_rank = %d, source = %d, dest = %d",
my_rank, source, dest);
if (my_rank == 0) {
times[i] =0.0;
max_times[i] = 0.0;
min_times[i] = 1000000.0;
for (test = 0; test < test_count; test++) {
start = MPI_Wtime();
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
elapsed = MPI_Wtime() - start;
times[i] = times[i] + elapsed;
if (elapsed > max_times[i])
max_times[i] = elapsed;
if (elapsed < min_times[i])
min_times[i] = elapsed;
}
} else { /* my_rank != 0 */
for (test = 0; test < test_count; test++) {
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
}
}
} /* for size . . . */

Print_results(io_comm, my_rank, min_size, max_size,
size_incr, time_array_order, test_count, times,
max_times, min_times);

MPI_Finalize();
} /* main */

/********************************************************/
void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr) {
int i;

*x_ptr = (float *) malloc(max_size*sizeof(float));

*order_ptr = (max_size - min_size)/size_incr + 1;
*times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*max_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*min_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));

/* Initialize buffer -- why this? */
for (i = 0; i < max_size; i++)
(*x_ptr)[i] = (float) my_rank;
} /* Initialize */

/********************************************************/
/* Send results from process 0 in MPI_COMM_WORLD to */
/* I/O process in io_comm, which prints the results. */
void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count, double* times,
double* max_times, double* min_times) {
int i;
int size;
MPI_Status status;
int io_process;
int io_rank;

Get_io_rank(io_comm, &io_process);
MPI_Comm_rank(io_comm, &io_rank);

if (my_rank == 0) {
MPI_Send(times, time_array_order, MPI_DOUBLE,
io_rank, 0, io_comm);
MPI_Send(max_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
MPI_Send(min_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
}
if (io_rank == io_process) {
MPI_Recv(times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(max_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(min_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);

printf("Message size (floats): ");
for (size = min_size;
size <= max_size; size += size_incr)
printf("%10d ", size);
printf("\n");

printf("io_process = %d, time_array_order = %d\n",
io_process, time_array_order);
fflush(stdout);

printf("Avg circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*times[i]/test_count);
printf("\n");
printf("Max circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*max_times[i]);
printf("\n");
printf("Min circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*min_times[i]);
printf("\n\n");
fflush(stdout);
}
} /* Print_results */

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize

/* comm_time.c
* Version 4: Process 0 starts off the ring pass.
* Fixed erroneous calc of time_array_order in Initialize.
* Changed number of message sizes to 2.
* Removed debug output from Print_results.
* Time communication around a ring of processes.
* Guaranteed to have bugs.
*
* Input: None (see notes).
*
* Output: Average, minimum, and maximum time for messages
* of varying sizes to be forwarded around a ring of
* processes.
*
* Algorithm:
* 1. Allocate and initialize storage for messages
* and communication times
* 2. Compute ranks of neighbors in ring.
* 3. Foreach message size
* 3b. Foreach test
* 3a. Start clock
* 3c. Send message around loop
* 3d. Add elapsed time to running sum
* 3e. Update max/min elapsed time
* 4. Print times.
*
* Functions:
* Initialize: Allocate and initialize arrays
* Print_results: Send results to I/O process
* and print.
*
* Notes:
* 1. Due to difficulties some MPI implementations
* have with input, the number of tests, the max
* message size, the min message size, and the size
* increment are hardwired.
* 2. We assume that the size increment evenly divides
* the difference max_size - min_size
*
* See Chap 9, pp. 192 & ff and p. 206 in PPMPI.
*/
#include
#include "mpi.h"
#include "cio.h"

void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr);

void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count,
double* times, double* max_times, double* min_times);

main(int argc, char* argv[]) {
int test_count = 2; /* Number of tests */
int max_size = 1000; /* Max msg. length */
int min_size = 0; /* Min msg. length */
int size_incr = 1000; /* Increment for */
/* msg. sizes */
float* x; /* Message buffer */
double* times; /* Elapsed times */
double* max_times; /* Max times */
double* min_times; /* Min times */
int time_array_order; /* Size of timing */
/* arrays. */
double start; /* Start time */
double elapsed; /* Elapsed time */
int i, test, size; /* Loop variables */
int p, my_rank, source, dest;
MPI_Comm io_comm;
MPI_Status status;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_dup(MPI_COMM_WORLD, &io_comm);
Cache_io_rank(MPI_COMM_WORLD, io_comm);

Cprintf(io_comm,"","Before Initialize, p = %d, my_rank = %d",
p, my_rank);
Initialize(max_size, min_size, size_incr, my_rank,
&x, ×, &max_times, &min_times,
&time_array_order);

source = (my_rank - 1) % p;
dest = (my_rank + 1) % p;

/* For each message size, find average circuit time */
/* Loop var size = message size */
/* Loop var i = index into arrays for timings */
for (size = min_size, i = 0; size <= max_size;
size = size + size_incr, i++) {
Cprintf(io_comm,"",
"Before if, my_rank = %d, source = %d, dest = %d",
my_rank, source, dest);
if (my_rank == 0) {
times[i] =0.0;
max_times[i] = 0.0;
min_times[i] = 1000000.0;
for (test = 0; test < test_count; test++) {
start = MPI_Wtime();
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
elapsed = MPI_Wtime() - start;
times[i] = times[i] + elapsed;
if (elapsed > max_times[i])
max_times[i] = elapsed;
if (elapsed < min_times[i])
min_times[i] = elapsed;
}
} else { /* my_rank != 0 */
for (test = 0; test < test_count; test++) {
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
}
}
} /* for size . . . */

Print_results(io_comm, my_rank, min_size, max_size,
size_incr, time_array_order, test_count, times,
max_times, min_times);

MPI_Finalize();
} /* main */

/********************************************************/
void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr) {
int i;

*x_ptr = (float *) malloc(max_size*sizeof(float));

*order_ptr = (max_size - min_size)/size_incr + 1;
*times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*max_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*min_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));

/* Initialize buffer -- why this? */
for (i = 0; i < max_size; i++)
(*x_ptr)[i] = (float) my_rank;
} /* Initialize */

/********************************************************/
/* Send results from process 0 in MPI_COMM_WORLD to */
/* I/O process in io_comm, which prints the results. */
void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count, double* times,
double* max_times, double* min_times) {
int i;
int size;
MPI_Status status;
int io_process;
int io_rank;

Get_io_rank(io_comm, &io_process);
MPI_Comm_rank(io_comm, &io_rank);

if (my_rank == 0) {
MPI_Send(times, time_array_order, MPI_DOUBLE,
io_rank, 0, io_comm);
MPI_Send(max_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
MPI_Send(min_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
}
if (io_rank == io_process) {
MPI_Recv(times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(max_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(min_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);

printf("Message size (floats): ");
for (size = min_size;
size <= max_size; size += size_incr)
printf("%10d ", size);
printf("\n");

printf("Avg circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*times[i]/test_count);
printf("\n");
printf("Max circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*max_times[i]);
printf("\n");
printf("Min circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*min_times[i]);
printf("\n\n");
fflush(stdout);
}
} /* Print_results */

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize. * Changed number of message sizes to 2.

/* comm_time.c
* Version 5: Process 0 starts off the ring pass.
* Fixed erroneous calc of time_array_order in Initialize.
* Changed number of message sizes to 2.
* Removed debug output from Print_results.
* Corrected calculation of source process rank.
* Try to check sends and receives using Cprintfs in loop.
* Time communication around a ring of processes.
* Guaranteed to have bugs.
*
* Input: None (see notes).
*
* Output: Average, minimum, and maximum time for messages
* of varying sizes to be forwarded around a ring of
* processes.
*
* Algorithm:
* 1. Allocate and initialize storage for messages
* and communication times
* 2. Compute ranks of neighbors in ring.
* 3. Foreach message size
* 3b. Foreach test
* 3a. Start clock
* 3c. Send message around loop
* 3d. Add elapsed time to running sum
* 3e. Update max/min elapsed time
* 4. Print times.
*
* Functions:
* Initialize: Allocate and initialize arrays
* Print_results: Send results to I/O process
* and print.
*
* Notes:
* 1. Due to difficulties some MPI implementations
* have with input, the number of tests, the max
* message size, the min message size, and the size
* increment are hardwired.
* 2. We assume that the size increment evenly divides
* the difference max_size - min_size
*
* See Chap 9, pp. 192 & ff and pp. 208 & ff in PPMPI.
*/
#include
#include "mpi.h"
#include "cio.h"

void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr);

void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count,
double* times, double* max_times, double* min_times);

main(int argc, char* argv[]) {
int test_count = 2; /* Number of tests */
int max_size = 1000; /* Max msg. length */
int min_size = 0; /* Min msg. length */
int size_incr = 1000; /* Increment for */
/* msg. sizes */
float* x; /* Message buffer */
double* times; /* Elapsed times */
double* max_times; /* Max times */
double* min_times; /* Min times */
int time_array_order; /* Size of timing */
/* arrays. */
double start; /* Start time */
double elapsed; /* Elapsed time */
int i, test, size; /* Loop variables */
int p, my_rank, source, dest;
MPI_Comm io_comm;
MPI_Status status;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_dup(MPI_COMM_WORLD, &io_comm);
Cache_io_rank(MPI_COMM_WORLD, io_comm);

Cprintf(io_comm,"","Before Initialize, p = %d, my_rank = %d",
p, my_rank);
Initialize(max_size, min_size, size_incr, my_rank,
&x, ×, &max_times, &min_times,
&time_array_order);

source = (my_rank - 1 + p) % p;
dest = (my_rank + 1) % p;

/* For each message size, find average circuit time */
/* Loop var size = message size */
/* Loop var i = index into arrays for timings */
for (size = min_size, i = 0; size <= max_size;
size = size + size_incr, i++) {
Cprintf(io_comm,"",
"Before if, my_rank = %d, source = %d, dest = %d",
my_rank, source, dest);
if (my_rank == 0) {
times[i] =0.0;
max_times[i] = 0.0;
min_times[i] = 1000000.0;
for (test = 0; test < test_count; test++) {
start = MPI_Wtime();
Cprintf(io_comm,"",
"Before send x[0] = %f, size = %d, dest = %d",
my_rank, x[0], size, dest);
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
Cprintf(io_comm,"",
"After recv x[0] = %f, size = %d, source = %d",
my_rank, x[0], size, source);
elapsed = MPI_Wtime() - start;
times[i] = times[i] + elapsed;
if (elapsed > max_times[i])
max_times[i] = elapsed;
if (elapsed < min_times[i])
min_times[i] = elapsed;
}
} else { /* my_rank != 0 */
for (test = 0; test < test_count; test++) {
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
Cprintf(io_comm,"",
"After recv x[0] = %f, size = %d, source = %d",
my_rank, x[0], size, source);
Cprintf(io_comm,"",
"Before send x[0] = %f, size = %d, dest = %d",
my_rank, x[0], size, dest);

MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
}
}
} /* for size . . . */

Print_results(io_comm, my_rank, min_size, max_size,
size_incr, time_array_order, test_count, times,
max_times, min_times);

MPI_Finalize();
} /* main */

/********************************************************/
void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr) {
int i;

*x_ptr = (float *) malloc(max_size*sizeof(float));

*order_ptr = (max_size - min_size)/size_incr + 1;
*times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*max_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*min_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));

/* Initialize buffer -- why this? */
for (i = 0; i < max_size; i++)
(*x_ptr)[i] = (float) my_rank;
} /* Initialize */

/********************************************************/
/* Send results from process 0 in MPI_COMM_WORLD to */
/* I/O process in io_comm, which prints the results. */
void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count, double* times,
double* max_times, double* min_times) {
int i;
int size;
MPI_Status status;
int io_process;
int io_rank;

Get_io_rank(io_comm, &io_process);
MPI_Comm_rank(io_comm, &io_rank);

if (my_rank == 0) {
MPI_Send(times, time_array_order, MPI_DOUBLE,
io_rank, 0, io_comm);
MPI_Send(max_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
MPI_Send(min_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
}
if (io_rank == io_process) {
MPI_Recv(times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(max_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(min_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);

printf("Message size (floats): ");
for (size = min_size;
size <= max_size; size += size_incr)
printf("%10d ", size);
printf("\n");

printf("Avg circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*times[i]/test_count);
printf("\n");
printf("Max circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*max_times[i]);
printf("\n");
printf("Min circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*min_times[i]);
printf("\n\n");
fflush(stdout);
}
} /* Print_results */

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize. * Changed number of message sizes to 2. *

/* comm_time.c
* Version 6: Process 0 starts off the ring pass.
* Fixed erroneous calc of time_array_order in Initialize.
* Changed number of message sizes to 2.
* Removed debug output from Print_results.
* Corrected calculation of source process rank.
* Try to check sends and receives using Cprintfs in loop.
* Relocate Cprintfs in loop.
* Time communication around a ring of processes.
* Guaranteed to have bugs.
*
* Input: None (see notes).
*
* Output: Average, minimum, and maximum time for messages
* of varying sizes to be forwarded around a ring of
* processes.
*
* Algorithm:
* 1. Allocate and initialize storage for messages
* and communication times
* 2. Compute ranks of neighbors in ring.
* 3. Foreach message size
* 3b. Foreach test
* 3a. Start clock
* 3c. Send message around loop
* 3d. Add elapsed time to running sum
* 3e. Update max/min elapsed time
* 4. Print times.
*
* Functions:
* Initialize: Allocate and initialize arrays
* Print_results: Send results to I/O process
* and print.
*
* Notes:
* 1. Due to difficulties some MPI implementations
* have with input, the number of tests, the max
* message size, the min message size, and the size
* increment are hardwired.
* 2. We assume that the size increment evenly divides
* the difference max_size - min_size
*
* See Chap 9, pp. 192 & ff and pp. 209 & ff in PPMPI.
*/
#include
#include "mpi.h"
#include "cio.h"

void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr);

void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count,
double* times, double* max_times, double* min_times);

main(int argc, char* argv[]) {
int test_count = 2; /* Number of tests */
int max_size = 1000; /* Max msg. length */
int min_size = 0; /* Min msg. length */
int size_incr = 1000; /* Increment for */
/* msg. sizes */
float* x; /* Message buffer */
double* times; /* Elapsed times */
double* max_times; /* Max times */
double* min_times; /* Min times */
int time_array_order; /* Size of timing */
/* arrays. */
double start; /* Start time */
double elapsed; /* Elapsed time */
int i, test, size; /* Loop variables */
int p, my_rank, source, dest;
MPI_Comm io_comm;
MPI_Status status;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_dup(MPI_COMM_WORLD, &io_comm);
Cache_io_rank(MPI_COMM_WORLD, io_comm);

Cprintf(io_comm,"","Before Initialize, p = %d, my_rank = %d",
p, my_rank);
Initialize(max_size, min_size, size_incr, my_rank,
&x, ×, &max_times, &min_times,
&time_array_order);

source = (my_rank - 1 + p) % p;
dest = (my_rank + 1) % p;

/* For each message size, find average circuit time */
/* Loop var size = message size */
/* Loop var i = index into arrays for timings */
for (size = min_size, i = 0; size <= max_size;
size = size + size_incr, i++) {
Cprintf(io_comm,"",
"Before if, my_rank = %d, source = %d, dest = %d",
my_rank, source, dest);
if (my_rank == 0) {
times[i] =0.0;
max_times[i] = 0.0;
min_times[i] = 1000000.0;
for (test = 0; test < test_count; test++) {
start = MPI_Wtime();
Cprintf(io_comm,"",
"Before send x[0] = %f, size = %d, dest = %d",
my_rank, x[0], size, dest);
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
Cprintf(io_comm,"",
"After recv x[0] = %f, size = %d, source = %d",
my_rank, x[0], size, source);
elapsed = MPI_Wtime() - start;
times[i] = times[i] + elapsed;
if (elapsed > max_times[i])
max_times[i] = elapsed;
if (elapsed < min_times[i])
min_times[i] = elapsed;
}
} else { /* my_rank != 0 */
for (test = 0; test < test_count; test++) {
Cprintf(io_comm,"",
"Before recv x[0] = %f, size = %d, source = %d",
my_rank, x[0], size, source);
MPI_Recv(x, size, MPI_FLOAT, source, 0,
MPI_COMM_WORLD, &status);
MPI_Send(x, size, MPI_FLOAT, dest, 0,
MPI_COMM_WORLD);
Cprintf(io_comm,"",
"After send x[0] = %f, size = %d, dest = %d",
my_rank, x[0], size, dest);
}
}
} /* for size . . . */

Print_results(io_comm, my_rank, min_size, max_size,
size_incr, time_array_order, test_count, times,
max_times, min_times);

MPI_Finalize();
} /* main */

/********************************************************/
void Initialize(int max_size, int min_size, int size_incr,
int my_rank, float** x_ptr, double** times_ptr,
double** max_times_ptr, double** min_times_ptr,
int* order_ptr) {
int i;

*x_ptr = (float *) malloc(max_size*sizeof(float));

*order_ptr = (max_size - min_size)/size_incr + 1;
*times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*max_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));
*min_times_ptr =
(double *) malloc((*order_ptr)*sizeof(double));

/* Initialize buffer -- why this? */
for (i = 0; i < max_size; i++)
(*x_ptr)[i] = (float) my_rank;
} /* Initialize */

/********************************************************/
/* Send results from process 0 in MPI_COMM_WORLD to */
/* I/O process in io_comm, which prints the results. */
void Print_results(MPI_Comm io_comm, int my_rank,
int min_size, int max_size, int size_incr,
int time_array_order, int test_count, double* times,
double* max_times, double* min_times) {
int i;
int size;
MPI_Status status;
int io_process;
int io_rank;

Get_io_rank(io_comm, &io_process);
MPI_Comm_rank(io_comm, &io_rank);

if (my_rank == 0) {
MPI_Send(times, time_array_order, MPI_DOUBLE,
io_rank, 0, io_comm);
MPI_Send(max_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
MPI_Send(min_times, time_array_order, MPI_DOUBLE,
io_process, 0, io_comm);
}
if (io_rank == io_process) {
MPI_Recv(times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(max_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);
MPI_Recv(min_times, time_array_order, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, io_comm, &status);

printf("Message size (floats): ");
for (size = min_size;
size <= max_size; size += size_incr)
printf("%10d ", size);
printf("\n");

printf("Avg circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*times[i]/test_count);
printf("\n");
printf("Max circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*max_times[i]);
printf("\n");
printf("Min circuit time (ms): ");
for (i = 0; i < time_array_order; i++)
printf("%10f ",1000.0*min_times[i]);
printf("\n\n");
fflush(stdout);
}
} /* Print_results */

Computes a parallel dot product. Uses MPI_Allreduce.

/* parallel_dot1.c -- Computes a parallel dot product. Uses MPI_Allreduce.
*
* Input:
* n: order of vectors
* x, y: the vectors
*
* Output:
* the dot product of x and y as computed by each process.
*
* Note: Arrays containing vectors are statically allocated. Assumes that
* n, the global order of the vectors, is evenly divisible by p, the
* number of processes.
*
* See Chap 5, pp. 76 & ff in PPMPI.
*/
#include
#include "mpi.h"

#define MAX_LOCAL_ORDER 100

main(int argc, char* argv[]) {
float local_x[MAX_LOCAL_ORDER];
float local_y[MAX_LOCAL_ORDER];
int n;
int n_bar; /* = n/p */
float dot;
int p;
int my_rank;

void Read_vector(char* prompt, float local_v[], int n_bar, int p,
int my_rank);
float Parallel_dot(float local_x[], float local_y[], int n_bar);
void Print_results(float dot, int my_rank, int p);

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

if (my_rank == 0) {
printf("Enter the order of the vectors\n");
scanf("%d", &n);
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
n_bar = n/p;

Read_vector("the first vector", local_x, n_bar, p, my_rank);
Read_vector("the second vector", local_y, n_bar, p, my_rank);

dot = Parallel_dot(local_x, local_y, n_bar);

Print_results(dot, my_rank, p);

MPI_Finalize();
} /* main */

/*****************************************************************/
void Read_vector(
char* prompt /* in */,
float local_v[] /* out */,
int n_bar /* in */,
int p /* in */,
int my_rank /* in */) {
int i, q;
float temp[MAX_LOCAL_ORDER];
MPI_Status status;

if (my_rank == 0) {
printf("Enter %s\n", prompt);
for (i = 0; i < n_bar; i++)
scanf("%f", &local_v[i]);
for (q = 1; q < p; q++) {
for (i = 0; i < n_bar; i++)
scanf("%f", &temp[i]);
MPI_Send(temp, n_bar, MPI_FLOAT, q, 0, MPI_COMM_WORLD);
}
} else {
MPI_Recv(local_v, n_bar, MPI_FLOAT, 0, 0, MPI_COMM_WORLD,
&status);
}
} /* Read_vector */

/*****************************************************************/
float Serial_dot(
float x[] /* in */,
float y[] /* in */,
int n /* in */) {

int i;
float sum = 0.0;

for (i = 0; i < n; i++)
sum = sum + x[i]*y[i];
return sum;
} /* Serial_dot */

/*****************************************************************/
float Parallel_dot(
float local_x[] /* in */,
float local_y[] /* in */,
int n_bar /* in */) {

float local_dot;
float dot = 0.0;
float Serial_dot(float x[], float y[], int m);

local_dot = Serial_dot(local_x, local_y, n_bar);
MPI_Allreduce(&local_dot, &dot, 1, MPI_FLOAT,
MPI_SUM, MPI_COMM_WORLD);
return dot;
} /* Parallel_dot */

/*****************************************************************/
void Print_results(
float dot /* in */,
int my_rank /* in */,
int p /* in */) {
int q;
float temp;
MPI_Status status;

if (my_rank == 0) {
printf("dot = \n");
printf("Process 0 > %f\n", dot);
for (q = 1; q < p; q++) {
MPI_Recv(&temp, 1, MPI_FLOAT, q, 0, MPI_COMM_WORLD,
&status);
printf("Process %d > %f\n", q, temp);
}
} else {
MPI_Send(&dot, 1, MPI_FLOAT, 0, 0, MPI_COMM_WORLD);
}

} /* Print_results */

pack a row of a sparse matrix and send from process 0 * to process 1. Process 1 allocates required storage after partially * unpacking.

/* sparse_row.c -- pack a row of a sparse matrix and send from process 0
* to process 1. Process 1 allocates required storage after partially
* unpacking.
*
* Input: none
* Output: the row received by process 1.
*
* Notes:
* 1. This program should only be run with 2 processes.
* 2. Only the row of the matrix is created on both processes.
*
* See Chap. 6, pp. 104 & ff in PPMPI
*/
#include
#include
#include "mpi.h"

#define HUGE 100

main(int argc, char* argv[]) {
int p;
int my_rank;
float* entries;
int* column_subscripts;
int nonzeroes;
int position;
int row_number;
char buffer[HUGE]; /* HUGE is a predefined constant */
MPI_Status status;
int i;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

if (my_rank == 0) {
/* Get the number of nonzeros in the row. */
/* Allocate storage for the row. */
/* Initialize entries and column_subscripts */
nonzeroes = 10;
entries = (float*) malloc(nonzeroes*sizeof(float));
column_subscripts = (int*) malloc(nonzeroes*sizeof(int));
for (i = 0; i < nonzeroes; i++) {
entries[i] = (float) 2*i;
column_subscripts[i] = 3*i;
}

/* Now pack the data and send */
position = 0;
MPI_Pack(&nonzeroes, 1, MPI_INT, buffer, HUGE,
&position, MPI_COMM_WORLD);
MPI_Pack(&row_number, 1, MPI_INT, buffer, HUGE,
&position, MPI_COMM_WORLD);
MPI_Pack(entries, nonzeroes, MPI_FLOAT, buffer,
HUGE, &position, MPI_COMM_WORLD);
MPI_Pack(column_subscripts, nonzeroes, MPI_INT,
buffer, HUGE, &position, MPI_COMM_WORLD);
MPI_Send(buffer, position, MPI_PACKED, 1, 0,
MPI_COMM_WORLD);
} else { /* my_rank == 1 */
MPI_Recv(buffer, HUGE, MPI_PACKED, 0, 0,
MPI_COMM_WORLD, &status);
position = 0;
MPI_Unpack(buffer, HUGE, &position, &nonzeroes,
1, MPI_INT, MPI_COMM_WORLD);
MPI_Unpack(buffer, HUGE, &position, &row_number,
1, MPI_INT, MPI_COMM_WORLD);
/* Allocate storage for entries and column_subscripts */
entries = (float *) malloc(nonzeroes*sizeof(float));
column_subscripts = (int *) malloc(nonzeroes*sizeof(int));
MPI_Unpack(buffer,HUGE, &position, entries,
nonzeroes, MPI_FLOAT, MPI_COMM_WORLD);
MPI_Unpack(buffer, HUGE, &position, column_subscripts,
nonzeroes, MPI_INT, MPI_COMM_WORLD);
for (i = 0; i < nonzeroes; i++)
printf("%4.1f %2d\n", entries[i], column_subscripts[i]);
}

MPI_Finalize();
} /* main */

send the upper triangle of a matrix from process 0 * to process 1

/* send_triangle.c -- send the upper triangle of a matrix from process 0
* to process 1
*
* Input: None
* Output: The matrix received by process 1
*
* Note: This program should only be run with 2 processes.
*
* See Chap 6, p. 98, in PPMPI.
*/
#include
#include "mpi.h"

#define n 10

main(int argc, char* argv[]) {
int p;
int my_rank;
float A[n][n]; /* Complete Matrix */
float T[n][n]; /* Upper Triangle */
int displacements[n];
int block_lengths[n];
MPI_Datatype index_mpi_t;
int i, j;
MPI_Status status;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

for (i = 0; i < n; i++) {
block_lengths[i] = n-i;
displacements[i] = (n+1)*i;
}
MPI_Type_indexed(n, block_lengths, displacements,
MPI_FLOAT, &index_mpi_t);
MPI_Type_commit(&index_mpi_t);

if (my_rank == 0) {
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
A[i][j] = (float) i + j;
MPI_Send(A, 1, index_mpi_t, 1, 0, MPI_COMM_WORLD);
} else {/* my_rank == 1 */
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
T[i][j] = 0.0;
MPI_Recv(T, 1, index_mpi_t, 0, 0, MPI_COMM_WORLD, &status);
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++)
printf("%4.1f ", T[i][j]);
printf("\n");
}
}

MPI_Finalize();
} /* main */

send third row of a matrix from process 0 to process 1

/* send_row.c -- send third row of a matrix from process 0 to process 1
*
* Input: none
* Output: the row received by process 1
*
* Note: Program should only be run with 2 processes
*
* See Chap 6, p. 96 in PPMPI
*/
#include
#include "mpi.h"

main(int argc, char* argv[]) {
int p;
int my_rank;
float A[10][10];
MPI_Status status;
int i, j;

MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

if (my_rank == 0) {
for (i = 0; i < 10; i++)
for (j = 0; j < 10; j++)
A[i][j] = (float) i;
MPI_Send(&(A[2][0]), 10, MPI_FLOAT, 1, 0,
MPI_COMM_WORLD);
} else { /* my_rank = 1 */
MPI_Recv(&(A[2][0]), 10, MPI_FLOAT, 0, 0,
MPI_COMM_WORLD, &status);
for (j = 0; j < 10; j++)
printf("%3.1f ", A[2][j]);
printf("\n");
}

MPI_Finalize();
} /* main */

send column 1 of a matrix on process 0 to row 1 * on process 1.

/* send_col_to_row.c -- send column 1 of a matrix on process 0 to row 1
* on process 1.
*
* Input: none
* Output: The row received by process 1.
*
* Note: This program should only be run with 2 processes
*
* See Chap 6., pp. 98 & ff in PPMPI
*/
#include
#include "mpi.h"

main(int argc, char* argv[]) {
int p;
int my_rank;
float A[10][10];
MPI_Status status;
MPI_Datatype column_mpi_t;
int i, j;

MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

MPI_Type_vector(10, 1, 10, MPI_FLOAT, &column_mpi_t);
MPI_Type_commit(&column_mpi_t);

if (my_rank == 0) {
for (i = 0; i < 10; i++)
for (j = 0; j < 10; j++)
A[i][j] = (float) i;
MPI_Send(&(A[0][0]), 1, column_mpi_t, 1, 0,
MPI_COMM_WORLD);
} else { /* my_rank = 1 */
for (i = 0; i < 10; i++)
for (j = 0; j < 10; j++)
A[i][j] = 0.0;
MPI_Recv(&(A[0][0]), 10, MPI_FLOAT, 0, 0,
MPI_COMM_WORLD, &status);
for (j = 0; j < 10; j++)
printf("%3.1f ", A[0][j]);
printf("\n");
}

MPI_Finalize();
} /* main */

send the third column of a matrix from process 0 to * process 1

/* send_col.c -- send the third column of a matrix from process 0 to
* process 1
*
* Input: None
* Output: The column received by process 1
*
* Note: This program should only be run with 2 processes
*
* See Chap 6., pp. 96 & ff in PPMPI
*/
#include
#include "mpi.h"

main(int argc, char* argv[]) {
int p;
int my_rank;
float A[10][10];
MPI_Status status;
MPI_Datatype column_mpi_t;
int i, j;

MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

MPI_Type_vector(10, 1, 10, MPI_FLOAT, &column_mpi_t);
MPI_Type_commit(&column_mpi_t);

if (my_rank == 0) {
for (i = 0; i < 10; i++)
for (j = 0; j < 10; j++)
A[i][j] = (float) j;
MPI_Send(&(A[0][2]), 1, column_mpi_t, 1, 0,
MPI_COMM_WORLD);
} else { /* my_rank = 1 */
MPI_Recv(&(A[0][2]), 1, column_mpi_t, 0, 0,
MPI_COMM_WORLD, &status);
for (i = 0; i < 10; i++)
printf("%3.1f ", A[i][2]);
printf("\n");
}

MPI_Finalize();
} /* main */

Parallel Trapezoidal Rule. Uses MPI_Pack/Unpack in * distribution of input data.

/* get_data4.c -- Parallel Trapezoidal Rule. Uses MPI_Pack/Unpack in
* distribution of input data.
*
* Input:
* a, b: limits of integration.
* n: number of trapezoids.
* Output: Estimate of the integral from a to b of f(x)
* using the trapezoidal rule and n trapezoids.
*
* Notes:
* 1. f(x) is hardwired.
* 2. the number of processes (p) should evenly divide
* the number of trapezoids (n).
*
* See Chap 6., pp. 100 & ff in PPMPI
*/
#include

/* We'll be using MPI routines, definitions, etc. */
#include "mpi.h"

main(int argc, char** argv) {
int my_rank; /* My process rank */
int p; /* The number of processes */
float a; /* Left endpoint */
float b; /* Right endpoint */
int n; /* Number of trapezoids */
float h; /* Trapezoid base length */
float local_a; /* Left endpoint my process */
float local_b; /* Right endpoint my process */
int local_n; /* Number of trapezoids for */
/* my calculation */
float integral; /* Integral over my interval */
float total; /* Total integral */
int source; /* Process sending integral */
int dest = 0; /* All messages go to 0 */
int tag = 0;
MPI_Status status;

void Get_data4(float* a_ptr, float* b_ptr, int* n_ptr, int my_rank);
float Trap(float local_a, float local_b, int local_n,
float h); /* Calculate local integral */

/* Let the system do what it needs to start up MPI */
MPI_Init(&argc, &argv);

/* Get my process rank */
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

/* Find out how many processes are being used */
MPI_Comm_size(MPI_COMM_WORLD, &p);

Get_data4(&a, &b, &n, my_rank);

h = (b-a)/n; /* h is the same for all processes */
local_n = n/p; /* So is the number of trapezoids */

/* Length of each process' interval of
* integration = local_n*h. So my interval
* starts at: */
local_a = a + my_rank*local_n*h;
local_b = local_a + local_n*h;
integral = Trap(local_a, local_b, local_n, h);

/* Add up the integrals calculated by each process */
MPI_Reduce(&integral, &total, 1, MPI_FLOAT,
MPI_SUM, 0, MPI_COMM_WORLD);

/* Print the result */
if (my_rank == 0) {
printf("With n = %d trapezoids, our estimate\n",
n);
printf("of the integral from %f to %f = %f\n",
a, b, total);
}

/* Shut down MPI */
MPI_Finalize();
} /* main */

/********************************************************************/
void Get_data4(
float* a_ptr /* out */,
float* b_ptr /* out */,
int* n_ptr /* out */,
int my_rank /* in */) {

char buffer[100]; /* Store data in buffer */
int position; /* Keep track of where data is */
/* in the buffer */

if (my_rank == 0){
printf("Enter a, b, and n\n");
scanf("%f %f %d", a_ptr, b_ptr, n_ptr);

/* Now pack the data into buffer. Position = 0 */
/* says start at beginning of buffer. */
position = 0;

/* Position is in/out */
MPI_Pack(a_ptr, 1, MPI_FLOAT, buffer, 100,
&position, MPI_COMM_WORLD);
/* Position has been incremented: it now refer- */
/* ences the first free location in buffer. */

MPI_Pack(b_ptr, 1, MPI_FLOAT, buffer, 100,
&position, MPI_COMM_WORLD);
/* Position has been incremented again. */

MPI_Pack(n_ptr, 1, MPI_INT, buffer, 100,
&position, MPI_COMM_WORLD);
/* Position has been incremented again. */

/* Now broadcast contents of buffer */
MPI_Bcast(buffer, 100, MPI_PACKED, 0,
MPI_COMM_WORLD);
} else {
MPI_Bcast(buffer, 100, MPI_PACKED, 0,
MPI_COMM_WORLD);

/* Now unpack the contents of buffer */
position = 0;
MPI_Unpack(buffer, 100, &position, a_ptr, 1,
MPI_FLOAT, MPI_COMM_WORLD);
/* Once again position has been incremented: */
/* it now references the beginning of b. */

MPI_Unpack(buffer, 100, &position, b_ptr, 1,
MPI_FLOAT, MPI_COMM_WORLD);
MPI_Unpack(buffer, 100, &position, n_ptr, 1,
MPI_INT, MPI_COMM_WORLD);
}
} /* Get_data4 */

/********************************************************************/
float Trap(
float local_a /* in */,
float local_b /* in */,
int local_n /* in */,
float h /* in */) {

float integral; /* Store result in integral */
float x;
int i;

float f(float x); /* function we're integrating */

integral = (f(local_a) + f(local_b))/2.0;
x = local_a;
for (i = 1; i <= local_n-1; i++) {
x = x + h;
integral = integral + f(x);
}
integral = integral*h;
return integral;
} /* Trap */

/********************************************************************/
float f(float x) {
float return_val;
/* Calculate f(x). */
/* Store calculation in return_val. */
return_val = x*x;
return return_val;
} /* f */

Parallel Trapezoidal Rule. Builds a derived type * for use with the distribution of the input data.

/* get_data3.c -- Parallel Trapezoidal Rule. Builds a derived type
* for use with the distribution of the input data.
*
* Input:
* a, b: limits of integration.
* n: number of trapezoids.
* Output: Estimate of the integral from a to b of f(x)
* using the trapezoidal rule and n trapezoids.
*
* Notes:
* 1. f(x) is hardwired.
* 2. the number of processes (p) should evenly divide
* the number of trapezoids (n).
*
* See Chap 6, pp. 90 & ff in PPMPI
*/
#include

/* We'll be using MPI routines, definitions, etc. */
#include "mpi.h"

void Build_derived_type(
float* a_ptr /* in */,
float* b_ptr /* in */,
int* n_ptr /* in */,
MPI_Datatype* mesg_mpi_t_ptr /* out */);

main(int argc, char** argv) {
int my_rank; /* My process rank */
int p; /* The number of processes */
float a; /* Left endpoint */
float b; /* Right endpoint */
int n; /* Number of trapezoids */
float h; /* Trapezoid base length */
float local_a; /* Left endpoint my process */
float local_b; /* Right endpoint my process */
int local_n; /* Number of trapezoids for */
/* my calculation */
float integral; /* Integral over my interval */
float total; /* Total integral */
int source; /* Process sending integral */
int dest = 0; /* All messages go to 0 */
int tag = 0;
MPI_Status status;

void Get_data3(float* a_ptr, float* b_ptr, int* n_ptr, int my_rank);
float Trap(float local_a, float local_b, int local_n,
float h); /* Calculate local integral */

/* Let the system do what it needs to start up MPI */
MPI_Init(&argc, &argv);

/* Get my process rank */
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

/* Find out how many processes are being used */
MPI_Comm_size(MPI_COMM_WORLD, &p);

Get_data3(&a, &b, &n, my_rank);

h = (b-a)/n; /* h is the same for all processes */
local_n = n/p; /* So is the number of trapezoids */

/* Length of each process' interval of
* integration = local_n*h. So my interval
* starts at: */
local_a = a + my_rank*local_n*h;
local_b = local_a + local_n*h;
integral = Trap(local_a, local_b, local_n, h);

/* Add up the integrals calculated by each process */
MPI_Reduce(&integral, &total, 1, MPI_FLOAT,
MPI_SUM, 0, MPI_COMM_WORLD);

/* Print the result */
if (my_rank == 0) {
printf("With n = %d trapezoids, our estimate\n",
n);
printf("of the integral from %f to %f = %f\n",
a, b, total);
}

/* Shut down MPI */
MPI_Finalize();
} /* main */

/********************************************************************/
void Build_derived_type(
float* a_ptr /* in */,
float* b_ptr /* in */,
int* n_ptr /* in */,
MPI_Datatype* mesg_mpi_t_ptr /* out */) {
/* pointer to new MPI type */

/* The number of elements in each "block" of the */
/* new type. For us, 1 each. */
int block_lengths[3];

/* Displacement of each element from start of new */
/* type. The "d_i's." */
/* MPI_Aint ("address int") is an MPI defined C */
/* type. Usually an int. */
MPI_Aint displacements[3];

/* MPI types of the elements. The "t_i's." */
MPI_Datatype typelist[3];

/* Use for calculating displacements */
MPI_Aint start_address;
MPI_Aint address;

block_lengths[0] = block_lengths[1]
= block_lengths[2] = 1;

/* Build a derived datatype consisting of */
/* two floats and an int */
typelist[0] = MPI_FLOAT;
typelist[1] = MPI_FLOAT;
typelist[2] = MPI_INT;

/* First element, a, is at displacement 0 */
displacements[0] = 0;

/* Calculate other displacements relative to a */
MPI_Address(a_ptr, &start_address);

/* Find address of b and displacement from a */
MPI_Address(b_ptr, &address);
displacements[1] = address - start_address;

/* Find address of n and displacement from a */
MPI_Address(n_ptr, &address);
displacements[2] = address - start_address;

/* Build the derived datatype */
MPI_Type_struct(3, block_lengths, displacements,
typelist, mesg_mpi_t_ptr);

/* Commit it -- tell system we'll be using it for */
/* communication. */
MPI_Type_commit(mesg_mpi_t_ptr);
} /* Build_derived_type */

/********************************************************************/
void Get_data3(
float* a_ptr /* out */,
float* b_ptr /* out */,
int* n_ptr /* out */,
int my_rank /* in */) {
MPI_Datatype mesg_mpi_t; /* MPI type corresponding */
/* to 3 floats and an int */

if (my_rank == 0){
printf("Enter a, b, and n\n");
scanf("%f %f %d", a_ptr, b_ptr, n_ptr);
}

Build_derived_type(a_ptr, b_ptr, n_ptr, &mesg_mpi_t);
MPI_Bcast(a_ptr, 1, mesg_mpi_t, 0, MPI_COMM_WORLD);
} /* Get_data3 */

/********************************************************************/
float Trap(
float local_a /* in */,
float local_b /* in */,
int local_n /* in */,
float h /* in */) {

float integral; /* Store result in integral */
float x;
int i;

float f(float x); /* function we're integrating */

integral = (f(local_a) + f(local_b))/2.0;
x = local_a;
for (i = 1; i <= local_n-1; i++) {
x = x + h;
integral = integral + f(x);
}
integral = integral*h;
return integral;
} /* Trap */

/********************************************************************/
float f(float x) {
float return_val;
/* Calculate f(x). */
/* Store calculation in return_val. */
return_val = x*x;
return return_val;
} /* f */

send a subvector from process 0 to process 1

/* count.c -- send a subvector from process 0 to process 1
*
* Input: none
* Output: contents of vector received by process 1
*
* Note: Program should only be run with 2 processes.
*
* See Chap 6, pp. 89 & ff. in PPMPI
*/
#include
#include "mpi.h"

main(int argc, char* argv[]) {
float vector[100];
MPI_Status status;
int p;
int my_rank;
int i;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

/* Initialize vector and send */
if (my_rank == 0) {
for (i = 0; i < 50; i++)
vector[i] = 0.0;
for (i = 50; i < 100; i++)
vector[i] = 1.0;
MPI_Send(vector+50, 50, MPI_FLOAT, 1, 0,
MPI_COMM_WORLD);
} else { /* my_rank == 1 */
MPI_Recv(vector+50, 50, MPI_FLOAT, 0, 0,
MPI_COMM_WORLD, &status);
for (i = 50; i < 100; i++)
printf("%3.1f ",vector[i]);
printf("\n");
}

MPI_Finalize();
} /* main */

test basic topology functions

/* top_fcns.c -- test basic topology functions
*
* Input: none
* Output: results of calls to various functions testing topology
* creation
*
* Algorithm:
* 1. Build a 2-dimensional Cartesian communicator from
* MPI_Comm_world
* 2. Print topology information for each process
* 3. Use MPI_Cart_sub to build a communicator for each
* row of the Cartesian communicator
* 4. Carry out a broadcast across each row communicator
* 5. Print results of broadcast
* 6. Use MPI_Cart_sub to build a communicator for each
* column of the Cartesian communicator
* 7. Carry out a broadcast across each column communicator
* 8. Print results of broadcast
*
* Note: Assumes the number of processes, p, is a perfect square
*
* See Chap 7, pp. 121 & ff in PPMPI
*/
#include
#include "mpi.h"
#include

main(int argc, char* argv[]) {
int p;
int my_rank;
int q;
MPI_Comm grid_comm;
int dim_sizes[2];
int wrap_around[2];
int reorder = 1;
int coordinates[2];
int my_grid_rank;
int grid_rank;
int free_coords[2];
MPI_Comm row_comm;
MPI_Comm col_comm;
int row_test;
int col_test;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

q = (int) sqrt((double) p);

dim_sizes[0] = dim_sizes[1] = q;
wrap_around[0] = wrap_around[1] = 1;
MPI_Cart_create(MPI_COMM_WORLD, 2, dim_sizes,
wrap_around, reorder, &grid_comm);

MPI_Comm_rank(grid_comm, &my_grid_rank);
MPI_Cart_coords(grid_comm, my_grid_rank, 2,
coordinates);

MPI_Cart_rank(grid_comm, coordinates, &grid_rank);

printf("Process %d > my_grid_rank = %d, coords = (%d,%d), grid_rank = %d\n",
my_rank, my_grid_rank, coordinates[0], coordinates[1], grid_rank);

free_coords[0] = 0;
free_coords[1] = 1;
MPI_Cart_sub(grid_comm, free_coords, &row_comm);
if (coordinates[1] == 0)
row_test = coordinates[0];
else
row_test = -1;
MPI_Bcast(&row_test, 1, MPI_INT, 0, row_comm);
printf("Process %d > coords = (%d,%d), row_test = %d\n",
my_rank, coordinates[0], coordinates[1], row_test);

free_coords[0] = 1;
free_coords[1] = 0;
MPI_Cart_sub(grid_comm, free_coords, &col_comm);
if (coordinates[0] == 0)
col_test = coordinates[1];
else
col_test = -1;
MPI_Bcast(&col_test, 1, MPI_INT, 0, col_comm);
printf("Process %d > coords = (%d,%d), col_test = %d\n",
my_rank, coordinates[0], coordinates[1], col_test);

MPI_Finalize();
} /* main */

multiply two square matrices on a single processor

/* serial_mat_mult.c -- multiply two square matrices on a single processor
*
* Input:
* n: order of the matrices
* A,B: factor matrices
*
* Output:
* C: product matrix
*
* See Chap 7, pp. 111 & ff in PPMPI
*/
#include

#define MAX_ORDER 10

typedef float MATRIX_T[MAX_ORDER][MAX_ORDER];

main() {
int n;
MATRIX_T A;
MATRIX_T B;
MATRIX_T C;

void Read_matrix(char* prompt, MATRIX_T A, int n);
void Serial_matrix_mult(MATRIX_T A, MATRIX_T B, MATRIX_T C, int n);
void Print_matrix(char* title, MATRIX_T C, int n);

printf("What's the order of the matrices?\n");
scanf("%d", &n);

Read_matrix("Enter A", A, n);
Print_matrix("A = ", A, n);
Read_matrix("Enter B", B, n);
Print_matrix("B = ", B, n);
Serial_matrix_mult(A, B, C, n);
Print_matrix("Their product is", C, n);

} /* main */

/*****************************************************************/
void Read_matrix(
char* prompt /* in */,
MATRIX_T A /* out */,
int n /* in */) {
int i, j;

printf("%s\n", prompt);
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
scanf("%f", &A[i][j]);
} /* Read_matrix */

/*****************************************************************/
/* MATRIX_T is a two-dimensional array of floats */
void Serial_matrix_mult(
MATRIX_T A /* in */,
MATRIX_T B /* in */,
MATRIX_T C /* out */,
int n /* in */) {

int i, j, k;

void Print_matrix(char* title, MATRIX_T C, int n);

Print_matrix("In Serial_matrix_mult A = ", A, n);
Print_matrix("In Serial_matrix_mult B = ", B, n);

for (i = 0; i < n; i++)
for (j = 0; j < n; j++) {
C[i][j] = 0.0;
for (k = 0; k < n; k++)
C[i][j] = C[i][j] + A[i][k]*B[k][j];
printf("i = %d, j = %d, c_ij = %f\n", i, j, C[i][j]);
}
} /* Serial_matrix_mult */

/*****************************************************************/
void Print_matrix(
char* title /* in */,
MATRIX_T C /* out */,
int n /* in */) {
int i, j;

printf("%s\n", title);
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++)
printf("%4.1f ", C[i][j]);
printf("\n");
}
} /* Read_matrix */

uses Fox's algorithm to multiply two square matrices

/* fox.c -- uses Fox's algorithm to multiply two square matrices
*
* Input:
* n: global order of matrices
* A,B: the factor matrices
* Output:
* C: the product matrix
*
* Notes:
* 1. Assumes the number of processes is a perfect square
* 2. The array member of the matrices is statically allocated
* 3. Assumes the global order of the matrices is evenly
* divisible by sqrt(p).
*
* See Chap 7, pp. 113 & ff and pp. 125 & ff in PPMPI
*/
#include
#include "mpi.h"
#include
#include

typedef struct {
int p; /* Total number of processes */
MPI_Comm comm; /* Communicator for entire grid */
MPI_Comm row_comm; /* Communicator for my row */
MPI_Comm col_comm; /* Communicator for my col */
int q; /* Order of grid */
int my_row; /* My row number */
int my_col; /* My column number */
int my_rank; /* My rank in the grid comm */
} GRID_INFO_T;

#define MAX 65536
typedef struct {
int n_bar;
#define Order(A) ((A)->n_bar)
float entries[MAX];
#define Entry(A,i,j) (*(((A)->entries) + ((A)->n_bar)*(i) + (j)))
} LOCAL_MATRIX_T;

/* Function Declarations */
LOCAL_MATRIX_T* Local_matrix_allocate(int n_bar);
void Free_local_matrix(LOCAL_MATRIX_T** local_A);
void Read_matrix(char* prompt, LOCAL_MATRIX_T* local_A,
GRID_INFO_T* grid, int n);
void Print_matrix(char* title, LOCAL_MATRIX_T* local_A,
GRID_INFO_T* grid, int n);
void Set_to_zero(LOCAL_MATRIX_T* local_A);
void Local_matrix_multiply(LOCAL_MATRIX_T* local_A,
LOCAL_MATRIX_T* local_B, LOCAL_MATRIX_T* local_C);
void Build_matrix_type(LOCAL_MATRIX_T* local_A);
MPI_Datatype local_matrix_mpi_t;

LOCAL_MATRIX_T* temp_mat;
void Print_local_matrices(char* title, LOCAL_MATRIX_T* local_A,
GRID_INFO_T* grid);

/*********************************************************/
main(int argc, char* argv[]) {
int p;
int my_rank;
GRID_INFO_T grid;
LOCAL_MATRIX_T* local_A;
LOCAL_MATRIX_T* local_B;
LOCAL_MATRIX_T* local_C;
int n;
int n_bar;

void Setup_grid(GRID_INFO_T* grid);
void Fox(int n, GRID_INFO_T* grid, LOCAL_MATRIX_T* local_A,
LOCAL_MATRIX_T* local_B, LOCAL_MATRIX_T* local_C);

MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

Setup_grid(&grid);
if (my_rank == 0) {
printf("What's the order of the matrices?\n");
scanf("%d", &n);
}

MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
n_bar = n/grid.q;

local_A = Local_matrix_allocate(n_bar);
Order(local_A) = n_bar;
Read_matrix("Enter A", local_A, &grid, n);
Print_matrix("We read A =", local_A, &grid, n);

local_B = Local_matrix_allocate(n_bar);
Order(local_B) = n_bar;
Read_matrix("Enter B", local_B, &grid, n);
Print_matrix("We read B =", local_B, &grid, n);

Build_matrix_type(local_A);
temp_mat = Local_matrix_allocate(n_bar);

local_C = Local_matrix_allocate(n_bar);
Order(local_C) = n_bar;
Fox(n, &grid, local_A, local_B, local_C);

Print_matrix("The product is", local_C, &grid, n);

Free_local_matrix(&local_A);
Free_local_matrix(&local_B);
Free_local_matrix(&local_C);

MPI_Finalize();
} /* main */

/*********************************************************/
void Setup_grid(
GRID_INFO_T* grid /* out */) {
int old_rank;
int dimensions[2];
int wrap_around[2];
int coordinates[2];
int free_coords[2];

/* Set up Global Grid Information */
MPI_Comm_size(MPI_COMM_WORLD, &(grid->p));
MPI_Comm_rank(MPI_COMM_WORLD, &old_rank);

/* We assume p is a perfect square */
grid->q = (int) sqrt((double) grid->p);
dimensions[0] = dimensions[1] = grid->q;

/* We want a circular shift in second dimension. */
/* Don't care about first */
wrap_around[0] = wrap_around[1] = 1;
MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions,
wrap_around, 1, &(grid->comm));
MPI_Comm_rank(grid->comm, &(grid->my_rank));
MPI_Cart_coords(grid->comm, grid->my_rank, 2,
coordinates);
grid->my_row = coordinates[0];
grid->my_col = coordinates[1];

/* Set up row communicators */
free_coords[0] = 0;
free_coords[1] = 1;
MPI_Cart_sub(grid->comm, free_coords,
&(grid->row_comm));

/* Set up column communicators */
free_coords[0] = 1;
free_coords[1] = 0;
MPI_Cart_sub(grid->comm, free_coords,
&(grid->col_comm));
} /* Setup_grid */

/*********************************************************/
void Fox(
int n /* in */,
GRID_INFO_T* grid /* in */,
LOCAL_MATRIX_T* local_A /* in */,
LOCAL_MATRIX_T* local_B /* in */,
LOCAL_MATRIX_T* local_C /* out */) {

LOCAL_MATRIX_T* temp_A; /* Storage for the sub- */
/* matrix of A used during */
/* the current stage */
int stage;
int bcast_root;
int n_bar; /* n/sqrt(p) */
int source;
int dest;
MPI_Status status;

n_bar = n/grid->q;
Set_to_zero(local_C);

/* Calculate addresses for circular shift of B */
source = (grid->my_row + 1) % grid->q;
dest = (grid->my_row + grid->q - 1) % grid->q;

/* Set aside storage for the broadcast block of A */
temp_A = Local_matrix_allocate(n_bar);

for (stage = 0; stage < grid->q; stage++) {
bcast_root = (grid->my_row + stage) % grid->q;
if (bcast_root == grid->my_col) {
MPI_Bcast(local_A, 1, local_matrix_mpi_t,
bcast_root, grid->row_comm);
Local_matrix_multiply(local_A, local_B,
local_C);
} else {
MPI_Bcast(temp_A, 1, local_matrix_mpi_t,
bcast_root, grid->row_comm);
Local_matrix_multiply(temp_A, local_B,
local_C);
}
MPI_Sendrecv_replace(local_B, 1, local_matrix_mpi_t,
dest, 0, source, 0, grid->col_comm, &status);
} /* for */

} /* Fox */

/*********************************************************/
LOCAL_MATRIX_T* Local_matrix_allocate(int local_order) {
LOCAL_MATRIX_T* temp;

temp = (LOCAL_MATRIX_T*) malloc(sizeof(LOCAL_MATRIX_T));
return temp;
} /* Local_matrix_allocate */

/*********************************************************/
void Free_local_matrix(
LOCAL_MATRIX_T** local_A_ptr /* in/out */) {
free(*local_A_ptr);
} /* Free_local_matrix */

/*********************************************************/
/* Read and distribute matrix:
* foreach global row of the matrix,
* foreach grid column
* read a block of n_bar floats on process 0
* and send them to the appropriate process.
*/
void Read_matrix(
char* prompt /* in */,
LOCAL_MATRIX_T* local_A /* out */,
GRID_INFO_T* grid /* in */,
int n /* in */) {

int mat_row, mat_col;
int grid_row, grid_col;
int dest;
int coords[2];
float* temp;
MPI_Status status;

if (grid->my_rank == 0) {
temp = (float*) malloc(Order(local_A)*sizeof(float));
printf("%s\n", prompt);
fflush(stdout);
for (mat_row = 0; mat_row < n; mat_row++) {
grid_row = mat_row/Order(local_A);
coords[0] = grid_row;
for (grid_col = 0; grid_col < grid->q; grid_col++) {
coords[1] = grid_col;
MPI_Cart_rank(grid->comm, coords, &dest);
if (dest == 0) {
for (mat_col = 0; mat_col < Order(local_A); mat_col++)
scanf("%f",
(local_A->entries)+mat_row*Order(local_A)+mat_col);
} else {
for(mat_col = 0; mat_col < Order(local_A); mat_col++)
scanf("%f", temp + mat_col);
MPI_Send(temp, Order(local_A), MPI_FLOAT, dest, 0,
grid->comm);
}
}
}
free(temp);
} else {
for (mat_row = 0; mat_row < Order(local_A); mat_row++)
MPI_Recv(&Entry(local_A, mat_row, 0), Order(local_A),
MPI_FLOAT, 0, 0, grid->comm, &status);
}

} /* Read_matrix */

/*********************************************************/
void Print_matrix(
char* title /* in */,
LOCAL_MATRIX_T* local_A /* out */,
GRID_INFO_T* grid /* in */,
int n /* in */) {
int mat_row, mat_col;
int grid_row, grid_col;
int source;
int coords[2];
float* temp;
MPI_Status status;

if (grid->my_rank == 0) {
temp = (float*) malloc(Order(local_A)*sizeof(float));
printf("%s\n", title);
for (mat_row = 0; mat_row < n; mat_row++) {
grid_row = mat_row/Order(local_A);
coords[0] = grid_row;
for (grid_col = 0; grid_col < grid->q; grid_col++) {
coords[1] = grid_col;
MPI_Cart_rank(grid->comm, coords, &source);
if (source == 0) {
for(mat_col = 0; mat_col < Order(local_A); mat_col++)
printf("%4.1f ", Entry(local_A, mat_row, mat_col));
} else {
MPI_Recv(temp, Order(local_A), MPI_FLOAT, source, 0,
grid->comm, &status);
for(mat_col = 0; mat_col < Order(local_A); mat_col++)
printf("%4.1f ", temp[mat_col]);
}
}
printf("\n");
}
free(temp);
} else {
for (mat_row = 0; mat_row < Order(local_A); mat_row++)
MPI_Send(&Entry(local_A, mat_row, 0), Order(local_A),
MPI_FLOAT, 0, 0, grid->comm);
}

} /* Print_matrix */

/*********************************************************/
void Set_to_zero(
LOCAL_MATRIX_T* local_A /* out */) {

int i, j;

for (i = 0; i < Order(local_A); i++)
for (j = 0; j < Order(local_A); j++)
Entry(local_A,i,j) = 0.0;

} /* Set_to_zero */

/*********************************************************/
void Build_matrix_type(
LOCAL_MATRIX_T* local_A /* in */) {
MPI_Datatype temp_mpi_t;
int block_lengths[2];
MPI_Aint displacements[2];
MPI_Datatype typelist[2];
MPI_Aint start_address;
MPI_Aint address;

MPI_Type_contiguous(Order(local_A)*Order(local_A),
MPI_FLOAT, &temp_mpi_t);

block_lengths[0] = block_lengths[1] = 1;

typelist[0] = MPI_INT;
typelist[1] = temp_mpi_t;

MPI_Address(local_A, &start_address);
MPI_Address(&(local_A->n_bar), &address);
displacements[0] = address - start_address;

MPI_Address(local_A->entries, &address);
displacements[1] = address - start_address;

MPI_Type_struct(2, block_lengths, displacements,
typelist, &local_matrix_mpi_t);
MPI_Type_commit(&local_matrix_mpi_t);
} /* Build_matrix_type */

/*********************************************************/
void Local_matrix_multiply(
LOCAL_MATRIX_T* local_A /* in */,
LOCAL_MATRIX_T* local_B /* in */,
LOCAL_MATRIX_T* local_C /* out */) {
int i, j, k;

for (i = 0; i < Order(local_A); i++)
for (j = 0; j < Order(local_A); j++)
for (k = 0; k < Order(local_B); k++)
Entry(local_C,i,j) = Entry(local_C,i,j)
+ Entry(local_A,i,k)*Entry(local_B,k,j);

} /* Local_matrix_multiply */

/*********************************************************/
void Print_local_matrices(
char* title /* in */,
LOCAL_MATRIX_T* local_A /* in */,
GRID_INFO_T* grid /* in */) {

int coords[2];
int i, j;
int source;
MPI_Status status;

if (grid->my_rank == 0) {
printf("%s\n", title);
printf("Process %d > grid_row = %d, grid_col = %d\n",
grid->my_rank, grid->my_row, grid->my_col);
for (i = 0; i < Order(local_A); i++) {
for (j = 0; j < Order(local_A); j++)
printf("%4.1f ", Entry(local_A,i,j));
printf("\n");
}
for (source = 1; source < grid->p; source++) {
MPI_Recv(temp_mat, 1, local_matrix_mpi_t, source, 0,
grid->comm, &status);
MPI_Cart_coords(grid->comm, source, 2, coords);
printf("Process %d > grid_row = %d, grid_col = %d\n",
source, coords[0], coords[1]);
for (i = 0; i < Order(temp_mat); i++) {
for (j = 0; j < Order(temp_mat); j++)
printf("%4.1f ", Entry(temp_mat,i,j));
printf("\n");
}
}
fflush(stdout);
} else {
MPI_Send(local_A, 1, local_matrix_mpi_t, 0, 0, grid->comm);
}

} /* Print_local_matrices */

Software World

Bu Blogda Ara

İzleyiciler

Blog Arşivi

Hakkımda

7 Şubat 2010 Pazar

Process 0 starts off the ring pass. * Time communication around a ring of processes. * Guaranteed to have bugs

Process 0 starts off the ring pass. * Includes extra printf in Print_results. * Time communication around a ring of processes

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize. * Time communication around a ring of processes.

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize. * Changed number of message sizes to 2.

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize. * Changed number of message sizes to 2.

Process 0 starts off the ring pass. * Fixed erroneous calc of time_array_order in Initialize. * Changed number of message sizes to 2. *

Computes a parallel dot product. Uses MPI_Allreduce.

pack a row of a sparse matrix and send from process 0 * to process 1. Process 1 allocates required storage after partially * unpacking.

send the upper triangle of a matrix from process 0 * to process 1

send third row of a matrix from process 0 to process 1

send column 1 of a matrix on process 0 to row 1 * on process 1.

send the third column of a matrix from process 0 to * process 1

Parallel Trapezoidal Rule. Uses MPI_Pack/Unpack in * distribution of input data.

Parallel Trapezoidal Rule. Builds a derived type * for use with the distribution of the input data.

send a subvector from process 0 to process 1

test basic topology functions

multiply two square matrices on a single processor

uses Fox's algorithm to multiply two square matrices