List Info

Thread: LAM: please any body solve my problem




LAM: please any body solve my problem
country flaguser name
India
2007-03-06 01:23:34
Hi,
Please any one solve my problem.
I have a matrix multiplication program below which  run on multiple machines(using linux).
The program works fine with small size matrices(till 900), but doesn't work with large
size matrices(>900).
I am allocating memory dynamically.
The error is as follows
It seems that [at least] one of the processes that was started with mpirun did not invoke MPI_INIT before quitting (it is possible that more than one process  did not invoke MPI_INIT--- mpirun was only notified of the first one which was on node n0).
Mpirun can only be sed with MPI programs .
Mpirun failed with exit staus 252
 
Can anyone tell me the possible reason that causes this error?
/***********  MULTIPLY_MATRICES WITH MPI AND
 CANNON ALGORITHM    *******/
 
 
#include <stdio.h> &nbsp; &nbsp; 
#include <math.h>
#include "mpi.h" &nbsp; &nbsp; &nbsp; 
 
#define N &nbsp; &nbsp; 1200 &nbsp; &nbsp;  /* < 900 ok above 900 problem exists */
#define _mat(i,j) (mat[(i)*n+(j)
 ;
 
 readmat(char *fname,int *mat,int n)
{
 &nbsp;  FILE *fp;
 &nbsp;  int i,j;
 &nbsp; 
 if((fp=fopen(fname,"r"))==NULL)
 &nbsp; &nbsp;  return(-1);
 &nbsp;  for(i=0;i<n;i++)
 &nbsp; &nbsp; &nbsp; for(j=0;j<n;j++)
 &nbsp; &nbsp; &nbsp; &nbsp;  if(fscanf(fp,"%d",&amp;_mat(i,j))==EOF)
 &nbsp; &nbsp; &nbsp;    {
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; fclose(fp);
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; return(-1);
 &nbsp; &nbsp; &nbsp;    }
 &nbsp;  fclose(fp);
 &nbsp;  return(0);
} &nbsp; &nbsp; &nbsp; &nbsp; 
 
 
 
writemat(char *fname,int *mat,int n)
{
 &nbsp;  FILE *fp;
 &nbsp;  int i,j;
 &nbsp;  if((fp=fopen(fname,"w"))==NULL)
 &nbsp; &nbsp; &nbsp;  return(-1);
 &nbsp;  for(i=0;i<n; fprintf(fp,"n"),i++)
 &nbsp; &nbsp; &nbsp;  for(j=0;j<n;j++)
 &nbsp; &nbsp; &nbsp;   &nbsp; fprintf(fp,"%dt",_mat(i,j));
 &nbsp;  fclose(fp);
 &nbsp;  return(0);
}  
 
 
 &nbsp; &nbsp; &nbsp;   &nbsp;  
 
int main(int argc,char *argv[])
{
   
 &nbsp; int
 myrank_old,myrank_new,process_rank,numprocs,situation=0,namelen,source,count;
 &nbsp; double t1,t2;
 &nbsp; 
 &nbsp; int M,S,start_block_x,start_block_y;
 &nbsp; int i,j,k,l,coord_nbr_proc;
 &nbsp; float sum;
 &nbsp; int *package_A,*package_B,*C_blocks,*temp_A,*temp_B,*temp_C;
 &nbsp; char processor_name[MPI_MAX_PROCESSOR_NAME]; 
 &nbsp; MPI_Status stat;
 &nbsp; MPI_Comm comm_new;
 &nbsp; int
 num_dims=2,dims[2],periods[2],coords[2];
 &nbsp; int rank_source,rank_dest;
 &nbsp; 
 &nbsp; MPI_Init(&;argc,&argv);
 &nbsp; MPI_Comm_rank(MPI_COMM_WORLD,&amp;myrank_old);
 &nbsp; MPI_Comm_size(MPI_COMM_WORLD,&amp;numprocs);
 
 &nbsp; M=(int)sqrt((double)numprocs);
 &nbsp; S=N/M;
 &nbsp; 
 &nbsp; dims[0]=dims[1]=M;
 &nbsp; periods[0]=periods[1]=1;
 &nbsp;
 MPI_Cart_create(MPI_COMM_WORLD,num_dims,dims,periods,0,&comm_new);
 &nbsp; MPI_Comm_rank(comm_new,&myrank_new);
 &nbsp; 
 &nbsp; MPI_Get_processor_name(processor_name,&namelen);
 &nbsp; fprintf(stdout,"Process %d of %d on %sn",myrank_new, numprocs, processor_name);
 &nbsp; 
 &nbsp; package_A=(int *)malloc(S*S*sizeof(int));
 &nbsp; package_B=(int *)malloc(S*S*sizeof(int));
 &nbsp; C_blocks=(int *)malloc(S*S*sizeof(int));
&nbsp;
 &nbsp; temp_A=(int
 *)malloc(S*S*sizeof(int));
 &nbsp; temp_B=(int *)malloc(S*S*sizeof(int));
 &nbsp; temp_C=(int *)malloc(S*S*sizeof(int));
 &nbsp; &nbsp; &nbsp; 
 &nbsp; 
 &nbsp; for(i=0; i<S*S; i++)
 &nbsp; &nbsp; &nbsp;  temp_C[i]=0;
 
 &nbsp; if(myrank_new==0)
 &nbsp; {
 &nbsp; &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp; int A[N][N],B[N][N],C[N][N];
 &nbsp; &nbsp; &nbsp; 
 
 &nbsp; &nbsp; &nbsp; if(readmat("A_file",(int *)A,N)<0)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; situation=1;
 &nbsp; &nbsp; &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp; if(readmat("B_file",(int *)B,N)<0)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; situation=1;
 &nbsp; &nbsp; &nbsp; &nbsp;   ; 
 &nbsp; &nbsp; &nbsp; MPI_Bcast(&amp;situation,1,MPI_INT,0,comm_new);
 &nbsp; &nbsp; &nbsp; if(situation==1)
 &nbsp; &nbsp; &nbsp; { &nbsp; 
  &nbsp; &nbsp; &nbsp; &nbsp;   ;printf("File A_file or B_file has problem.n");
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp;  MPI_Finalize();
 &nbsp; &nbsp; &nbsp; &nbsp;   ; return(0);  
 &nbsp; &nbsp; &nbsp; }
 &nbsp; &nbsp; &nbsp;
 t1=MPI_Wtime();
 &nbsp; &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp; for(process_rank=1;process_rank&lt;numprocs;process_rank++)
 &nbsp; &nbsp; &nbsp; {
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp;  start_block_x=(process_rank/M);
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp;  start_block_y=(process_rank%M);
 &nbsp; &nbsp; &nbsp; &nbsp;   ; 
 &nbsp; &nbsp; &nbsp; &nbsp;   count=0;
 &nbsp; &nbsp; &nbsp; &nbsp;   ; for(i=start_block_x*S;i<(start_block_x+1)*S;i++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; {
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; for(j=start_block_y*S;j<(start_block_y+1)*S;j++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; {
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp;  package_A[count]=A[i][j];
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp;  package_B[count]=B[i][j];
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp;  count++;
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; }
 &nbsp; &nbsp; &nbsp; &nbsp;   }
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp;MPI_Send(package_A,S*S,MPI_INT,process_rank,0,comm_new);
 &nbsp; &nbsp; &nbsp; &nbsp;   MPI_Send(package_B,S*S,MPI_INT,process_rank,0,comm_new);
 &nbsp; &nbsp; &nbsp;  }
 &nbsp; &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp; &nbsp;start_block_x=(myrank_new/M);
 &nbsp; &nbsp; &nbsp;  start_block_y=(myrank_new%M);
 
 &nbsp; &nbsp; &nbsp;  count=0;
 &nbsp; &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp;  for(i=start_block_x*S;i<(start_block_x+1)*S;i++)
 &nbsp; &nbsp; &nbsp;  {
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; for(j=start_block_y*S;j<(start_block_y+1)*S;j++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; {
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp; temp_A[count]=A[i][j];
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp; temp_B[count]=B[i][j];
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp;  
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp; count++;
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; }
 &nbsp; &nbsp; &nbsp;  }
 &nbsp; &nbsp; &nbsp;   &nbsp;   &nbsp; &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp; &nbsp;for(coord_nbr_proc=0;coord_nbr_proc<;M;coord_nbr_proc++)
 &nbsp; &nbsp; &nbsp;  {
 &nbsp; &nbsp; &nbsp;   &nbsp; 
  
 &nbsp; &nbsp; &nbsp; &nbsp;   for(i=0;i<S;i++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; {  
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp;for(j=0;j<S;j++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  { &nbsp; &nbsp; &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; for(k=0;k<;S;k++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; &nbsp; &nbsp; &nbsp;   temp_C[i*S+j]+=temp_A[i*S+k]*temp_B[k*S+j];
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp; } &nbsp; &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp;   ;
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp;}
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; MPI_Cart_shift(comm_new,1,-1,&amp;rank_source,&rank_dest);
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; 
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp;MPI_Cart_shift(comm_new,0,-1,&rank_source,&;rank_dest);
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp;
 MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
 &nbsp; &nbsp; &nbsp;    } 
 &nbsp; &nbsp; &nbsp; &nbsp;  for(i=0;i<S;i++)
 &nbsp; &nbsp; &nbsp; &nbsp;  {
 &nbsp; &nbsp; &nbsp; &nbsp;   for(j=0;j<S;j++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; C[i][j]=temp_C[i*S+j];
 &nbsp; &nbsp; &nbsp; &nbsp;  }
 &nbsp; &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp;  for(i=1;i<numprocs;i++)
 &nbsp; &nbsp; &nbsp;    {
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp;   ; 
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; MPI_Recv(C_blocks,S*S,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,comm_new,&amp;stat);
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp;  count=0;
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; l=0;
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; source=stat.MPI_SOURCE;
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp;  for(j=0;j<S;j++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp;
 {
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp;   ; for(k=0;k<S;k++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp;   &nbsp;{
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; C[(source/M)*S+j][(source%M)*S+k]=C_blocks[l*S+count];
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; count++;
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; &nbsp; &nbsp; } 
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; &nbsp; &nbsp; count=0; 
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; &nbsp; &nbsp; l++; 
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; } &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; &nbsp; &nbsp; &nbsp;
 &nbsp; &nbsp; &nbsp; &nbsp;   ;}
 &nbsp; &nbsp; &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp; &nbsp; t2= MPI_Wtime(); 
 &nbsp; &nbsp; &nbsp;   &nbsp; printf("Execution_time=%lfn",(t2-t1)); &nbsp; 
 &nbsp; &nbsp; &nbsp; &nbsp; writemat("C_Cannon",(int *)C,N); 
 &nbsp; &nbsp; } &nbsp; &nbsp; &nbsp; &nbsp;  
 
 
 &nbsp; &nbsp; if(myrank_new!=0)
 &nbsp; &nbsp; {
 &nbsp; &nbsp; &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp; &nbsp;  MPI_Cart_coords(comm_new,myrank_new,2,coords);
 &nbsp; &nbsp; &nbsp; &nbsp;  MPI_Bcast(&situation,1,MPI_INT,0,comm_new); &nbsp;  
 &nbsp; &nbsp; &nbsp; &nbsp;  if(situation!=0)
 &nbsp; &nbsp; &nbsp;    {
 &nbsp; &nbsp; &nbsp; &nbsp;   MPI_Finalize();
 &nbsp; &nbsp; &nbsp; &nbsp;   return(0);
 &nbsp; &nbsp; &nbsp;    } &nbsp;  
 &nbsp; &nbsp; &nbsp; &nbsp;  MPI_Recv(temp_A,S*S,MPI_INT,0,0,comm_new,&stat);
 &nbsp; &nbsp; &nbsp; &nbsp;  MPI_Recv(temp_B,S*S,MPI_INT,0,0,comm_new,&amp;stat);
 
 &nbsp; &nbsp; &nbsp;   
 MPI_Cart_shift(comm_new,1,-coords[0],&rank_source,&rank_dest);
 &nbsp;   &nbsp; &nbsp; &nbsp;MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
 &nbsp; &nbsp; &nbsp;    MPI_Cart_shift(comm_new,0,-coords[1],&rank_source,&rank_dest);
 &nbsp; &nbsp; &nbsp; &nbsp;  MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
 &nbsp; &nbsp;   &nbsp; &nbsp;for(coord_nbr_proc=0;coord_nbr_proc<M;coord_nbr_proc++)
 &nbsp; &nbsp; &nbsp; &nbsp;  {
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; for(i=0;i<S;i++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; {  
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp;for(j=0;j<S;j++)
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp;  {
 &nbsp; &nbsp; &nbsp; &nbsp;   ;  &nbsp; &nbsp; &nbsp; &nbsp;  
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; for(k=0;k<;S;k++)
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; &nbsp; &nbsp;  temp_C[i*S+j]+=temp_A[i*S+k]*temp_B[k*S+j];
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; &nbsp; 
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp;}
 &nbsp; &nbsp; &nbsp;   &nbsp; &nbsp;  }
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; MPI_Cart_shift(comm_new,1,-1,&amp;rank_source,&rank_dest);
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
 &nbsp;   &nbsp; &nbsp; &nbsp; &nbsp;  MPI_Cart_shift(comm_new,0,-1,&rank_source,&amp;rank_dest);
 &nbsp; &nbsp; &nbsp; &nbsp;   ; &nbsp; MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
 &nbsp; &nbsp; &nbsp;    } 
 &nbsp; &nbsp; &nbsp; &nbsp;MPI_Send(temp_C,S*S,MPI_INT,0,0,comm_new); 
 &nbsp;} &nbsp;  
 &nbsp;MPI_Finalize();
  return(0); &nbsp; &nbsp; &nbsp; &nbsp;   ;  
}
 


Here’s a new way to find what you're looking for - Yahoo! Answers
Re: LAM: please any body solve my problem
user name
2007-03-06 02:33:00
Why don't you debug it?. Debug it the way children after
fork are 
debugged. Put following code snippet at start of slave
code.

[snip]
int j = 1,k;
while (!j) k++;
[snip]

CPU will keep spinning here and after looking at "ps
ax" output you can 
attach gdb using
"gdb <source file> pid and do "(gdb)p j = 1
" and keep on single 
stepping in the code.

Rajesh vanaparthi wrote:
> Hi,
> Please any one solve my problem.
> I have a matrix multiplication program below which run
on multiple 
> machines(using linux).
> The program works fine with small size matrices(till
900), but doesn't 
> work with large
> size matrices(>900).
> I am allocating memory dynamically.
> The error is as follows
> */It seems that [at least] one of the processes that
was started with 
> mpirun did not invoke MPI_INIT before quitting (it is
possible that 
> more than one process did not invoke MPI_INIT--- mpirun
was only 
> notified of the first one which was on node n0)./*
> */Mpirun can only be sed with MPI programs ./*
> */Mpirun failed with exit staus 252/*
> Can anyone tell me the possible reason that causes this
error?
> /***********  MULTIPLY_MATRICES WITH MPI AND
>  CANNON ALGORITHM    *******/
>  
>  
> #include <stdio.h>     
> #include <math.h>
> #include "mpi.h"       
>  
> #define N     1200      /* < 900 ok above 900
problem exists */
> #define _mat(i,j) (mat[(i)*n+(j)
>  
>  
>  readmat(char *fname,int *mat,int n)
> {
>     FILE *fp;
>     int i,j;
>    
>  if((fp=fopen(fname,"r"))==NULL)
>       return(-1);
>     for(i=0;i<n;i++)
>        for(j=0;j<n;j++)
>          
if(fscanf(fp,"%d",&_mat(i,j))==EOF)
>           {
>              fclose(fp);
>              return(-1);
>           }
>     fclose(fp);
>     return(0);
> }         
>  
>  
>  
> writemat(char *fname,int *mat,int n)
> {
>     FILE *fp;
>     int i,j;
>     if((fp=fopen(fname,"w"))==NULL)
>         return(-1);
>     for(i=0;i<n; fprintf(fp,"n"),i++)
>         for(j=0;j<n;j++)
>            fprintf(fp,"%dt",_mat(i,j));
>     fclose(fp);
>     return(0);
> }  
>  
>  
>             
>  
> int main(int argc,char *argv[])
> {
>    
>    int
> 
myrank_old,myrank_new,process_rank,numprocs,situation=0,name
len,source,count;
>    double t1,t2;
>    
>    int M,S,start_block_x,start_block_y;
>    int i,j,k,l,coord_nbr_proc;
>    float sum;
>    int
*package_A,*package_B,*C_blocks,*temp_A,*temp_B,*temp_C;
>    char processor_name[MPI_MAX_PROCESSOR_NAME]; 
>    MPI_Status stat;
>    MPI_Comm comm_new;
>    int
>  num_dims=2,dims[2],periods[2],coords[2];
>    int rank_source,rank_dest;
>    
>    MPI_Init(&argc,&argv);
>    MPI_Comm_rank(MPI_COMM_WORLD,&myrank_old);
>    MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
>  
>    M=(int)sqrt((double)numprocs);
>    S=N/M;
>    
>    dims[0]=dims[1]=M;
>    periods[0]=periods[1]=1;
>   
> 
MPI_Cart_create(MPI_COMM_WORLD,num_dims,dims,periods,0,&
comm_new);
>    MPI_Comm_rank(comm_new,&myrank_new);
>    
>   
MPI_Get_processor_name(processor_name,&namelen);
>    fprintf(stdout,"Process %d of %d on
%sn",myrank_new, numprocs, processor_name);
>    
>    package_A=(int *)malloc(S*S*sizeof(int));
>    package_B=(int *)malloc(S*S*sizeof(int));
>    C_blocks=(int *)malloc(S*S*sizeof(int));
>  
>    temp_A=(int
>  *)malloc(S*S*sizeof(int));
>    temp_B=(int *)malloc(S*S*sizeof(int));
>    temp_C=(int *)malloc(S*S*sizeof(int));
>        
>    
>    for(i=0; i<S*S; i++)
>         temp_C[i]=0;
>  
>    if(myrank_new==0)
>    {
>        
>        int A[N][N],B[N][N],C[N][N];
>        
>  
>        if(readmat("A_file",(int *)A,N)<0)
>             situation=1;
>           
>        if(readmat("B_file",(int *)B,N)<0)
>             situation=1;
>             
>        MPI_Bcast(&situation,1,MPI_INT,0,comm_new);
>        if(situation==1)
>        {   
>             printf("File A_file or B_file has
problem.n");
>               MPI_Finalize();
>             return(0);  
>        }
>       
>  t1=MPI_Wtime();
>        
>       
for(process_rank=1;process_rank<numprocs;process_rank++)
>        {
>               start_block_x=(process_rank/M);
>               start_block_y=(process_rank%M);
>             
>             count=0;
>            
for(i=start_block_x*S;i<(start_block_x+1)*S;i++)
>             {
>                
for(j=start_block_y*S;j<(start_block_y+1)*S;j++)
>                 {
>                      package_A[count]=A[i][j];
>                      package_B[count]=B[i][j];
>                      count++;
>                 }
>              }
>              
>             
MPI_Send(package_A,S*S,MPI_INT,process_rank,0,comm_new);
>             
MPI_Send(package_B,S*S,MPI_INT,process_rank,0,comm_new);
>         }
>         
>         start_block_x=(myrank_new/M);
>         start_block_y=(myrank_new%M);
>  
>         count=0;
>         
>        
for(i=start_block_x*S;i<(start_block_x+1)*S;i++)
>         {
>              
for(j=start_block_y*S;j<(start_block_y+1)*S;j++)
>               {
>                     temp_A[count]=A[i][j];
>                     temp_B[count]=B[i][j];
>                 
>                     count++;
>               }
>         }
>                     
>        
for(coord_nbr_proc=0;coord_nbr_proc<M;coord_nbr_proc++)
>         {
>            
>   
>             for(i=0;i<S;i++)
>             {  
>                  for(j=0;j<S;j++)
>              {         
>                         for(k=0;k<S;k++)
>                       
temp_C[i*S+j]+=temp_A[i*S+k]*temp_B[k*S+j];
>                  }       
>                    
>               }
>              
MPI_Cart_shift(comm_new,1,-1,&rank_source,&rank_dest
);
>              
MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_sou
rce,0,comm_new,&stat);
>               
>              
MPI_Cart_shift(comm_new,0,-1,&rank_source,&rank_dest
);
>              
> 
MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_sou
rce,0,comm_new,&stat);
>  
>           } 
>           for(i=0;i<S;i++)
>           {
>              for(j=0;j<S;j++)
>                 C[i][j]=temp_C[i*S+j];
>           }
>         
>         
>         for(i=1;i<numprocs;i++)
>           {
>                     
>                   
>               
MPI_Recv(C_blocks,S*S,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,com
m_new,&stat);
>                    count=0;
>                l=0;
>                source=stat.MPI_SOURCE;
>                    for(j=0;j<S;j++)
>               
>  {
>                        for(k=0;k<S;k++)
>                        {
>                          
C[(source/M)*S+j][(source%M)*S+k]=C_blocks[l*S+count];
>                           count++;
>                    } 
>                    count=0; 
>                    l++; 
>                }      
>                     
>            }
>          
>          t2= MPI_Wtime(); 
>           
printf("Execution_time=%lfn",(t2-t1));   
>          writemat("C_Cannon",(int *)C,N); 
>      }          
>  
>  
>      if(myrank_new!=0)
>      {
>           
>          
MPI_Cart_coords(comm_new,myrank_new,2,coords);
>          
MPI_Bcast(&situation,1,MPI_INT,0,comm_new);    
>           if(situation!=0)
>           {
>              MPI_Finalize();
>              return(0);
>           }    
>          
MPI_Recv(temp_A,S*S,MPI_INT,0,0,comm_new,&stat);
>          
MPI_Recv(temp_B,S*S,MPI_INT,0,0,comm_new,&stat);
>  
>          
> 
MPI_Cart_shift(comm_new,1,-coords[0],&rank_source,&r
ank_dest);
>          
MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_sou
rce,0,comm_new,&stat);
>  
>          
MPI_Cart_shift(comm_new,0,-coords[1],&rank_source,&r
ank_dest);
>          
MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_sou
rce,0,comm_new,&stat);
>  
>          
for(coord_nbr_proc=0;coord_nbr_proc<M;coord_nbr_proc++)
>           {
>              
>               for(i=0;i<S;i++)
>               {  
>                    for(j=0;j<S;j++)
>                    {
>                       
>                         for(k=0;k<S;k++)
>                             
temp_C[i*S+j]+=temp_A[i*S+k]*temp_B[k*S+j];
>                         
>                      }
>               }
>              
MPI_Cart_shift(comm_new,1,-1,&rank_source,&rank_dest
);
>              
MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_sou
rce,0,comm_new,&stat);
>  
>              
MPI_Cart_shift(comm_new,0,-1,&rank_source,&rank_dest
);
>              
MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_sou
rce,0,comm_new,&stat);
>  
>           } 
>         MPI_Send(temp_C,S*S,MPI_INT,0,0,comm_new); 
>   }    
>   MPI_Finalize();
>   return(0);             
> }
>
>
------------------------------------------------------------
------------
> Here’s a new way to find what you're looking for -
Yahoo! Answers 
> <http://us.rd.yahoo.com/mail/in/yanswers
/*http://in.answers.yahoo.com/>
>
------------------------------------------------------------
------------
>
> _______________________________________________
> This list is archived at http://www.l
am-mpi.org/MailArchives/lam/

_______________________________________________
This list is archived at http://www.l
am-mpi.org/MailArchives/lam/

Re: LAM: ****** please any body solve my problem
country flaguser name
United States
2007-03-06 06:09:23
rajesh_ju05yahoo.co.in wrote:
> Hi,
>   Please any one solve my problem.
>   I have a matrix multiplication program below which 
run on multiple machines(using linux). 
> The program works fine with small size matrices(till
900), but doesn't work with large 
> size matrices(>900). 
>   I am allocating memory dynamically.

Without checking for failure of malloc() ?  How does the
memory you are 
attempting to allocate compare with the available address
space on your 
(unspecified) type of system?
_______________________________________________
This list is archived at http://www.l
am-mpi.org/MailArchives/lam/

[1-3]

about | contact  Other archives ( Real Estate discussion Medical topics )