24 #ifndef __PAR_MATRIX_SPARSE_H__
25 #define __PAR_MATRIX_SPARSE_H__
35 #include <parVector/parVector.hpp>
36 #include <smg2s/nilpotent.hpp>
37 #include <smg2s/spectrum.hpp>
38 #include <utils/MPI_DataType.hpp>
39 #include <utils/utils.hpp>
40 #include <parMatrix/MatrixCSR.hpp>
42 #ifdef __USE_COMPLEX__
46 #define MIN(a,b) (((a)<(b))?(a):(b))
47 #define MAX(a,b) (((a)>(b))?(a):(b))
58 template<
typename T,
typename S>
152 void SetValue(S row, S col, T value);
173 void AddValue(S row, S col, T value);
216 void initMat(S diag_l, S diag_u, Base<T> scale, T shift, Base<T> sparsity );
222 void initMat(S diag_l, S diag_u);
310 void MatView(std::string matName);
329 template<
typename T,
typename S>
333 lower_b = index_map.GetLowerBound();
334 upper_b = index_map.GetUpperBound();
335 comm = index_map.GetCurrentComm();
336 MPI_Comm_rank(comm, &ProcID);
337 MPI_Comm_size(comm, &nProcs);
338 nrows = index_map.GetLocalSize();
339 ncols = index_map.GetGlobalSize();
346 template<
typename T,
typename S>
351 upper_b = index_map.GetUpperBound();
352 comm = index_map.GetCurrentComm();
353 MPI_Comm_rank(comm, &ProcID);
354 MPI_Comm_size(comm, &nProcs);
355 nrows = index_map.GetLocalSize();
356 ncols = index_map.GetGlobalSize();
363 template<
typename T,
typename S>
366 std::cout <<
"Id " << ProcID <<
": local matrix size = " << nrows <<
"x" << ncols <<
", nnz = " << nnz_loc <<
", lower_b = " << lower_b <<
", upper_b = " << upper_b << std::endl;
369 template<
typename T,
typename S>
372 typename std::map<S,T>::iterator it;
373 if(ProcID == 0) {std::cout <<
"Parallel MatView: " << std::endl;}
375 for (S i = 0; i < nrows; i++){
376 if(dynmat_loc != NULL){
377 std::cout <<
"row " << index_map.Loc2Glob(i) <<
": ";
378 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); ++it){
379 std::cout <<
"("<<it->first <<
"," << it->second <<
"); ";
381 std::cout << std::endl;
387 template<
typename T,
typename S>
390 typename std::map<S,T>::iterator it;
391 if(ProcID == 0) {std::cout <<
"Parallel MatView: " << std::endl;}
393 for (S i = 0; i < nrows; i++){
394 if(dynmat_loc != NULL){
395 std::cout << matName +
" ]> row " << index_map.Loc2Glob(i) <<
": ";
396 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); ++it){
397 std::cout <<
"("<<it->first <<
"," << it->second <<
"); ";
399 std::cout << std::endl;
405 template<
typename T,
typename S>
408 typename std::map<S,T>::iterator it;
410 if(dynmat_loc == NULL){
411 dynmat_loc =
new std::map<S,T> [nrows];
413 it = dynmat_loc[row].find(col);
416 if(it == dynmat_loc[row].end()){
417 dynmat_loc[row][col] = value;
427 template<
typename T,
typename S>
430 for( S i = 0; i < nindex; i++){
431 SetValueLocal(rows[i],cols[i],values[i]);
435 template<
typename T,
typename S>
438 S local_row = index_map.Glob2Loc(row);
440 if(local_row >= 0 && local_row < nrows){
441 SetValueLocal(local_row, col, value);
446 template<
typename T,
typename S>
452 for(
auto i = 0; i < local_size; i++){
453 SetValueLocal(i, diag.
Loc2Glob(i), a[i]);
458 template<
typename T,
typename S>
461 typename std::map<S,T>::iterator it;
463 if(dynmat_loc == NULL){
464 dynmat_loc =
new std::map<S,T> [nrows];
466 it = dynmat_loc[row].find(col);
468 if(it == dynmat_loc[row].end()){
469 dynmat_loc[row][col] = value;
477 template<
typename T,
typename S>
480 S local_row = index_map.Glob2Loc(row);
482 if(local_row >= 0 && local_row < nrows){
483 AddValueLocal(local_row, col, value);
488 template<
typename T,
typename S>
491 typename std::map<S,T>::iterator it;
493 if(dynmat_loc != NULL){
494 it = dynmat_loc[row].find(col);
495 if(it == dynmat_loc[row].end()){
498 return dynmat_loc[row][col];
505 template<
typename T,
typename S>
508 S local_row = index_map.Glob2Loc(row);
509 if(local_row >= 0 && local_row < nrows){
510 return GetValueLocal(local_row, col);
517 template<
typename T,
typename S>
519 typename std::map<S,T>::iterator it;
521 if(dynmat_loc != NULL){
522 for(i = 0; i < nrows; i++){
523 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
524 it->second = it->second*scale;
531 template<
typename T,
typename S>
533 typename std::map<S,T>::iterator it;
534 typename std::map<S,T>::iterator it1;
538 if(index_map != X.
GetMap()){
542 std::cout <<
"SMG2S]> Caught Exception: for AXPY, matrix should be in the same vectorMap" << std::endl;
547 for(i = 0; i < nrows; i++){
555 for(i = 0; i < nrows; i++){
558 []( std::map<S, T> &m,
const std::pair<const S, T> &p)
560 return ( m[p.first] += p.second, m );
563 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end();){
565 if(it->second == T(0)){
566 it=dynmat_loc[i].erase(it);
576 }
else if(dynmat_loc == NULL && X.
GetDynMatLoc() != NULL){
577 dynmat_loc =
new std::map<S,T> [nrows];
578 for(i = 0; i < nrows; i++){
586 template<
typename T,
typename S>
588 typename std::map<S,T>::iterator it;
591 if(dynmat_loc != NULL && scale != T(1)){
592 for(i = 0; i < nrows; i++){
593 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
595 dynmat_loc[i][k] = dynmat_loc[i][k]*scale;
601 for(i = 0; i < nrows; i++){
604 []( std::map<S, T> &m,
const std::pair<const S, T> &p)
606 return ( m[p.first] += p.second, m );
609 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end();){
611 if(it->second == T(0)){
612 it=dynmat_loc[i].erase(it);
623 dynmat_loc =
new std::map<S,T> [nrows];
624 for(i = 0; i < nrows; i++){
631 template<
typename T,
typename S>
634 dynmat_loc =
new std::map<S,T> [nrows];
636 typename std::map<S,T>::iterator it;
641 for(i = 0; i < nrows; i++){
648 template<
typename T,
typename S>
651 typename std::map<S,T>::iterator it;
657 if(dynmat_loc != NULL){
658 for(i = 0; i < nrows; i++){
659 nnz_loc += dynmat_loc[i].size();
665 template<
typename T,
typename S>
667 typename std::map<S,T>::iterator it;
669 if(dynmat_loc != NULL){
670 for(i = 0; i < nrows; i++){
671 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end();){
673 if(std::abs(it->second) < (std::numeric_limits<Base<T>>::denorm_min())){
674 it=dynmat_loc[i].erase(it);
686 template<
typename T,
typename S>
689 typename std::map<S,T>::iterator it;
691 if(dynmat_loc != NULL){
692 for(i = 0; i < nrows; i++){
693 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
700 template<
typename T,
typename S>
703 bool isnonsymm =
false;
705 if (
sizeof(scale)/
sizeof(sparsity) == 1){
712 diag_l = abs(diag_l);
713 diag_u = abs(diag_u);
716 if (isnonsymm && diag_u < 2){
720 std::cout <<
"SMG2S]> Caught Exception: for initialisationg of non-symmetric matrix, please ensure abs(diag_u) >= 2" << std::endl;
724 if (diag_l < diag_u){
728 std::cout <<
"SMG2S]> Caught Exception: for initialisationg of matrix, please ensure abs(diag_l) < abs(diag_u)" << std::endl;
736 std::cout <<
"SMG2S]> Caught Exception: for initialisationg of matrix, please ensure abs(diag_u) < size" << std::endl;
741 std::mt19937_64 rd(ProcID);
742 std::uniform_real_distribution<> d(0, 1);
744 for (
auto i = MAX(diag_u, lower_b); i < upper_b; i++){
745 for (
auto j = MAX(0, i - diag_l); j <= i - diag_u; j++){
746 auto sampling = d(rd);
747 if (sampling < 1.0 - sparsity){
748 SetValue(i, j, T(scale * d(rd)) + shift );
750 SetValue(i, j, T(0) );
757 template<
typename T,
typename S>
759 initMat(diag_l, diag_u, 1.0, 0.0, 0.9);
763 template<
typename T,
typename S>
765 int size1 =
sizeof(T) /
sizeof(Base<T>);
768 printf(
"Info ]> For generating non-Hermtian matrices, the matrices should be real\n");
772 SetDiagonal(spectrum);
775 template<
typename T,
typename S>
778 int size1 =
sizeof(T) /
sizeof(Base<T>);
781 printf(
"Info ]> For generating non-Symmetric matrices, the matrices should be real\n");
785 SetDiagonal(spectrum);
788 template<
typename T,
typename S>
791 int size1 =
sizeof(T) /
sizeof(Base<T>);
794 printf(
"Info ]> For generating non-Symmetric matrices, the matrices should be real\n");
798 auto overlap = checkNonSymmSpec<T,S>(spectrum);
800 auto *array = spectrum.GetArray();
802 auto local_size = spectrum.GetLocalSize();
803 auto nu = spectrum.GetUpperNeighbor(1);
804 auto nl = spectrum.GetLowerNeighbor(1);
811 SetValueLocal(0, lower_b, array[0].real());
812 SetValueLocal(0, lower_b-1, -abs(array[0].imag()));
813 }
else if(overlap == 0){
819 while(idx < (local_size - 1) ){
820 if(array[idx].imag() == 0){
821 SetValueLocal(idx, idx + lower_b, array[idx].real());
825 SetValueLocal(idx, idx + lower_b, array[idx].real());
826 SetValueLocal(idx, idx + lower_b + 1, abs(array[idx].imag()) );
827 SetValueLocal(idx + 1, idx + lower_b + 1, array[idx+1].real());
828 SetValueLocal(idx + 1, idx + lower_b , -abs(array[idx+1].imag()) );
834 if( (local_size - nzeros - overlap) % 2 != 0 ){
835 if(array[local_size - 1].imag() != 0){
836 SetValueLocal(local_size - 1, lower_b+local_size-1, array[local_size - 1].real());
837 SetValueLocal(local_size - 1, lower_b+local_size, abs(array[local_size - 1].imag()));
839 SetValueLocal(local_size - 1, local_size - 1 + lower_b, array[local_size - 1].real());
844 template<
typename T,
typename S>
849 typename std::map<S, T>::iterator it;
853 if(dynmat_loc != NULL){
855 csr.
rows.push_back(count);
856 for(i = 0; i < nrows; i++){
857 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
858 if(it->second != T(0)){
861 csr.
vals.push_back(v);
862 csr.
cols.push_back(j);
866 csr.
rows.push_back(count);
873 template<
typename T,
typename S>
880 typename std::map<S,T>::iterator it;
882 if(dynmat_loc != NULL){
883 for(
auto i = 0; i < nrows; i++){
884 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); ++it){
885 S j = it->first + offset;
887 if(!std::binary_search(IndOfZeros.begin(), IndOfZeros.end(), j-offset) && it->second != T(0)){
888 prod.SetValueLocal(i, j, it->second);
900 template<
typename T,
typename S>
906 auto lprocbound_map = index_map.GetLBoundMap();
907 auto uprocbound_map = index_map.GetUBoundMap();
909 std::vector<S> targetProcs;
910 std::vector<S> originProcs;
911 std::vector<S> procsDiff;
913 typename std::map<S,T>::iterator it;
915 if(prod.dynmat_loc == NULL){
916 prod.dynmat_loc =
new std::map<S,T> [nrows];
921 if(dynmat_loc != NULL){
922 for(
auto row = lprocbound_map[ProcID] + offset; row < uprocbound_map[ProcID]; row++){
923 auto i = index_map.Glob2Loc(row);
924 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); ++it){
927 if (!std::binary_search(IndOfZeros.begin(), IndOfZeros.end(), row-offset-offset)&& it->second != T(0) ){
928 prod.SetValueLocal(i-offset, j, it->second);
934 std::map<std::pair<int, int>, std::vector<S>> rowSendToProc;
935 typename std::map<std::pair<int, int>, std::vector<S>>::iterator itm;
937 for(
auto id = 0;
id < nProcs;
id++){
938 for(
auto rid = 0; rid < id; rid++){
939 std::vector<S> rcollect;
940 for(
auto r = lprocbound_map[
id]; r < lprocbound_map[id] + MIN(offset, uprocbound_map[
id] - lprocbound_map[
id]); r++){
941 if( (r - offset >= lprocbound_map[rid]) && (r - offset < uprocbound_map[rid]) ){
942 if ( !std::binary_search(IndOfZeros.begin(), IndOfZeros.end(), r-offset-offset) ){
943 rcollect.push_back(r);
947 if(rcollect.size() != 0){
948 rowSendToProc.insert(std::pair<std::pair<int, int>, std::vector<S>>( std::pair<int, S>(
id, rid), rcollect ) );
953 std::vector<std::map<std::pair<int, int>,std::vector<S>>> rowSendToProc_Path;
954 while(rowSendToProc.size() != 0){
955 std::map<std::pair<int, int>,std::vector<S>> single_path;
956 std::vector<int> first_collected;
957 std::vector<int> second_collected;
958 for(itm = rowSendToProc.begin(); itm != rowSendToProc.end(); ++itm){
959 if(std::find(first_collected.begin(), first_collected.end(), itm->first.first) == first_collected.end()){
960 if(std::find(second_collected.begin(), second_collected.end(), itm->first.second) == second_collected.end()){
961 first_collected.push_back(itm->first.first);
962 second_collected.push_back(itm->first.second);
963 single_path.insert(std::pair<std::pair<int, int>,std::vector<S>>(itm->first, itm->second) );
969 for(
auto i = 0; i < first_collected.size(); i++){
970 rowSendToProc.erase(std::pair<int, int>(first_collected[i], second_collected[i]) );
973 rowSendToProc_Path.push_back(single_path);
976 int comm_path_nb = rowSendToProc_Path.size();
979 for(
auto path = 0; path < comm_path_nb; path++){
981 std::vector<S> sSize;
982 std::vector<S> sIndices;
983 std::vector<T> sBufs;
985 std::vector<S> rSize;
986 std::vector<S> rIndices;
987 std::vector<T> rBufs;
989 for(itm = rowSendToProc_Path[path].begin(); itm != rowSendToProc_Path[path].end(); ++itm){
990 MPI_Request stypereq;
991 MPI_Request rtypereq;
993 if(ProcID == itm->first.first){
995 sIndices.insert(sIndices.begin(), count);
996 for(
auto i = 0; i < itm->second.size(); i++){
997 S j = index_map.Glob2Loc(itm->second[i]);
998 for(it = dynmat_loc[j].begin(); it != dynmat_loc[j].end(); ++it){
999 sIndices.push_back(it->first);
1000 sBufs.push_back(it->second);
1003 sIndices.insert(sIndices.begin(), count);
1006 sSize[0] = sBufs.size();
1007 sSize[1] = sIndices.size() - sBufs.size() - 1;
1009 MPI_Isend(sSize.data(), 2, getMPI_Type<S>(), itm->first.second, itm->first.second, comm, &stypereq);
1013 if(ProcID == itm->first.second){
1015 MPI_Irecv(rSize.data(), 2, getMPI_Type<S>(), itm->first.first, ProcID, comm, &rtypereq);
1016 MPI_Wait(&rtypereq, &typestat);
1018 rBufs.resize(rSize[0]);
1019 rIndices.resize(rSize[1]+rSize[0]+1);
1022 if(ProcID == itm->first.first){
1023 MPI_Isend(sBufs.data(), sSize[0], getMPI_Type<T>(), itm->first.second, itm->first.second, comm, &stypereq);
1026 if(ProcID == itm->first.second){
1027 MPI_Irecv(rBufs.data(), rSize[0], getMPI_Type<T>(), itm->first.first, ProcID, comm, &rtypereq);
1028 MPI_Wait(&rtypereq, &typestat);
1031 if(ProcID == itm->first.first){
1032 MPI_Isend(sIndices.data(), sSize[0]+sSize[1]+1, getMPI_Type<S>(), itm->first.second, itm->first.second, comm, &stypereq);
1035 if(ProcID == itm->first.second){
1036 MPI_Irecv(rIndices.data(), rSize[0]+rSize[1]+1, getMPI_Type<S>(), itm->first.first, ProcID, comm, &rtypereq);
1037 MPI_Wait(&rtypereq, &typestat);
1039 for(
auto i = 0; i < itm->second.size(); i++){
1040 auto row = index_map.Glob2Loc(itm->second[i] - offset);
1041 for(
auto cnt = rIndices[rSize[1] - i] ; cnt < rIndices[rSize[1] - i - 1]; cnt++){
1042 auto col = rIndices[rSize[1]+1+cnt];
1043 prod.SetValueLocal(row, col, rBufs[cnt]);
1055 template<
typename T,
typename S>
1061 if( (
sizeof(T)/
sizeof(Base<T>) != 1) ){
1065 std::cout <<
"SMG2S]> Caught Exception: for complex matrix, please use writeToMatrixMarketCmplx" << std::endl;
1070 header.append(
"%%MatrixMarket matrix coordinate ");
1072 header.append(
"real general\n");
1074 header.append(
"%=================================================================================\n%\n");
1075 header.append(
"% This matrix is generated by SMG2S: Scalable Matrix with Given Spectrum\n");
1076 header.append(
"% Opensource codes is available: https://github.com/SMG2S/SMG2S\n");
1077 header.append(
"% @article{wu2020parallel,\n");
1078 header.append(
"% title={A parallel generator of non-Hermitian matrices computed from given spectra},\n");
1079 header.append(
"% author={Wu, Xinzhe and Petiton, Serge G and Lu, Yutong},\n");
1080 header.append(
"% journal={Concurrency and Computation: Practice and Experience},\n");
1081 header.append(
"% volume={32},\n");
1082 header.append(
"% number={20},\n");
1083 header.append(
"% pages={e5710},\n");
1084 header.append(
"% year={2020},\n");
1085 header.append(
"% publisher={Wiley Online Library},\n");
1086 header.append(
"% doi={https://doi.org/10.1002/cpe.5710}\n");
1087 header.append(
"% }\n");
1088 header.append(
"%\n%=================================================================================\n");
1091 S gsize = index_map.GetGlobalSize();
1094 MPI_Allreduce(&nnz_loc, &gnnz, 1, getMPI_Type<S>(), MPI_SUM, comm);
1096 std::string dim_info = std::to_string(gsize) +
" " + std::to_string(gsize) +
" " + std::to_string(gnnz) +
"\n";
1097 header.append(dim_info);
1099 typename std::map<S,T>::iterator it;
1101 if(dynmat_loc != NULL){
1102 for(
auto i = 0; i < nrows; i++){
1103 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
1105 auto v = it->second;
1106 data += std::to_string(index_map.Loc2Glob(i)+1) +
" " + std::to_string(k+1) +
" " + std::to_string(v) +
"\n";
1112 MPI_Offset write_offset;
1113 MPI_Offset text_size;
1114 MPI_Offset *write_size_per_proc;
1117 write_size_per_proc = (MPI_Offset *)malloc(
sizeof(MPI_Offset) * nProcs);
1119 text_size = data.size();
1121 MPI_Allgather(&text_size, 1, MPI_OFFSET, write_size_per_proc, 1, MPI_OFFSET, comm);
1124 write_offset = header.size();
1126 for (
auto i = 0; i < ProcID; ++i) {
1127 write_offset += write_size_per_proc[i];
1131 MPI_File_delete (file_name.c_str(), MPI_INFO_NULL);
1132 MPI_File_open(comm, file_name.c_str(), MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
1136 MPI_File_write_at(fh, 0, header.c_str(), header.size(), MPI_CHAR, &sts);
1139 MPI_File_write_at(fh, write_offset, data.c_str(), data.size(), MPI_CHAR, &sts);
1141 MPI_File_close(&fh);
1142 free(write_size_per_proc);
1147 template<
typename T,
typename S>
1153 if( (
sizeof(T)/
sizeof(Base<T>) != 2) ){
1157 std::cout <<
"SMG2S]> Caught Exception: for real matrix, please use writeToMatrixMarket" << std::endl;
1162 header.append(
"%%MatrixMarket matrix coordinate ");
1164 header.append(
"complex general");
1167 header.append(
"%=================================================================================\n%\n");
1168 header.append(
"% This matrix is generated by SMG2S: Scalable Matrix with Given Spectrum\n");
1169 header.append(
"% Opensource codes is available: https://github.com/SMG2S/SMG2S\n");
1170 header.append(
"% @article{wu2020parallel,\n");
1171 header.append(
"% title={A parallel generator of non-Hermitian matrices computed from given spectra},\n");
1172 header.append(
"% author={Wu, Xinzhe and Petiton, Serge G and Lu, Yutong},\n");
1173 header.append(
"% journal={Concurrency and Computation: Practice and Experience},\n");
1174 header.append(
"% volume={32},\n");
1175 header.append(
"% number={20},\n");
1176 header.append(
"% pages={e5710},\n");
1177 header.append(
"% year={2020},\n");
1178 header.append(
"% publisher={Wiley Online Library},\n");
1179 header.append(
"% doi={https://doi.org/10.1002/cpe.5710}\n");
1180 header.append(
"% }\n");
1181 header.append(
"%\n%=================================================================================\n");
1184 S gsize = index_map.GetGlobalSize();
1187 MPI_Allreduce(&nnz_loc, &gnnz, 1, getMPI_Type<S>(), MPI_SUM, comm);
1189 std::string dim_info = std::to_string(gsize) +
" " + std::to_string(gsize) +
" " + std::to_string(gnnz) +
"\n";
1190 header.append(dim_info);
1192 typename std::map<S,T>::iterator it;
1194 if(dynmat_loc != NULL){
1195 for(
auto i = 0; i < nrows; i++){
1196 for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
1198 auto v = it->second;
1199 data += std::to_string(index_map.Loc2Glob(i)+1) +
" " + std::to_string(k+1) +
" " + std::to_string(v.real()) +
" " + std::to_string(v.imag()) +
"\n";
1205 MPI_Offset write_offset;
1206 MPI_Offset text_size;
1207 MPI_Offset *write_size_per_proc;
1210 write_size_per_proc = (MPI_Offset *)malloc(
sizeof(MPI_Offset) * nProcs);
1212 text_size = data.size();
1214 MPI_Allgather(&text_size, 1, MPI_OFFSET, write_size_per_proc, 1, MPI_OFFSET, comm);
1217 write_offset = header.size();
1219 for (
auto i = 0; i < ProcID; ++i) {
1220 write_offset += write_size_per_proc[i];
1224 MPI_File_delete (file_name.c_str(), MPI_INFO_NULL);
1225 MPI_File_open(comm, file_name.c_str(), MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
1229 MPI_File_write_at(fh, 0, header.c_str(), header.size(), MPI_CHAR, &sts);
1232 MPI_File_write_at(fh, write_offset, data.c_str(), data.size(), MPI_CHAR, &sts);
1234 MPI_File_close(&fh);
1235 free(write_size_per_proc);