SMG2S
Sparse Matrix Generator with Given Spectrum
parMatrixSparse.hpp
1 /*
2 MIT License
3 Copyright (c) 2019 Xinzhe WU @ Maison de la Simulation, France
4 Copyright (c) 2019-2022, Xinzhe Wu @ Simulation and Data Laboratory Quantum
5  Materials, Forschungszentrum Juelich GmbH.
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 The above copyright notice and this permission notice shall be included in all
14 copies or substantial portions of the Software.
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 SOFTWARE.
22 */
23 
24 #ifndef __PAR_MATRIX_SPARSE_H__
25 #define __PAR_MATRIX_SPARSE_H__
26 
27 #include <mpi.h>
28 #include <iostream>
29 #include <fstream>
30 #include <sstream>
31 #include <string>
32 #include <vector>
33 #include <random>
34 #include <algorithm>
35 #include <parVector/parVector.hpp>
36 #include <smg2s/nilpotent.hpp>
37 #include <smg2s/spectrum.hpp>
38 #include <utils/MPI_DataType.hpp>
39 #include <utils/utils.hpp>
40 #include <parMatrix/MatrixCSR.hpp>
41 #include <limits>
42 #ifdef __USE_COMPLEX__
43 #include <complex>
44 #endif
45 
46 #define MIN(a,b) (((a)<(b))?(a):(b))
47 #define MAX(a,b) (((a)>(b))?(a):(b))
48 
50 
58 template<typename T, typename S>
60 {
61  private:
63  std::map<S, T> *dynmat_loc;
64 
65  //size of local matrix
67  S ncols;
69  S nrows;
70  //number of non-zeros in local
80  MPI_Comm comm;
81 
82  // mpi size and rank
84  int ProcID;
86  int nProcs;
87 
88  public:
91 
98 
104 
105  //get
107  S GetNRows(){return nrows;};
109  S GetNCols(){return ncols;};
111  S GetNNzLoc(){return nnz_loc;};
115  S GetLowerBound(){return lower_b;};
117  S GetUpperBound(){return upper_b;};
119  MPI_Comm GetComm(){return comm;};
121  int GetProcId(){return ProcID;};
123  int GetNProcs(){return nProcs;};
124 
126  std::map<S, T> *GetDynMatLoc(){return dynmat_loc;};
127 
128  //set value
130 
135  void SetValueLocal(S row, S col, T value);
136  //set value
138 
144  void SetValuesLocal(S nindex, S *rows, S *cols, T *values);
145  //set value
147 
152  void SetValue(S row, S col, T value);
154 
157  void SetDiagonal(parVector<T,S> diag);
158 
159  //add value
161 
166  void AddValueLocal(S row, S col, T value);
168 
173  void AddValue(S row, S col, T value);
174 
175  //get value
177 
181  T GetValueLocal(S row, S col);
183 
187  T GetValue(S row, S col);
188 
190 
193  void MatScale(T scale);
195 
199  void MatAXPY(parMatrixSparse<T,S> X, T scale);
201 
205  void MatAYPX(parMatrixSparse<T,S> X, T scale);
206 
207  //init matrix lower part of matrix
209 
216  void initMat(S diag_l, S diag_u, Base<T> scale, T shift, Base<T> sparsity );
218 
222  void initMat(S diag_l, S diag_u);
224 
231  void setSpecNonHerm(parVector<T,S> spectrum);
233 
240  void setSpecNonSymm(parVector<T,S> spectrum);
242 
250  void setSpecNonSymmCmplx(parVector<std::complex<Base<T>>,S> spectrum);
251 
253 
256  void updateNnz();
257 
259 
262  void copy(parMatrixSparse<T,S> X);
264 
267  void rmZeros();
268 
269  //matrix multiplies a nilpotent matrix
271 
275 
276  //a nilpotent matrix multiplies a matrix
278 
282 
284  void ZeroEntries();
285 
287 
291 
293 
300  void show();
302 
305  void MatView();
307 
310  void MatView(std::string matName);
312 
317  void writeToMatrixMarket(std::string file_name);
319 
324  void writeToMatrixMarketCmplx(std::string file_name);
325 
326 };
327 
328 
329 template<typename T, typename S>
331 {
332  index_map = vec.GetVecMap();
333  lower_b = index_map.GetLowerBound();
334  upper_b = index_map.GetUpperBound();
335  comm = index_map.GetCurrentComm();
336  MPI_Comm_rank(comm, &ProcID);
337  MPI_Comm_size(comm, &nProcs);
338  nrows = index_map.GetLocalSize();
339  ncols = index_map.GetGlobalSize();
340 
341  //at the beginning, empty matrix, and elements will be dynamically added
342  nnz_loc = 0;
343  dynmat_loc = NULL;
344 }
345 
346 template<typename T, typename S>
348 {
349  index_map = map;
350  lower_b = index_map.GetLowerBound();
351  upper_b = index_map.GetUpperBound();
352  comm = index_map.GetCurrentComm();
353  MPI_Comm_rank(comm, &ProcID);
354  MPI_Comm_size(comm, &nProcs);
355  nrows = index_map.GetLocalSize();
356  ncols = index_map.GetGlobalSize();
357 
358  //at the beginning, empty matrix, and elements will be dynamically added
359  nnz_loc = 0;
360  dynmat_loc = NULL;
361 }
362 
363 template<typename T, typename S>
365 {
366  std::cout << "Id " << ProcID << ": local matrix size = " << nrows << "x" << ncols << ", nnz = " << nnz_loc << ", lower_b = " << lower_b << ", upper_b = " << upper_b << std::endl;
367 }
368 
369 template<typename T, typename S>
371 {
372  typename std::map<S,T>::iterator it;
373  if(ProcID == 0) {std::cout << "Parallel MatView: " << std::endl;}
374 
375  for (S i = 0; i < nrows; i++){
376  if(dynmat_loc != NULL){
377  std::cout << "row " << index_map.Loc2Glob(i) << ": ";
378  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); ++it){
379  std::cout <<"("<<it->first << "," << it->second << "); ";
380  }
381  std::cout << std::endl;
382  }
383  }
384 
385 }
386 
387 template<typename T, typename S>
388 void parMatrixSparse<T,S>::MatView(std::string matName)
389 {
390  typename std::map<S,T>::iterator it;
391  if(ProcID == 0) {std::cout << "Parallel MatView: " << std::endl;}
392 
393  for (S i = 0; i < nrows; i++){
394  if(dynmat_loc != NULL){
395  std::cout << matName + " ]> row " << index_map.Loc2Glob(i) << ": ";
396  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); ++it){
397  std::cout <<"("<<it->first << "," << it->second << "); ";
398  }
399  std::cout << std::endl;
400  }
401  }
402 
403 }
404 
405 template<typename T,typename S>
406 void parMatrixSparse<T,S>::SetValueLocal(S row, S col, T value)
407 {
408  typename std::map<S,T>::iterator it;
409 
410  if(dynmat_loc == NULL){
411  dynmat_loc = new std::map<S,T> [nrows];
412  }
413  it = dynmat_loc[row].find(col);
414 
415  if(value != T(0)){
416  if(it == dynmat_loc[row].end()){
417  dynmat_loc[row][col] = value;
418  nnz_loc++;
419  }
420  else{
421  it->second = value;
422  }
423  }
424 }
425 
426 
427 template<typename T,typename S>
428 void parMatrixSparse<T,S>::SetValuesLocal(S nindex, S *rows, S *cols, T *values)
429 {
430  for( S i = 0; i < nindex; i++){
431  SetValueLocal(rows[i],cols[i],values[i]);
432  }
433 }
434 
435 template<typename T,typename S>
436 void parMatrixSparse<T,S>::SetValue(S row, S col, T value)
437 {
438  S local_row = index_map.Glob2Loc(row);
439 
440  if(local_row >= 0 && local_row < nrows){
441  SetValueLocal(local_row, col, value);
442  }
443 }
444 
445 
446 template<typename T,typename S>
448 {
449  T *a = diag.GetArray();
450  S local_size = diag.GetLocalSize();
451 
452  for(auto i = 0; i < local_size; i++){
453  SetValueLocal(i, diag.Loc2Glob(i), a[i]);
454  }
455 }
456 
457 
458 template<typename T,typename S>
459 void parMatrixSparse<T,S>::AddValueLocal(S row, S col, T value)
460 {
461  typename std::map<S,T>::iterator it;
462 
463  if(dynmat_loc == NULL){
464  dynmat_loc = new std::map<S,T> [nrows];
465  }
466  it = dynmat_loc[row].find(col);
467 
468  if(it == dynmat_loc[row].end()){
469  dynmat_loc[row][col] = value;
470  nnz_loc++;
471  }
472  else{
473  it->second += value;
474  }
475 }
476 
477 template<typename T,typename S>
478 void parMatrixSparse<T,S>::AddValue(S row, S col, T value)
479 {
480  S local_row = index_map.Glob2Loc(row);
481 
482  if(local_row >= 0 && local_row < nrows){
483  AddValueLocal(local_row, col, value);
484  }
485 }
486 
487 
488 template<typename T,typename S>
490 {
491  typename std::map<S,T>::iterator it;
492 
493  if(dynmat_loc != NULL){
494  it = dynmat_loc[row].find(col);
495  if(it == dynmat_loc[row].end()){
496  return T(0);
497  }else{
498  return dynmat_loc[row][col];
499  }
500  }else{
501  return T(0);
502  }
503 }
504 
505 template<typename T,typename S>
507 {
508  S local_row = index_map.Glob2Loc(row);
509  if(local_row >= 0 && local_row < nrows){
510  return GetValueLocal(local_row, col);
511  }
512  else{
513  return 0.0;
514  }
515 }
516 
517 template<typename T,typename S>
519  typename std::map<S,T>::iterator it;
520  S i;
521  if(dynmat_loc != NULL){
522  for(i = 0; i < nrows; i++){
523  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
524  it->second = it->second*scale;
525  }
526  }
527  }
528 }
529 
530 
531 template<typename T,typename S>
533  typename std::map<S,T>::iterator it;
534  typename std::map<S,T>::iterator it1;
535 
536  S i, k;
537 
538  if(index_map != X.GetMap()){
539  try{
540  throw 505;
541  }catch(...){
542  std::cout << "SMG2S]> Caught Exception: for AXPY, matrix should be in the same vectorMap" << std::endl;
543  }
544  }
545 
546  if(X.GetDynMatLoc() != NULL && scale != T(1)){
547  for(i = 0; i < nrows; i++){
548  for(it1 = X.GetDynMatLoc()[i].begin(); it1 != X.GetDynMatLoc()[i].end();++it1){
549  X.GetDynMatLoc()[i][it1->first] = scale * it1->second;
550  }
551 
552  }
553  }
554  if(dynmat_loc != NULL && X.GetDynMatLoc() != NULL){
555  for(i = 0; i < nrows; i++){
556 
557  dynmat_loc[i] = std::accumulate( X.GetDynMatLoc()[i].begin(), X.GetDynMatLoc()[i].end(), dynmat_loc[i],
558  []( std::map<S, T> &m, const std::pair<const S, T> &p)
559  {
560  return ( m[p.first] += p.second, m );
561  } );
562 
563  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end();){
564  k = it->first;
565  if(it->second == T(0)){
566  it=dynmat_loc[i].erase(it);
567  }else{
568  it++;
569  }
570  }
571 
572  }
573 
574  updateNnz();
575 
576  } else if(dynmat_loc == NULL && X.GetDynMatLoc() != NULL){
577  dynmat_loc = new std::map<S,T> [nrows];
578  for(i = 0; i < nrows; i++){
579  dynmat_loc[i].insert(X.GetDynMatLoc()[i].begin(), X.GetDynMatLoc()[i].end());
580  }
581  nnz_loc = X.nnz_loc;
582  }
583 }
584 
585 
586 template<typename T,typename S>
588  typename std::map<S,T>::iterator it;
589 
590  S i, k;
591  if(dynmat_loc != NULL && scale != T(1)){
592  for(i = 0; i < nrows; i++){
593  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
594  k = it->first;
595  dynmat_loc[i][k] = dynmat_loc[i][k]*scale;
596  }
597  }
598  }
599 
600  if(dynmat_loc != NULL && X.GetDynMatLoc() != NULL){
601  for(i = 0; i < nrows; i++){
602 
603  dynmat_loc[i] = std::accumulate( X.GetDynMatLoc()[i].begin(), X.GetDynMatLoc()[i].end(), dynmat_loc[i],
604  []( std::map<S, T> &m, const std::pair<const S, T> &p)
605  {
606  return ( m[p.first] += p.second, m );
607  } );
608 
609  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end();){
610  k = it->first;
611  if(it->second == T(0)){
612  it=dynmat_loc[i].erase(it);
613  }else{
614  it++;
615  }
616  }
617  }
618 
619  updateNnz();
620  }
621 
622  if(dynmat_loc == NULL && X.GetDynMatLoc() != NULL){
623  dynmat_loc = new std::map<S,T> [nrows];
624  for(i = 0; i < nrows; i++){
625  dynmat_loc[i].insert(X.GetDynMatLoc()[i].begin(), X.GetDynMatLoc()[i].end() );
626  }
627  nnz_loc = X.nnz_loc;
628  }
629 }
630 
631 template<typename T, typename S>
633 
634  dynmat_loc = new std::map<S,T> [nrows];
635 
636  typename std::map<S,T>::iterator it;
637 
638  S i, k;
639 
640  if(X.GetDynMatLoc() != NULL){
641  for(i = 0; i < nrows; i++){
642  dynmat_loc[i].insert(X.GetDynMatLoc()[i].begin(),X.GetDynMatLoc()[i].end());
643  }
644  }
645 }
646 
647 
648 template<typename T, typename S>
650 
651  typename std::map<S,T>::iterator it;
652 
653  S i, k;
654 
655  nnz_loc = 0;
656 
657  if(dynmat_loc != NULL){
658  for(i = 0; i < nrows; i++){
659  nnz_loc += dynmat_loc[i].size();
660  }
661  }
662 }
663 
664 
665 template<typename T, typename S>
667  typename std::map<S,T>::iterator it;
668  S i, k;
669  if(dynmat_loc != NULL){
670  for(i = 0; i < nrows; i++){
671  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end();){
672  k = it->first;
673  if(std::abs(it->second) < (std::numeric_limits<Base<T>>::denorm_min())){
674  it=dynmat_loc[i].erase(it);
675  }else{
676  it++;
677  }
678  }
679  }
680 
681  updateNnz();
682  }
683 
684 }
685 
686 template<typename T,typename S>
688 {
689  typename std::map<S,T>::iterator it;
690  S i;
691  if(dynmat_loc != NULL){
692  for(i = 0; i < nrows; i++){
693  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
694  it->second = T(0);
695  }
696  }
697  }
698 }
699 
700 template<typename T, typename S>
701 void parMatrixSparse<T,S>::initMat(S diag_l, S diag_u, Base<T> scale, T shift, Base<T> sparsity){
702 
703  bool isnonsymm = false;
704 
705  if (sizeof(scale)/sizeof(sparsity) == 1){
706  //std::cout << "using real scalar" << std::endl;
707  isnonsymm = true;
708  }
709 
710  S size = ncols;
711 
712  diag_l = abs(diag_l);
713  diag_u = abs(diag_u);
714 
715 
716  if (isnonsymm && diag_u < 2){
717  try{
718  throw 505;
719  }catch(...){
720  std::cout << "SMG2S]> Caught Exception: for initialisationg of non-symmetric matrix, please ensure abs(diag_u) >= 2" << std::endl;
721  }
722  }
723 
724  if (diag_l < diag_u){
725  try{
726  throw 505;
727  }catch(...){
728  std::cout << "SMG2S]> Caught Exception: for initialisationg of matrix, please ensure abs(diag_l) < abs(diag_u)" << std::endl;
729  }
730  }
731 
732  if (diag_u >= size){
733  try{
734  throw 505;
735  }catch(...){
736  std::cout << "SMG2S]> Caught Exception: for initialisationg of matrix, please ensure abs(diag_u) < size" << std::endl;
737  }
738  }
739 
740  //std::random_device rd;
741  std::mt19937_64 rd(ProcID);
742  std::uniform_real_distribution<> d(0, 1);
743 
744  for (auto i = MAX(diag_u, lower_b); i < upper_b; i++){
745  for (auto j = MAX(0, i - diag_l); j <= i - diag_u; j++){
746  auto sampling = d(rd);
747  if (sampling < 1.0 - sparsity){
748  SetValue(i, j, T(scale * d(rd)) + shift );
749  }else{
750  SetValue(i, j, T(0) );
751  }
752  }
753  }
754 
755 }
756 
757 template<typename T, typename S>
758 void parMatrixSparse<T,S>::initMat(S diag_l, S diag_u){
759  initMat(diag_l, diag_u, 1.0, 0.0, 0.9);
760 }
761 
762 
763 template<typename T, typename S>
765  int size1 = sizeof(T) / sizeof(Base<T>);
766 
767  if(size1 == 1){
768  printf("Info ]> For generating non-Hermtian matrices, the matrices should be real\n");
769  return;
770  }
771 
772  SetDiagonal(spectrum);
773 }
774 
775 template<typename T, typename S>
777 
778  int size1 = sizeof(T) / sizeof(Base<T>);
779 
780  if(size1 == 2){
781  printf("Info ]> For generating non-Symmetric matrices, the matrices should be real\n");
782  return;
783  }
784 
785  SetDiagonal(spectrum);
786 }
787 
788 template<typename T, typename S>
789 void parMatrixSparse<T,S>::setSpecNonSymmCmplx(parVector<std::complex<Base<T>>,S> spectrum){
790 
791  int size1 = sizeof(T) / sizeof(Base<T>);
792 
793  if(size1 == 2){
794  printf("Info ]> For generating non-Symmetric matrices, the matrices should be real\n");
795  return;
796  }
797 
798  auto overlap = checkNonSymmSpec<T,S>(spectrum);
799 
800  auto *array = spectrum.GetArray();
801 
802  auto local_size = spectrum.GetLocalSize();
803  auto nu = spectrum.GetUpperNeighbor(1);
804  auto nl = spectrum.GetLowerNeighbor(1);
805 
806  S idx = 0;
807  S step;
808  S nzeros;
809  if(overlap == 1){
810  idx = 1;
811  SetValueLocal(0, lower_b, array[0].real());
812  SetValueLocal(0, lower_b-1, -abs(array[0].imag()));
813  }else if(overlap == 0){
814  idx = 0;
815  }
816 
817  nzeros = 0;
818 
819  while(idx < (local_size - 1) ){
820  if(array[idx].imag() == 0){
821  SetValueLocal(idx, idx + lower_b, array[idx].real());
822  step = 1;
823  nzeros++;
824  }else{
825  SetValueLocal(idx, idx + lower_b, array[idx].real());
826  SetValueLocal(idx, idx + lower_b + 1, abs(array[idx].imag()) );
827  SetValueLocal(idx + 1, idx + lower_b + 1, array[idx+1].real());
828  SetValueLocal(idx + 1, idx + lower_b , -abs(array[idx+1].imag()) );
829  step = 2;
830  }
831  idx += step;
832  }
833 
834  if( (local_size - nzeros - overlap) % 2 != 0 ){
835  if(array[local_size - 1].imag() != 0){
836  SetValueLocal(local_size - 1, lower_b+local_size-1, array[local_size - 1].real());
837  SetValueLocal(local_size - 1, lower_b+local_size, abs(array[local_size - 1].imag()));
838  }else{
839  SetValueLocal(local_size - 1, local_size - 1 + lower_b, array[local_size - 1].real());
840  }
841  }
842 }
843 
844 template<typename T,typename S>
846 {
847  S count, i, j;
848  T v;
849  typename std::map<S, T>::iterator it;
850 
851  MatrixCSR<T,S> csr = MatrixCSR<T,S>(nnz_loc, nrows);
852 
853  if(dynmat_loc != NULL){
854  count = 0;
855  csr.rows.push_back(count);
856  for(i = 0; i < nrows; i++){
857  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
858  if(it->second != T(0)){
859  j = it->first;
860  v = it->second;
861  csr.vals.push_back(v);
862  csr.cols.push_back(j);
863  count++;
864  }
865  }
866  csr.rows.push_back(count);
867  }
868  }
869  return csr;
870 }
871 
872 //matrix multiplies a nilpotent matrix
873 template<typename T,typename S>
875 
876  auto offset = nilp.getOffset();
877  auto IndOfZeros = nilp.getIndOfZeros();
878  auto prod = parMatrixSparse<T, S>(index_map);
879 
880  typename std::map<S,T>::iterator it;
881 
882  if(dynmat_loc != NULL){
883  for(auto i = 0; i < nrows; i++){
884  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); ++it){
885  S j = it->first + offset;
886  if(j < ncols){
887  if(!std::binary_search(IndOfZeros.begin(), IndOfZeros.end(), j-offset) && it->second != T(0)){
888  prod.SetValueLocal(i, j, it->second);
889  }
890  }
891  }
892 
893  }
894  }
895 
896  return prod;
897 }
898 
899 //a nilpotent matrix multiplies a matrix
900 template<typename T,typename S>
902  auto offset = nilp.getOffset();
903  auto IndOfZeros = nilp.getIndOfZeros();
904  auto prod = parMatrixSparse<T, S>(index_map);
905 
906  auto lprocbound_map = index_map.GetLBoundMap();
907  auto uprocbound_map = index_map.GetUBoundMap();
908 
909  std::vector<S> targetProcs;
910  std::vector<S> originProcs;
911  std::vector<S> procsDiff;
912 
913  typename std::map<S,T>::iterator it;
914 
915  if(prod.dynmat_loc == NULL){
916  prod.dynmat_loc = new std::map<S,T> [nrows];
917  }
918 
919 
920  //shift the in-memory part
921  if(dynmat_loc != NULL){
922  for(auto row = lprocbound_map[ProcID] + offset; row < uprocbound_map[ProcID]; row++){
923  auto i = index_map.Glob2Loc(row);
924  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); ++it){
925  auto j = it->first;
926  //if not the index of zeros in nilpotent
927  if (!std::binary_search(IndOfZeros.begin(), IndOfZeros.end(), row-offset-offset)&& it->second != T(0) ){
928  prod.SetValueLocal(i-offset, j, it->second);
929  }
930  }
931  }
932  }
933 
934  std::map<std::pair<int, int>, std::vector<S>> rowSendToProc;
935  typename std::map<std::pair<int, int>, std::vector<S>>::iterator itm;
936 
937  for(auto id = 0; id < nProcs; id++){
938  for(auto rid = 0; rid < id; rid++){
939  std::vector<S> rcollect;
940  for(auto r = lprocbound_map[id]; r < lprocbound_map[id] + MIN(offset, uprocbound_map[id] - lprocbound_map[id]); r++){
941  if( (r - offset >= lprocbound_map[rid]) && (r - offset < uprocbound_map[rid]) ){
942  if ( !std::binary_search(IndOfZeros.begin(), IndOfZeros.end(), r-offset-offset) ){
943  rcollect.push_back(r);
944  }
945  }
946  }
947  if(rcollect.size() != 0){
948  rowSendToProc.insert(std::pair<std::pair<int, int>, std::vector<S>>( std::pair<int, S>(id, rid), rcollect ) );
949  }
950  }
951  }
952 
953  std::vector<std::map<std::pair<int, int>,std::vector<S>>> rowSendToProc_Path;
954  while(rowSendToProc.size() != 0){
955  std::map<std::pair<int, int>,std::vector<S>> single_path;
956  std::vector<int> first_collected;
957  std::vector<int> second_collected;
958  for(itm = rowSendToProc.begin(); itm != rowSendToProc.end(); ++itm){
959  if(std::find(first_collected.begin(), first_collected.end(), itm->first.first) == first_collected.end()){
960  if(std::find(second_collected.begin(), second_collected.end(), itm->first.second) == second_collected.end()){
961  first_collected.push_back(itm->first.first);
962  second_collected.push_back(itm->first.second);
963  single_path.insert(std::pair<std::pair<int, int>,std::vector<S>>(itm->first, itm->second) );
964 
965  }
966  }
967  }
968 
969  for(auto i = 0; i < first_collected.size(); i++){
970  rowSendToProc.erase(std::pair<int, int>(first_collected[i], second_collected[i]) );
971  }
972 
973  rowSendToProc_Path.push_back(single_path);
974  }
975 
976  int comm_path_nb = rowSendToProc_Path.size();
977  //package for sending and receving
978 
979  for(auto path = 0; path < comm_path_nb; path++){
980 
981  std::vector<S> sSize;
982  std::vector<S> sIndices;
983  std::vector<T> sBufs;
984 
985  std::vector<S> rSize;
986  std::vector<S> rIndices;
987  std::vector<T> rBufs;
988 
989  for(itm = rowSendToProc_Path[path].begin(); itm != rowSendToProc_Path[path].end(); ++itm){
990  MPI_Request stypereq;
991  MPI_Request rtypereq;
992  MPI_Status typestat;
993  if(ProcID == itm->first.first){
994  S count = 0;
995  sIndices.insert(sIndices.begin(), count);
996  for(auto i = 0; i < itm->second.size(); i++){
997  S j = index_map.Glob2Loc(itm->second[i]);
998  for(it = dynmat_loc[j].begin(); it != dynmat_loc[j].end(); ++it){
999  sIndices.push_back(it->first);
1000  sBufs.push_back(it->second);
1001  count++;
1002  }
1003  sIndices.insert(sIndices.begin(), count);
1004  }
1005  sSize.resize(2);
1006  sSize[0] = sBufs.size();
1007  sSize[1] = sIndices.size() - sBufs.size() - 1;
1008 
1009  MPI_Isend(sSize.data(), 2, getMPI_Type<S>(), itm->first.second, itm->first.second, comm, &stypereq);
1010 
1011  }
1012 
1013  if(ProcID == itm->first.second){
1014  rSize.resize(2);
1015  MPI_Irecv(rSize.data(), 2, getMPI_Type<S>(), itm->first.first, ProcID, comm, &rtypereq);
1016  MPI_Wait(&rtypereq, &typestat);
1017 
1018  rBufs.resize(rSize[0]);
1019  rIndices.resize(rSize[1]+rSize[0]+1);
1020  }
1021 
1022  if(ProcID == itm->first.first){
1023  MPI_Isend(sBufs.data(), sSize[0], getMPI_Type<T>(), itm->first.second, itm->first.second, comm, &stypereq);
1024  }
1025 
1026  if(ProcID == itm->first.second){
1027  MPI_Irecv(rBufs.data(), rSize[0], getMPI_Type<T>(), itm->first.first, ProcID, comm, &rtypereq);
1028  MPI_Wait(&rtypereq, &typestat);
1029  }
1030 
1031  if(ProcID == itm->first.first){
1032  MPI_Isend(sIndices.data(), sSize[0]+sSize[1]+1, getMPI_Type<S>(), itm->first.second, itm->first.second, comm, &stypereq);
1033  }
1034 
1035  if(ProcID == itm->first.second){
1036  MPI_Irecv(rIndices.data(), rSize[0]+rSize[1]+1, getMPI_Type<S>(), itm->first.first, ProcID, comm, &rtypereq);
1037  MPI_Wait(&rtypereq, &typestat);
1038 
1039  for(auto i = 0; i < itm->second.size(); i++){
1040  auto row = index_map.Glob2Loc(itm->second[i] - offset);
1041  for(auto cnt = rIndices[rSize[1] - i] ; cnt < rIndices[rSize[1] - i - 1]; cnt++){
1042  auto col = rIndices[rSize[1]+1+cnt];
1043  prod.SetValueLocal(row, col, rBufs[cnt]);
1044  }
1045  }
1046 
1047 
1048  }
1049  }
1050  }
1051 
1052  return prod;
1053 }
1054 
1055 template<typename T,typename S>
1056 void parMatrixSparse<T,S>::writeToMatrixMarket(std::string file_name){
1057 
1058  std::string header;
1059  std::string data;
1060 
1061  if( (sizeof(T)/sizeof(Base<T>) != 1) ){
1062  try{
1063  throw 505;
1064  }catch(...){
1065  std::cout << "SMG2S]> Caught Exception: for complex matrix, please use writeToMatrixMarketCmplx" << std::endl;
1066  }
1067  }
1068 
1069  //generate MatrixMarket header
1070  header.append("%%MatrixMarket matrix coordinate ");
1071 
1072  header.append("real general\n");
1073 
1074  header.append("%=================================================================================\n%\n");
1075  header.append("% This matrix is generated by SMG2S: Scalable Matrix with Given Spectrum\n");
1076  header.append("% Opensource codes is available: https://github.com/SMG2S/SMG2S\n");
1077  header.append("% @article{wu2020parallel,\n");
1078  header.append("% title={A parallel generator of non-Hermitian matrices computed from given spectra},\n");
1079  header.append("% author={Wu, Xinzhe and Petiton, Serge G and Lu, Yutong},\n");
1080  header.append("% journal={Concurrency and Computation: Practice and Experience},\n");
1081  header.append("% volume={32},\n");
1082  header.append("% number={20},\n");
1083  header.append("% pages={e5710},\n");
1084  header.append("% year={2020},\n");
1085  header.append("% publisher={Wiley Online Library},\n");
1086  header.append("% doi={https://doi.org/10.1002/cpe.5710}\n");
1087  header.append("% }\n");
1088  header.append("%\n%=================================================================================\n");
1089 
1090 
1091  S gsize = index_map.GetGlobalSize();
1092  S gnnz;
1093 
1094  MPI_Allreduce(&nnz_loc, &gnnz, 1, getMPI_Type<S>(), MPI_SUM, comm);
1095 
1096  std::string dim_info = std::to_string(gsize) + " " + std::to_string(gsize) + " " + std::to_string(gnnz) + "\n";
1097  header.append(dim_info);
1098 
1099  typename std::map<S,T>::iterator it;
1100 
1101  if(dynmat_loc != NULL){
1102  for(auto i = 0; i < nrows; i++){
1103  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
1104  auto k = it->first;
1105  auto v = it->second;
1106  data += std::to_string(index_map.Loc2Glob(i)+1) + " " + std::to_string(k+1) + " " + std::to_string(v) + "\n";
1107  }
1108  }
1109  }
1110 
1111  MPI_File fh;
1112  MPI_Offset write_offset;
1113  MPI_Offset text_size;
1114  MPI_Offset *write_size_per_proc;
1115  MPI_Status sts;
1116 
1117  write_size_per_proc = (MPI_Offset *)malloc(sizeof(MPI_Offset) * nProcs);
1118 
1119  text_size = data.size();
1120 
1121  MPI_Allgather(&text_size, 1, MPI_OFFSET, write_size_per_proc, 1, MPI_OFFSET, comm);
1122 
1123 
1124  write_offset = header.size();
1125 
1126  for (auto i = 0; i < ProcID; ++i) {
1127  write_offset += write_size_per_proc[i];
1128  }
1129 
1130 
1131  MPI_File_delete (file_name.c_str(), MPI_INFO_NULL);
1132  MPI_File_open(comm, file_name.c_str(), MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
1133 
1134 
1135  if (ProcID == 0) {
1136  MPI_File_write_at(fh, 0, header.c_str(), header.size(), MPI_CHAR, &sts);
1137  }
1138 
1139  MPI_File_write_at(fh, write_offset, data.c_str(), data.size(), MPI_CHAR, &sts);
1140 
1141  MPI_File_close(&fh);
1142  free(write_size_per_proc);
1143 }
1144 
1145 
1146 
1147 template<typename T,typename S>
1149 
1150  std::string header;
1151  std::string data;
1152 
1153  if( (sizeof(T)/sizeof(Base<T>) != 2) ){
1154  try{
1155  throw 505;
1156  }catch(...){
1157  std::cout << "SMG2S]> Caught Exception: for real matrix, please use writeToMatrixMarket" << std::endl;
1158  }
1159  }
1160 
1161  //generate MatrixMarket header
1162  header.append("%%MatrixMarket matrix coordinate ");
1163 
1164  header.append("complex general");
1165 
1166 
1167  header.append("%=================================================================================\n%\n");
1168  header.append("% This matrix is generated by SMG2S: Scalable Matrix with Given Spectrum\n");
1169  header.append("% Opensource codes is available: https://github.com/SMG2S/SMG2S\n");
1170  header.append("% @article{wu2020parallel,\n");
1171  header.append("% title={A parallel generator of non-Hermitian matrices computed from given spectra},\n");
1172  header.append("% author={Wu, Xinzhe and Petiton, Serge G and Lu, Yutong},\n");
1173  header.append("% journal={Concurrency and Computation: Practice and Experience},\n");
1174  header.append("% volume={32},\n");
1175  header.append("% number={20},\n");
1176  header.append("% pages={e5710},\n");
1177  header.append("% year={2020},\n");
1178  header.append("% publisher={Wiley Online Library},\n");
1179  header.append("% doi={https://doi.org/10.1002/cpe.5710}\n");
1180  header.append("% }\n");
1181  header.append("%\n%=================================================================================\n");
1182 
1183 
1184  S gsize = index_map.GetGlobalSize();
1185  S gnnz;
1186 
1187  MPI_Allreduce(&nnz_loc, &gnnz, 1, getMPI_Type<S>(), MPI_SUM, comm);
1188 
1189  std::string dim_info = std::to_string(gsize) + " " + std::to_string(gsize) + " " + std::to_string(gnnz) + "\n";
1190  header.append(dim_info);
1191 
1192  typename std::map<S,T>::iterator it;
1193 
1194  if(dynmat_loc != NULL){
1195  for(auto i = 0; i < nrows; i++){
1196  for(it = dynmat_loc[i].begin(); it != dynmat_loc[i].end(); it++){
1197  auto k = it->first;
1198  auto v = it->second;
1199  data += std::to_string(index_map.Loc2Glob(i)+1) + " " + std::to_string(k+1) + " " + std::to_string(v.real()) + " " + std::to_string(v.imag()) + "\n";
1200  }
1201  }
1202  }
1203 
1204  MPI_File fh;
1205  MPI_Offset write_offset;
1206  MPI_Offset text_size;
1207  MPI_Offset *write_size_per_proc;
1208  MPI_Status sts;
1209 
1210  write_size_per_proc = (MPI_Offset *)malloc(sizeof(MPI_Offset) * nProcs);
1211 
1212  text_size = data.size();
1213 
1214  MPI_Allgather(&text_size, 1, MPI_OFFSET, write_size_per_proc, 1, MPI_OFFSET, comm);
1215 
1216 
1217  write_offset = header.size();
1218 
1219  for (auto i = 0; i < ProcID; ++i) {
1220  write_offset += write_size_per_proc[i];
1221  }
1222 
1223 
1224  MPI_File_delete (file_name.c_str(), MPI_INFO_NULL);
1225  MPI_File_open(comm, file_name.c_str(), MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
1226 
1227 
1228  if (ProcID == 0) {
1229  MPI_File_write_at(fh, 0, header.c_str(), header.size(), MPI_CHAR, &sts);
1230  }
1231 
1232  MPI_File_write_at(fh, write_offset, data.c_str(), data.size(), MPI_CHAR, &sts);
1233 
1234  MPI_File_close(&fh);
1235  free(write_size_per_proc);
1236 }
1237 
1238 
1239 #endif
parMatrixSparse::GetNRows
S GetNRows()
Return the number of rows of local matrix on each MPI proc.
Definition: parMatrixSparse.hpp:107
parMatrixSparse::dynmat_loc
std::map< S, T > * dynmat_loc
An array of `std::map` in which each map stores the column index and related non-zero entry value of ...
Definition: parMatrixSparse.hpp:63
parVectorMap
A class which determines the way to distribute a vector across MPI procs.
Definition: parVectorMap.hpp:40
parMatrixSparse::MatView
void MatView()
Print a parMatrixSparse object in a distributed COO format.
Definition: parMatrixSparse.hpp:370
MatrixCSR::vals
std::vector< T > vals
An array stores the values of non-zero elements.
Definition: MatrixCSR.hpp:86
parMatrixSparse::setSpecNonSymm
void setSpecNonSymm(parVector< T, S > spectrum)
Set a given spectrum (all eigenvalues are in real scalars) onto a parMatrixSparse object for construc...
Definition: parMatrixSparse.hpp:776
parMatrixSparse::ncols
S ncols
number of columns of local matrix on each MPI proc
Definition: parMatrixSparse.hpp:67
parMatrixSparse::setSpecNonHerm
void setSpecNonHerm(parVector< T, S > spectrum)
Set a given spectrum onto a parMatrixSparse object for constructing a non-Hermitian matrix.
Definition: parMatrixSparse.hpp:764
parVectorMap::GetLowerBound
S GetLowerBound()
Return parVectorMap<S>::lower_bound.
Definition: parVectorMap.hpp:126
MatrixCSR::rows
std::vector< S > rows
An array points to row starts in indices and data.
Definition: MatrixCSR.hpp:72
parMatrixSparse::GetNProcs
int GetNProcs()
Return parMatrixSparse::nProcs.
Definition: parMatrixSparse.hpp:123
parMatrixSparse::nrows
S nrows
number of rows of local matrix on each MPI proc
Definition: parMatrixSparse.hpp:69
parMatrixSparse::GetValue
T GetValue(S row, S col)
Return a value to an entry of a distributed sparse matrix with a given global index on each MPI proc.
Definition: parMatrixSparse.hpp:506
parMatrixSparse::index_map
parVectorMap< S > index_map
the parVectorMap object used to distribute the global matrix across 1D MPI grid
Definition: parMatrixSparse.hpp:74
parMatrixSparse::AddValue
void AddValue(S row, S col, T value)
Add a value to an entry of a distributed sparse matrix with a given global index on each MPI proc.
Definition: parMatrixSparse.hpp:478
parMatrixSparse::ZeroEntries
void ZeroEntries()
Make all the entries a sparse matrix to be zeros, but keep the sparsity structure as it is.
Definition: parMatrixSparse.hpp:687
parMatrixSparse::lower_b
S lower_b
the smallest index of row of a distributed matrix on each MPI proc
Definition: parMatrixSparse.hpp:76
parMatrixSparse::upper_b
S upper_b
upper_b-1 = the largest index of row of a distributed matrix on each MPI proc
Definition: parMatrixSparse.hpp:78
parMatrixSparse::nnz_loc
S nnz_loc
number of non-zero entries of local matrix on each MPI proc
Definition: parMatrixSparse.hpp:72
parMatrixSparse::GetLowerBound
S GetLowerBound()
Return parMatrixSparse::lower_b.
Definition: parMatrixSparse.hpp:115
parMatrixSparse::writeToMatrixMarket
void writeToMatrixMarket(std::string file_name)
A parallel IO to write a parMatrixSparse object into a file of MatrixMarket format.
Definition: parMatrixSparse.hpp:1056
parMatrixSparse::ConvertToCSR
MatrixCSR< T, S > ConvertToCSR()
Convert a parMatrixSparse with dynamic memory into a distributed CSR matrix.
Definition: parMatrixSparse.hpp:845
Nilpotent
A class which determines the information of a nilpotent matrix.
Definition: nilpotent.hpp:45
parMatrixSparse
A class which defines a sparse matrix distributed across 1D MPI grid.
Definition: parMatrixSparse.hpp:59
MatrixCSR
A struct which stores a sparse matrix in CSR format.
Definition: MatrixCSR.hpp:46
Nilpotent::getIndOfZeros
std::vector< S > getIndOfZeros()
Get indices of all zeros entries on the off-diagonal.
Definition: nilpotent.hpp:164
parMatrixSparse::MA
parMatrixSparse< T, S > MA(Nilpotent< S > nilp)
Perform `M*A`, in which `A` is a nilpotent matrix.
Definition: parMatrixSparse.hpp:874
parMatrixSparse::setSpecNonSymmCmplx
void setSpecNonSymmCmplx(parVector< std::complex< Base< T >>, S > spectrum)
Set a given spectrum (all eigenvalues can be in real scalars or appear as pairs of conjugate complex ...
Definition: parMatrixSparse.hpp:789
parMatrixSparse::MatAYPX
void MatAYPX(parMatrixSparse< T, S > X, T scale)
Perform `A = scale * A + X`.
Definition: parMatrixSparse.hpp:587
parVector::Loc2Glob
S Loc2Glob(S local_index)
Convert a index of local vector on each MPI proc into its index in the global distributed vector.
Definition: parVector.hpp:137
parMatrixSparse::show
void show()
Display multiple information of a parMatrixSparse object.
Definition: parMatrixSparse.hpp:364
parMatrixSparse::GetComm
MPI_Comm GetComm()
Return parMatrixSparse::comm.
Definition: parMatrixSparse.hpp:119
parMatrixSparse::initMat
void initMat(S diag_l, S diag_u, Base< T > scale, T shift, Base< T > sparsity)
filling the lower part of matrix between diagonal of offset `diag_l` and diagonal of offset `diag_u` ...
Definition: parMatrixSparse.hpp:701
parMatrixSparse::SetValuesLocal
void SetValuesLocal(S nindex, S *rows, S *cols, T *values)
Set multiple values to a distributed sparse matrix with given multiple local indices on each MPI proc...
Definition: parMatrixSparse.hpp:428
parMatrixSparse::GetProcId
int GetProcId()
Return parMatrixSparse::ProcID.
Definition: parMatrixSparse.hpp:121
parVector::GetVecMap
parVectorMap< S > GetVecMap()
Return parVector::index_map.
Definition: parVector.hpp:80
parMatrixSparse::SetValue
void SetValue(S row, S col, T value)
Set a value to a distributed sparse matrix with a given global index on each MPI proc.
Definition: parMatrixSparse.hpp:436
parMatrixSparse::GetDynMatLoc
std::map< S, T > * GetDynMatLoc()
Return parMatrixSparse::dynmat_loc.
Definition: parMatrixSparse.hpp:126
parMatrixSparse::ProcID
int ProcID
rank of each MPI procs within the working MPI communicator
Definition: parMatrixSparse.hpp:84
parMatrixSparse::GetNNzLoc
S GetNNzLoc()
Return the number of non-zeros entries of local matrix on each MPI proc.
Definition: parMatrixSparse.hpp:111
initMat
A struct which stores the information for the initial input matrix of SMG2S.
Definition: initMat.hpp:42
parMatrixSparse::updateNnz
void updateNnz()
Explicitly re-compute the number of nnz on each MPI proc.
Definition: parMatrixSparse.hpp:649
parMatrixSparse::writeToMatrixMarketCmplx
void writeToMatrixMarketCmplx(std::string file_name)
A parallel IO to write a parMatrixSparse object with complex scalar into a file of MatrixMarket forma...
Definition: parMatrixSparse.hpp:1148
parMatrixSparse::SetValueLocal
void SetValueLocal(S row, S col, T value)
Set a value to a distributed sparse matrix with a given local index on each MPI proc.
Definition: parMatrixSparse.hpp:406
parMatrixSparse::MatScale
void MatScale(T scale)
Perform an operation `A = scale * A`, in which `scale` is a scalar.
Definition: parMatrixSparse.hpp:518
parMatrixSparse::copy
void copy(parMatrixSparse< T, S > X)
Duplicate from another parMatrixSparse object.
Definition: parMatrixSparse.hpp:632
parVector::GetArray
T * GetArray()
Get the pointer `*array` which stores the local piece of vector on each MPI proc.
Definition: parVector.hpp:128
Nilpotent::getOffset
S getOffset()
Get off-diagonal offset of nilpotent matrix.
Definition: nilpotent.hpp:159
MatrixCSR::cols
std::vector< S > cols
An array stores the column indices of non-zero elements.
Definition: MatrixCSR.hpp:79
parMatrixSparse::GetMap
parVectorMap< S > GetMap()
Return the parVectorMap object used to distribute sparse Matrix.
Definition: parMatrixSparse.hpp:113
parMatrixSparse::SetDiagonal
void SetDiagonal(parVector< T, S > diag)
Set the diagonal of a distributed sparse matrix with a given parVector object.
Definition: parMatrixSparse.hpp:447
parVector::GetLocalSize
S GetLocalSize()
Return parVector<S>::local_size.
Definition: parVector.hpp:89
parMatrixSparse::AddValueLocal
void AddValueLocal(S row, S col, T value)
Add a value to an entry of a distributed sparse matrix with a given local index on each MPI proc.
Definition: parMatrixSparse.hpp:459
parMatrixSparse::MatAXPY
void MatAXPY(parMatrixSparse< T, S > X, T scale)
Perform `A = A + scale * X`.
Definition: parMatrixSparse.hpp:532
parMatrixSparse::GetNCols
S GetNCols()
Return the number of columns of local matrix on each MPI proc.
Definition: parMatrixSparse.hpp:109
parVector
A class which defines a vector distributed across 1D MPI grid.
Definition: parVector.hpp:41
parMatrixSparse::AM
parMatrixSparse< T, S > AM(Nilpotent< S > nilp)
Perform `A*M`, in which `A` is a nilpotent matrix.
Definition: parMatrixSparse.hpp:901
parMatrixSparse::rmZeros
void rmZeros()
Explicitly remove all the zeros of parMatrixSparse object.
Definition: parMatrixSparse.hpp:666
parMatrixSparse::GetValueLocal
T GetValueLocal(S row, S col)
Return a value of an entry of a distributed sparse matrix with a given local index on each MPI proc.
Definition: parMatrixSparse.hpp:489
parMatrixSparse::GetUpperBound
S GetUpperBound()
Return parMatrixSparse::upper_b.
Definition: parMatrixSparse.hpp:117
parMatrixSparse::comm
MPI_Comm comm
the working MPI communicator
Definition: parMatrixSparse.hpp:80
parMatrixSparse::nProcs
int nProcs
number of MPI procs within the working MPI communicator
Definition: parMatrixSparse.hpp:86