14 std::unique_ptr<helpers::matrix> doci::DM2::sp2tp =
nullptr;
15 std::unique_ptr<helpers::matrix> doci::DM2::tp2sp =
nullptr;
18 #define HDF5_STATUS_CHECK(status) if(status < 0) std::cerr << __FILE__ << ":" << __LINE__ << ": Problem with writing to file. Status code=" << status << std::endl;
26 DM2::DM2(
unsigned int n_sp,
unsigned int n)
32 diag.resize((n_sp*(n_sp-1))/2);
51 diag.resize((n_sp*(n_sp-1))/2);
57 assert(sp2tp && tp2sp);
66 block = std::move(orig.block);
67 diag = std::move(orig.diag);
82 block = std::move(orig.block);
83 diag = std::move(orig.diag);
97 std::fill(diag.begin(), diag.end(), val);
124 int i = (*sp2tp)(a,b);
125 int j = (*sp2tp)(c,d);
127 if(i<block->getn() && j<block->getn())
128 return sign * (*block)(i,j);
130 return sign * diag[(i-block->getn())%diag.size()];
142 (*block) += (*a.block);
145 int dim = diag.size();
148 double *tmp =
const_cast<double *
>(a.diag.data());
150 daxpy_(&dim,&alpha,tmp,&inc,diag.data(),&inc);
159 void DM2::fill_lists(
unsigned int n_sp)
163 auto n_tp = M*(M-1)/2;
175 (*sp2tp)(a,a+L) = (*sp2tp)(a+L,a) = tel++;
179 for(
int b=a+1;b<L;b++)
180 (*sp2tp)(a,b) = (*sp2tp)(b,a) = tel++;
184 for(
int b=a+1;b<M;b++)
185 (*sp2tp)(a,b) = (*sp2tp)(b,a) = tel++;
189 for(
int b=L+a+1;b<M;b++)
191 (*sp2tp)(a,b) = (*sp2tp)(b,a) = tel++;
195 for(
int b=a%L+1;b<L;b++)
197 (*sp2tp)(a,b) = (*sp2tp)(b,a) = tel++;
202 for(
int b=a+1;b<M;b++)
204 (*tp2sp)((*sp2tp)(a,b),0) = a;
205 (*tp2sp)((*sp2tp)(a,b),1) = b;
215 hid_t file_id, group_id, dataset_id, attribute_id, dataspace_id;
218 file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
220 group_id = H5Gcreate(file_id,
"/RDM", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
222 hsize_t dimblock = block->getn() * block->getn();
224 dataspace_id = H5Screate_simple(1, &dimblock, NULL);
226 dataset_id = H5Dcreate(group_id,
"Block", H5T_IEEE_F64LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
230 status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data);
233 status = H5Sclose(dataspace_id);
236 status = H5Dclose(dataset_id);
239 dimblock = diag.size();
241 dataspace_id = H5Screate_simple(1, &dimblock, NULL);
243 dataset_id = H5Dcreate(group_id,
"Vector", H5T_IEEE_F64LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
245 status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, diag.data());
248 status = H5Sclose(dataspace_id);
251 status = H5Dclose(dataset_id);
255 dataspace_id = H5Screate(H5S_SCALAR);
256 unsigned int L = block->getn();
258 attribute_id = H5Acreate (group_id,
"L", H5T_STD_U32LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
259 status = H5Awrite (attribute_id, H5T_NATIVE_UINT, &L );
262 status = H5Aclose(attribute_id);
265 attribute_id = H5Acreate (group_id,
"N", H5T_STD_U32LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
266 status = H5Awrite (attribute_id, H5T_NATIVE_UINT, &N );
269 status = H5Aclose(attribute_id);
272 status = H5Sclose(dataspace_id);
275 status = H5Gclose(group_id);
278 status = H5Fclose(file_id);
289 hid_t file_id, dataset_id, group_id, attribute_id;
292 file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
295 group_id = H5Gopen(file_id,
"/RDM", H5P_DEFAULT);
298 attribute_id = H5Aopen(group_id,
"L", H5P_DEFAULT);
302 status = H5Aread(attribute_id, H5T_NATIVE_UINT, &L);
305 status = H5Aclose(attribute_id);
308 attribute_id = H5Aopen(group_id,
"N", H5P_DEFAULT);
312 status = H5Aread(attribute_id, H5T_NATIVE_UINT, &N);
315 status = H5Aclose(attribute_id);
320 dataset_id = H5Dopen(file_id,
"/RDM/Block", H5P_DEFAULT);
323 status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dm2.block->getpointer());
326 status = H5Dclose(dataset_id);
329 dataset_id = H5Dopen(file_id,
"/RDM/Vector", H5P_DEFAULT);
332 status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dm2.diag.data());
335 status = H5Dclose(dataset_id);
338 status = H5Gclose(group_id);
341 status = H5Fclose(file_id);
360 return block->getn();
371 auto num_t = omp_get_max_threads();
372 unsigned long long num_elems = (eigv.size()*1ul*(eigv.size()+1ul))/2;
373 unsigned long long size_part = num_elems/num_t + 1;
377 std::vector<unsigned int> workload(num_t+1);
378 workload.front() = 0;
379 workload.back() = eigv.size();
381 for(
int i=1;i<num_t;i++)
383 auto num_lines = workload[i-1];
384 unsigned long long num_elems = 0;
386 while(num_elems < size_part)
387 num_elems += eigv.size() - num_lines++;
389 if(num_lines > eigv.size())
390 num_lines = eigv.size();
392 workload[i] = num_lines;
395 std::vector< std::unique_ptr<DM2> > dm2_parts(num_t);
397 std::cout <<
"Running with " << num_t <<
" threads." << std::endl;
403 auto start = std::chrono::high_resolution_clock::now();
404 auto me = omp_get_thread_num();
406 dm2_parts[me].reset(
new DM2(block->getn(),N));
407 (*dm2_parts[me]) = 0;
411 for(
auto idx_begin=0;idx_begin<workload[me];++idx_begin)
414 auto vec_copy = eigv;
416 build_iter(my_perm, vec_copy, workload[me], workload[me+1], (*dm2_parts[me]));
418 auto end = std::chrono::high_resolution_clock::now();
421 std::cout << me <<
"\t" << std::chrono::duration_cast<std::chrono::duration<double,std::ratio<1>>>(end-start).count() <<
" s" << std::endl;
426 for(
auto &cur_dm2: dm2_parts)
427 (*this) += (*cur_dm2);
430 void DM2::build_iter(
Permutation& perm, std::vector<double> &eigv,
unsigned int i_start,
unsigned int i_end,
DM2 &cur_2dm)
432 auto& perm_bra = perm;
434 for(
unsigned int i=i_start;i<i_end;++i)
436 const auto bra = perm_bra.
get();
446 auto ksp = cur & (~cur + 1);
454 (*cur_2dm.block)(s,s) += eigv[i] * eigv[i];
461 auto ksp2 = cur2 & (~cur2 + 1);
468 unsigned int idx = (*sp2tp)(r,s);
470 idx -= block->getn();
473 cur_2dm.diag[idx] += eigv[i] * eigv[i];
478 for(
unsigned int j=i+1;j<eigv.size();++j)
480 const auto ket = perm_ket.next();
482 const auto diff = bra ^ ket;
490 auto ksp1 = diff_c & (~diff_c + 1);
494 auto ksp2 = diff_c & (~diff_c + 1);
503 (*cur_2dm.block)(r,s) += eigv[i] * eigv[j];
504 (*cur_2dm.block)(s,r) += eigv[i] * eigv[j];
515 output <<
"Block: " << std::endl;
518 output << i <<
"\t" << j <<
"\t|\t" << (*dm2.tp2sp)(i,0) <<
" " << (*dm2.tp2sp)(i,1) <<
" ; " << (*dm2.tp2sp)(j,0) <<
" " << (*dm2.tp2sp)(j,1) <<
"\t\t" << (*dm2.block)(i,j) << std::endl;
522 output <<
"Vector (4x): " << std::endl;
523 for(
int i=0;i<dm2.diag.size();i++)
524 output << i <<
"\t|\t" << (*dm2.tp2sp)(L+i,0) <<
" " << (*dm2.tp2sp)(L+i,1) <<
"\t\t" << dm2.diag[i] << std::endl;
535 auto L = block->getn();
538 auto calc_elem = [
this,&mol,L] (
int i,
int j) {
539 int a = (*tp2sp)(i,0);
540 int b = (*tp2sp)(i,1);
541 int c = (*tp2sp)(j,0);
542 int d = (*tp2sp)(j,1);
553 result += (mol.
getT(a_,a_) + mol.
getT(b_,b_))/(N - 1.0);
558 if(b==(a+L) && d==(c+L))
559 result += mol.
getV(a_,b_,c_,d_);
563 if(i==j && a/L == b/L && a!=b)
564 result += mol.
getV(a_,b_,c_,d_) - mol.
getV(a_,b_,d_,c_);
568 if(i==j && a/L != b/L && a%L!=b%L)
569 result += mol.
getV(a_,b_, c_,d_);
574 for(
int i=0;i<block->getn();++i)
575 for(
int j=i;j<block->getn();++j)
576 (*block)(i,j) = (*block)(j,i) = calc_elem(i,j);
578 for(
int i=0;i<diag.size();i++)
581 diag[i] = 0.5*calc_elem(L+i,L+i) + 0.5*calc_elem(L*L+i,L*L+i);
593 int n = block->getn()*block->getn();
596 result +=
ddot_(&n,block->getpointer(),&inc,x.block->getpointer(),&inc);
599 result += 4 *
ddot_(&n,diag.data(),&inc,x.diag.data(),&inc);
610 for(
auto elem : diag)
613 return block->trace() + 4 * result;
626 std::pair<double,bool>
DM2::find_min_angle(
int k,
int l,
double start_angle, std::function<
double(
int,
int)> &T, std::function<
double(
int,
int,
int,
int)> &V)
const
630 const int L = block->getn();
632 double theta = start_angle;
634 const DM2 &rdm = *
this;
636 double cos2 = 2.0/(N-1.0)*(T(k,k)*rdm(k,k+L,k,k+L)+T(l,l)*rdm(l,l+L,l,l+L));
638 double sin2 = 2.0/(N-1.0)*(T(l,l)*rdm(k,k+L,k,k+L)+T(k,k)*rdm(l,l+L,l,l+L));
641 double sincos = 2.0/(N-1.0)*T(k,l)*(rdm(l,l+L,l,l+L)-rdm(k,k+L,k,k+L));
648 cos2 += 2*V(k,k,a,a)*rdm(k,k+L,a,a+L)+2*V(l,l,a,a)*rdm(l,l+L,a,a+L)+2*(2*V(k,a,k,a)-V(k,a,a,k)+2.0/(N-1.0)*T(k,k))*rdm(k,a,k,a)+2*(2*V(l,a,l,a)-V(l,a,a,l)+2.0/(N-1.0)*T(l,l))*rdm(l,a,l,a);
650 sin2 += 2*V(l,l,a,a)*rdm(k,k+L,a,a+L)+2*V(k,k,a,a)*rdm(l,l+L,a,a+L)+2*(2*V(k,a,k,a)-V(k,a,a,k)+2.0/(N-1.0)*T(k,k))*rdm(l,a,l,a)+2*(2*V(l,a,l,a)-V(l,a,a,l)+2.0/(N-1.0)*T(l,l))*rdm(k,a,k,a);
652 sincos += 2*V(k,l,a,a)*(rdm(l,l+L,a,a+L)-rdm(k,k+L,a,a+L))+2*(2*V(k,a,l,a)-V(k,a,a,l)+2.0/(N-1.0)*T(k,l))*(rdm(l,a,l,a)-rdm(k,a,k,a));
655 const double cos4 = V(k,k,k,k)*rdm(k,k+L,k,k+L)+V(l,l,l,l)*rdm(l,l+L,l,l+L)+2*V(k,k,l,l)*rdm(k,k+L,l,l+L)+2*(2*V(k,l,k,l)-V(k,k,l,l))*rdm(k,l,k,l);
657 const double sin4 = V(k,k,k,k)*rdm(l,l+L,l,l+L)+V(l,l,l,l)*rdm(k,k+L,k,k+L)+2*V(k,k,l,l)*rdm(k,k+L,l,l+L)+2*(2*V(k,l,k,l)-V(k,k,l,l))*rdm(k,l,k,l);
660 const double cos2sin2 = (2*V(k,k,l,l)+V(k,l,k,l))*(rdm(k,k+L,k,k+L)+rdm(l,l+L,l,l+L))+((V(k,k,k,k)+V(l,l,l,l)-2*(V(k,l,k,l)+V(k,k,l,l))))*rdm(k,k+L,l,l+L)+(V(k,k,k,k)+V(l,l,l,l)-6*V(k,k,l,l)+2*V(k,l,k,l))*rdm(k,l,k,l);
663 const double sin3cos = V(k,l,k,k)*rdm(l,l+L,l,l+L)-V(k,l,l,l)*rdm(k,k+L,k,k+L)-(V(k,l,k,k)-V(k,l,l,l))*(rdm(k,k+L,l,l+L)+rdm(k,l,k,l));
666 const double cos3sin = V(k,l,l,l)*rdm(l,l+L,l,l+L)-V(k,l,k,k)*rdm(k,k+L,k,k+L)+(V(k,l,k,k)-V(k,l,l,l))*(rdm(k,k+L,l,l+L)+rdm(k,l,k,l));
671 auto gradient = [&] (
double theta) ->
double {
672 double cos = std::cos(theta);
673 double sin = std::sin(theta);
675 double res = 16*(cos3sin-sin3cos)*cos*cos*cos*cos - 4*(cos4+sin4-2*cos2sin2)*sin*cos*cos*cos + 4*(sincos-3*cos3sin+5*sin3cos)*cos*cos + 2*(2*sin4-cos2+sin2-2*cos2sin2)*sin*cos-2*sincos-4*sin3cos;
681 auto hessian = [&] (
double theta) ->
double {
682 double cos = std::cos(theta);
683 double sin = std::sin(theta);
685 double res = -16*(cos4+sin4-2*cos2sin2)*cos*cos*cos*cos+64*(sin3cos-cos3sin)*sin*cos*cos*cos+4*(3*cos4+5*sin4-cos2+sin2-8*cos2sin2)*cos*cos-8*(sincos-3*cos3sin+5*sin3cos)*sin*cos+2*(cos2-2*sin4-sin2+2*cos2sin2);
697 const int max_iters = 20;
698 const double convergence = 1e-12;
700 double change = gradient(theta)*theta+hessian(theta)*theta*theta/2.0;
707 for(iter=0;iter<max_iters;iter++)
709 double dx = gradient(theta)/hessian(theta);
713 if(fabs(dx) < convergence)
718 std::cout <<
"Reached max iters in find_min_angle: " << k <<
" " << l << std::endl;
721 std::cout <<
"Found max!" << std::endl;
723 return std::make_pair(theta, hessian(theta)>0);
736 double DM2::calc_rotate(
int k,
int l,
double theta, std::function<
double(
int,
int)> &T, std::function<
double(
int,
int,
int,
int)> &V)
const
740 const int L = block->getn();
742 const DM2 &rdm = *
this;
744 double energy = 4/(N-1.0)*(T(k,k)+T(l,l)) * rdm(k,l,k,l);
746 double cos2 = 2.0/(N-1.0)*(T(k,k)*rdm(k,k+L,k,k+L)+T(l,l)*rdm(l,l+L,l,l+L));
748 double sin2 = 2.0/(N-1.0)*(T(l,l)*rdm(k,k+L,k,k+L)+T(k,k)*rdm(l,l+L,l,l+L));
751 double sincos = 2.0/(N-1.0)*T(k,l)*(rdm(l,l+L,l,l+L)-rdm(k,k+L,k,k+L));
758 energy += 2.0/(N-1.0) * T(a,a) * (rdm(a,a+L,a,a+L)+2*rdm(a,k,a,k)+2*rdm(a,l,a,l));
765 energy += 2.0/(N-1.0) * (T(a,a)+T(b,b)) * rdm(a,b,a,b);
767 energy += V(a,a,b,b) * rdm(a,a+L,b,b+L);
769 energy += (2*V(a,b,a,b)-V(a,b,b,a)) * rdm(a,b,a,b);
772 cos2 += 2*V(k,k,a,a)*rdm(k,k+L,a,a+L)+2*V(l,l,a,a)*rdm(l,l+L,a,a+L)+2*(2*V(k,a,k,a)-V(k,a,a,k)+2.0/(N-1.0)*T(k,k))*rdm(k,a,k,a)+2*(2*V(l,a,l,a)-V(l,a,a,l)+2.0/(N-1.0)*T(l,l))*rdm(l,a,l,a);
774 sin2 += 2*V(l,l,a,a)*rdm(k,k+L,a,a+L)+2*V(k,k,a,a)*rdm(l,l+L,a,a+L)+2*(2*V(k,a,k,a)-V(k,a,a,k)+2.0/(N-1.0)*T(k,k))*rdm(l,a,l,a)+2*(2*V(l,a,l,a)-V(l,a,a,l)+2.0/(N-1.0)*T(l,l))*rdm(k,a,k,a);
776 sincos += 2*V(k,l,a,a)*(rdm(l,l+L,a,a+L)-rdm(k,k+L,a,a+L))+2*(2*V(k,a,l,a)-V(k,a,a,l)+2.0/(N-1.0)*T(k,l))*(rdm(l,a,l,a)-rdm(k,a,k,a));
779 const double cos4 = V(k,k,k,k)*rdm(k,k+L,k,k+L)+V(l,l,l,l)*rdm(l,l+L,l,l+L)+2*V(k,k,l,l)*rdm(k,k+L,l,l+L)+2*(2*V(k,l,k,l)-V(k,k,l,l))*rdm(k,l,k,l);
781 const double sin4 = V(k,k,k,k)*rdm(l,l+L,l,l+L)+V(l,l,l,l)*rdm(k,k+L,k,k+L)+2*V(k,k,l,l)*rdm(k,k+L,l,l+L)+2*(2*V(k,l,k,l)-V(k,k,l,l))*rdm(k,l,k,l);
784 const double cos2sin2 = (2*V(k,k,l,l)+V(k,l,k,l))*(rdm(k,k+L,k,k+L)+rdm(l,l+L,l,l+L))+((V(k,k,k,k)+V(l,l,l,l)-2*(V(k,l,k,l)+V(k,k,l,l))))*rdm(k,k+L,l,l+L)+(V(k,k,k,k)+V(l,l,l,l)-6*V(k,k,l,l)+2*V(k,l,k,l))*rdm(k,l,k,l);
787 const double sin3cos = V(k,l,k,k)*rdm(l,l+L,l,l+L)-V(k,l,l,l)*rdm(k,k+L,k,k+L)-(V(k,l,k,k)-V(k,l,l,l))*(rdm(k,k+L,l,l+L)+rdm(k,l,k,l));
790 const double cos3sin = V(k,l,l,l)*rdm(l,l+L,l,l+L)-V(k,l,k,k)*rdm(k,k+L,k,k+L)+(V(k,l,k,k)-V(k,l,l,l))*(rdm(k,k+L,l,l+L)+rdm(k,l,k,l));
792 const double cos = std::cos(theta);
793 const double sin = std::sin(theta);
795 energy += cos*cos*cos*cos*cos4;
797 energy += sin*sin*sin*sin*sin4;
799 energy += cos*cos*cos2;
801 energy += sin*sin*sin2;
803 energy += 2*sin*cos*sincos;
805 energy += 2*cos*cos*sin*sin*cos2sin2;
807 energy += 4*cos*sin*sin*sin*sin3cos;
809 energy += 4*cos*cos*cos*sin*cos3sin;
std::pair< double, bool > find_min_angle(int k, int l, double start_angle, std::function< double(int, int)> &T, std::function< double(int, int, int, int)> &V) const
static DM2 ReadFromFile(const std::string)
double calc_rotate(int k, int l, double theta, std::function< double(int, int)> &T, std::function< double(int, int, int, int)> &V) const
double ddot_(int *n, double *x, int *incx, double *y, int *incy)
void WriteToFile(const std::string) const
double operator()(int, int, int, int) const
static unsigned int CountBits(mybitset)
unsigned int get_n_sp() const
std::ostream & operator<<(std::ostream &output, doci::DM2 &dm2)
virtual mybitset get() const
void Build(Permutation &, std::vector< double > &)
void BuildHamiltonian(const Molecule &)
DM2(unsigned int, unsigned int)
virtual unsigned int get_n_electrons() const
virtual double getT(int, int) const =0
#define HDF5_STATUS_CHECK(status)
void daxpy_(int *n, double *alpha, double *x, int *incx, double *y, int *incy)
DM2 & operator=(const DM2 &)
virtual double getV(int, int, int, int) const =0
unsigned int get_n_electrons() const
double Dot(const DM2 &) const
double * getpointer() const
DM2 & operator+=(const DM2 &)
virtual unsigned int get_n_sp() const