9 #include "DOCIHamtilonian.h"
14 std::unique_ptr<helpers::matrix> doci::DM2::sp2tp =
nullptr;
15 std::unique_ptr<helpers::matrix> doci::DM2::tp2sp =
nullptr;
18 #define HDF5_STATUS_CHECK(status) if(status < 0) std::cerr << __FILE__ << ":" << __LINE__ << ": Problem with writing to file. Status code=" << status << std::endl;
32 diag.resize((n_sp*(n_sp-1))/2);
51 diag.resize((n_sp*(n_sp-1))/2);
57 assert(sp2tp && tp2sp);
66 block = std::move(orig.block);
67 diag = std::move(orig.diag);
82 block = std::move(orig.block);
83 diag = std::move(orig.diag);
97 std::fill(diag.begin(), diag.end(), val);
124 int i = (*sp2tp)(a,b);
125 int j = (*sp2tp)(c,d);
127 if(i<block->getn() && j<block->getn())
128 return sign * (*block)(i,j);
130 return sign * diag[(i-block->getn())%diag.size()];
142 (*block) += (*a.block);
145 int dim = diag.size();
148 double *tmp =
const_cast<double *
>(a.diag.data());
150 daxpy_(&dim,&alpha,tmp,&inc,diag.data(),&inc);
159 void DM2::fill_lists(
unsigned int n_sp)
163 auto n_tp = M*(M-1)/2;
175 (*sp2tp)(a,a+L) = (*sp2tp)(a+L,a) = tel++;
179 for(
int b=a+1;b<L;b++)
180 (*sp2tp)(a,b) = (*sp2tp)(b,a) = tel++;
184 for(
int b=a+1;b<M;b++)
185 (*sp2tp)(a,b) = (*sp2tp)(b,a) = tel++;
189 for(
int b=L+a+1;b<M;b++)
191 (*sp2tp)(a,b) = (*sp2tp)(b,a) = tel++;
195 for(
int b=a%L+1;b<L;b++)
197 (*sp2tp)(a,b) = (*sp2tp)(b,a) = tel++;
202 for(
int b=a+1;b<M;b++)
204 (*tp2sp)((*sp2tp)(a,b),0) = a;
205 (*tp2sp)((*sp2tp)(a,b),1) = b;
215 hid_t file_id, group_id, dataset_id, attribute_id, dataspace_id;
218 file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
221 group_id = H5Gcreate(file_id,
"/RDM", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
224 hsize_t dimblock = block->getn() * block->getn();
226 dataspace_id = H5Screate_simple(1, &dimblock, NULL);
228 dataset_id = H5Dcreate(group_id,
"Block", H5T_IEEE_F64LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
233 status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data);
236 status = H5Sclose(dataspace_id);
239 status = H5Dclose(dataset_id);
242 dimblock = diag.size();
244 dataspace_id = H5Screate_simple(1, &dimblock, NULL);
246 dataset_id = H5Dcreate(group_id,
"Vector", H5T_IEEE_F64LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
249 status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, diag.data());
252 status = H5Sclose(dataspace_id);
255 status = H5Dclose(dataset_id);
259 dataspace_id = H5Screate(H5S_SCALAR);
260 unsigned int L = block->getn();
262 attribute_id = H5Acreate (group_id,
"L", H5T_STD_U32LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
263 status = H5Awrite (attribute_id, H5T_NATIVE_UINT, &L );
266 status = H5Aclose(attribute_id);
269 attribute_id = H5Acreate (group_id,
"N", H5T_STD_U32LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
270 status = H5Awrite (attribute_id, H5T_NATIVE_UINT, &N );
273 status = H5Aclose(attribute_id);
276 status = H5Sclose(dataspace_id);
279 status = H5Gclose(group_id);
282 status = H5Fclose(file_id);
293 hid_t file_id, dataset_id, group_id, attribute_id;
296 file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
299 group_id = H5Gopen(file_id,
"/RDM", H5P_DEFAULT);
302 attribute_id = H5Aopen(group_id,
"L", H5P_DEFAULT);
306 status = H5Aread(attribute_id, H5T_NATIVE_UINT, &L);
309 status = H5Aclose(attribute_id);
312 attribute_id = H5Aopen(group_id,
"N", H5P_DEFAULT);
316 status = H5Aread(attribute_id, H5T_NATIVE_UINT, &N);
319 status = H5Aclose(attribute_id);
324 dataset_id = H5Dopen(file_id,
"/RDM/Block", H5P_DEFAULT);
327 status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dm2.block->getpointer());
330 status = H5Dclose(dataset_id);
333 dataset_id = H5Dopen(file_id,
"/RDM/Vector", H5P_DEFAULT);
336 status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dm2.diag.data());
339 status = H5Dclose(dataset_id);
342 status = H5Gclose(group_id);
345 status = H5Fclose(file_id);
364 return block->getn();
375 auto num_t = omp_get_max_threads();
376 unsigned long long num_elems = (eigv.size()*1ul*(eigv.size()+1ul))/2;
377 unsigned long long size_part = num_elems/num_t + 1;
381 std::vector<unsigned int> workload(num_t+1);
382 workload.front() = 0;
383 workload.back() = eigv.size();
385 for(
int i=1;i<num_t;i++)
387 auto num_lines = workload[i-1];
388 unsigned long long num_elems = 0;
390 while(num_elems < size_part)
391 num_elems += eigv.size() - num_lines++;
393 if(num_lines > eigv.size())
394 num_lines = eigv.size();
396 workload[i] = num_lines;
399 std::vector< std::unique_ptr<DM2> > dm2_parts(num_t);
401 std::cout <<
"Running with " << num_t <<
" threads." << std::endl;
407 auto start = std::chrono::high_resolution_clock::now();
408 auto me = omp_get_thread_num();
410 dm2_parts[me].reset(
new DM2(block->getn(),N));
411 (*dm2_parts[me]) = 0;
415 for(
auto idx_begin=0;idx_begin<workload[me];++idx_begin)
418 auto vec_copy = eigv;
420 build_iter(my_perm, vec_copy, workload[me], workload[me+1], (*dm2_parts[me]));
422 auto end = std::chrono::high_resolution_clock::now();
425 std::cout << me <<
"\t" << std::chrono::duration_cast<std::chrono::duration<double,std::ratio<1>>>(end-start).count() <<
" s" << std::endl;
430 for(
auto &cur_dm2: dm2_parts)
431 (*this) += (*cur_dm2);
434 void DM2::build_iter(
Permutation& perm, std::vector<double> &eigv,
unsigned int i_start,
unsigned int i_end,
DM2 &cur_2dm)
436 auto& perm_bra = perm;
438 for(
unsigned int i=i_start;i<i_end;++i)
440 const auto bra = perm_bra.
get();
450 auto ksp = cur & (~cur + 1);
458 (*cur_2dm.block)(s,s) += eigv[i] * eigv[i];
465 auto ksp2 = cur2 & (~cur2 + 1);
472 unsigned int idx = (*sp2tp)(r,s);
474 idx -= block->getn();
477 cur_2dm.diag[idx] += eigv[i] * eigv[i];
482 for(
unsigned int j=i+1;j<eigv.size();++j)
484 const auto ket = perm_ket.next();
486 const auto diff = bra ^ ket;
494 auto ksp1 = diff_c & (~diff_c + 1);
498 auto ksp2 = diff_c & (~diff_c + 1);
507 (*cur_2dm.block)(r,s) += eigv[i] * eigv[j];
508 (*cur_2dm.block)(s,r) += eigv[i] * eigv[j];
519 output <<
"Block: " << std::endl;
522 output << i <<
"\t" << j <<
"\t|\t" << (*dm2.tp2sp)(i,0) <<
" " << (*dm2.tp2sp)(i,1) <<
" ; " << (*dm2.tp2sp)(j,0) <<
" " << (*dm2.tp2sp)(j,1) <<
"\t\t" << (*dm2.block)(i,j) << std::endl;
526 output <<
"Vector (4x): " << std::endl;
527 for(
int i=0;i<dm2.diag.size();i++)
528 output << i <<
"\t|\t" << (*dm2.tp2sp)(L+i,0) <<
" " << (*dm2.tp2sp)(L+i,1) <<
"\t\t" << dm2.diag[i] << std::endl;
539 auto L = block->getn();
542 auto calc_elem = [
this,&mol,L] (
int i,
int j) {
543 int a = (*tp2sp)(i,0);
544 int b = (*tp2sp)(i,1);
545 int c = (*tp2sp)(j,0);
546 int d = (*tp2sp)(j,1);
557 result += (mol.
getT(a_,a_) + mol.
getT(b_,b_))/(N - 1.0);
562 if(b==(a+L) && d==(c+L))
563 result += mol.
getV(a_,b_,c_,d_);
567 if(i==j && a/L == b/L && a!=b)
568 result += mol.
getV(a_,b_,c_,d_) - mol.
getV(a_,b_,d_,c_);
572 if(i==j && a/L != b/L && a%L!=b%L)
573 result += mol.
getV(a_,b_, c_,d_);
578 for(
int i=0;i<block->getn();++i)
579 for(
int j=i;j<block->getn();++j)
580 (*block)(i,j) = (*block)(j,i) = calc_elem(i,j);
582 for(
int i=0;i<diag.size();i++)
585 diag[i] = 0.5*calc_elem(L+i,L+i) + 0.5*calc_elem(L*L+i,L*L+i);
597 int n = block->getn()*block->getn();
600 result +=
ddot_(&n,block->getpointer(),&inc,x.block->getpointer(),&inc);
603 result += 4 *
ddot_(&n,diag.data(),&inc,x.diag.data(),&inc);
614 for(
auto elem : diag)
617 return block->trace() + 4 * result;
630 std::pair<double,bool>
DM2::find_min_angle(
int k,
int l,
double start_angle, std::function<
double(
int,
int)> &T, std::function<
double(
int,
int,
int,
int)> &V)
const
634 const int L = block->getn();
636 double theta = start_angle;
638 const DM2 &rdm = *
this;
640 double cos2 = 2.0/(N-1.0)*(T(k,k)*rdm(k,k+L,k,k+L)+T(l,l)*rdm(l,l+L,l,l+L));
642 double sin2 = 2.0/(N-1.0)*(T(l,l)*rdm(k,k+L,k,k+L)+T(k,k)*rdm(l,l+L,l,l+L));
645 double sincos = 2.0/(N-1.0)*T(k,l)*(rdm(l,l+L,l,l+L)-rdm(k,k+L,k,k+L));
652 cos2 += 2*V(k,k,a,a)*rdm(k,k+L,a,a+L)+2*V(l,l,a,a)*rdm(l,l+L,a,a+L)+2*(2*V(k,a,k,a)-V(k,a,a,k)+2.0/(N-1.0)*T(k,k))*rdm(k,a,k,a)+2*(2*V(l,a,l,a)-V(l,a,a,l)+2.0/(N-1.0)*T(l,l))*rdm(l,a,l,a);
654 sin2 += 2*V(l,l,a,a)*rdm(k,k+L,a,a+L)+2*V(k,k,a,a)*rdm(l,l+L,a,a+L)+2*(2*V(k,a,k,a)-V(k,a,a,k)+2.0/(N-1.0)*T(k,k))*rdm(l,a,l,a)+2*(2*V(l,a,l,a)-V(l,a,a,l)+2.0/(N-1.0)*T(l,l))*rdm(k,a,k,a);
656 sincos += 2*V(k,l,a,a)*(rdm(l,l+L,a,a+L)-rdm(k,k+L,a,a+L))+2*(2*V(k,a,l,a)-V(k,a,a,l)+2.0/(N-1.0)*T(k,l))*(rdm(l,a,l,a)-rdm(k,a,k,a));
659 const double cos4 = V(k,k,k,k)*rdm(k,k+L,k,k+L)+V(l,l,l,l)*rdm(l,l+L,l,l+L)+2*V(k,k,l,l)*rdm(k,k+L,l,l+L)+2*(2*V(k,l,k,l)-V(k,k,l,l))*rdm(k,l,k,l);
661 const double sin4 = V(k,k,k,k)*rdm(l,l+L,l,l+L)+V(l,l,l,l)*rdm(k,k+L,k,k+L)+2*V(k,k,l,l)*rdm(k,k+L,l,l+L)+2*(2*V(k,l,k,l)-V(k,k,l,l))*rdm(k,l,k,l);
664 const double cos2sin2 = (2*V(k,k,l,l)+V(k,l,k,l))*(rdm(k,k+L,k,k+L)+rdm(l,l+L,l,l+L))+((V(k,k,k,k)+V(l,l,l,l)-2*(V(k,l,k,l)+V(k,k,l,l))))*rdm(k,k+L,l,l+L)+(V(k,k,k,k)+V(l,l,l,l)-6*V(k,k,l,l)+2*V(k,l,k,l))*rdm(k,l,k,l);
667 const double sin3cos = V(k,l,k,k)*rdm(l,l+L,l,l+L)-V(k,l,l,l)*rdm(k,k+L,k,k+L)-(V(k,l,k,k)-V(k,l,l,l))*(rdm(k,k+L,l,l+L)+rdm(k,l,k,l));
670 const double cos3sin = V(k,l,l,l)*rdm(l,l+L,l,l+L)-V(k,l,k,k)*rdm(k,k+L,k,k+L)+(V(k,l,k,k)-V(k,l,l,l))*(rdm(k,k+L,l,l+L)+rdm(k,l,k,l));
675 auto gradient = [&] (
double theta) ->
double {
676 double cos = std::cos(theta);
677 double sin = std::sin(theta);
679 double res = 16*(cos3sin-sin3cos)*cos*cos*cos*cos - 4*(cos4+sin4-2*cos2sin2)*sin*cos*cos*cos + 4*(sincos-3*cos3sin+5*sin3cos)*cos*cos + 2*(2*sin4-cos2+sin2-2*cos2sin2)*sin*cos-2*sincos-4*sin3cos;
685 auto hessian = [&] (
double theta) ->
double {
686 double cos = std::cos(theta);
687 double sin = std::sin(theta);
689 double res = -16*(cos4+sin4-2*cos2sin2)*cos*cos*cos*cos+64*(sin3cos-cos3sin)*sin*cos*cos*cos+4*(3*cos4+5*sin4-cos2+sin2-8*cos2sin2)*cos*cos-8*(sincos-3*cos3sin+5*sin3cos)*sin*cos+2*(cos2-2*sin4-sin2+2*cos2sin2);
701 const int max_iters = 20;
702 const double convergence = 1e-12;
704 double change = gradient(theta)*theta+hessian(theta)*theta*theta/2.0;
711 for(iter=0;iter<max_iters;iter++)
713 double dx = gradient(theta)/hessian(theta);
717 if(fabs(dx) < convergence)
722 std::cout <<
"Reached max iters in find_min_angle: " << k <<
" " << l << std::endl;
725 std::cout <<
"Found max!" << std::endl;
727 return std::make_pair(theta, hessian(theta)>0);
740 double DM2::calc_rotate(
int k,
int l,
double theta, std::function<
double(
int,
int)> &T, std::function<
double(
int,
int,
int,
int)> &V)
const
744 const int L = block->getn();
746 const DM2 &rdm = *
this;
748 double energy = 4/(N-1.0)*(T(k,k)+T(l,l)) * rdm(k,l,k,l);
750 double cos2 = 2.0/(N-1.0)*(T(k,k)*rdm(k,k+L,k,k+L)+T(l,l)*rdm(l,l+L,l,l+L));
752 double sin2 = 2.0/(N-1.0)*(T(l,l)*rdm(k,k+L,k,k+L)+T(k,k)*rdm(l,l+L,l,l+L));
755 double sincos = 2.0/(N-1.0)*T(k,l)*(rdm(l,l+L,l,l+L)-rdm(k,k+L,k,k+L));
762 energy += 2.0/(N-1.0) * T(a,a) * (rdm(a,a+L,a,a+L)+2*rdm(a,k,a,k)+2*rdm(a,l,a,l));
769 energy += 2.0/(N-1.0) * (T(a,a)+T(b,b)) * rdm(a,b,a,b);
771 energy += V(a,a,b,b) * rdm(a,a+L,b,b+L);
773 energy += (2*V(a,b,a,b)-V(a,b,b,a)) * rdm(a,b,a,b);
776 cos2 += 2*V(k,k,a,a)*rdm(k,k+L,a,a+L)+2*V(l,l,a,a)*rdm(l,l+L,a,a+L)+2*(2*V(k,a,k,a)-V(k,a,a,k)+2.0/(N-1.0)*T(k,k))*rdm(k,a,k,a)+2*(2*V(l,a,l,a)-V(l,a,a,l)+2.0/(N-1.0)*T(l,l))*rdm(l,a,l,a);
778 sin2 += 2*V(l,l,a,a)*rdm(k,k+L,a,a+L)+2*V(k,k,a,a)*rdm(l,l+L,a,a+L)+2*(2*V(k,a,k,a)-V(k,a,a,k)+2.0/(N-1.0)*T(k,k))*rdm(l,a,l,a)+2*(2*V(l,a,l,a)-V(l,a,a,l)+2.0/(N-1.0)*T(l,l))*rdm(k,a,k,a);
780 sincos += 2*V(k,l,a,a)*(rdm(l,l+L,a,a+L)-rdm(k,k+L,a,a+L))+2*(2*V(k,a,l,a)-V(k,a,a,l)+2.0/(N-1.0)*T(k,l))*(rdm(l,a,l,a)-rdm(k,a,k,a));
783 const double cos4 = V(k,k,k,k)*rdm(k,k+L,k,k+L)+V(l,l,l,l)*rdm(l,l+L,l,l+L)+2*V(k,k,l,l)*rdm(k,k+L,l,l+L)+2*(2*V(k,l,k,l)-V(k,k,l,l))*rdm(k,l,k,l);
785 const double sin4 = V(k,k,k,k)*rdm(l,l+L,l,l+L)+V(l,l,l,l)*rdm(k,k+L,k,k+L)+2*V(k,k,l,l)*rdm(k,k+L,l,l+L)+2*(2*V(k,l,k,l)-V(k,k,l,l))*rdm(k,l,k,l);
788 const double cos2sin2 = (2*V(k,k,l,l)+V(k,l,k,l))*(rdm(k,k+L,k,k+L)+rdm(l,l+L,l,l+L))+((V(k,k,k,k)+V(l,l,l,l)-2*(V(k,l,k,l)+V(k,k,l,l))))*rdm(k,k+L,l,l+L)+(V(k,k,k,k)+V(l,l,l,l)-6*V(k,k,l,l)+2*V(k,l,k,l))*rdm(k,l,k,l);
791 const double sin3cos = V(k,l,k,k)*rdm(l,l+L,l,l+L)-V(k,l,l,l)*rdm(k,k+L,k,k+L)-(V(k,l,k,k)-V(k,l,l,l))*(rdm(k,k+L,l,l+L)+rdm(k,l,k,l));
794 const double cos3sin = V(k,l,l,l)*rdm(l,l+L,l,l+L)-V(k,l,k,k)*rdm(k,k+L,k,k+L)+(V(k,l,k,k)-V(k,l,l,l))*(rdm(k,k+L,l,l+L)+rdm(k,l,k,l));
796 const double cos = std::cos(theta);
797 const double sin = std::sin(theta);
799 energy += cos*cos*cos*cos*cos4;
801 energy += sin*sin*sin*sin*sin4;
803 energy += cos*cos*cos2;
805 energy += sin*sin*sin2;
807 energy += 2*sin*cos*sincos;
809 energy += 2*cos*cos*sin*sin*cos2sin2;
811 energy += 4*cos*sin*sin*sin*sin3cos;
813 energy += 4*cos*cos*cos*sin*cos3sin;
std::pair< double, bool > find_min_angle(int k, int l, double start_angle, std::function< double(int, int)> &T, std::function< double(int, int, int, int)> &V) const
static DM2 ReadFromFile(const std::string)
double calc_rotate(int k, int l, double theta, std::function< double(int, int)> &T, std::function< double(int, int, int, int)> &V) const
double ddot_(int *n, double *x, int *incx, double *y, int *incy)
void WriteToFile(const std::string) const
double operator()(int, int, int, int) const
static unsigned int CountBits(mybitset)
unsigned int get_n_sp() const
std::ostream & operator<<(std::ostream &output, doci::DM2 &dm2)
virtual mybitset get() const
void Build(Permutation &, std::vector< double > &)
void BuildHamiltonian(const Molecule &)
DM2(unsigned int, unsigned int)
virtual unsigned int get_n_electrons() const
virtual double getT(int, int) const =0
void daxpy_(int *n, double *alpha, double *x, int *incx, double *y, int *incy)
DM2 & operator=(const DM2 &)
#define HDF5_STATUS_CHECK(status)
virtual double getV(int, int, int, int) const =0
unsigned int get_n_electrons() const
double Dot(const DM2 &) const
double * getpointer() const
DM2 & operator+=(const DM2 &)
virtual unsigned int get_n_sp() const