From 3774b8b506f3e6543bbf36c808d188ef4cf05bfd Mon Sep 17 00:00:00 2001
From: ydan <ydanuiuc@gmail.com>
Date: Thu, 9 Jan 2025 23:12:02 -0600
Subject: [PATCH] add the charge smoothing code

---
 README.md                           |  37 ++
 chgcar-smoothing/3dfft.H            | 184 +++++++
 chgcar-smoothing/Makefile           |  20 +
 chgcar-smoothing/chgcar.H           | 158 ++++++
 chgcar-smoothing/chgcar_smoothing.C | 279 +++++++++++
 chgcar-smoothing/dcomp.H            |  39 ++
 chgcar-smoothing/io.H               | 742 ++++++++++++++++++++++++++++
 chgcar-smoothing/matrix.H           | 332 +++++++++++++
 chgcar-smoothing/smoothing.H        | 294 +++++++++++
 9 files changed, 2085 insertions(+)
 create mode 100644 chgcar-smoothing/3dfft.H
 create mode 100644 chgcar-smoothing/Makefile
 create mode 100644 chgcar-smoothing/chgcar.H
 create mode 100644 chgcar-smoothing/chgcar_smoothing.C
 create mode 100644 chgcar-smoothing/dcomp.H
 create mode 100644 chgcar-smoothing/io.H
 create mode 100644 chgcar-smoothing/matrix.H
 create mode 100644 chgcar-smoothing/smoothing.H

diff --git a/README.md b/README.md
index 4a07fb4..3ecda46 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,40 @@
+# Charge Smoothing
+This is the charge smoothing branch of the Bader Integration repository. Although it is not directly related to Bader integration or the weight method, it was developed from some of the code pieces in this repository, especially the CHGCAR manipulation routines in directory ```chgcar-manipulation``` written by Prof. Trinkle.
+
+Here, charge smoothing refers to suppressing the high-frequency components of a charge density in the reciprocal space. Because these high-frequency components could lead to numerical noise in the real space, such operation is possible to make the charge density a smoother quantity. In this code, the smoothing procedure is decribed as
+
+$$\rho(\mathbf{r}) \xrightarrow{\text{FFT}} \rho(\mathbf{G}) \xrightarrow{\text{smoothing}} \rho'(\mathbf{G}) \xrightarrow{\text{IFFT}} \rho'(\mathbf{r}) $$
+
+where we take the original charge density $\rho(\mathbf{r})$, do a fast Fourier transform (FFT), apply the smoothing method or a low-pass filter, and get the smoothed charge density $\rho'(\mathbf{r})$ after an inverse fast fourier transform (IFFT).
+
+Two smoothing methods or low-pass filters are implemented in file ```smooth.H```: a power-law truncation filter and a Gaussian filter. The code extracts the parameters of the chosen filter by fitting a power-law/Gaussian curve to a reference (expected to be smooth and well-behaved) charge density, and then apply the filter to the target charge density to smooth it. See functions ```extract_filter()``` and ```apply_filter()```, or ```extract_filter_gaussian()``` and ```apply_filter_gaussian()``` in file ```smooth.H``` for more details.
+
+Two parameters are currently hard-coded in this code: an integer ```GAUSSIAN``` which controls whether the Gaussian filter or the power-law truncation filter is to be used, and a double-type ```FILTER_MULT``` which manually scales the parameter ```l0``` of the Gaussian filter. (Yang found in his own research that the extracted Gaussian filter was not the optimal one, so he made manual adjustments and chose to determine the parameter with other methods.) 
+
+Please read the source code in ```chgcar_smoothing.C``` and ```smoothing.H```, understand what it is doing before reusing it.
+
+The charge smoothing source code is put in directory ```chgcar-smoothing```. To compile the code, run command
+
+    make
+
+A cpp compiler such as g++ is required for compilation. This code uses the GNU Scientific Library (gsl library). In Ubuntu, this library can be acquired by running ```sudo apt-get install libgsl-dev```.
+
+To run the compiled binary ```chgcar_smoothing```, run command
+
+    ./chgcar_smoothing CHGCAR_ref CHGCAR_tgt > CHGCAR_smoothed
+
+where ```CHGCAR_ref``` is the reference charge density for filter extraction, ```CHGCAR_tgt``` is the target charge density to be smoothed, and ```CHGCAR_smoothed``` is the smoothed charge density. It is assumed that the charge density files are in the CHGCAR-format of VASP 4. For files generated by VASP 5 or newer versions, please remove the line in the CHGCAR file header that specifies the ion species; alternatively, consider updating the code (one can replace the ```chgcar.H``` file here with the updated ```chgcar.H``` file in the master branch, and add a line in file ```chgcar_smoothing.C``` to output the ion species).
+
+This code was developed by Yang Dan, and used in the publication below. Details about charge smoothing are provided in the supplementary material.
+
+* Y. Dan and D. R. Trinkle, "First-principles core energies of isolated basal and prism screw dislocations in magnesium." *Mater. Res. Lett.* **10**, 360–368 (2022). [doi](https://doi.org/10.1080/21663831.2022.2051763)
+
+***
+
+Below is the original README.
+
+***
+
 # Bader integration using weighted integration
 This code takes the grid-based output from [VASP](https://www.vasp.at/) as from CHGCAR or AECCAR files and performs *Bader integration* over the basins of attraction. The call
 
diff --git a/chgcar-smoothing/3dfft.H b/chgcar-smoothing/3dfft.H
new file mode 100644
index 0000000..671ab04
--- /dev/null
+++ b/chgcar-smoothing/3dfft.H
@@ -0,0 +1,184 @@
+#ifndef __3DFFT_H
+#define __3DFFT_H
+#include <math.h>
+#include <gsl/gsl_fft_complex.h>
+
+#define REAL(z,i) ((z)[2*(i)])
+#define IMAG(z,i) ((z)[2*(i)+1])
+
+int forward_3dfft (double* Z, int N0, int N1, int N2, double* Xr, double* Xi);
+
+int inverse_3dfft (double* Xr, double* Xi, int N0, int N1, int N2, double* Z);
+
+// A grid N0 x N1 x N2.  Our indexing of Z is (fortran?) compatible with
+// CHGCAR: Z[i + N0*(j + N1*k)] == Z[i][j][k]
+// Assume EVERYTHING is already allocated.
+int forward_3dfft (double* Z, int N0, int N1, int N2, double* Xr, double* Xi) 
+{
+  // Wavetables and workspace:
+  gsl_fft_complex_wavetable* wave[3];
+  wave[0] = gsl_fft_complex_wavetable_alloc(N0);
+  wave[1] = gsl_fft_complex_wavetable_alloc(N1);
+  wave[2] = gsl_fft_complex_wavetable_alloc(N2);
+  
+  gsl_fft_complex_workspace* work[3];
+  work[0] = gsl_fft_complex_workspace_alloc(N0);
+  work[1] = gsl_fft_complex_workspace_alloc(N1);
+  work[2] = gsl_fft_complex_workspace_alloc(N2);
+  
+  // dataspace: ("packed" gsl complex arrays)
+  double* data[3], *d;
+  data[0] = new double[2*N0];
+  data[1] = new double[2*N1];
+  data[2] = new double[2*N2];
+  
+  int i, j, k;
+  int nind, N01=N0*N1;
+  
+  for (k=0; k<N2; ++k) {
+
+    d = data[0];
+    for (j=0; j<N1; ++j) {
+      // transform Z(i)[j,k] -> Z~(i)[j,k]
+      nind = N0*(j+N1*k);
+      for (i=0; i<N0; ++i) {
+	REAL(d, i) = Z[i+nind];
+	IMAG(d, i) = 0;
+      }
+      gsl_fft_complex_forward(d, 1, N0, wave[0], work[0]);
+      for (i=0; i<N0; ++i) {
+	Xr[i+nind] = REAL(d, i);
+	Xi[i+nind] = IMAG(d, i);
+      }
+    }
+
+    d = data[1];
+    for (i=0; i<N0; ++i) {
+      // transform X[i](j)[k] -> X~[i](j)[k]
+      nind = i+N0*N1*k;
+      for (j=0; j<N1; ++j) {
+	REAL(d, j) = Xr[j*N0+nind];
+	IMAG(d, j) = Xi[j*N0+nind];
+      }
+      gsl_fft_complex_forward(d, 1, N1, wave[1], work[1]);
+      for (j=0; j<N1; ++j) {
+	Xr[j*N0+nind] = REAL(d, j);
+	Xi[j*N0+nind] = IMAG(d, j);
+      }
+    }
+  }
+
+  d = data[2];
+  for (i=0; i<N0; ++i)
+    for (j=0; j<N1; ++j) {
+      nind = i + j*N0;
+      for (k=0; k<N2; ++k) {
+	REAL(d, k) = Xr[k*N01+nind];
+	IMAG(d, k) = Xi[k*N01+nind];
+      }
+      gsl_fft_complex_forward(d, 1, N2, wave[2], work[2]);
+      for (k=0; k<N2; ++k) {
+	Xr[k*N01+nind] = REAL(d, k);
+	Xi[k*N01+nind] = IMAG(d, k);
+      }
+    }
+  
+  // Garbage collection:
+  for (i=0; i<3; ++i) delete[] data[i];
+  for (i=0; i<3; ++i) gsl_fft_complex_workspace_free(work[i]);
+  for (i=0; i<3; ++i) gsl_fft_complex_wavetable_free(wave[i]);
+
+  return 0;
+}
+
+// Note: this is *destructive* with Xr and Xi.
+int inverse_3dfft (double* Xr, double* Xi, int N0, int N1, int N2, double* Z) 
+{
+  // Wavetables and workspace:
+  gsl_fft_complex_wavetable* wave[3];
+  wave[0] = gsl_fft_complex_wavetable_alloc(N0);
+  wave[1] = gsl_fft_complex_wavetable_alloc(N1);
+  wave[2] = gsl_fft_complex_wavetable_alloc(N2);
+  
+  gsl_fft_complex_workspace* work[3];
+  work[0] = gsl_fft_complex_workspace_alloc(N0);
+  work[1] = gsl_fft_complex_workspace_alloc(N1);
+  work[2] = gsl_fft_complex_workspace_alloc(N2);
+  
+  // dataspace: ("packed" gsl complex arrays)
+  double* data[3], *d;
+  data[0] = new double[2*N0];
+  data[1] = new double[2*N1];
+  data[2] = new double[2*N2];
+  
+  int i, j, k;
+  int nind, N01=N0*N1;
+  
+  for (k=0; k<N2; ++k) {
+
+    d = data[0];
+    for (j=0; j<N1; ++j) {
+      // transform Z(i)[j,k] -> Z~(i)[j,k]
+      nind = N0*(j+N1*k);
+      for (i=0; i<N0; ++i) {
+	REAL(d, i) = Xr[i+nind];
+	IMAG(d, i) = Xi[i+nind];;
+      }
+      gsl_fft_complex_inverse(d, 1, N0, wave[0], work[0]);
+      for (i=0; i<N0; ++i) {
+	Xr[i+nind] = REAL(d, i);
+	Xi[i+nind] = IMAG(d, i);
+      }
+    }
+
+    d = data[1];
+    for (i=0; i<N0; ++i) {
+      // transform X[i](j)[k] -> X~[i](j)[k]
+      nind = i+N0*N1*k;
+      for (j=0; j<N1; ++j) {
+	REAL(d, j) = Xr[j*N0+nind];
+	IMAG(d, j) = Xi[j*N0+nind];
+      }
+      gsl_fft_complex_inverse(d, 1, N1, wave[1], work[1]);
+      for (j=0; j<N1; ++j) {
+	Xr[j*N0+nind] = REAL(d, j);
+	Xi[j*N0+nind] = IMAG(d, j);
+      }
+    }
+  }
+
+  // Last step--finally overwrite Z:
+  int is_real=1;
+  double sum_imag = 0;
+  d = data[2];
+  for (i=0; i<N0; ++i)
+    for (j=0; j<N1; ++j) {
+      nind = i + j*N0;
+      for (k=0; k<N2; ++k) {
+	REAL(d, k) = Xr[k*N01+nind];
+	IMAG(d, k) = Xi[k*N01+nind];
+      }
+      gsl_fft_complex_inverse(d, 1, N2, wave[2], work[2]);
+      for (k=0; k<N2; ++k) {
+	Z[k*N01+nind] = REAL(d, k);
+	sum_imag += IMAG(d,k) * IMAG(d,k);
+	is_real = is_real && (zero(IMAG(d, k)));
+      }
+    }
+
+  sum_imag = sqrt(sum_imag/(N01*N2));
+  if (! is_real)
+    fprintf(stderr, "## iFFT had rms imaginary component: %.8le\n", sum_imag);
+  
+  // Garbage collection:
+  for (i=0; i<3; ++i) delete[] data[i];
+  for (i=0; i<3; ++i) gsl_fft_complex_workspace_free(work[i]);
+  for (i=0; i<3; ++i) gsl_fft_complex_wavetable_free(wave[i]);
+
+  return !is_real;
+}
+
+#undef REAL
+#undef IMAG
+
+#endif
diff --git a/chgcar-smoothing/Makefile b/chgcar-smoothing/Makefile
new file mode 100644
index 0000000..7fbb0c4
--- /dev/null
+++ b/chgcar-smoothing/Makefile
@@ -0,0 +1,20 @@
+CC:=g++
+INCLUDE:=-I.
+LIBS:=-L.
+LIBM:=-lm
+LIBGSL:=-lgsl -lgslcblas
+
+INCLUDES = *.H 
+
+TARGET = chgcar_smoothing
+
+all: $(TARGET)
+
+chgcar_smoothing.o: chgcar_smoothing.C $(INCLUDES)
+	$(CC) -Wall -c chgcar_smoothing.C
+
+chgcar_smoothing: chgcar_smoothing.o
+	$(CC) -o chgcar_smoothing chgcar_smoothing.o $(LIBGSL) $(LIBM)
+
+clean:
+	rm -f *.o chgcar_smoothing
\ No newline at end of file
diff --git a/chgcar-smoothing/chgcar.H b/chgcar-smoothing/chgcar.H
new file mode 100644
index 0000000..31a8406
--- /dev/null
+++ b/chgcar-smoothing/chgcar.H
@@ -0,0 +1,158 @@
+#ifndef __CHGCAR_H
+#define __CHGCAR_H
+#include <math.h>
+
+void fort_printf(char* str, const double& x);
+
+void read_grid(FILE* infile, double* rho, int Ng);
+
+void write_grid(FILE* outfile, double* rho, int Ng);
+
+void read_CHGCAR_header(FILE* infile, char* comment, double& a0,
+                        double latt[9], int* Natom, double** &uatom);
+
+void write_CHGCAR_header(FILE* outfile, char* comment, double a0,
+			 double latt[9], int* Natom, double** uatom);
+
+
+/*================================= main ==================================*/
+
+// this is how many entries appear per line...
+const int READ_stride = 5;
+
+void read_grid(FILE* infile, double* rho, int Ng) 
+{
+    int n, line, nl;
+    char dump[512];
+    
+    if (Ng == 0) {
+        fgets(dump, sizeof(dump), infile);
+        return;
+    }
+    
+    nl = Ng/READ_stride;
+    n=0;
+    
+    for (line=0, n=0; line<nl; ++line, n+=READ_stride) {
+        fgets(dump, sizeof(dump), infile);
+        sscanf(dump, "%lf %lf %lf %lf %lf", rho+n, rho+n+1, rho+n+2, rho+n+3, rho+n+4);
+    }
+    
+    if (n!=Ng) {
+        // last line...
+        fgets(dump, sizeof(dump), infile);
+        char *strp, *endp;
+        for (strp=dump; n<Ng; ++n, strp=endp) rho[n] = strtod(strp, &endp);
+        strp=NULL; endp=NULL;
+    }
+}
+
+
+// outputs in true fortran style... we seem to be having trouble
+// because we weren't using this... *sigh*
+void fort_printf(char* str, const double& x) 
+{
+    sprintf(str, "%+17.10lE", x);
+    str[2] = str[1]; str[1] = '.';
+    if (x >= 0) str[0] = '0';
+    
+    // fix the exponent (only if non-zero)
+    if (x != 0) {
+        int expon = strtol(str+14, (char**)NULL, 10);
+        expon++;
+        if (expon>=0) str[14] = '+';
+        else         {str[14] = '-'; expon = -expon;}
+        sprintf(str+15, "%02d", expon);
+    }
+}
+
+// not quite the same as what is output by CHGCAR, but maintains the same
+// accuracy, so "just as good", I'd say.
+void write_grid(FILE* outfile, double* rho, int Ng) 
+{
+    if (Ng == 0) {
+        fprintf(outfile, "\n");
+        return;
+    }
+    int n;
+    char dump[18];
+    for (n=0; n<Ng; ++n) {
+        fort_printf(dump, rho[n]);
+        fprintf(outfile, " %s", dump);
+        if (n%5 == 4) fprintf(outfile, "\n");
+    }
+    if (Ng%5 != 0) fprintf(outfile, "\n");
+}
+
+void read_CHGCAR_header(FILE* infile, char* comment, double& a0,
+			double latt[9], int* Natom, double** &uatom) 
+{
+    char dump[512];
+    int i;
+
+    //++ comment line
+    fgets(dump, sizeof(dump), infile);
+    char *p = stpcpy(comment, dump);
+    (p-1)[0] = '\0'; // remove the newline at the end of dump.
+
+    //++ a0
+    fgets(dump, sizeof(dump), infile);
+    sscanf(dump, "%lf", &a0);
+
+    //++ lattice vectors
+    for (i=0; i<3; ++i) {
+        fgets(dump, sizeof(dump), infile);
+
+        // was using a different definition before
+        sscanf(dump, "%lf %lf %lf", latt+i, latt+3+i, latt+6+i);
+    }
+
+    //++ number of atoms
+    fgets(dump, sizeof(dump), infile);
+    char *strp, *endp=dump;
+    int types = 0, Natoms=0;
+    do {
+        strp=endp;
+        i = strtol(strp, &endp, 10);
+        if (strp!=endp) {
+            Natoms += i;
+            Natom[types] = i;
+            ++types;
+        }
+    } while (strp!=endp);
+    Natom[types] = -1;
+    
+    //++ direct coord. (CHGCAR should always be output with "Direct")
+    fgets(dump, sizeof(dump), infile);
+    
+    //++ atom positions
+    uatom = new double*[Natoms];
+    for (i=0; i<Natoms; ++i) {
+        uatom[i] = new double[3];
+        double* in_vect = uatom[i];
+        fgets(dump, sizeof(dump), infile);
+        sscanf(dump, "%lf %lf %lf", in_vect, in_vect+1, in_vect+2);
+    }
+  
+    //++ blank line
+    fgets(dump, sizeof(dump), infile);
+}
+
+void write_CHGCAR_header(FILE* outfile, char* comment, double a0,
+			 double latt[9], int* Natom, double** uatom) 
+{
+    fprintf(outfile, "%s\n", comment);
+    fprintf(outfile, "%20.17lf\n", a0);
+    for (int i=0; i<3; ++i)
+        fprintf(outfile, "  %11.6lf %11.6lf %11.6lf\n", latt[i], latt[i+3], latt[i+6]);
+    int Natoms=0;
+    for (int n=0; Natom[n]>=0; ++n) {
+        Natoms += Natom[n];
+        if (Natom[n]>0) fprintf(outfile, " %3d", Natom[n]);
+    }
+    fprintf(outfile, "\nDirect\n");
+    for (int n=0; n<Natoms; ++n)
+        fprintf(outfile, " %9.6lf %9.6lf %9.6lf\n", uatom[n][0],uatom[n][1],uatom[n][2]);
+    fprintf(outfile, "\n");
+}
+#endif
\ No newline at end of file
diff --git a/chgcar-smoothing/chgcar_smoothing.C b/chgcar-smoothing/chgcar_smoothing.C
new file mode 100644
index 0000000..3fb2217
--- /dev/null
+++ b/chgcar-smoothing/chgcar_smoothing.C
@@ -0,0 +1,279 @@
+//************************** COMPILIATION OPTIONS ************************
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "io.H"
+//#include <string.h>
+//#include <math.h>
+#include <unistd.h>
+#include "chgcar.H"
+#include "3dfft.H"
+#include "matrix.H"
+#include "smoothing.H"
+
+/*================================= main ==================================*/
+
+// Arguments first, then flags, then explanation.
+const int NUMARGS = 2;
+const char* ARGLIST = "[-rvt] CHGCAR.bulk CHGCAR.new [rcut]";
+
+const char* ARGEXPL = 
+"  CHGCAR.bulk: output from VASP; handles US, PAW, spin- and non-spin-polar\n\
+  CHGCAR.new:  header portion of CHGCAR, including grid\n\
+  rcut:        cutoff distance for being \"vacuum\"; default = WS radius\n\
+\n\
+  -k  keep charge density; no \"resetting\" to correct amount\n\
+  -r  \"raw\" density -- no smoothing\n\
+  -v  VERBOSE\n\
+  -t  TESTING\n\
+\n\
+** WARNING: CHGCAR.bulk and CHGCAR.new should _not_ both be stdin";
+
+const char* FILEEXPL =
+"Both CHGCAR.bulk and CHGCAR.new are in VASP format; CHGCAR.new only requires\n\
+the atom positions and the new grid dimensions, while CHGCAR.bulk must be a\n\
+complete CHGCAR file, output from VASP to be meaningful.";
+
+int VERBOSE = 0;	// The infamous verbose flag.
+int TESTING = 0;	// Extreme verbosity (testing purposes)
+int ERROR = 0;		// Analysis: Error flag (for analysis purposes)
+
+int main(int argc, char **argv) {
+
+    // ************************** INITIALIZATION ***********************
+    char* progname = basename(argv[0]);
+    int RESCALE = 1;	// rescale total charge
+    int SMOOTH = 1;	// smooth total charge
+    int GAUSSIAN = 1; // use Gaussian filter (1) or Power law truncation (0)
+
+    char ch;
+    while ((ch = getopt(argc, argv, "krvthp")) != -1) {
+        switch (ch) {
+            case 'k': RESCALE = 0; break;
+            case 'r': SMOOTH = 0; break;
+            case 'v': VERBOSE = 1; break;
+            case 't': TESTING = 1; VERBOSE = 1; break;
+            case 'h':
+            case '?':
+            case 'p': GAUSSIAN = 0; break;
+            default: ERROR = 1;
+        }
+    }
+    argc -= optind; argv += optind;
+    if (argc < NUMARGS) ERROR = 2;
+    // All hell broken loose yet?
+    if (ERROR != 0) {
+        fprintf(stderr, "%s %s\n", progname, ARGLIST);
+        fprintf(stderr, "%s\n", ARGEXPL);
+        fprintf(stderr, "Input file format:\n%s\n", FILEEXPL);
+        exit(ERROR);
+    }
+
+    // ****************************** INPUT ****************************
+    char dump[512];
+    FILE* infile_bulk, *infile;
+    char* chgcar_bulk_name = argv[0];
+    char *chgcar_name = argv[1];
+    double rcut = 0;
+    if (argc > NUMARGS) rcut = strtod(argv[2], (char**)NULL);
+    if (rcut < 0) {
+        fprintf(stderr, "rcut=%g < 0?\n", rcut);
+        exit(ERROR_BADFLAG);
+    }
+
+    infile_bulk = myopenr(chgcar_bulk_name);
+    if (infile_bulk == NULL) {
+        fprintf(stderr, "Couldn't open %s for reading.\n", chgcar_bulk_name);
+        exit(ERROR_NOFILE);
+    }
+
+    infile = myopenr(chgcar_name);
+    if (infile == NULL) {
+        fprintf(stderr, "Couldn't open %s for reading.\n", chgcar_name);
+        exit(ERROR_NOFILE);
+    }
+
+    if (infile_bulk == infile) {
+        fprintf(stderr, "Cannot have both CHGCAR.bulk and CHGCAR.new be stdin.\n");
+        exit(ERROR_NOFILE);
+    }
+
+    // ***************************** ANALYSIS **************************
+    double a0;
+    double latt_bulk[9], latt[9];
+    int atomlist[100];
+    int Natom_bulk=0, Natom=0;
+    double **uatom_bulk = NULL, **uatom = NULL;
+    double rsmall;
+
+    // read header
+    read_CHGCAR_header(infile_bulk, dump, a0, latt_bulk, atomlist, uatom_bulk);
+    for (int i=0; atomlist[i]>=0; ++i) Natom_bulk += atomlist[i];
+    for (int d=0; d<9; ++d) latt_bulk[d] *= a0;
+
+    read_CHGCAR_header(infile, dump, a0, latt, atomlist, uatom);
+    write_CHGCAR_header(stdout, dump, a0, latt, atomlist, uatom);
+    for (int i=0; atomlist[i]>=0; ++i) Natom += atomlist[i];
+    for (int d=0; d<9; ++d) latt[d] *= a0;
+
+    // cutoff:
+    if (rcut == 0) 
+        // Wigner-Seitz radius:
+        rcut = cube_root( 3./(4.*M_PI) *det(latt_bulk) );
+    // minimum distance:
+    rsmall = 0.5*min_nn(latt, Natom, uatom);
+
+    // read grid values
+    int N[3];
+    fgets(dump, sizeof(dump), infile);
+    sscanf(dump, "%d %d %d", N+0, N+1, N+2);
+    int Ng=N[0]*N[1]*N[2];
+    double* rho = new double[Ng]; // charge density
+    read_grid(infile, rho, Ng);
+    myclose(infile);
+
+    // non-spin polarized case, hard-coded though...
+    for (int ISPIN=0; ISPIN<1; ++ISPIN) {
+        //++ grid values
+        int Nb[3];
+        fgets(dump, sizeof(dump), infile_bulk);
+        sscanf(dump, "%d %d %d", Nb+0, Nb+1, Nb+2);
+
+        int Nbg=Nb[0]*Nb[1]*Nb[2];
+        double* rhob = new double[Nbg];
+
+        // ++ read in our bulk grid
+        read_grid(infile_bulk, rhob, Nbg);
+
+        // Now, all of our analysis.
+        // 1. determine our low-pass filter values (from bulk)
+        if (TESTING) {
+            double cell=0;
+            for (int ng=0; ng<Nbg; ++ng) cell += rhob[ng];
+            fprintf(stderr, "# sum rhob= %g x %d x %d\n", 
+                cell/(Natom_bulk*Nbg), Natom_bulk, Nbg);
+            int negcount=0;
+            for (int ng=0; ng<Nbg; ++ng) if (rhob[ng]<0) ++negcount;
+            fprintf(stderr, "# negcount= %d/%d = %g\n", 
+                negcount, Nbg, (double)negcount/(double)Nbg);
+        }
+        double l0, filter_a, filter_b;
+        double cell_charge;
+        const double FILTER_MULT = 0.45;  // at some point, should make this a param...
+        {
+            double* rhobR = new double[Nbg];
+            double* rhobI = new double[Nbg];
+            forward_3dfft(rhob, Nb[0], Nb[1], Nb[2], rhobR, rhobI);
+            cell_charge = rhobR[0]/Nbg; // need this for later
+            if (GAUSSIAN)
+                extract_filter_gaussian(rhobR, rhobI, Nb, latt_bulk, l0);
+            else
+                extract_filter(rhobR, rhobI, Nb, latt_bulk, filter_a, filter_b);
+            if (VERBOSE) {
+                fprintf(stderr, "# rsmall= %g\n", rsmall);
+                fprintf(stderr, "# rcut=   %g\n", rcut);
+                if (GAUSSIAN) {
+                    fprintf(stderr, "# rho l0= %g\n", l0);
+                    fprintf(stderr, "# mult_fact= %.8lf\n", FILTER_MULT);
+                }
+                else
+                    fprintf(stderr, "# rho_filter(G)= %.8le |G|^ %.8lf\n", filter_a, filter_b);
+                fprintf(stderr, "# scale_fact= %d\n", Natom);
+            }
+            // garbage collection
+            delete[] rhob; delete[] rhobR; delete[] rhobI;
+        }
+
+        // 2. low-pass truncation filter
+        double* rhoR = new double[Ng];
+        double* rhoI = new double[Ng];
+        double basescale = (double)Ng * (double)Natom; // needed due to size
+        if (TESTING) {
+            double cell=0;
+            for (int ng=0; ng<Ng; ++ng) cell += rho[ng];
+            fprintf(stderr, "# sum rho=  %g x %d x %d\n", 
+                    cell/basescale, Natom, Ng);
+            int negcount=0;
+            for (int ng=0; ng<Ng; ++ng) if (rho[ng]<0) ++negcount;
+            fprintf(stderr, "# negcount= %d/%d = %g\n", 
+                    negcount, Ng, (double)negcount/(double)Ng);      
+        }
+        forward_3dfft(rho, N[0], N[1], N[2], rhoR, rhoI);
+        
+        if (VERBOSE) {
+            fprintf(stderr, "# rhoR[G=0]= %g x %d x %d\n", 
+                    rhoR[0]/basescale, Natom, Ng);
+            fprintf(stderr, "# should be= %g\n", cell_charge);
+            fprintf(stderr, "# RESCALE= %d\n", RESCALE);
+            fprintf(stderr, "# SMOOTH= %d\n", SMOOTH);
+        }
+        // scale values to correct charge / spin:
+        if (RESCALE) {
+            double scale = cell_charge * basescale/ rhoR[0];
+            for (int ng=0; ng<Ng; ++ng) {
+                rhoR[ng] *= scale; rhoI[ng] *= scale;
+            }
+        }
+
+        // smooth it out
+        if (SMOOTH) {
+            if (GAUSSIAN)
+                apply_filter_gaussian(rhoR, rhoI, N, latt, l0/FILTER_MULT);
+            else
+                apply_filter(rhoR, rhoI, N, latt, filter_a, filter_b);
+        }
+        inverse_3dfft(rhoR, rhoI, N[0], N[1], N[2], rho);
+
+        // garbage collection
+        delete[] rhoR; delete[] rhoI;
+
+        // quick sanity check against "negative" charge:
+        if (ISPIN == 0) {
+            for (int ng=0; ng<Ng; ++ng)
+                if (rho[ng]<0) rho[ng]=0;
+            if (RESCALE) {
+                double cell=0;
+                for (int ng=0; ng<Ng; ++ng) cell += rho[ng];
+                if (TESTING) {
+                    fprintf(stderr, "# sum rho=  %g x %d x %d\n", 
+                    cell/basescale, Natom, Ng);
+                }
+                double scale = cell_charge * basescale/ cell;
+                for (int ng=0; ng<Ng; ++ng) rho[ng] *= scale;
+            }
+        }
+
+        // *** OUTPUT ***
+        printf(" %4d %4d %4d\n", N[0], N[1], N[2]);
+        write_grid(stdout, rho, Ng);
+    
+        if (feof(infile_bulk)) {
+            myclose(infile_bulk);
+            return ERROR;
+        }
+
+        //++ (augmentation charges?)
+        fgets(dump, sizeof(dump), infile_bulk);
+        if (dump[0] == 'a') {
+            // augmentation charges...
+            int aug_grid;
+            sscanf(dump, "%*s %*s %*d %d", &aug_grid);
+            double* aug = new double[aug_grid];
+            read_grid(infile_bulk, aug, aug_grid);
+            
+            for (int n=0; n<Natom; ++n) {
+                printf("augmentation occupancies %3d %3d\n", n+1, aug_grid);
+                write_grid(stdout, aug, aug_grid);
+            }
+            fgets(dump, sizeof(dump), infile_bulk);
+        }
+
+        if (feof(infile_bulk)) {
+            myclose(infile_bulk);
+            return ERROR;
+        }
+
+    }
+    myclose(infile_bulk);
+    return ERROR;
+}
diff --git a/chgcar-smoothing/dcomp.H b/chgcar-smoothing/dcomp.H
new file mode 100644
index 0000000..9823bbb
--- /dev/null
+++ b/chgcar-smoothing/dcomp.H
@@ -0,0 +1,39 @@
+#ifndef __DCOMP_H
+#define __DCOMP_H
+#include <math.h>
+
+/*
+  Program: dcomp.H
+  Author:  D. Trinkle
+  Date:    Sept. 27, 2002
+  Purpose: Define dcomp, zero, and round (obsolete in Mac OS X -- math.h)
+*/
+
+// Some general functions, etc.
+
+const double TOLER = 1e-7;
+
+inline int dcomp (double a, double b) 
+{
+  if (a>b)
+    return ( (a-b) < TOLER );
+  else
+    return ( (b-a) < TOLER );
+}
+
+inline int zero (double a) 
+{
+  return ( (a < TOLER) && (a > -TOLER) );
+}
+
+/*
+inline int round (double x) 
+{
+  if (x > 0)
+    return (int) (x+0.5);
+  else
+    return -((int) (-x+0.5));
+}
+*/
+
+#endif
diff --git a/chgcar-smoothing/io.H b/chgcar-smoothing/io.H
new file mode 100644
index 0000000..afb32ee
--- /dev/null
+++ b/chgcar-smoothing/io.H
@@ -0,0 +1,742 @@
+#ifndef __IO_H
+#define __IO_H
+
+/*
+  Program: io.H
+  Author:  D. Trinkle
+  Date:    Jan. 22, 2003
+  Purpose: This is the general part of the code, designed to do all
+           parsing of both the command line and the (ohmms style)
+	   metafiles.
+
+	   This header file contains the following subroutines:
+
+	   parse_commandline (argc, argv, Narg, args, 
+                              VERBOSE, TESTING, MEMORY,
+			      Nflags, flaglist[], flagon[]);
+	     --separates the commandline arguments given in argv
+	       into the Narg pieces (in order), and removes any
+	       verbose, testing, or memory flags and puts them into the
+	       associated flags.  We also check flags against a user
+	       defined list of possible flags in flaglist, and
+	       turn on the appropriate flagon if found.  Doesn't
+	       support flags with parameters.  Return value: ERROR code.
+	       Note: there is a special error code, for help (ERROR_HELP).
+
+	   read_meta (meta_file, cart, u, Natoms)
+
+	     --reads in the input meta file (formatted for OHMMS).
+               Assumes that meta_file is an already open file (so that
+               we can feed stdin as an option).  Doesn't close the
+               file either.  Allocates memory for each of the
+               pointers, and fills it up.  Return value: ERROR code.
+
+           free_meta (u, Natoms)
+	     --frees up the associated memory (done for completeness only).
+
+	   verbose_output_meta (cart, u, Natoms, opening) 
+	     --outputs everything we read in from the input file,
+	       preceding each line with opening.
+
+	   write_meta (meta_file, cart, u, atom_type, Natoms)
+	     --creates a meta_file with the correctly labeled atoms
+
+	   write_xyz (meta_file, cart, u, atom_type, Natoms, basename,
+	              Nexpand, expand)
+	     --creates an xyz file with the correctly labeled atoms.
+	       We name each atom as "basename" with the atom_type
+	       number concatenated.  Nexpand is the number of "expansion"
+	       integer shifts we give (we output Natoms * Nexpand atoms).
+
+
+	   print_long_help (prog_name, arglist, Nflags, flaglist, argexpl)
+	     --prints a standard help message (includes input file
+	       format, list of crystal classes), but also lists
+	       the correct format for the program by calling
+	       print_short_help().
+
+	   print_short_help (prog_name, arglist, Nflags, flaglist, argexpl)
+	     --lists the correct format for the program.  Called if
+	       the number of arguments is too low, or by long_help.
+
+	   insidecell (double x)
+	     --translate x to within [0,1)
+
+	   myopenr (char *filename)
+	     --open a file; if filename is '-', set to stdin
+
+	   myopenw (char *filename)
+	     --open a file; if filename is '-', set to stdout
+
+	   myclose (FILE *f)
+	     --close a file, unless its stdin or stdout
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <iostream>
+#include <iomanip>
+#include "dcomp.H"
+
+//****************************** ERROR FLAGS ***************************
+
+// Note: we reuse error codes, since once we throw an error, everything
+//       will stop anyway.
+
+// errors from parse_commandline:
+const int ERROR_SHORT   = 1; // Not enough arguments on line
+const int ERROR_BADFLAG = 2; // We got a bad flag
+const int ERROR_HELP    = 4; // Not really an error, but we can't continue
+
+// errors from read_meta:
+const int ERROR_NOFILE =      1; // General error if a file can't be opened.
+const int ERROR_BADFILE =     2; // File has wrong format.
+const int ERROR_ZEROVOL =     4; // Cartesian cell has zero volume
+const int ERROR_LEFTHANDED =  8; // Cartesian cell has negative volume
+const int ERROR_MEMORY =     16; // Not enough memory
+
+
+inline int has_error (int ERROR, int ERROR_CODE) 
+{
+  return ( (ERROR & ERROR_CODE) != 0 );
+}
+
+
+//****************************** SUBROUTINES ***************************
+// Headers for all:
+inline double insidecell (double x);
+int parse_commandline (int argc, char **argv, 
+		       int Narg, char **args, 
+		       int &VERBOSE, int &TESTING, int &MEMORY,
+		       int Nflags, const char* flaglist, int* flagon);
+
+// handle a variable number of arguments... (called by other)
+int parse_commandline_var (int argc, char **argv, 
+			   int& Nargs, char **args, 
+			   int &VERBOSE, int &TESTING, int &MEMORY,
+			   int Nflags, const char* flaglist, int* flagon);
+
+int  read_meta (FILE *meta_file, double cart[9], double** &u, int &Natoms);
+void free_meta (double** &u, int Natoms);
+void verbose_output_meta (double cart[9], double** u, int Natoms,
+			  const char* opening);
+void write_meta (FILE *meta_file, double cart[9], double** u,
+		 int* atom_type, int Natoms, char* basename, 
+		 char** phase_name, int output_mask);
+void write_xyz (FILE *meta_file, double cart[9], double** u,
+		int* atom_type, int Natoms, char* basename,
+		char** phase_name, int output_mask, 
+		int Nexpand, int** expand);
+
+void print_short_help (char *prog_name, const char *arglist, 
+		       int Nflags, const char* flaglist, const char *argexpl);
+void print_long_help (char *prog_name, const char *arglist,
+		      int Nflags, const char* flaglist, const char *argexpl);
+
+
+inline FILE* myopenr(char *filename) 
+{
+  if (filename[0] == '-') return stdin;
+  else                    return fopen(filename, "r");
+}
+
+inline FILE* myopenw(char *filename) 
+{
+  if (filename[0] == '-') return stdout;
+  else                    return fopen(filename, "w");
+}
+  
+inline void myclose(FILE *f) 
+{
+  if ((f != stdin) && (f != stdout) && (f != NULL)) fclose(f);
+}
+
+
+//******************************* insidecell ***************************
+inline double insidecell (double x) 
+{
+  double y;
+  // Return a value less than 1, and greater than or equal to 0.
+  for (y = x + 1 + 1.e-13 - (int)(x); y >= 1.0; y -= 1.0) ;
+  y -= 1.e-13;
+  if (y < 0.) return 0;
+  return y;
+}
+
+
+// ***************************** is_blank ******************************
+inline int is_blank (char c) {return (c == ' ') || (c == '\t') || (c == '\n');}
+
+
+// Move to the next non-blank part of string
+inline void nextnonblank (char* &p) 
+{
+  for ( ; ((*p)!='\0') && is_blank(*p); ++p) ;
+}
+
+// Move to the end of string (find the first blank)
+inline void nextblank (char* &p) 
+{
+  for ( ; ((*p)!='\0') && (!is_blank(*p)); ++p) ;
+}
+
+const char COMMENT_CHAR = '#';
+
+inline void nextnoncomment (FILE* infile, char* dump, const int &size) 
+{
+  do {fgets(dump, size, infile);
+  } while ((!feof(infile)) && (dump[0] == COMMENT_CHAR));
+}
+
+
+//*************************** parse_commandline ************************
+// We separate the commandline arguments given in argv into the Narg 
+// pieces (in order), and remove verbose, testing, or memory flags and
+// puts them into the associated flags.  Return value: ERROR code.
+// Note the error codes listed above.
+// 
+// The flags (not case sensitive) -- anything starting with -, but not
+// a dash by itself (could refer to stdin / stdout)
+//   -v       : verbose
+//   -t       : testing mode (also flips verbose, by default)
+//   -m <mem> : a general memory flag; set MEMORY to value of second param.
+//   -h       : we _want_ help
+//   (all other flags generate an error)
+// Everything that's _not_ a flag gets put into args, one at a time.
+// We assume that args has NOT BEEN ALLOCATED, and that
+// VERBOSE, TESTING, and MEMORY have already been set to default
+// values--we will only change them if they show up in the command line.
+// 
+// We are going to do pointer _assignments_ on args, so it should
+// simply be an array of N pointers to char* (there's no reason to 
+// go around _copying_ these arguments).
+
+// Now backwards compatible; the _var version allows for a variable
+// number of arguments (-1), but still throws errors like a champ
+// if given a constant number of arguments
+
+int parse_commandline(int argc, char **argv, 
+		      int Narg, char **args, 
+		      int &VERBOSE, int &TESTING, int &MEMORY,
+		      int Nflags, const char* flaglist, int* flagon) 
+{
+  int N, ERROR;
+  N=Narg;
+  ERROR = parse_commandline_var(argc, argv, N, args, VERBOSE,
+				TESTING, MEMORY, Nflags, flaglist, flagon);
+  if (N<Narg)
+    ERROR = ERROR | ERROR_SHORT;
+  return ERROR;
+}
+
+int parse_commandline_var(int argc, char **argv, 
+			  int& Narg, char **args, 
+			  int &VERBOSE, int &TESTING, int &MEMORY,
+			  int Nflags, const char* flaglist, int* flagon) 
+{
+  int ERROR, HELP;
+  int count; // Count of how many args we have.
+  int n;     // Counting through argc.
+  int i, j;  // Counting through our flaglist.
+  char flag; // Our flag (maybe)
+  int FLAGFOUND;
+  int MEMFOUND; // Special flag indicating that we hit -m
+  
+  count = 0; // No arguments yet.
+  ERROR = 0; // We just started, so nothing's gone wrong yet.  
+  HELP = 0;  // Assume they don't want help till they ask for it.
+  // Note: we DON'T set verbose or testing... if they're already
+  // true before we get to this point, we don't need to upset that.
+
+  // First argument is always the program name, so start at 1.
+  for (n=1; n<argc; ++n) {
+    if ((argv[n][0] == '-') && (argv[n][1] != 0) && (!isdigit(argv[n][1]))) {
+      // We've got a flag!!
+      MEMFOUND = 0;
+      for (j=1; argv[n][j] != 0; ++j) {
+	flag = tolower(argv[n][j]);
+	switch (flag) {
+	case 'v': VERBOSE = 1; break;
+	case 't': TESTING = 1; break;
+	case 'h': HELP = 1; break;
+	case 'm': MEMFOUND = 1; break; // We'll extract MEMORY later...
+	  // If we get this far, that means we may have a user-defined
+	  // flag
+	default:
+	  FLAGFOUND = 0;
+	  for (i=0; i<Nflags; ++i)
+	    if (flag == tolower(flaglist[i])) {
+	      flagon[i] = 1;
+	      FLAGFOUND = -1;
+	    }
+	  if (!FLAGFOUND)
+	    // Throw an error--we didn't find our flag.
+	    ERROR = ERROR | ERROR_BADFLAG;
+	}
+      }
+      if (MEMFOUND) {
+	// Extract the "memory" value:
+	if ((n+1)<argc) {
+	  ++n; // Increment arg counter:
+	  sscanf(argv[n], "%d", &MEMORY);
+	}
+	else
+	  ERROR = ERROR | ERROR_SHORT;
+      }
+    }
+    else {
+      // We didn't get a flag, so copy over the arg. info, if 
+      // we haven't read all the arguments yet.
+      // Narg < 0 --> variable number of arguments
+      if ( (Narg<0) || (count < Narg) ) {
+	// POINTER ASSIGNMENT:
+	args[count] = argv[n];
+	++count;
+      }
+    }
+  }
+  // If we're testing, we're _definitely_ verbose:
+  VERBOSE = VERBOSE || TESTING;
+  if (HELP)         ERROR = ERROR | ERROR_HELP;  // We called help.
+  if (count < abs(Narg)) ERROR = ERROR | ERROR_SHORT; // Not enough args.
+  Narg = count;
+  
+  return ERROR;
+}
+
+
+//****************************** read_meta *****************************
+// We read in the input file (formatted as an OHMMS metafile).  Assumes
+// that meta_file is an already open file (so that we can feed stdin as
+// an option).  Doesn't close the file either.
+// Note: an OHMMS meta_file contains lots of information; we're looking
+// for just two tags: <lattice>, for the lattice information, and
+// <TBMD>, for the atom location information.  We ignore everything else.
+// Also assumes the atoms are in unit coord. (though this could be
+// changed for future versions to convert back to unit cell coord).
+// Allocates memory for each of the pointers, and fills it up.
+// Return value: ERROR code (see above)
+// ==== input file ====
+// ...                        # extra stuff (we ignore)
+// <lattice>
+// a1.x a1.y a1.z             # Cartesian coord of unit cell
+// a2.x a2.y a2.z
+// a3.x a3.y a3.z
+// </lattice>
+// ...                        # extra stuff
+// <TBMD>
+// number_of_ions = Natoms    # Number of atoms in unit cell
+// positions unit/cartesian   # Default is cartesian, unless "u" is first
+// i1 u1.1 u1.2 u1.3          # Atom type, and locations in direct coord.
+// ...
+// iN uN.1 uN.2 uN.3
+// </TBMD>
+// ...                        # extra stuff
+// ==== input file ====
+
+int read_meta(FILE *meta_file, double cart[9], double** &u, int &Natoms) 
+{
+  char dump[512]; // For reading files (allows extra characters at the end
+                  // of each line)
+  int i, k;
+  int READ_LATTICE, READ_ATOMS;
+  int IN_UNIT;
+  char *p;        // String used to "parse" our tags
+
+  if (meta_file == NULL)
+    return ERROR_NOFILE;
+
+  // ========================== read input file ========================
+
+  // We're gonna read until either:
+  // a) we've read both our lattice and atoms, or
+  // b) EOF
+  READ_LATTICE = 0;
+  READ_ATOMS = 0;
+  while ( ( (!READ_LATTICE) || (!READ_ATOMS) )
+	  && (! feof(meta_file)) ) {
+    fgets(dump, sizeof(dump), meta_file); // Grab our next line.
+    // First, convert dump to lower case:
+    for (p = dump; *p != 0; ++p) *p = tolower(*p);
+
+    // <lattice>
+    p = strstr(dump, "lattice");
+    if ((p != NULL) && (!READ_LATTICE)) {
+      // We've got lattice info!
+      READ_LATTICE = -1;
+      // a1.x a1.y a1.z                # Cartesian coord of unit cell
+      // a2.x a2.y a2.z
+      // a3.x a3.y a3.z
+      for (i=0; i<3; ++i) {
+	fgets(dump, sizeof(dump), meta_file);
+	sscanf(dump, "%lf %lf %lf", 
+	       cart+(3*i), cart+(3*i+1), cart+(3*i+2));
+      }
+      continue; // Go on to the next line in the file.
+    }
+
+    // number_of_ions (easier to search for than TBMD)
+    p = strstr(dump, "number_of_ions");
+    if ((p != NULL) && (!READ_ATOMS)) {
+      // We've got atom info!
+      READ_ATOMS = -1;
+
+      // number_of_ions = Natoms
+      // Let's jump to the first integer on that line
+      p = dump + (strcspn (dump, "0123456789"));
+      sscanf(p, "%d", &Natoms);
+
+      // positions unit/cartesian  # Read that second word.
+      fgets(dump, sizeof(dump), meta_file);
+      p = dump; nextnonblank(p); // First word
+      // Now, let's look for a word beginning with u (after "positions"):
+      do {
+	nextblank(p); nextnonblank(p); // Next word
+	IN_UNIT = ((*p == 'u') || (*p == 'U'));
+      } while ((!IN_UNIT) && (*p != '\0') && (*p != '#'));
+      
+      // i1 u1.1 u1.2 u1.3          # Atom type, and locations in direct coord.
+      // ...
+      // iN uN.1 uN.2 uN.3
+      u = new double *[Natoms];
+      for (i=0; i<Natoms; ++i) {
+	u[i] = new double[3];
+	fgets(dump, sizeof(dump), meta_file);
+	// Note: we ignore the "atom type" in the metafile.
+	sscanf(dump, "%d %lf %lf %lf", &k, u[i], u[i]+1, u[i]+2);
+	if (IN_UNIT) {
+	  for (k=0; k<3; ++k)
+	    u[i][k] = insidecell(u[i][k]);
+	}
+	else {
+	  // Do nothing... we'll do the conversion *after*, in case
+	  // we haven't read our cell information yet.
+	}
+      }
+      continue;
+    }
+    // Space reserved for possible future parsing
+  }
+
+  if ( (!READ_LATTICE) || (!READ_ATOMS) ) // We didn't get everything
+    return ERROR_BADFILE;
+
+  // ============================ test values ==========================
+
+  // Determine determinants:
+  double det_cart;
+  
+  det_cart = cart[0]*(cart[4]*cart[8] - cart[7]*cart[5]) -
+    cart[1]*(cart[3]*cart[8] - cart[6]*cart[5]) +
+    cart[2]*(cart[3]*cart[7] - cart[6]*cart[4]);
+  
+  if (dcomp(det_cart, 0)) return ERROR_ZEROVOL;
+  if (det_cart < 0)       return ERROR_LEFTHANDED;
+
+  // Now, convert cartesian to unit cell if necessary:
+  if (! IN_UNIT) {
+    // First, invert the cell.
+    double inv_cart[9];
+    double unit[3];
+    inv_cart[0] = cart[4]*cart[8] - cart[5]*cart[7];
+    inv_cart[1] = cart[2]*cart[7] - cart[1]*cart[8];
+    inv_cart[2] = cart[1]*cart[5] - cart[2]*cart[4];
+    
+    inv_cart[3] = cart[5]*cart[6] - cart[3]*cart[8];
+    inv_cart[4] = cart[0]*cart[8] - cart[2]*cart[6];
+    inv_cart[5] = cart[2]*cart[3] - cart[0]*cart[5];
+    
+    inv_cart[6] = cart[3]*cart[7] - cart[4]*cart[6];
+    inv_cart[7] = cart[1]*cart[6] - cart[0]*cart[7];
+    inv_cart[8] = cart[0]*cart[4] - cart[1]*cart[3];
+    for (i=0; i<9; ++i) inv_cart[i] *= (1./det_cart);
+
+    // Now, let's convert 'em:
+    for (i=0; i<Natoms; ++i) {
+      for (k=0; k<3; ++k)
+	unit[k] = insidecell(u[i][0]*inv_cart[k] +
+			     u[i][1]*inv_cart[3+k] +
+			     u[i][2]*inv_cart[6+k]);
+      // Now, copy over.
+      for (k=0; k<3; ++k) u[i][k] = unit[k];
+    }
+  }
+
+  // If we've gotten this far, it all went well.
+  return 0;
+}
+
+
+// ************************* verbose_output_meta ***********************
+// Little routine to output everything we read in from cell.
+
+void verbose_output_meta (double cart[9], double** u, int Natoms,
+			  const char* opening) 
+{
+  int i;
+
+  printf("%sCartesian coordinates:\n", opening);
+  for (i=0; i<3; ++i)
+    printf("%sa%1d = %8.5lf %8.5lf %8.5lf\n", opening, i+1, 
+	   cart[3*i], cart[3*i+1], cart[3*i+2]);
+
+  printf("%sAtoms in cell (%d):\n", opening, Natoms);
+  if (u == NULL)
+    printf("%satom positions not read.\n", opening);
+  else
+    for (i=0; i<Natoms; ++i)
+      printf("%su%1d = %8.5lf %8.5lf %8.5lf\n", opening, i+1, 
+	     u[i][0], u[i][1], u[i][2]);
+
+}
+
+
+//****************************** write_meta ****************************
+// Output the metafile with the correct atom labels
+void write_meta (FILE *meta_file, double cart[9], double** u,
+		 int* atom_type, int Natoms, char* basename, 
+		 char** phase_name, int output_mask) 
+{
+  int n, j;
+
+  int omask, WITH_NONE;
+  if (output_mask <= 0) {
+    WITH_NONE = -1;
+    omask = -output_mask;
+  }
+  else {
+    WITH_NONE = 0;
+    omask = output_mask;
+  }
+    
+  // How many atoms?
+  int count;
+  int *show_atom;
+  show_atom = new int[Natoms];
+
+  for (n=0; n<Natoms; ++n) {
+    show_atom[n] = 0;
+    if (atom_type[n] == 0) {
+      if (WITH_NONE) show_atom[n] = 1;
+    }
+    else {
+      for (j=0; j<30; ++j) {
+	if ( ( (1<<j) & omask ) != 0)
+	  if ( ( (1<<j) & atom_type[n]) != 0) show_atom[n] = 1;
+      }
+    }
+  }
+  for (n=0, count=0; n<Natoms; ++n)
+    count += show_atom[n];
+
+  fprintf(meta_file, "<unitcell>\n");
+  fprintf(meta_file, "  <lattice>\n");
+  fprintf(meta_file, "    %19.15lf %19.15lf %19.15lf  # a1\n",
+	  cart[0], cart[1], cart[2]);
+  fprintf(meta_file, "    %19.15lf %19.15lf %19.15lf  # a2\n",
+	  cart[3], cart[4], cart[5]);
+  fprintf(meta_file, "    %19.15lf %19.15lf %19.15lf  # a3\n",
+	  cart[6], cart[7], cart[8]);
+  fprintf(meta_file, "  </lattice>\n");
+  fprintf(meta_file, "  <bconds> p p p </bconds>\n");
+  fprintf(meta_file, "</unitcell>\n");
+  fprintf(meta_file, "<ptclset>\n");
+  fprintf(meta_file, "  <format> tbmd </format>\n");
+  fprintf(meta_file, "  <name> none </name>\n");
+  fprintf(meta_file, "<TBMD>\n");
+  fprintf(meta_file, "number_of_ions = %d\n", count);
+  fprintf(meta_file, "positions = unit\n");
+
+  for (n=0; n<Natoms; ++n)
+    if (show_atom[n]) {
+      fprintf(meta_file, "  1  %17.15lf %17.15lf %17.15lf  # %d %s",
+	      u[n][0], u[n][1], u[n][2], n+1, basename);
+      for (j=0; j<30; ++j)
+	if ( ( (1<<j) & atom_type[n]) != 0)
+	  fprintf(meta_file, " %s", phase_name[j]);
+      fprintf(meta_file, "\n");
+    }
+  
+  fprintf(meta_file, "</TBMD>\n");
+  fprintf(meta_file, "</ptclset>\n");
+}
+
+
+//****************************** write_xyz *****************************
+// Output an XYZ format file.  We make "copies" of the atoms by using
+// the "expand" vectors.  If Nexpand == 0, we just output what we've
+// got; else, we'll output Natoms * Nexpand atoms.
+void write_xyz (FILE *meta_file, double cart[9], double** u,
+		int* atom_type, int Natoms, char* basename,
+		char** phase_name, int output_mask, 
+		int Nexpand, int** expvect)
+{
+  int n, i, j;
+  double xyz[3];
+  double* uvect;
+  int expand_out;
+  int nexpand, **expand;
+  
+  expand_out = ((Nexpand > 1) && (expvect != NULL));
+  if (expand_out) {
+    nexpand = Nexpand;
+    expand = expvect;
+  }
+  else {
+    nexpand = 1;
+    expand = new int *[1];
+    expand[0] = new int[3];
+    for (i=0; i<3; ++i) expand[0][i] = 0;
+  }
+
+  int omask, WITH_NONE;
+  if (output_mask <= 0) {
+    WITH_NONE = -1;
+    omask = -output_mask;
+  }
+  else {
+    WITH_NONE = 0;
+    omask = output_mask;
+  }
+    
+  // How many atoms?
+  int count;
+  int *show_atom;
+  show_atom = new int[Natoms];
+
+  for (n=0; n<Natoms; ++n) {
+    show_atom[n] = 0;
+    if (atom_type[n] == 0) {
+      if (WITH_NONE) show_atom[n] = 1;
+    }
+    else {
+      for (j=0; j<30; ++j) {
+	if ( ( (1<<j) & omask ) != 0)
+	  if ( ( (1<<j) & atom_type[n]) != 0)
+	    show_atom[n] = 1;
+      }
+    }
+  }
+  for (n=0, count=0; n<Natoms; ++n)
+    count += show_atom[n];
+
+  fprintf(meta_file, "%d\n", count*nexpand);
+  fprintf(meta_file, "  %s phases: ", basename);
+  for (j=0; j<30; ++j)
+    if ( ( (1<<j) & output_mask ) != 0)
+      fprintf(meta_file, "%s ", phase_name[j]);
+  if (WITH_NONE)
+    fprintf(meta_file, "unidentified ");
+  fprintf(meta_file, ": flagged, expanded %d times\n", nexpand);
+
+  for (n=0; n<Natoms; ++n)
+    if (show_atom[n]) {
+      uvect = u[n];
+      for (i=0; i<nexpand; ++i) {
+	xyz[0] = cart[0]*(uvect[0] + expand[i][0])
+	  + cart[3]*(uvect[1] + expand[i][1])
+	  + cart[6]*(uvect[2] + expand[i][2]);
+	xyz[1] = cart[1]*(uvect[0] + expand[i][0])
+	  + cart[4]*(uvect[1] + expand[i][1])
+	  + cart[7]*(uvect[2] + expand[i][2]);
+	xyz[2] = cart[2]*(uvect[0] + expand[i][0])
+	  + cart[5]*(uvect[1] + expand[i][1])
+	  + cart[8]*(uvect[2] + expand[i][2]);
+	fprintf(meta_file, "%s", basename);
+	for (j=0; j<30; ++j)
+	  if ( ( (1<<j) & atom_type[n]) != 0)
+	    fprintf(meta_file, "_%s", phase_name[j]);
+	fprintf(meta_file, " %19.15lf %19.15lf %19.15lf\n",
+		xyz[0], xyz[1], xyz[2]);
+      }
+    }
+  
+  if (!expand_out) {
+    delete[] expand[0];
+    delete[] expand;
+  }
+  else expand = NULL;
+
+  delete[] show_atom;
+}
+
+
+
+//****************************** free_meta *****************************
+// Free up the group and atom info:
+
+void free_meta(double** &u, int Natoms) 
+{
+  int i;
+
+  if (u != NULL) {
+    for (i=0; i<Natoms; ++i)
+      delete[] u[i];
+    delete[] u;
+  }
+}
+
+
+
+// ************************** print_short_help *************************
+// List the correct format for the program.  Called if the number of
+// arguments is too low, or by print_long_help.
+// Prog_name should argv[0] (the invoked program name)
+// arglist does NOT need to contain the flag info
+// argexpl can contain an optional explanation of the command line param
+
+const char* FLAGLIST = "[-v] [-t] [-m <memorysize>] [-h]";
+const char* FLAGexpl = 
+"  -v               verbose output\n\
+  -t               testing (sets -v as well)\n\
+  -m <memorysize>  set memory allocation to <memorysize>\n\
+  -h               print detailed help";
+
+void print_short_help (char *prog_name, const char *arglist, 
+		       int Nflags, const char* flaglist, const char *argexpl)
+{
+  int i;
+  fprintf(stderr, "%s %s", prog_name, arglist);
+  if (Nflags > 0) fprintf(stderr, " [-");
+  for (i=0; i<Nflags; ++i)
+    fprintf(stderr, "%c", flaglist[i]);
+  if (Nflags > 0) fprintf(stderr, "]");
+  fprintf(stderr, " %s\n", FLAGLIST);
+  if (argexpl != NULL)
+    fprintf(stderr, "%s\n", argexpl);
+  fprintf(stderr, "%s\n", FLAGexpl);
+  fprintf(stderr, "\n");
+}
+
+
+
+// *************************** print_long_help *************************
+// Print a standard help message (includes input file format),
+// but also lists the correct format for the program by calling
+// print_short_help().
+
+const char* INPUTFILE = 
+"  We ignore all of the tags in the meta_file except:\n\
+   <lattice>\n\
+   a1.x a1.y a1.z             # Cartesian coord of unit cell\n\
+   a2.x a2.y a2.z\n\
+   a3.x a3.y a3.z\n\
+\n\
+   and:\n\
+   <TBMD>\n\
+   number_of_ions = Natoms    # Number of atoms in unit cell\n\
+   positions unit/cartesian   # Default is cartesian, unless \"u\" is first\n\
+   i1 u1.1 u1.2 u1.3          # Atom type, and locations in direct coord.\n\
+   ...\n\
+   iN uN.1 uN.2 uN.3";
+
+void print_long_help (char *prog_name, const char *arglist,
+		      int Nflags, const char* flaglist, const char *argexpl)
+{
+  // Call short help first.
+  print_short_help(prog_name, arglist, Nflags, flaglist, argexpl);
+  fprintf(stderr, "OHMMS meta-file format:\n%s\n\n", INPUTFILE);
+}
+#endif
diff --git a/chgcar-smoothing/matrix.H b/chgcar-smoothing/matrix.H
new file mode 100644
index 0000000..1cd6519
--- /dev/null
+++ b/chgcar-smoothing/matrix.H
@@ -0,0 +1,332 @@
+#ifndef __MATRIX_H
+#define __MATRIX_H
+#include <math.h>
+
+const double ident[9] = {1,0,0,0,1,0,0,0,1};
+
+// **** Matrix manipulation, all done in 9-v notation
+
+// DET: Returns the det. of an integer matrix
+inline int det (const int x[9]) 
+{
+  return (x[0]*(x[4]*x[8] - x[5]*x[7])
+    + x[1]*(x[6]*x[5] - x[3]*x[8])
+    + x[2]*(x[3]*x[7] - x[6]*x[4]));
+}
+
+inline double det (const double x[9]) 
+{
+  return (x[0]*(x[4]*x[8] - x[5]*x[7])
+    + x[1]*(x[6]*x[5] - x[3]*x[8])
+    + x[2]*(x[3]*x[7] - x[6]*x[4]));
+}
+
+// MULT: evaluates the product of a * b, returns in c:
+inline void mult (const int a[9], const int b[9], int c[9]) 
+{
+  c[0] = a[0]*b[0] + a[1]*b[3] + a[2]*b[6];
+  c[1] = a[0]*b[1] + a[1]*b[4] + a[2]*b[7];
+  c[2] = a[0]*b[2] + a[1]*b[5] + a[2]*b[8];
+  c[3] = a[3]*b[0] + a[4]*b[3] + a[5]*b[6];
+  c[4] = a[3]*b[1] + a[4]*b[4] + a[5]*b[7];
+  c[5] = a[3]*b[2] + a[4]*b[5] + a[5]*b[8];
+  c[6] = a[6]*b[0] + a[7]*b[3] + a[8]*b[6];
+  c[7] = a[6]*b[1] + a[7]*b[4] + a[8]*b[7];
+  c[8] = a[6]*b[2] + a[7]*b[5] + a[8]*b[8];
+}
+
+inline void mult (const double a[9], const int b[9], double c[9]) 
+{
+  c[0] = a[0]*b[0] + a[1]*b[3] + a[2]*b[6];
+  c[1] = a[0]*b[1] + a[1]*b[4] + a[2]*b[7];
+  c[2] = a[0]*b[2] + a[1]*b[5] + a[2]*b[8];
+  c[3] = a[3]*b[0] + a[4]*b[3] + a[5]*b[6];
+  c[4] = a[3]*b[1] + a[4]*b[4] + a[5]*b[7];
+  c[5] = a[3]*b[2] + a[4]*b[5] + a[5]*b[8];
+  c[6] = a[6]*b[0] + a[7]*b[3] + a[8]*b[6];
+  c[7] = a[6]*b[1] + a[7]*b[4] + a[8]*b[7];
+  c[8] = a[6]*b[2] + a[7]*b[5] + a[8]*b[8];
+}
+
+inline void mult (const int a[9], const double b[9], double c[9]) 
+{
+  c[0] = a[0]*b[0] + a[1]*b[3] + a[2]*b[6];
+  c[1] = a[0]*b[1] + a[1]*b[4] + a[2]*b[7];
+  c[2] = a[0]*b[2] + a[1]*b[5] + a[2]*b[8];
+  c[3] = a[3]*b[0] + a[4]*b[3] + a[5]*b[6];
+  c[4] = a[3]*b[1] + a[4]*b[4] + a[5]*b[7];
+  c[5] = a[3]*b[2] + a[4]*b[5] + a[5]*b[8];
+  c[6] = a[6]*b[0] + a[7]*b[3] + a[8]*b[6];
+  c[7] = a[6]*b[1] + a[7]*b[4] + a[8]*b[7];
+  c[8] = a[6]*b[2] + a[7]*b[5] + a[8]*b[8];
+}
+
+inline void mult (const double a[9], const double b[9], double c[9]) 
+{
+  c[0] = a[0]*b[0] + a[1]*b[3] + a[2]*b[6];
+  c[1] = a[0]*b[1] + a[1]*b[4] + a[2]*b[7];
+  c[2] = a[0]*b[2] + a[1]*b[5] + a[2]*b[8];
+  c[3] = a[3]*b[0] + a[4]*b[3] + a[5]*b[6];
+  c[4] = a[3]*b[1] + a[4]*b[4] + a[5]*b[7];
+  c[5] = a[3]*b[2] + a[4]*b[5] + a[5]*b[8];
+  c[6] = a[6]*b[0] + a[7]*b[3] + a[8]*b[6];
+  c[7] = a[6]*b[1] + a[7]*b[4] + a[8]*b[7];
+  c[8] = a[6]*b[2] + a[7]*b[5] + a[8]*b[8];
+}
+
+// MULT3 routines: D = ABC
+// only dii is a little different... the rest are all the same
+inline void mult (const int a[9], const int b[9], const int c[9],
+		  int d[9]) 
+{ int temp[9];  mult(a,b,temp);  mult(temp,c,d);}
+
+inline void mult (const double a[9], const int b[9], const int c[9],
+		  double d[9]) 
+{ int temp[9];  mult(b,c,temp);  mult(a, temp,d);}
+
+inline void mult (const int a[9], const double b[9], const int c[9],
+		  double d[9]) 
+{ double temp[9];  mult(a,b,temp);  mult(temp,c,d);}
+
+inline void mult (const double a[9], const double b[9], const int c[9],
+		  double d[9]) 
+{ double temp[9];  mult(a,b,temp);  mult(temp,c,d);}
+
+inline void mult (const int a[9], const int b[9], const double c[9],
+		  double d[9]) 
+{ int temp[9];  mult(a,b,temp);  mult(temp,c,d);}
+
+inline void mult (const double a[9], const int b[9], const double c[9],
+		  double d[9]) 
+{ double temp[9];  mult(a,b,temp);  mult(temp,c,d);}
+
+inline void mult (const int a[9], const double b[9], const double c[9],
+		  double d[9]) 
+{ double temp[9];  mult(a,b,temp);  mult(temp,c,d);}
+
+inline void mult (const double a[9], const double b[9], const double c[9],
+		  double d[9]) 
+{ double temp[9];  mult(a,b,temp);  mult(temp,c,d);}
+
+// MULT_SCALAR: Multiplies matrix a by scalar b, result in c
+inline void mult (const int a[9], const int b, int c[9]) 
+{ for (int i=0; i<9; ++i) c[i] = b*a[i]; }
+
+inline void mult (const int b, const int a[9], int c[9]) 
+{ for (int i=0; i<9; ++i) c[i] = b*a[i]; }
+
+inline void mult (const int a[9], const double b, double c[9]) 
+{ for (int i=0; i<9; ++i) c[i] = b*a[i]; }
+
+inline void mult (const double b, const int a[9], double c[9]) 
+{ for (int i=0; i<9; ++i) c[i] = b*a[i]; }
+
+inline void mult (const double a[9], const int b, double c[9]) 
+{ for (int i=0; i<9; ++i) c[i] = b*a[i]; }
+
+inline void mult (const int b, const double a[9], double c[9]) 
+{ for (int i=0; i<9; ++i) c[i] = b*a[i]; }
+
+inline void mult (const double a[9], const double b, double c[9]) 
+{ for (int i=0; i<9; ++i) c[i] = b*a[i]; }
+
+inline void mult (const double b, const double a[9], double c[9]) 
+{ for (int i=0; i<9; ++i) c[i] = b*a[i]; }
+
+
+// SQUARE: Calculate xT * x
+//   Note: the product is symmetric, so we get to do a smaller number
+//   of multiplications.
+
+inline void square (const int x[9], int s[9]) 
+{
+  s[0] = x[0]*x[0] + x[3]*x[3] + x[6]*x[6];
+  s[1] = x[0]*x[1] + x[3]*x[4] + x[6]*x[7];
+  s[2] = x[0]*x[2] + x[3]*x[5] + x[6]*x[8];
+  s[3] = s[1];
+  s[4] = x[1]*x[1] + x[4]*x[4] + x[7]*x[7];
+  s[5] = x[1]*x[2] + x[4]*x[5] + x[7]*x[8];
+  s[6] = s[2];
+  s[7] = s[5];
+  s[8] = x[2]*x[2] + x[5]*x[5] + x[8]*x[8];
+}
+
+inline void square (const int x[9], double s[9]) 
+{
+  s[0] = x[0]*x[0] + x[3]*x[3] + x[6]*x[6];
+  s[1] = x[0]*x[1] + x[3]*x[4] + x[6]*x[7];
+  s[2] = x[0]*x[2] + x[3]*x[5] + x[6]*x[8];
+  s[3] = s[1];
+  s[4] = x[1]*x[1] + x[4]*x[4] + x[7]*x[7];
+  s[5] = x[1]*x[2] + x[4]*x[5] + x[7]*x[8];
+  s[6] = s[2];
+  s[7] = s[5];
+  s[8] = x[2]*x[2] + x[5]*x[5] + x[8]*x[8];
+}
+
+inline void square (const double x[9], double s[9]) 
+{
+  s[0] = x[0]*x[0] + x[3]*x[3] + x[6]*x[6];
+  s[1] = x[0]*x[1] + x[3]*x[4] + x[6]*x[7];
+  s[2] = x[0]*x[2] + x[3]*x[5] + x[6]*x[8];
+  s[3] = s[1];
+  s[4] = x[1]*x[1] + x[4]*x[4] + x[7]*x[7];
+  s[5] = x[1]*x[2] + x[4]*x[5] + x[7]*x[8];
+  s[6] = s[2];
+  s[7] = s[5];
+  s[8] = x[2]*x[2] + x[5]*x[5] + x[8]*x[8];
+}
+
+// INVERSE: Evaluates inverse of x; returns it in inv / inverse (return value
+// is det x).  Need to divide by det to get correct inverse.
+inline int inverse (const int x[9], int inv[9])
+{
+  inv[0] = x[4]*x[8] - x[5]*x[7];
+  inv[1] = x[2]*x[7] - x[1]*x[8];
+  inv[2] = x[1]*x[5] - x[2]*x[4];
+
+  inv[3] = x[5]*x[6] - x[3]*x[8];
+  inv[4] = x[0]*x[8] - x[2]*x[6];
+  inv[5] = x[2]*x[3] - x[0]*x[5];
+
+  inv[6] = x[3]*x[7] - x[4]*x[6];
+  inv[7] = x[1]*x[6] - x[0]*x[7];
+  inv[8] = x[0]*x[4] - x[1]*x[3];
+  
+  return det(x);
+}
+
+inline double inverse (const double x[9], double inv[9])
+{
+  inv[0] = x[4]*x[8] - x[5]*x[7];
+  inv[1] = x[2]*x[7] - x[1]*x[8];
+  inv[2] = x[1]*x[5] - x[2]*x[4];
+
+  inv[3] = x[5]*x[6] - x[3]*x[8];
+  inv[4] = x[0]*x[8] - x[2]*x[6];
+  inv[5] = x[2]*x[3] - x[0]*x[5];
+
+  inv[6] = x[3]*x[7] - x[4]*x[6];
+  inv[7] = x[1]*x[6] - x[0]*x[7];
+  inv[8] = x[0]*x[4] - x[1]*x[3];
+  
+  return det(x);
+}
+
+// ********************************* magnsq ****************************
+// Returns the squared magnitude of a vector, where metric[9] is the
+// SYMMETRIC metric, and u is the vector in direct coordinates
+inline double magnsq (const double metric[9], const double u[3]) 
+{
+  return metric[0]*u[0]*u[0] + metric[4]*u[1]*u[1] + metric[8]*u[2]*u[2]
+    + 2*metric[1]*u[0]*u[1] + 2*metric[2]*u[0]*u[2] + 2*metric[5]*u[1]*u[2];
+}
+
+inline double magnsq (const int metric[9], const double u[3]) 
+{
+  return metric[0]*u[0]*u[0] + metric[4]*u[1]*u[1] + metric[8]*u[2]*u[2]
+    + 2*metric[1]*u[0]*u[1] + 2*metric[2]*u[0]*u[2] + 2*metric[5]*u[1]*u[2];
+}
+
+inline double magnsq (const double metric[9], const int u[3]) 
+{
+  return metric[0]*u[0]*u[0] + metric[4]*u[1]*u[1] + metric[8]*u[2]*u[2]
+    + 2*metric[1]*u[0]*u[1] + 2*metric[2]*u[0]*u[2] + 2*metric[5]*u[1]*u[2];
+}
+
+inline int magnsq (const int metric[9], const int u[3]) 
+{
+  return metric[0]*u[0]*u[0] + metric[4]*u[1]*u[1] + metric[8]*u[2]*u[2]
+    + 2*metric[1]*u[0]*u[1] + 2*metric[2]*u[0]*u[2] + 2*metric[5]*u[1]*u[2];
+}
+
+// ZERO: determine if matrix is zero
+inline int zero (const int a[9]) 
+{
+  return (a[0]==0) && (a[1]==0) &&(a[2]==0) &&
+    (a[3]==0) && (a[4]==0) && (a[5]==0) &&
+    (a[6]==0) && (a[7]==0) && (a[8]==0);
+}
+
+inline int zero (const double a[9]) 
+{
+  return zero(a[0]) && zero(a[1]) && zero(a[2]) &&
+    zero(a[3]) && zero(a[4]) && zero(a[5]) &&
+    zero(a[6]) && zero(a[7]) && zero(a[8]);
+}
+
+// ZERO_VECT: same thing for vectors
+inline int zero_vect(const int a[3]) {
+  return (a[0]==0) && (a[1]==0) && (a[2]==0);
+}
+
+inline int zero_vect(const double a[3]) {
+  return zero(a[0]) && zero(a[1]) && zero(a[2]);
+}
+
+// CAREFUL_INVERSE: inverts a matrix, symmetric or not, but polishes
+// the final result to within a set tolerance.
+// NOTE: unlike all of our other inverses, we go ahead and scale by
+// the determinant.
+const double CAREFUL_INV_TOL = 1e-15;
+const int MAX_ITER = 20; // maximum number of iterations
+
+void careful_inverse (const double a[9], double inv[9], double &err) 
+{
+  double b[9], ba[9], bab[9];
+  double deta;
+  int d;
+  
+  deta = inverse(a, b); // our first crack at it...
+  if (deta == 0) {
+    err = 1; // a pretty obvious flag...
+    return;
+  }
+  mult(b, 1./deta, b); // now b should be scaled accordingly.
+  
+  mult(b, a, ba);
+  for (err=0, d=0; d<9; ++d) err += fabs(ba[d]-ident[d]);
+  // polish the result, until err is below our desired tolerance
+  // but don't do more than n steps.
+  for (int n=0; (n<MAX_ITER) && (err > CAREFUL_INV_TOL); ++n) {
+    mult(ba, b, bab);
+    // polish in place:
+    for (d=0; d<9; ++d) b[d] = 2.*b[d] - bab[d];
+    mult(b, a, ba);
+    for (err=0, d=0; d<9; ++d) err += fabs(ba[d]-ident[d]);
+  }
+  // final result:
+  for (d=0; d<9; ++d) inv[d] = b[d];
+}
+
+inline void careful_inverse (const double a[9], double inv[9]) 
+{
+    double err;
+    careful_inverse(a, inv, err);
+}
+
+// Do the transpose "in place," with a loop "unrolled" version:
+
+#define __ISWAP__(x,y) ({temp = x; x = y; y = temp;})
+#define __DSWAP__(x,y) ({temp = x; x = y; y = temp;})
+
+inline void self_transpose (int a[9]) 
+{
+    int temp;
+    __ISWAP__(a[1], a[3]);
+    __ISWAP__(a[2], a[6]);
+    __ISWAP__(a[5], a[7]);
+}
+
+inline void self_transpose (double a[9]) 
+{
+    double temp;
+    __DSWAP__(a[1], a[3]);
+    __DSWAP__(a[2], a[6]);
+    __DSWAP__(a[5], a[7]);
+}
+
+#undef __ISWAP__
+#undef __DSWAP__
+
+#endif
\ No newline at end of file
diff --git a/chgcar-smoothing/smoothing.H b/chgcar-smoothing/smoothing.H
new file mode 100644
index 0000000..afd08b3
--- /dev/null
+++ b/chgcar-smoothing/smoothing.H
@@ -0,0 +1,294 @@
+#ifndef __SMOOTHING_H
+#define __SMOOTHING_H
+#include <math.h>
+#include <stdio.h>
+
+const double RHO_MIN_SCALE = 1e-4;
+
+double min_nn(double latt[9], int N, double** u);
+
+inline double cube_root (double x) { return exp(log(x)/3.); }
+
+void add_list (double* G_l, double* rho_l, int& Nl, double G, double rho);
+
+void extract_filter_gaussian(double* rhoR, double* rhoI, 
+                    int N[3], double latt[9], double& l0);
+
+void apply_filter_gaussian(double* rhoR, double* rhoI, 
+			   int N[3], double latt[9],
+			   double l0);
+
+void extract_filter(double* rhoR, double* rhoI, 
+		    int N[3], double latt[9],
+		    double& filter_a, double& filter_b);
+
+void apply_filter(double* rhoR, double* rhoI, 
+		  int N[3], double latt[9],
+		  double filter_a, double filter_b);
+
+// 3^3 * N*(N+1)/2 algorithm for finding shortest NN distance
+double min_nn(double latt[9], int N, double** u) 
+{
+    double metric[9];
+    square(latt, metric);
+    double rmin2=metric[0];
+    for (int m=0; m<N; ++m)
+    for (int n=m; n<N; ++n) {
+        double ud0[3] = {u[n][0]-u[m][0],u[n][1]-u[m][1],u[n][2]-u[m][2]}, ud[3];
+        int shift[3];
+        for (shift[0]=-1; shift[0]<=1; ++shift[0])
+            for (shift[1]=-1; shift[1]<=1; ++shift[1])
+                for (shift[2]=-1; shift[2]<=1; ++shift[2]) {
+	                for (int d=0; d<3; ++d) ud[d] = ud0[d] + shift[d];
+                    double rmagn2 = magnsq(metric, ud);
+                    if (rmagn2 == 0) continue;
+                    if (rmagn2 < rmin2) rmin2 = rmagn2;
+                }
+    }
+    return sqrt(rmin2);
+}
+
+void add_list (double* G_l, double* rho_l, int& Nl, double G, double rho) {
+    // I don't know why this was here... I've removed it
+    //  if (zero(rho)) return;
+    if (Nl==0) {
+        Nl++; G_l[0] = G; rho_l[0] = rho; return;
+    }
+    
+    int i;
+    for (i=0; (i<Nl) && (G>G_l[i]); ++i) ;
+    if (dcomp(G_l[i], G)) {
+        if (rho_l[i] < rho) rho_l[i] = rho;
+    }
+    else {
+        for (int j=Nl; j>i; --j) {
+            G_l[j] = G_l[j-1]; rho_l[j] = rho_l[j-1];
+        }
+        G_l[i] = G; rho_l[i] = rho; ++Nl;
+    }
+}
+
+// *************************** GAUSSIAN FILTERS ************************
+
+void extract_filter_gaussian(double* rhoR, double* rhoI, int N[3], double latt[9],
+                             double& l0) {
+    double Gmetric[9];
+    {
+        double temp[9];
+        careful_inverse(latt, temp);	// temp = [a]^-1
+        self_transpose(temp); // Definition is changed here, to avoid a problem.
+        mult(temp, 2*M_PI, temp);		// temp = 2pi[a]^-1
+        square(temp, Gmetric);		// Gmetric = (2pi)^2 [a]^-T[a]^-1
+    }
+    
+    int n[3], nc[3];
+    int Nh[3] = {N[0]/2, N[1]/2, N[2]/2};
+    
+    int nind=0;
+    int h[3], half[3];
+    int MEMORY = (Nh[0]+1)*(Nh[1]+1)*(Nh[2]+1); // hardcoded, I know...
+    double* G_l = new double[MEMORY];
+    double* rho_l = new double[MEMORY];
+    int Nl = 0;
+    
+    double rho_min = rhoR[0] * RHO_MIN_SCALE;
+
+    for (n[2]=0; n[2]<N[2]; ++(n[2]))
+        for (n[1]=0; n[1]<N[1]; ++(n[1]))
+            for (n[0]=0; n[0]<N[0]; ++(n[0])) {
+                for (int d=0; d<3; ++d) {
+                    if (n[d]>Nh[d]) nc[d] = n[d]-N[d];
+                    else            nc[d] = n[d];
+                    if (n[d]==Nh[d]) half[d] = 1;
+                    else             half[d] = 0;
+                }
+                double scaled_rho = hypot(rhoR[nind], rhoI[nind])
+                /( (half[0]+1)*(half[1]+1)*(half[2]+1) );
+                ++nind;
+                if (scaled_rho < rho_min) continue;
+
+                for (h[0]=0; h[0]<=half[0]; ++(h[0]))
+                    for (h[1]=0; h[1]<=half[1]; ++(h[1]))
+                        for (h[2]=0; h[2]<=half[2]; ++(h[2])) {
+                            double nc_h[3];
+                            for (int d=0; d<3; ++d) nc_h[d] = nc[d] - h[d]*N[d];
+                            double Gabs2 = magnsq(Gmetric, nc_h);
+                            add_list(G_l, rho_l, Nl, Gabs2, log(scaled_rho));
+                            if (Nl >= MEMORY) {
+                                fprintf(stderr, "** BAD MOJO: exceeded memory requirement in extract_filter_gaussian **\n");
+                                exit(-1);
+                            }
+                        }
+            }
+
+    // now, fit to a Gaussian (which is a linear fit, in this case)
+    double G_sum=0, G2_sum=0, rho_sum=0, rhoG_sum=0;
+    for (int i=0; i<Nl; ++i) {
+        G_sum    += G_l[i];
+        G2_sum   += G_l[i] * G_l[i];
+        rho_sum  += rho_l[i];
+        rhoG_sum += rho_l[i] * G_l[i];
+    }
+
+    // garbage collection
+    delete[] G_l; delete[] rho_l;
+    
+    l0 = (Nl*rhoG_sum - G_sum*rho_sum)/(Nl*G2_sum - G_sum*G_sum);
+    if (l0>0) {
+        fprintf(stderr, "Somehow did not get a _decreasing_ Gaussian in extract_filter_gaussian\n");
+        l0 = 0;
+    }
+    l0 = sqrt(-l0)*(2.*M_PI);  // scale accordingly.
+}
+
+// assumes that l0 has been scaled by some factor
+void apply_filter_gaussian(double* rhoR, double* rhoI, 
+			   int N[3], double latt[9],
+			   double l0) {
+
+    double Gmetric[9];
+    {
+        double temp[9];
+        careful_inverse(latt, temp);	// temp = [a]^-1
+        self_transpose(temp); // Definition is changed here, to avoid a problem.
+        mult(temp, 2*M_PI, temp);		// temp = 2pi[a]^-1
+        square(temp, Gmetric);		// Gmetric = (2pi)^2 [a]^-T[a]^-1
+    }
+    
+    int n[3], nc[3];
+    int Nh[3] = {N[0]/2, N[1]/2, N[2]/2};
+
+    int nind=0;
+    double lsquare = (l0*l0)/(4.*M_PI*M_PI);
+
+    for (n[2]=0; n[2]<N[2]; ++(n[2]))
+        for (n[1]=0; n[1]<N[1]; ++(n[1]))
+            for (n[0]=0; n[0]<N[0]; ++(n[0])) {
+                for (int d=0; d<3; ++d) {
+                    if (n[d]>Nh[d]) nc[d] = n[d]-N[d];
+                    else            nc[d] = n[d];
+                }
+                double Gabs2 = magnsq(Gmetric, nc);
+                double scale = exp(-Gabs2*lsquare);
+                rhoR[nind] *= scale; rhoI[nind] *= scale; ++nind;
+            }
+}
+
+//? *************************** POWER LAW TRUNCATION FILTERS ************************
+
+void extract_filter(double* rhoR, double* rhoI, 
+		    int N[3], double latt[9],
+		    double& filter_a, double& filter_b) 
+{
+    double Gmetric[9];
+    {
+        double temp[9];
+        careful_inverse(latt, temp);	// temp = [a]^-1
+        self_transpose(temp); // Definition is changed here, to avoid a problem.
+        mult(temp, 2*M_PI, temp);		// temp = 2pi[a]^-1
+        square(temp, Gmetric);		// Gmetric = (2pi)^2 [a]^-T[a]^-1
+    }
+    
+    int n[3], nc[3];
+    int Nh[3] = {N[0]/2, N[1]/2, N[2]/2};
+    
+    int nind=0;
+    int h[3], half[3];
+    double Gabs;
+    int MEMORY = (Nh[0]+1)*(Nh[1]+1)*(Nh[2]+1); // hardcoded, I know...
+    double* G_l = new double[MEMORY];
+    double* rho_l = new double[MEMORY];
+    int Nl = 0;
+    
+    double rho_min = rhoR[0] * RHO_MIN_SCALE;
+    
+    for (n[2]=0; n[2]<N[2]; ++(n[2]))
+        for (n[1]=0; n[1]<N[1]; ++(n[1]))
+            for (n[0]=0; n[0]<N[0]; ++(n[0])) {
+                for (int d=0; d<3; ++d) {
+                    if (n[d]>Nh[d]) nc[d] = n[d]-N[d];
+                    else            nc[d] = n[d];
+                    if (n[d]==Nh[d]) half[d] = 1;
+                    else             half[d] = 0;
+                }
+                double scaled_rho = hypot(rhoR[nind], rhoI[nind])
+                /( (half[0]+1)*(half[1]+1)*(half[2]+1) );
+                ++nind;
+                
+                if (zero_vect(nc)) continue;
+                if (scaled_rho < rho_min) continue;
+                
+                for (h[0]=0; h[0]<=half[0]; ++(h[0]))
+                    for (h[1]=0; h[1]<=half[1]; ++(h[1]))
+                        for (h[2]=0; h[2]<=half[2]; ++(h[2])) {
+                            double nc_h[3];
+                            for (int d=0; d<3; ++d) nc_h[d] = nc[d] - h[d]*N[d];
+                            Gabs = sqrt(magnsq(Gmetric, nc_h));
+                            add_list(G_l, rho_l, Nl, log(Gabs), log(scaled_rho));
+                            if (Nl >= MEMORY) {
+                                fprintf(stderr, "** BAD MOJO: exceeded memory requirement in extract_filter **\n");
+                                exit(-1);
+                            }
+                        }
+            }
+    
+    // now, fit to a power law...
+    double G_sum=0, G2_sum=0, rho_sum=0, rhoG_sum=0;
+    for (int i=0; i<Nl; ++i) {
+        G_sum    += G_l[i];
+        G2_sum   += G_l[i] * G_l[i];
+        rho_sum  += rho_l[i];
+        rhoG_sum += rho_l[i] * G_l[i];
+    }
+    filter_b = (Nl*rhoG_sum - G_sum*rho_sum)/(Nl*G2_sum - G_sum*G_sum);
+    
+    filter_a = 0;
+    for (int i=0; i<Nl; ++i) {
+        double guess_a = exp(rho_l[i] - filter_b*G_l[i]);
+        if (guess_a > filter_a) filter_a = guess_a;
+    }
+    
+    // garbage collection
+    delete[] G_l; delete[] rho_l;
+}
+
+void apply_filter(double* rhoR, double* rhoI, 
+		  int N[3], double latt[9],
+		  double filter_a, double filter_b) 
+{
+    double Gmetric[9];
+    {
+        double temp[9];
+        careful_inverse(latt, temp);	// temp = [a]^-1
+        self_transpose(temp); // Definition is changed here, to avoid a problem.
+        mult(temp, 2*M_PI, temp);		// temp = 2pi[a]^-1
+        square(temp, Gmetric);		// Gmetric = (2pi)^2 [a]^-T[a]^-1
+    }
+    
+    int n[3], nc[3];
+    int Nh[3] = {N[0]/2, N[1]/2, N[2]/2};
+    
+    int nind=0;
+    double Gabs, rho_magn, rho_max;
+    
+    for (n[2]=0; n[2]<N[2]; ++(n[2]))
+        for (n[1]=0; n[1]<N[1]; ++(n[1]))
+            for (n[0]=0; n[0]<N[0]; ++(n[0])) {
+                for (int d=0; d<3; ++d) {
+                    if (n[d]>Nh[d]) nc[d] = n[d]-N[d];
+                    else            nc[d] = n[d];
+                }
+                rho_magn = hypot(rhoR[nind], rhoI[nind]);
+                ++nind;
+                
+                if (zero_vect(nc)) continue;
+                Gabs = sqrt(magnsq(Gmetric, nc));
+                rho_max = filter_a * exp(filter_b*log(Gabs));
+                if (rho_magn > rho_max) {
+                    rhoR[nind-1] *= rho_max/rho_magn;
+                    rhoI[nind-1] *= rho_max/rho_magn;
+                }
+            }
+}
+
+#endif
\ No newline at end of file