Source code for pythtb.w90

from pathlib import Path

import numpy as np

from .tbmodel import TBModel
from .io.w90 import load_w90_dataset, read_kpoint_path, read_bands_w90
from .io.qe import read_bands_qe
from .lattice import Lattice
from .utils import _red_to_cart, deprecated, kpath_distance

__all__ = ["W90"]

BOHRTOANG = 0.52917721092  # Bohr radius in Angstroms



[docs]
class W90:
    r"""Interface to Wannier90

    `Wannier90 <http://www.wannier.org>`_ is a post-processing tool
    that takes as input Bloch wavefunctions and energies generated
    by first-principles electronic structure codes such as
    Quantum-Espresso (PWscf), ABINIT, SIESTA, FLEUR,
    WIEN2k, or VASP. It produces maximally localized Wannier functions
    together with a tight-binding Hamiltonian in the Wannier basis [1]_.

    This class imports tight-binding model parameters from a
    Wannier90 calculation and makes them available in PythTB.
    Upon construction, it reads the relevant Wannier90 output
    files from the specified directory. Use :meth:`model` to
    convert the imported data into a :class:`TBModel` instance.

    The PythTB interface uses the following Wannier90 output files:

    - ``prefix.win``
    - ``prefix_hr.dat``
    - ``prefix_centres.xyz``
    - ``prefix_band.kpt`` (optional)
    - ``prefix_band.dat`` (optional)

    The ``prefix.win`` file provides general input to Wannier90 and is here
    primarily to obtain the lattice vectors.

    To ensure the required files ``prefix_hr.dat`` and ``prefix_centres.xyz``
    are written, include the following flags in the ``prefix.win`` file::

       write_hr = True
       write_xyz = True
       translate_home_cell = False

    These directives instruct Wannier90 to output (i) the real-space
    tight-binding Hamiltonian in ``prefix_hr.dat`` and (ii) the centers of
    the Wannier functions in ``prefix_centres.xyz`` without translating
    them to the home unit cell.

    The optional files ``prefix_band.kpt`` and ``prefix_band.dat``
    can be used to import the Wannier-interpolated band structures
    computed by Wannier90. Please see documentation of function
    :meth:`bands_w90` for more detail.

    Parameters
    ----------
    path : str
        Relative path to the folder that contains Wannier90
        files. These are ``prefix.win``, ``prefix_hr.dat``,
        ``prefix_centres.xyz`` and optionally ``prefix_band.kpt`` and
        ``prefix_band.dat``.

    prefix : str
        This is the prefix used by `Wannier90` code.
        Typically the input to the `Wannier90` code is name ``prefix.win``.

    See Also
    --------
    :ref:`w90-nb`

    Notes
    -----
    Units used throught this interface with Wannier90 are
    electron-volts (eV) and Angstroms.

    .. warning::
        This class has been tested on Wannier90 v3.1.0. Compatibility
        with other versions is not guaranteed.

    .. warning::
        The user needs to make sure that the Wannier functions
        computed using Wannier90 code are well localized. Otherwise the
        tight-binding model may not accurately interpolate the band
        structure. To ensure that the Wannier functions are well
        localized it is often enough to check that the total spread at
        the beginning of the minimization procedure (first total spread
        printed in .wout file) is not more than 20% larger than the
        total spread at the end of the minimization procedure. If those
        spreads differ by much more than 20% user needs to specify
        better initial projection functions.

    .. warning::
        The interpolation is only exact within the frozen energy window
        of the disentanglement procedure.

    .. warning::
        So far PythTB assumes that the position operator is
        diagonal in the tight-binding basis. This is discussed in the
        :download:`notes on tight-binding formalism
        </_static/formalism/pythtb-formalism.pdf>` in Eq. 2.7.,
        :math:`\langle\phi_{{\bf R} i} \vert {\bf r} \vert \phi_{{\bf
        R}' j} \rangle = ({\bf R} + {\bf t}_j) \delta_{{\bf R} {\bf R}'}
        \delta_{ij}`. However, this relation does not hold for Wannier
        functions! Therefore, if you use tight-binding model derived
        from this class in computing Berry-like objects that involve
        position operator such as Berry phase or Berry flux, you would
        not get the same result as if you computed those objects
        directly from the first-principles code! Nevertheless, this
        approximation does not affect other properties such as band
        structure dispersion.


    Examples
    --------
    Read Wannier90 from folder called *example_a*
    This assumes that that folder contains files "silicon.win" (and so on)

    >>> silicon = w90("example_a", "silicon")

    References
    ----------
    .. [1] "Wannier90 as a community code: new features and applications",
            G. Pizzi et al., J. Phys. Cond. Matt. 32,  165902 (2020).
    """

    def __init__(self, path, prefix):
        self.folder = Path(path).expanduser()
        if not self.folder.exists():
            raise FileNotFoundError(f"Wannier90 folder not found: {self.folder}")
        self.path = str(self.folder)
        self.prefix = prefix

        ds = load_w90_dataset(self.path, self.prefix)

        # stash raw win lines if you still need them for blocks
        self._win_lines = open(
            self.folder / f"{self.prefix}.win", "r", encoding="utf-8", errors="ignore"
        ).readlines()

        # adopt dataset fields (preserve your attribute names)
        self.lat = ds.lat_cart
        self.num_wan = ds.num_wan
        self.ham_r = {
            R: {"h": blk.h, "deg": blk.degeneracy} for R, blk in ds.ham_r.items()
        }
        self.xyz_cen = ds.centres_xyz
        self.red_cen = ds.centres_red
        self.lattice = Lattice(self.lat, self.red_cen, periodic_dirs=...)

        # check if for every non-zero R there is also -R
        self._validate_hr_symmetry()

        # caches (filled lazily)
        self._vecR_cache = {}
        self._dist_cache = {}

    def _validate_hr_symmetry(self):
        R_set = set(self.ham_r.keys())
        for R in R_set:
            if R != (0, 0, 0) and (-R[0], -R[1], -R[2]) not in R_set:
                raise ValueError(f"Did not find negative R for R = {R}!")

    @staticmethod
    def _wrap01(x: np.ndarray) -> np.ndarray:
        out = np.mod(x, 1.0)
        # snap 1.0 → 0.0 to avoid 2π glitches
        out[np.isclose(out, 1.0, atol=1e-12)] = 0.0
        return out

    def _get_vecR(self, R):
        """Cartesian vector for reduced lattice vector R, cached."""
        if not hasattr(self, "_vecR_cache"):
            self._vecR_cache = {}
        if R in self._vecR_cache:
            return self._vecR_cache[R]
        vecR = _red_to_cart((self.lat[0], self.lat[1], self.lat[2]), [R])[0]
        self._vecR_cache[R] = vecR
        return vecR

    def _get_dist_matrix(self, R):
        """Distance for reduced lattice vector R, cached."""
        if not hasattr(self, "_dist_cache"):
            self._dist_cache = {}
        if R in self._dist_cache:
            return self._dist_cache[R]
        vecR = self._get_vecR(R)
        delta = (-self.xyz_cen[:, None, :] + self.xyz_cen[None, :, :]) + vecR[
            None, None, :
        ]
        dist = np.linalg.norm(delta, axis=2)  # (num_wan, num_wan)
        self._dist_cache[R] = dist
        return dist

    def _precompute_distances(self):
        """Precompute distance matrices for all reduced lattice vectors."""
        if not hasattr(self, "_dist_cache"):
            self._dist_cache = {}
        for R in self.ham_r.keys():
            if R not in self._dist_cache:
                self._get_dist_matrix(R)


[docs]
    def model(
        self,
        zero_energy=0.0,
        min_hopping_norm=None,
        max_distance=None,
        ignorable_imaginary_part=None,
        *,
        onsite_imag_tol: float = 1e-9,
        fill_hermitian: bool = False,
    ) -> TBModel:
        r"""Get TBModel associated with this Wannier90 calculation.

        This function returns :class:`pythtb.TBModel` object that can
        be used to interpolate the band structure at arbitrary
        k-point, analyze the wavefunction character, etc.

        The tight-binding basis orbitals in the returned object are
        maximally localized Wannier functions as computed by
        Wannier90. Locations of the orbitals in the returned
        :class:`pythtb.TBModel` object are the centers of
        the Wannier functions computed by Wannier90.

        Parameters
        ----------

        zero_energy : float
            Sets the zero of the energy in the band structure.
            This value is typically set to the Fermi level
            computed by the density-functional code (or to the top of the valence band).
            Units are electron-volts.

        min_hopping_norm : float
            Hopping terms read from Wannier90 with complex norm less than
            *min_hopping_norm* will not be included in the returned
            tight-binding model. This parameters is specified in
            electron-volts. By default all terms regardless of their
            norm are included.

        max_distance : float
            Hopping terms from site *i* to site *j+R* will be ignored if
            the distance from orbital *i* to *j+R* is larger than
            *max_distance*. This parameter is given in Angstroms.
            By default all terms regardless of the distance are included.

        ignorable_imaginary_part : float
            The hopping term will be assumed to be exactly real if the
            absolute value of the imaginary part as computed by Wannier90
            is less than *ignorable_imaginary_part*. By default imaginary
            terms are not ignored. Units are again eV.

        Returns
        -------
        tb : :class:`pythtb.TBModel`
            The :class:`pythtb.TBModel` that can be used to
            interpolate Wannier90 band structure to an arbitrary k-point as well
            as to analyze the character of the wavefunctions.

        Notes
        -----
        - The character of the maximally localized Wannier functions is
          not exactly the same as that specified by the initial
          projections. The orbital character of the Wannier functions can be
          inferred either from the *projections* block in the *prefix*.win or
          from the *prefix*.nnkp file.

        - One way to ensure that the Wannier functions are as close to
          the initial projections as possible is to first choose a good set
          of initial projections (for these initial and final spread should
          not differ more than 20%) and then perform another Wannier90 run
          setting *num_iter=0* in the *prefix*.win file.

        - The tight-binding model returned by this function is only as good as
          the input from Wannier90. In particular, the choice of initial
          projections can have a significant impact on the quality of the
          resulting Wannier functions. It is recommended to experiment with
          different sets of initial projections and to carefully analyze the
          resulting Wannier functions to ensure that they are physically
          meaningful.

        - The number of spin components is always set to 1, even if the
          underlying DFT calculation includes spin.  Please refer to the
          *projections* block or the *prefix*.nnkp file to see which
          orbitals correspond to which spin.

        Examples
        --------
        Get `TBModel` with all hopping parameters

        >>> my_model = silicon.model()

        Simplified model that contains only hopping terms above 0.01 eV

        >>> my_model_simple = silicon.model(min_hopping_norm=0.01)
        >>> my_model_simple.display()

        """
        tb = TBModel(self.lattice)  # initialize the model object

        # remember that this model was computed from w90
        tb._from_w90 = True
        tb._assume_position_operator_diagonal = False

        # Onsites
        hr0 = self.ham_r[(0, 0, 0)]
        deg0 = float(hr0["deg"])
        # Divide by degeneracy only once and assert onsite is (numerically) real
        diag = np.diag(hr0["h"]) / deg0
        # sanity check: imaginary part should be tiny
        if np.max(np.abs(diag.imag)) > 1e-9:
            raise ValueError(f"Onsite terms should be real (|Im|>{onsite_imag_tol})")
        tb.set_onsite(diag.real - zero_energy)

        # Hoppings

        # Helper to decide if we should process an R (to avoid double counting)
        def _use_R(R):
            r1, r2, r3 = R
            if r1 != 0:
                return r1 > 0
            if r2 != 0:
                return r2 > 0
            return r3 > 0

        if max_distance is not None and not self._dist_cache:
            self._precompute_distances()

        amps_all, ii_all, jj_all, R_all = [], [], [], []
        num_wan = self.num_wan

        for R, blk in self.ham_r.items():
            # Onsite block already handled; keep only off-diagonal pairs here.
            if R == (0, 0, 0):
                use_this_R = True
            else:
                use_this_R = _use_R(R)
            if not use_this_R:
                continue

            # Start from allowed entries and avoid double counting
            if R == (0, 0, 0):
                keep = np.zeros((num_wan, num_wan), dtype=bool)
                iu = np.triu_indices(num_wan, k=1)
                keep[iu] = True
            else:
                keep = np.ones((num_wan, num_wan), dtype=bool)

            Hr = blk["h"]
            deg = float(blk["deg"])  # scalar

            # Divide by degeneracy once per block
            inv_deg = 1.0 / deg  # multiplying inverse is faster than dividing
            H = Hr * inv_deg  # (num_wan, num_wan)

            # Distance cutoff (use cached distances; compute lazily if needed)
            if max_distance is not None:
                dist = self._get_dist_matrix(R)
                keep &= dist <= max_distance
                if not np.any(keep):
                    continue

            # Apply min_hopping_norm filter
            if min_hopping_norm is not None:
                keep &= np.abs(H) >= min_hopping_norm
                if not np.any(keep):
                    continue

            # Optionally zero-out tiny imaginary parts before insertion
            if ignorable_imaginary_part is not None:
                sel = keep & (np.abs(H.imag) < ignorable_imaginary_part)
                if np.any(sel):
                    H = H.copy()
                    H.imag[sel] = 0.0

            ii, jj = np.nonzero(keep)
            if ii.size:
                amps = H[ii, jj]
                R_arr = np.repeat(np.array(R, dtype=int)[None, :], ii.size, axis=0)
                amps_all.append(amps)
                ii_all.append(ii)
                jj_all.append(jj)
                R_all.append(R_arr)

            if fill_hermitian and R != (0, 0, 0):
                # explicitly add the conjugate partners at -R
                Hc = H.conj().T
                ii2, jj2 = np.nonzero(keep.T)
                if ii2.size:
                    amps2 = Hc[ii2, jj2]
                    Rm = np.repeat((-np.array(R, dtype=int))[None, :], ii2.size, axis=0)
                    amps_all.append(amps2)
                    ii_all.append(ii2)
                    jj_all.append(jj2)
                    R_all.append(Rm)

        if amps_all:
            amps = np.concatenate(amps_all)
            ii = np.concatenate(ii_all)
            jj = np.concatenate(jj_all)
            R_arr = np.concatenate(R_all)
            tb._append_hops(amps, ii, jj, R_arr)

        return tb



[docs]
    def dist_hop(self):
        r"""Get distances and hopping terms of Hamiltonian in Wannier basis.

        This function returns all hopping terms (from orbital *i* to
        *j+R*) as well as the distances between the *i* and *j+R*
        orbitals. For well localized Wannier functions hopping term
        should decay exponentially with distance.

        Returns
        -------
        dist : np.ndarray
            Distances between Wannier function centers (*i* and *j+R*) in Angstroms.

        ham : np.ndarray
            Corresponding hopping terms in eV.

        Notes
        -----
        This function can be used to help determine the *min_hopping_norm*
        and *max_distance* parameters in the :func:`pythtb.w90.model` function
        call.

        Examples
        --------
        Get distances and hopping terms

        >>> (dist, ham) = silicon.dist_hop()

        Plot logarithm of the hopping term as a function of distance

        >>> import matplotlib.pyplot as plt
        >>> fig, ax = plt.subplots()
        >>> ax.scatter(dist, np.log(np.abs(ham)))
        >>> fig.savefig("localization.pdf")

        """

        ret_ham = []
        ret_dist = []
        num_wan = self.num_wan

        for R, blk in self.ham_r.items():
            Ham = blk["h"] / float(blk["deg"])
            dist = self._get_dist_matrix(R)  # (num_wan, num_wan)
            keep = np.ones((num_wan, num_wan), dtype=bool)

            if R == (0, 0, 0):
                np.fill_diagonal(keep, False)  # avoid diagonal terms

            ret_ham.append(Ham[keep])
            ret_dist.append(dist[keep])

        return (np.concatenate(ret_dist), np.concatenate(ret_ham))



[docs]
    def shells(self, num_digits=2):
        r"""Get all shells of distances between Wannier function centers.

        This is one of the diagnostic tools that can be used to help
        in determining *max_distance* parameter in
        :func:`pythtb.w90.model` function call.

        Parameters
        ----------
        num_digits : int
            Distances will be rounded up to these many digits. Default value is 2.

        Returns
        -------
        shells : list
            All distances between all Wannier function centers (*i* and *j+R*) in Angstroms.

        Examples
        --------
        Print all shells

        >>> print(silicon.shells())
        """

        shells = []
        for R in self.ham_r.keys():
            dist = self._get_dist_matrix(R)
            shells.extend(np.round(dist.ravel(), num_digits).tolist())

        # remove duplicates and sort
        shells = np.sort(list(set(shells)))

        return shells



[docs]
    @deprecated("use .bands_w90() instead (since v2.0).", category=FutureWarning)
    def w90_bands_consistency(self):
        """
        .. deprecated:: 2.0.0
            Use .bands_w90() instead.
        """
        return self.bands_w90()



[docs]
    def bands_w90(
        self,
        return_k_cart: bool = False,
        return_k_dist: bool = False,
        return_k_nodes: bool = False,
    ):
        r"""Read interpolated band structure from Wannier90 output files.

        The purpose of this function is to compare the interpolation
        in Wannier90 with that in PythTB. This function reads in band
        structure as interpolated by Wannier90.

        The code assumes that the following files were generated by
        Wannier90,

          - *prefix*\_band.kpt
          - *prefix*\_band.dat

        These files will be generated only if the *prefix*.win file
        contains the *kpoint_path* block.

        Parameters
        ----------
        return_k_cart : bool, optional
            If True, also return k-points in Cartesian coordinates.
        return_k_dist : bool, optional
            If True, also return the cumulative k-path distance (in inverse Angstroms).

            .. versionadded:: 2.0.0

        return_k_nodes : bool, optional
            If True, also return the k-point nodes and labels used in the path.
            Format is ``(k_nodes, k_labels)`` where ``k_nodes`` is an array
            of reduced coordinates and ``k_labels`` is a list of strings.

        Returns
        -------
        kpts : array
            k-points in reduced coordinates used in the
            interpolation in Wannier90 code. The expected format is
            the same as the input to
            :func:`pythtb.TBModel.solve_ham`.
        ene : array
            Energies interpolated by Wannier90 in eV. Format is ``ene[kpt,band]``.
        k_dist : array, optional
            Cumulative distances along the path (1/Å) as reported by Wannier90.
            Returned when ``return_k_dist=True``. Useful for plotting band structures.
        k_cart : array, optional
            k-points in Cartesian coordinates (1/Å).
            Returned when ``return_k_cart=True``.
        k_nodes : tuple[array, list[str]], optional
            Tuple ``(k_nodes, k_labels)`` containing the reduced coordinates
            of the k-point nodes and their labels.
            Returned when ``return_k_nodes=True``.

        Notes
        -----
        - The bands returned here are not the same as the band
          structure calculated by the underlying DFT code. The two will
          agree only on the coarse set of k-points that were used in
          Wannier90 generation.
        - If no terms were ignored in the call to :func:`pythtb.w90.model`
          then the two should be exactly the same (up to numerical precision).
          Otherwise one should expect deviations.
        - If one carefully chooses the cutoff parameters in :func:`pythtb.w90.model`
          it is likely that one could reproduce the full band-structure
          with only few dominant hopping terms.

        Examples
        --------
        Get band structure from `Wannier90`

        >>> w90_kpt, w90_evals, w90_k_dist, w90_k_nodes, w90_k_labels = silicon.bands_w90(
        ... return_k_dist=True, return_k_nodes=True)

        Get simplified model

        >>> my_model_simple = silicon.model(min_hopping_norm=0.01)

        Solve simplified model on the same k-path as in `Wannier90`

        >>> evals = my_model.solve_ham(w90_kpt)

        Now plot the comparison of the two

        >>> import matplotlib.pyplot as plt
        >>> fig, ax = plt.subplots()
        >>> for i in range(evals.shape[1]):
        >>>     ax.plot(range(evals.shape[1]), evals[i], "r-", zorder=-50)
        >>> for i in range(w90_evals.shape[0]):
        >>>     ax.plot(range(w90_evals.shape[1]), w90_evals[i], "k-", zorder=-100)
        >>> fig.savefig("comparison.pdf")
        """
        kpts, ene = read_bands_w90(self.folder, self.prefix, self.num_wan)

        B = self.lattice.recip_lat_vecs
        results = (kpts, ene)

        if return_k_dist:
            k_dist = kpath_distance(kpts, b1=B[0], b2=B[1], b3=B[2])
            results += (k_dist,)
        if return_k_cart:
            k_cart = kpts @ B
            results += (k_cart,)
        if return_k_nodes:
            k_nodes, k_labels = read_kpoint_path(self._win_lines, latex=True)
            results += (k_nodes, k_labels)
        return results



[docs]
    def bands_qe(self, return_k_cart=False, return_meta=False, return_kdist=False):
        """
        Read band structure from Quantum ESPRESSO output files.

        Reads in band structure as computed by Quantum ESPRESSO.
        The code assumes that the following file was generated by
        Quantum ESPRESSO, typically using the `bands.x` utility:

        - ``*prefix*_bands.dat``

        .. versionadded:: 2.0.0

        Parameters
        ----------
        return_k_cart : bool, optional
            If True, also return k-points in Cartesian coordinates.
        return_meta : bool, optional
            If True, return header metadata (nbnd, nks) when available.
        return_kdist : bool, optional
            If True, also return cumulative k-path distances (1/Å).

        Returns
        -------
        k_frac : array
            k-points in reduced coordinates used in the band structure.
        energies : array
            Energies computed by Quantum ESPRESSO in eV. Format is ``energies[kpt,band]``.
        k_dist : array, optional
            Cumulative distances along the path (1/Angstrom). Returned when
            ``return_kdist=True``. Useful for plotting band structures.
        k_cart : array, optional
            k-points in Cartesian coordinates (1/Angstrom). Returned when
            ``return_k_cart=True``.
        meta : dict, optional
            Metadata dictionary containing ``nbnd`` and ``nks`` when available.
            Returned when ``return_meta=True``.

        Notes
        -----
        - The purpose of this function is to read band structures
          computed by Quantum ESPRESSO for comparison with PythTB
          calculations based on Wannier90 tight-binding models.
        - The band structure from Quantum ESPRESSO can be compared
          with that from Wannier90 using the :func:`bands_w90` method.
        - Ensure that the k-point paths used in Quantum ESPRESSO
          and Wannier90 are consistent for meaningful comparisons.
        """
        k_markers, energies_rows, meta = read_bands_qe(self.folder, self.prefix)

        # Shape energies into (nks, nbnd)
        nks = meta.get("nks", len(energies_rows))
        nbnd = meta.get(
            "nbnd", max(len(r) for r in energies_rows) if energies_rows else 0
        )

        E = np.full((nks, nbnd), np.nan, dtype=float)
        for i in range(min(nks, len(energies_rows))):
            row = energies_rows[i]
            ncopy = min(nbnd, len(row))
            if ncopy:
                E[i, :ncopy] = row[:ncopy]

        # QE k markers are in units of (2π / alat); scale as your previous method
        k_cart = np.asarray(k_markers, float)
        alat = np.linalg.norm(self.lattice.lat_vecs[0])
        k_cart *= 2 * np.pi / alat

        # Convert to reduced coords using your reciprocal lattice
        B = self.lattice.recip_lat_vecs
        k_frac = k_cart @ np.linalg.inv(B)

        results = [k_frac, E]

        if return_kdist:
            k_dist = kpath_distance(k_frac, b1=B[0], b2=B[1], b3=B[2])
            results.append(k_dist)
        if return_k_cart:
            results.append(k_cart)
        if return_meta:
            results.append(meta)

        return tuple(results)