Source code for alogos._utilities.database_management

import csv as _csv
import os as _os
import sqlite3 as _sqlite3


[docs]class Sqlite3Wrapper:
    """Simple wrapper for easier usage of an SQLite3 database in Python3.

    References
    ----------
    - https://docs.python.org/3/library/sqlite3.html
    - https://www.sqlite.org
    - https://www.sqlite.org/limits.html

    """

[docs]    def __init__(self, database_location, try_to_use_wal=True):
        """Open a connection to an existing or new SQLite3 database.

        Parameters
        ----------
        database_location : string
            If the given string is ``":memory:"``, an in-memory database is created.
            Otherwise, a file-based database is created and the given string is
            used as filepath.
        try_to_use_wal : bool
            If True, it is attempted to use a Write-Ahead Log (WAL) instead of
            the default rollback journal, which can bring a significant speedup.
            This is supported since SQLite version 3.7.0 (2010-07-21) if the
            operating system supports it.

        References
        ----------
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.connect

            - You can use ":memory:" to open a database connection to a
              database that resides in RAM instead of on disk.

        - https://www.sqlite.org/wal.html

            - On success, the pragma will return the string "wal"

        - https://www.sqlite.org/pragma.html#pragma_synchronous

            - The synchronous=NORMAL setting is a good choice for most
              applications running in WAL mode.

        """
        self.connection = _sqlite3.connect(database_location)

        #
        if try_to_use_wal:
            try:
                result = self.execute_query("PRAGMA journal_mode=WAL;")
                if result[0][0] == "wal":
                    self.execute_query("PRAGMA synchronous=NORMAL;")
            except Exception:
                pass

    def __del__(self):
        """Close the connection to the database.

        References
        ----------
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.close

        """
        self.connection.close()

[docs]    def execute_query(self, query, record=None):
        """Execute an SQL statement, optionally with parameters.

        Parameters
        ----------
        query : str
            String with SQL query
        record : list or tuple
            The record needs to have one entry for each ``?`` placeholder in the query.

        Returns
        -------
        result : list of list of str
            The list of rows and columns returned by the cursor with ``fetchall``.

        References
        ----------
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.execute
        - https://docs.python.org/3/library/sqlite3.html#using-the-connection-as-a-context-manager

        """
        with self.connection as con:
            if record is None:
                result = con.execute(query).fetchall()
            else:
                result = con.execute(query, record).fetchall()
        return result

[docs]    def execute_query_for_many_records(self, query, records):
        """Execute an SQL statement for many records.

        This method can only be used for data insertion (INSERT)
        and not for data retrieval (SELECT) because nothing is returned
        by the called ``executemany`` method.

        Parameters
        ----------
        query : str
            String with SQL query
        records : list of tuples
            The list can have an arbitrary number of tuples, each representing a record.
            The tuple needs to have one entry for each ``?`` placeholder in the query.

        References
        ----------
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.executemany
        - https://docs.python.org/3/library/sqlite3.html#using-the-connection-as-a-context-manager

        """
        with self.connection as con:
            con.executemany(query, records)

[docs]    def execute_script(self, query_script):
        """Execute a script that can contain multiple SQL statements with a single transaction.

        This method can only be used for data insertion (INSERT)
        and not for data retrieval (SELECT) because nothing is returned
        by the called ``executescript`` method.

        Parameters
        ----------
        query_script : str
            String with multiple SQL queries separated by newlines.

        References
        ----------
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.executescript
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.executescript
        - https://docs.python.org/3/library/sqlite3.html#using-the-connection-as-a-context-manager

        """
        with self.connection as con:
            con.executescript(query_script)

[docs]    def get_version(self):
        """Get the version of the running SQLite library.

        Returns
        -------
        version_descriptor : str

        References
        ----------
        - https://sqlite.org/lang_corefunc.html#sqlite_version

        """
        result = self.execute_query("SELECT SQLITE_VERSION();")
        version = result[0][0]
        return "SQLite v{}".format(version)

[docs]    def get_table_names(self):
        """Get a list of tables contained in the database.

        Returns
        -------
        table_names : list of str

        References
        ----------
        - https://sqlite.org/fileformat.html#storage_of_the_sql_database_schema

        """
        result = self.execute_query(
            'SELECT name FROM sqlite_master WHERE type="table";'
        )
        table_names = [row[0] for row in result]
        return table_names

[docs]    def get_view_names(self):
        """Get a list of views contained in the database.

        Returns
        -------
        view_names : list of str

        References
        ----------
        - https://sqlite.org/fileformat.html#storage_of_the_sql_database_schema

        """
        result = self.execute_query('SELECT name FROM sqlite_master WHERE type="view";')
        view_names = [row[0] for row in result]
        return view_names

[docs]    def get_header_names(self, name):
        """Get the headers of a view or table.

        Parameters
        ----------
        name : string
            Name of a table or view.

        Returns
        -------
        header_names : list of str
            Header row of the chosen table or view.

        References
        ----------
        - https://stackoverflow.com/questions/947215/how-to-get-a-list-of-column-names-on-sqlite3-database

        """
        result = self.execute_query("SELECT name FROM PRAGMA_TABLE_INFO(?);", [name])
        header_names = [row[0] for row in result]
        return header_names

[docs]    def get_data_per_table(self):
        """Get data of each table in the database in form of a dictionary.

        Returns
        -------
        data : dict
            Each table name is a key in the dict.
            A table's entries form the corresponding value (list of list of str).

        """
        tables = self.get_table_names()
        data = dict()
        for table in tables:
            data[table] = self.execute_query("SELECT * FROM {};".format(table))
        return data

[docs]    def get_num_changes(self):
        """Get number of rows which have been modified, inserted, deleted since connection opened.

        Returns
        -------
        num_changes : int

        References
        ----------
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.total_changes

        """
        return self.connection.total_changes

[docs]    def export_sql(self, filepath):
        """Export the entire database to a SQL file at a given filepath.

        Parameters
        ----------
        filepath : str

        Raises
        ------
        FileExistsError
            If there is already a file or directory at the given filepath.

        References
        ----------
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup

        """
        # Argument processing
        self._check_filepath(filepath)

        # Export
        new_con = _sqlite3.connect(filepath)
        with new_con:
            self.connection.backup(new_con)
        new_con.close()

[docs]    def export_sql_text(self, filepath):
        """Export the entire database as text file with SQL statements at a given filepath.

        Parameters
        ----------
        filepath : str

        Raises
        ------
        FileExistsError
            If there is already a file or directory at the given filepath.

        References
        ----------
        - https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.iterdump

        """
        # Argument processing
        self._check_filepath(filepath)

        # Export
        with open(filepath, "w", encoding="utf-8") as file_handle:
            for line in self.connection.iterdump():
                file_handle.write("{}\n".format(line))

[docs]    def export_csv(self, filepath, name=None, include_header=True):
        """Export a chosen table or view of the database to a CSV file at a given filepath.

        Parameters
        ----------
        filepath : str
        name : str
            Name of a table or view that shall be exported.
            If `None`, each table of the database will be exported to a
            separate file. The filepath is constructed from the original
            filepath followed by the table name.

        Raises
        ------
        FileExistsError
            If there is already a file or directory at the given filepath.

        """
        if name is None:
            # Recursive call for each table name, if no name was provided by the user
            names = self.get_table_names()
            if filepath.endswith(".csv"):
                filepath = filepath[:-4]
            filepaths = ["{}_{}.csv".format(filepath, name) for name in names]
            for fp in filepaths:
                self._check_filepath(fp)
            for fp, name in zip(filepaths, names):
                self.export_csv(fp, name=name)
        else:
            # Argument processing
            self._check_filepath(filepath)
            table_names = self.get_table_names()
            view_names = self.get_view_names()
            if name not in table_names and name not in view_names:
                raise ValueError("The provided name is not a known table or view.")

            # Export
            with open(filepath, "w", encoding="utf-8") as csv_file:
                csv_writer = _csv.writer(csv_file, delimiter=",")
                # Header
                if include_header:
                    header = self.get_header_names(name)
                    csv_writer.writerow(header)
                # Rows
                rows = self.execute_query("select * from {};".format(name))
                csv_writer.writerows(rows)

    @staticmethod
    def _check_filepath(filepath):
        """Check if the given filepath exists.

        Raises
        ------
        FileExistsError
            If the given path is already taken by another file or directory.

        """
        if _os.path.exists(filepath):
            object_at_filepath = "file" if _os.path.isfile(filepath) else "directory"
            message = (
                'Cannot export data to the filepath "{}". '
                "There already is a {} with that path.".format(
                    filepath, object_at_filepath
                )
            )
            raise FileExistsError(message)