Skip to content

Loader Class

Loader classes.

SQLLoader

Class for loading and parsing SQL queries.

Source code in amee_utils/loader.py
class SQLLoader:
    """Class for loading and parsing SQL queries."""

    def __init__(self, spark: SparkSession) -> None:
        """Initialise the SQLLoader.

        Parameters
        ----------
        spark : SparkSession
            The SparkSession object to use for executing queries.

        Returns
        -------
        None
        """
        self.spark = spark

    @staticmethod
    def _read_file(path: Path) -> str:
        """Read the contents of a file and return it as a string.

        Parameters
        ----------
        path : Path
            The path to the file to read.

        Returns
        -------
        str
            The contents of the file as a string.

        Raises
        ------
        FileNotFoundError
            If the specified SQL file is not found.
        """
        try:
            with path.open("r") as f:
                return f.read()

        except FileNotFoundError as e:
            raise FileNotFoundError(f"SQL file not found: {path}. Please check the path and file name.") from e

    def parse(self, query: str) -> DataFrame:
        """Parse and execute an SQL query using the SparkSession.

        Drop duplicates, and return the result as a PySpark DataFrame.

        Parameters
        ----------
        query : str
            The SQL query to parse and execute.

        Returns
        -------
        DataFrame
            The result of the SQL query execution as a PySpark DataFrame.
        """
        return self.spark.sql(sqlQuery=query)

    def from_file(self, path: Path) -> DataFrame:
        """Load an SQL query from a file, parse it, and execute it using the SparkSession.

        Returns the parsed query result as a PySpark DataFrame.

        Parameters
        ----------
        path : Path
            The path to the file containing the SQL query.

        Returns
        -------
        DataFrame
            The result of the SQL query execution as a PySpark DataFrame.
        """
        query = self._read_file(path=path)

        return self.parse(query)

from_file(path)

Load an SQL query from a file, parse it, and execute it using the SparkSession.

Returns the parsed query result as a PySpark DataFrame.

Parameters:

Name Type Description Default
path Path

The path to the file containing the SQL query.

required

Returns:

Type Description
DataFrame

The result of the SQL query execution as a PySpark DataFrame.

Source code in amee_utils/loader.py
def from_file(self, path: Path) -> DataFrame:
    """Load an SQL query from a file, parse it, and execute it using the SparkSession.

    Returns the parsed query result as a PySpark DataFrame.

    Parameters
    ----------
    path : Path
        The path to the file containing the SQL query.

    Returns
    -------
    DataFrame
        The result of the SQL query execution as a PySpark DataFrame.
    """
    query = self._read_file(path=path)

    return self.parse(query)

parse(query)

Parse and execute an SQL query using the SparkSession.

Drop duplicates, and return the result as a PySpark DataFrame.

Parameters:

Name Type Description Default
query str

The SQL query to parse and execute.

required

Returns:

Type Description
DataFrame

The result of the SQL query execution as a PySpark DataFrame.

Source code in amee_utils/loader.py
def parse(self, query: str) -> DataFrame:
    """Parse and execute an SQL query using the SparkSession.

    Drop duplicates, and return the result as a PySpark DataFrame.

    Parameters
    ----------
    query : str
        The SQL query to parse and execute.

    Returns
    -------
    DataFrame
        The result of the SQL query execution as a PySpark DataFrame.
    """
    return self.spark.sql(sqlQuery=query)