From a1809aef3cf02e070a4fb448ceea58ea0686baf5 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 17 Mar 2026 11:04:51 -0400 Subject: [PATCH 1/8] Add snowflake client --- CHANGELOG.md | 5 + README.md | 1 + pyproject.toml | 6 +- src/nypl_py_utils/classes/snowflake_client.py | 99 ++++++++++++++++ src/nypl_py_utils/functions/config_helper.py | 69 +++++++---- src/nypl_py_utils/functions/log_helper.py | 39 ++++--- tests/test_config_helper.py | 107 +++++++++++++----- tests/test_log_helper.py | 3 +- tests/test_snowflake_client.py | 56 +++++++++ 9 files changed, 315 insertions(+), 70 deletions(-) create mode 100644 src/nypl_py_utils/classes/snowflake_client.py create mode 100644 tests/test_snowflake_client.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e1ac3a5..7a1c5ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ # Changelog +## v1.10.0 3/17/26 +- Add Snowflake client +- Update config helper to allow loading config files without PLAINTEXT/ENCRYPTED structure +- Update structured log helper to include name of the logger by default + ## v1.9.1 3/11/26 - Add merge_contextvars to default structlog configuration diff --git a/README.md b/README.md index b15e643..d3a1bab 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ This package contains common Python utility classes and functions. * Connecting to and querying a MySQL database * Connecting to and querying a PostgreSQL database * Connecting to and querying Redshift +* Connecting to and querying Snowflake * Making requests to the Oauth2 authenticated APIs such as NYPL Platform API and Sierra * Interacting with vendor APIs such as cloudLibrary diff --git a/pyproject.toml b/pyproject.toml index ec7f11e..9eb8951 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "nypl_py_utils" -version = "1.9.1" +version = "1.10.0" authors = [ { name="Aaron Friedman", email="aaronfriedman@nypl.org" }, ] @@ -70,6 +70,10 @@ secrets-manager-client = [ "boto3>=1.26.5", "botocore>=1.29.5" ] +snowflake-client = [ + "nypl_py_utils[log-helper]", + "snowflake-connector-python>=4.3.0" +] sftp-client = [ "nypl_py_utils[log-helper]", "paramiko>=3.4.1" diff --git a/src/nypl_py_utils/classes/snowflake_client.py b/src/nypl_py_utils/classes/snowflake_client.py new file mode 100644 index 0000000..8cc6eb0 --- /dev/null +++ b/src/nypl_py_utils/classes/snowflake_client.py @@ -0,0 +1,99 @@ +import snowflake.connector as sc + +from nypl_py_utils.functions.log_helper import create_log + + +class SnowflakeClient: + """Client for managing connections to Snowflake""" + + def __init__(self, account, user, password, warehouse=None): + self.logger = create_log('snowflake_client') + self.conn = None + self.account = account + self.user = user + self.password = password + self.warehouse = warehouse + + def connect(self, **kwargs): + """ + Connects to a Snowflake database using the given credentials. If + warehouse parameter is None, uses the default warehouse for the user. + + Parameters + ---------- + kwargs: + All possible arguments (such as timeouts) can be found here: + https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#connect + """ + self.logger.info('Connecting to Snowflake') + try: + self.conn = sc.connect( + account=self.account, + user=self.user, + password=self.password, + warehouse=self.warehouse, + **kwargs) + except Exception as e: + raise SnowflakeClientError( + f'Error connecting to Snowflake: {e}') from None + + def execute_query(self, query, **kwargs): + """ + Executes an arbitrary query against the given database connection. + + Note that: + 1) All results will be fetched by default, so this method is not + suitable if you do not want to load all rows into memory + 2) AUTOCOMMIT is on by default, so this method is not suitable if + you want to execute multiple queries in a single transaction + 3) This method can be used for both read and write queries, but + it's not optimized for writing -- there is no parameter binding + or executemany support, and the return value for write queries + can be unpredictable. + + Parameters + ---------- + kwargs: + All possible arguments (such as timeouts) can be found here: + https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#execute + + Returns + ------- + sequence + A list of tuples + """ + self.logger.info('Querying database') + cursor = self.conn.cursor() + try: + try: + cursor.execute(query, **kwargs) + return cursor.fetchall() + except Exception: + raise + finally: + cursor.close() + except Exception as e: + # If there was an error, also close the database connection + self.close_connection() + + short_q = str(query) + if len(short_q) > 2500: + short_q = short_q[:2497] + "..." + raise SnowflakeClientError( + f'Error executing Snowflake query {short_q}: {e}', self.logger + ) from None + + def close_connection(self): + """Closes the connection""" + self.logger.info('Closing Snowflake connection') + self.conn.close() + + +class SnowflakeClientError(Exception): + def __init__(self, message='', logger=None): + self.message = message + if logger is not None: + logger.error(message) + + def __str__(self): + return self.message diff --git a/src/nypl_py_utils/functions/config_helper.py b/src/nypl_py_utils/functions/config_helper.py index 7edb5ea..425c65e 100644 --- a/src/nypl_py_utils/functions/config_helper.py +++ b/src/nypl_py_utils/functions/config_helper.py @@ -10,14 +10,13 @@ def load_env_file(run_type, file_string): """ - This method loads a YAML config file containing environment variables, - decrypts whichever are encrypted, and puts them all into os.environ as - strings. For a YAML variable containing a list of values, the list is - exported into os.environ as a json string and should be loaded as such. + This method reads a YAML config file containing environment variables and + loads them all into os.environ as strings. See _parse_yaml_dict for more. - It requires the YAML file to be split into a 'PLAINTEXT_VARIABLES' section - and an 'ENCRYPTED_VARIABLES' section. See config/sample.yaml for an example - config file. + If the config file is divided into 'PLAINTEXT_VARIABLES' and + 'ENCRYPTED_VARIABLES' sections (see config/sample.yaml for an exmaple), the + 'ENCRYPTED_VARIABLES' variables will be decrypted first. Otherwise, all + variables will be loaded as is. Parameters ---------- @@ -36,31 +35,53 @@ def load_env_file(run_type, file_string): try: env_dict = yaml.safe_load(env_stream) except yaml.YAMLError: - logger.error('Invalid YAML file: {}'.format(open_file)) raise ConfigHelperError( 'Invalid YAML file: {}'.format(open_file)) from None except FileNotFoundError: - logger.error('Could not find config file {}'.format(open_file)) raise ConfigHelperError( 'Could not find config file {}'.format(open_file)) from None if env_dict: - for key, value in env_dict.get('PLAINTEXT_VARIABLES', {}).items(): - if type(value) is list: - os.environ[key] = json.dumps(value) - else: - os.environ[key] = str(value) - - kms_client = KmsClient() - for key, value in env_dict.get('ENCRYPTED_VARIABLES', {}).items(): - if type(value) is list: - decrypted_list = [kms_client.decrypt(v) for v in value] - os.environ[key] = json.dumps(decrypted_list) - else: - os.environ[key] = kms_client.decrypt(value) - kms_client.close() + if ('PLAINTEXT_VARIABLES' in env_dict + or 'ENCRYPTED_VARIABLES' in env_dict): + _parse_yaml_dict(env_dict.get('PLAINTEXT_VARIABLES', {})) + + kms_client = KmsClient() + _parse_yaml_dict(env_dict.get( + 'ENCRYPTED_VARIABLES', {}), kms_client) + kms_client.close() + else: + _parse_yaml_dict(env_dict) + + +def _parse_yaml_dict(yaml_dict, kms_client=None): + """ + Loads YAML dict into os.environ. All values are stored as strings to match + how AWS Lambda environment variables are stored. For list variables, the + list is exported into os.environ as a json string. + + If kms_client is not empty, decrypts the variables first. + + Does not allow for sub-dictionaries. + """ + for key, value in yaml_dict.items(): + if type(value) is dict: + raise ConfigHelperError( + 'Found sub-dictionary in YAML config') from None + elif type(value) is list: + val = [kms_client.decrypt(v) + for v in value] if kms_client else value + os.environ[key] = json.dumps(val) + else: + val = kms_client.decrypt(value) if kms_client else value + os.environ[key] = str(val) class ConfigHelperError(Exception): - def __init__(self, message=None): + def __init__(self, message='', logger=None): self.message = message + if logger is not None: + logger.error(message) + + def __str__(self): + return self.message diff --git a/src/nypl_py_utils/functions/log_helper.py b/src/nypl_py_utils/functions/log_helper.py index 5d1a38b..5550ff9 100644 --- a/src/nypl_py_utils/functions/log_helper.py +++ b/src/nypl_py_utils/functions/log_helper.py @@ -12,30 +12,37 @@ } -# Configure structlog to be machine-readable first and foremost -# while still making it easy for humans to parse -# End result (without additional bindings) is JSON like this: -# { -# "logger": "module param", -# "message": "this is a test log event", -# "level": "info", -# "timestamp": "2023-11-01 18:50:47" -# } def get_structlog(module): - structlog.configure( + """ + Standard logging without additional bindings looks as follows: + { + "level": "info", + "timestamp": "2026-01-01T12:00:00.613719Z", + "logger": "module param", + "message": "this is a test log event" + } + + Note that: 1) using bind_contextvars will bind variables to *all* loggers + that have been created, and 2) you cannot use the same module name for a + structlog and for a standard logger + """ + logger = logging.getLogger(module) + logger.addHandler(logging.StreamHandler(sys.stdout)) + logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO').upper()) + logger.propagate = False # Prevents double logging + + return structlog.wrap_logger( + logger, processors=[ structlog.contextvars.merge_contextvars, structlog.processors.add_log_level, structlog.processors.TimeStamper(fmt='iso'), + structlog.stdlib.add_logger_name, structlog.processors.EventRenamer('message'), structlog.processors.JSONRenderer(), - ], - context_class=dict, - logger_factory=structlog.PrintLoggerFactory(), + ] ) - return structlog.get_logger(module) - def standard_logger(module): logger = logging.getLogger(module) @@ -58,7 +65,7 @@ def standard_logger(module): def create_log(module, json=False): - if (json): + if json: return get_structlog(module) else: return standard_logger(module) diff --git a/tests/test_config_helper.py b/tests/test_config_helper.py index feadfe5..8f1bbe8 100644 --- a/tests/test_config_helper.py +++ b/tests/test_config_helper.py @@ -2,13 +2,12 @@ import pytest from nypl_py_utils.functions.config_helper import ( - load_env_file, ConfigHelperError) + _parse_yaml_dict, + load_env_file, + ConfigHelperError +) -_TEST_VARIABLE_NAMES = [ - 'TEST_STRING', 'TEST_INT', 'TEST_LIST', 'TEST_ENCRYPTED_VARIABLE_1', - 'TEST_ENCRYPTED_VARIABLE_2', 'TEST_ENCRYPTED_LIST'] - -_TEST_CONFIG_CONTENTS = \ +_TEST_CONFIG_CONTENTS_STRUCTURED = \ '''--- PLAINTEXT_VARIABLES: TEST_STRING: string-variable @@ -24,40 +23,58 @@ - test-encryption-4 ...''' +_TEST_CONFIG_CONTENTS_UNSTRUCTURED = \ + '''--- +TEST_STRING: string-variable +TEST_INT: 1 +TEST_LIST: + - string-var + - 2 +TEST_ENCRYPTED_VARIABLE: test-encryption-1 +...''' + +_PLAINTEXT_DICT = { + 'TEST_STRING': 'string-variable', 'TEST_INT': 1, + 'TEST_LIST': ['string-var', 2]} + +_ENCRYPTED_DICT = { + 'TEST_ENCRYPTED_VARIABLE_1': 'test-encryption-1', + 'TEST_ENCRYPTED_VARIABLE_2': 'test-encryption-2', + 'TEST_ENCRYPTED_LIST': ['test-encryption-3', 'test-encryption-4']} + class TestConfigHelper: - def test_load_env_file(self, mocker): + def test_load_env_file_structured(self, mocker): + mock_parser = mocker.patch( + 'nypl_py_utils.functions.config_helper._parse_yaml_dict') + mock_file_open = mocker.patch( + 'builtins.open', mocker.mock_open( + read_data=_TEST_CONFIG_CONTENTS_STRUCTURED)) mock_kms_client = mocker.MagicMock() - mock_kms_client.decrypt.side_effect = [ - 'test-decryption-1', 'test-decryption-2', 'test-decryption-3', - 'test-decryption-4'] mocker.patch('nypl_py_utils.functions.config_helper.KmsClient', return_value=mock_kms_client) - mock_file_open = mocker.patch( - 'builtins.open', mocker.mock_open(read_data=_TEST_CONFIG_CONTENTS)) - for key in _TEST_VARIABLE_NAMES: - assert key not in os.environ load_env_file('test-env', 'test-path/{}.yaml') mock_file_open.assert_called_once_with('test-path/test-env.yaml', 'r') - mock_kms_client.decrypt.assert_has_calls([ - mocker.call('test-encryption-1'), mocker.call('test-encryption-2'), - mocker.call('test-encryption-3'), mocker.call('test-encryption-4')] - ) + mock_parser.assert_has_calls([ + mocker.call(_PLAINTEXT_DICT), + mocker.call(_ENCRYPTED_DICT, mock_kms_client)]) mock_kms_client.close.assert_called_once() - assert os.environ['TEST_STRING'] == 'string-variable' - assert os.environ['TEST_INT'] == '1' - assert os.environ['TEST_LIST'] == '["string-var", 2]' - assert os.environ['TEST_ENCRYPTED_VARIABLE_1'] == 'test-decryption-1' - assert os.environ['TEST_ENCRYPTED_VARIABLE_2'] == 'test-decryption-2' - assert os.environ['TEST_ENCRYPTED_LIST'] == \ - '["test-decryption-3", "test-decryption-4"]' - for key in _TEST_VARIABLE_NAMES: - if key in os.environ: - del os.environ[key] + def test_load_env_file_unstructured(self, mocker): + mock_parser = mocker.patch( + 'nypl_py_utils.functions.config_helper._parse_yaml_dict') + mock_file_open = mocker.patch( + 'builtins.open', mocker.mock_open( + read_data=_TEST_CONFIG_CONTENTS_UNSTRUCTURED)) + + load_env_file('test-env', 'test-path/{}.yaml') + + mock_file_open.assert_called_once_with('test-path/test-env.yaml', 'r') + mock_parser.assert_called_once_with( + _PLAINTEXT_DICT | {'TEST_ENCRYPTED_VARIABLE': 'test-encryption-1'}) def test_missing_file_error(self): with pytest.raises(ConfigHelperError): @@ -68,3 +85,37 @@ def test_bad_yaml(self, mocker): 'builtins.open', mocker.mock_open(read_data='bad yaml: [')) with pytest.raises(ConfigHelperError): load_env_file('test-env', 'test-path/{}.not_yaml') + + def test_parse_yaml_dict_raw(self, mocker): + _parse_yaml_dict(_PLAINTEXT_DICT) + + assert os.environ['TEST_STRING'] == 'string-variable' + assert os.environ['TEST_INT'] == '1' + assert os.environ['TEST_LIST'] == '["string-var", 2]' + + for key in _PLAINTEXT_DICT.keys(): + del os.environ[key] + + def test_parse_yaml_dict_encrypted(self, mocker): + mock_kms_client = mocker.MagicMock() + mock_kms_client.decrypt.side_effect = [ + 'test-decryption-1', 'test-decryption-2', 'test-decryption-3', + 'test-decryption-4'] + + _parse_yaml_dict(_ENCRYPTED_DICT, kms_client=mock_kms_client) + + mock_kms_client.decrypt.assert_has_calls([ + mocker.call('test-encryption-1'), mocker.call('test-encryption-2'), + mocker.call('test-encryption-3'), mocker.call('test-encryption-4')] + ) + assert os.environ['TEST_ENCRYPTED_VARIABLE_1'] == 'test-decryption-1' + assert os.environ['TEST_ENCRYPTED_VARIABLE_2'] == 'test-decryption-2' + assert os.environ['TEST_ENCRYPTED_LIST'] == \ + '["test-decryption-3", "test-decryption-4"]' + + for key in _ENCRYPTED_DICT.keys(): + del os.environ[key] + + def test_parse_yaml_dict_sub_dictionary(self, mocker): + with pytest.raises(ConfigHelperError): + _parse_yaml_dict({'1': {'2': '3'}}) diff --git a/tests/test_log_helper.py b/tests/test_log_helper.py index 6d07669..57d7bce 100644 --- a/tests/test_log_helper.py +++ b/tests/test_log_helper.py @@ -11,13 +11,14 @@ @freeze_time('2023-01-01 19:00:00') class TestLogHelper: def test_json_logging(self, capsys): - logger = create_log('test_log', json=True) + logger = create_log('test_structlog', json=True) logger.info('test', some="json") output = json.loads(capsys.readouterr().out) assert output.get("message") == 'test' assert output.get("some") == 'json' assert output.get('level') == 'info' assert output.get('timestamp') == '2023-01-01T19:00:00Z' + assert output.get('logger') == 'test_structlog' def test_default_logging(self, caplog): logger = create_log('test_log') diff --git a/tests/test_snowflake_client.py b/tests/test_snowflake_client.py new file mode 100644 index 0000000..377b94b --- /dev/null +++ b/tests/test_snowflake_client.py @@ -0,0 +1,56 @@ +import pytest + +from nypl_py_utils.classes.snowflake_client import ( + SnowflakeClient, SnowflakeClientError +) + + +class TestSnowflakeClient: + + @pytest.fixture + def mock_snowflake_conn(self, mocker): + return mocker.patch('snowflake.connector.connect') + + @pytest.fixture + def test_instance(self): + return SnowflakeClient('test_account', 'test_user', 'test_password') + + def test_connect(self, mock_snowflake_conn, test_instance): + test_instance.connect() + mock_snowflake_conn.assert_called_once_with( + account='test_account', + user='test_user', + password='test_password', + warehouse=None) + + def test_execute_query( + self, mock_snowflake_conn, test_instance, mocker): + test_instance.connect() + + mock_cursor = mocker.MagicMock() + mock_cursor.fetchall.return_value = [(1, 2, 3), ('a', 'b', 'c')] + test_instance.conn.cursor.return_value = mock_cursor + + assert test_instance.execute_query( + 'test query') == [(1, 2, 3), ('a', 'b', 'c')] + mock_cursor.execute.assert_called_once_with('test query') + mock_cursor.close.assert_called_once() + + def test_execute_query_with_exception( + self, mock_snowflake_conn, test_instance, mocker): + test_instance.connect() + + mock_cursor = mocker.MagicMock() + mock_cursor.execute.side_effect = Exception() + test_instance.conn.cursor.return_value = mock_cursor + + with pytest.raises(SnowflakeClientError): + test_instance.execute_query('test query') + + mock_cursor.close.assert_called() + test_instance.conn.close.assert_called_once() + + def test_close_connection(self, mock_snowflake_conn, test_instance): + test_instance.connect() + test_instance.close_connection() + test_instance.conn.close.assert_called_once() From 42e7d02827bc808a3f9a629c7f658f2a5bd6497d Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 17 Mar 2026 11:09:06 -0400 Subject: [PATCH 2/8] Update config logging --- src/nypl_py_utils/functions/config_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nypl_py_utils/functions/config_helper.py b/src/nypl_py_utils/functions/config_helper.py index 425c65e..a77c97a 100644 --- a/src/nypl_py_utils/functions/config_helper.py +++ b/src/nypl_py_utils/functions/config_helper.py @@ -78,9 +78,9 @@ def _parse_yaml_dict(yaml_dict, kms_client=None): class ConfigHelperError(Exception): - def __init__(self, message='', logger=None): + def __init__(self, message=None): self.message = message - if logger is not None: + if message is not None: logger.error(message) def __str__(self): From bcff1809fe0d0946a9e6366da45cb9f811b6614e Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 17 Mar 2026 11:09:58 -0400 Subject: [PATCH 3/8] Remove unnecessary method override --- src/nypl_py_utils/functions/config_helper.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/nypl_py_utils/functions/config_helper.py b/src/nypl_py_utils/functions/config_helper.py index a77c97a..9a5d569 100644 --- a/src/nypl_py_utils/functions/config_helper.py +++ b/src/nypl_py_utils/functions/config_helper.py @@ -82,6 +82,3 @@ def __init__(self, message=None): self.message = message if message is not None: logger.error(message) - - def __str__(self): - return self.message From 2bf8fa2c8ba997a2ea16704518f7c55d01f68a2c Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 17 Mar 2026 11:13:51 -0400 Subject: [PATCH 4/8] Add snowflake requirements to devel requirements --- pyproject.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9eb8951..ef5276d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,14 +70,14 @@ secrets-manager-client = [ "boto3>=1.26.5", "botocore>=1.29.5" ] -snowflake-client = [ - "nypl_py_utils[log-helper]", - "snowflake-connector-python>=4.3.0" -] sftp-client = [ "nypl_py_utils[log-helper]", "paramiko>=3.4.1" ] +snowflake-client = [ + "nypl_py_utils[log-helper]", + "snowflake-connector-python>=4.3.0" +] config-helper = [ "nypl_py_utils[kms-client,log-helper]", "PyYAML>=6.0" @@ -97,7 +97,7 @@ research-catalog-identifier-helper = [ "requests>=2.28.1" ] development = [ - "nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,log-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]", + "nypl_py_utils[avro-client,cloudlibrary-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,snowflake-client,config-helper,log-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]", "flake8>=6.0.0", "freezegun>=1.2.2", "mock>=4.0.3", From 68b7644dc814881c0098cea461dcd693fba66693 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 17 Mar 2026 11:22:58 -0400 Subject: [PATCH 5/8] Update package versions --- .github/workflows/deploy-production.yml | 4 ++-- .github/workflows/deploy-qa.yml | 4 ++-- .github/workflows/run-unit-tests.yml | 4 ++-- pyproject.toml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/deploy-production.yml b/.github/workflows/deploy-production.yml index 11d8711..8dbba8f 100644 --- a/.github/workflows/deploy-production.yml +++ b/.github/workflows/deploy-production.yml @@ -32,10 +32,10 @@ jobs: - name: Checkout repo uses: actions/checkout@v6 - - name: Set up Python 3.9 + - name: Set up Python 3.13 uses: actions/setup-python@v6 with: - python-version: '3.9' + python-version: '3.13' cache: 'pip' cache-dependency-path: 'pyproject.toml' diff --git a/.github/workflows/deploy-qa.yml b/.github/workflows/deploy-qa.yml index 7bb6fd7..319cfe3 100644 --- a/.github/workflows/deploy-qa.yml +++ b/.github/workflows/deploy-qa.yml @@ -32,10 +32,10 @@ jobs: - name: Checkout repo uses: actions/checkout@v6 - - name: Set up Python 3.9 + - name: Set up Python 3.13 uses: actions/setup-python@v6 with: - python-version: '3.9' + python-version: '3.13' cache: 'pip' cache-dependency-path: 'pyproject.toml' diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 3d076df..2440ae0 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -17,10 +17,10 @@ jobs: - name: Checkout repo uses: actions/checkout@v6 - - name: Set up Python 3.9 + - name: Set up Python 3.13 uses: actions/setup-python@v6 with: - python-version: '3.9' + python-version: '3.13' cache: 'pip' cache-dependency-path: 'pyproject.toml' diff --git a/pyproject.toml b/pyproject.toml index ef5276d..a84c3b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,7 +101,7 @@ development = [ "flake8>=6.0.0", "freezegun>=1.2.2", "mock>=4.0.3", - "pytest==8.0", + "pytest>=8.0.0", "pytest-mock>=3.10.0", "requests-mock>=1.10.0" ] From fb65e449fe7604e925b6a9ee1a6993a36fe3e7b6 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Thu, 19 Mar 2026 12:38:14 -0400 Subject: [PATCH 6/8] Update log helper comment --- src/nypl_py_utils/functions/log_helper.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/nypl_py_utils/functions/log_helper.py b/src/nypl_py_utils/functions/log_helper.py index 5550ff9..f5fc16e 100644 --- a/src/nypl_py_utils/functions/log_helper.py +++ b/src/nypl_py_utils/functions/log_helper.py @@ -22,9 +22,10 @@ def get_structlog(module): "message": "this is a test log event" } - Note that: 1) using bind_contextvars will bind variables to *all* loggers - that have been created, and 2) you cannot use the same module name for a - structlog and for a standard logger + Note that: 1) you should *NOT* use the same module name for a structlog + and for a standard logger, and 2) using bind_contextvars will bind + variables to *all* loggers. To bind a context variable on one logger + without binding it to others, use `logger = logger.bind(contextvar=0)`. """ logger = logging.getLogger(module) logger.addHandler(logging.StreamHandler(sys.stdout)) From ad8b676c96d28d22156dbcdf3aa34b4437bf295b Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Thu, 19 Mar 2026 16:21:12 -0400 Subject: [PATCH 7/8] Allow connecting via private key or password --- src/nypl_py_utils/classes/snowflake_client.py | 73 ++++++++++++++----- tests/test_snowflake_client.py | 32 +++++++- 2 files changed, 81 insertions(+), 24 deletions(-) diff --git a/src/nypl_py_utils/classes/snowflake_client.py b/src/nypl_py_utils/classes/snowflake_client.py index 8cc6eb0..95f1306 100644 --- a/src/nypl_py_utils/classes/snowflake_client.py +++ b/src/nypl_py_utils/classes/snowflake_client.py @@ -6,40 +6,71 @@ class SnowflakeClient: """Client for managing connections to Snowflake""" - def __init__(self, account, user, password, warehouse=None): + def __init__(self, account, user, private_key=None, password=None): self.logger = create_log('snowflake_client') + if (password is None) == (private_key is None): + raise SnowflakeClientError( + 'Either password or private key must be set (but not both)', + self.logger + ) from None + self.conn = None self.account = account self.user = user + self.private_key = private_key self.password = password - self.warehouse = warehouse - def connect(self, **kwargs): + def connect(self, mfa_code=None, **kwargs): """ - Connects to a Snowflake database using the given credentials. If - warehouse parameter is None, uses the default warehouse for the user. + Connects to Snowflake using the given credentials. If you're connecting + locally, you should be using the password and mfa_code. If the + connection is for production code, a private_key should be set up. Parameters ---------- + mfa_code: str, optional + The six-digit MFA code. Only necessary for connecting as a human + user. kwargs: - All possible arguments (such as timeouts) can be found here: + All possible arguments (such as which warehouse to use or how + long to wait before timing out) can be found here: https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#connect """ self.logger.info('Connecting to Snowflake') - try: - self.conn = sc.connect( - account=self.account, - user=self.user, - password=self.password, - warehouse=self.warehouse, - **kwargs) - except Exception as e: - raise SnowflakeClientError( - f'Error connecting to Snowflake: {e}') from None + if self.private_key is not None: + try: + self.conn = sc.connect( + account=self.account, + user=self.user, + private_key=self.private_key, + **kwargs) + except Exception as e: + raise SnowflakeClientError( + f'Error connecting to Snowflake: {e}', self.logger + ) from None + else: + if mfa_code is None: + raise SnowflakeClientError( + 'When using a password, an MFA code must also be provided', + self.logger + ) from None + + pw = self.password + mfa_code + try: + self.conn = sc.connect( + account=self.account, + user=self.user, + password=pw, + passcode_in_password=True, + **kwargs) + except Exception as e: + raise SnowflakeClientError( + f'Error connecting to Snowflake: {e}', self.logger + ) from None def execute_query(self, query, **kwargs): """ - Executes an arbitrary query against the given database connection. + Executes an arbitrary query against the given connection. Note that: 1) All results will be fetched by default, so this method is not @@ -53,6 +84,8 @@ def execute_query(self, query, **kwargs): Parameters ---------- + query: str + The SQL query to execute kwargs: All possible arguments (such as timeouts) can be found here: https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#execute @@ -62,7 +95,7 @@ def execute_query(self, query, **kwargs): sequence A list of tuples """ - self.logger.info('Querying database') + self.logger.info('Querying Snowflake') cursor = self.conn.cursor() try: try: @@ -73,12 +106,12 @@ def execute_query(self, query, **kwargs): finally: cursor.close() except Exception as e: - # If there was an error, also close the database connection + # If there was an error, also close the connection self.close_connection() short_q = str(query) if len(short_q) > 2500: - short_q = short_q[:2497] + "..." + short_q = short_q[:2497] + '...' raise SnowflakeClientError( f'Error executing Snowflake query {short_q}: {e}', self.logger ) from None diff --git a/tests/test_snowflake_client.py b/tests/test_snowflake_client.py index 377b94b..89320e3 100644 --- a/tests/test_snowflake_client.py +++ b/tests/test_snowflake_client.py @@ -13,15 +13,39 @@ def mock_snowflake_conn(self, mocker): @pytest.fixture def test_instance(self): - return SnowflakeClient('test_account', 'test_user', 'test_password') + return SnowflakeClient( + 'test_account', 'test_user', private_key='test_pk') - def test_connect(self, mock_snowflake_conn, test_instance): + def test_init_no_pw(self): + with pytest.raises(SnowflakeClientError): + SnowflakeClient('test_account', 'test_user') + + def test_init_multiple_pw(self): + with pytest.raises(SnowflakeClientError): + SnowflakeClient('test_account', 'test_user', 'test_pk', 'test_pw') + + def test_connect_with_pk(self, mock_snowflake_conn, test_instance): test_instance.connect() mock_snowflake_conn.assert_called_once_with( account='test_account', user='test_user', - password='test_password', - warehouse=None) + private_key='test_pk') + + def test_connect_with_pw(self, mock_snowflake_conn): + test_instance = SnowflakeClient( + 'test_account', 'test_user', password='test_pw') + test_instance.connect('123456') + mock_snowflake_conn.assert_called_once_with( + account='test_account', + user='test_user', + password='test_pw123456', + passcode_in_password=True) + + def test_connect_no_mfa(self, mock_snowflake_conn): + test_instance = SnowflakeClient( + 'test_account', 'test_user', password='test_pw') + with pytest.raises(SnowflakeClientError): + test_instance.connect() def test_execute_query( self, mock_snowflake_conn, test_instance, mocker): From 8408520dcebd1a8a38fbf96ab57e6e153880ed88 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Fri, 20 Mar 2026 09:45:48 -0400 Subject: [PATCH 8/8] Update test name --- tests/test_snowflake_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_snowflake_client.py b/tests/test_snowflake_client.py index 89320e3..35a2ec2 100644 --- a/tests/test_snowflake_client.py +++ b/tests/test_snowflake_client.py @@ -20,7 +20,7 @@ def test_init_no_pw(self): with pytest.raises(SnowflakeClientError): SnowflakeClient('test_account', 'test_user') - def test_init_multiple_pw(self): + def test_init_multiple_auth(self): with pytest.raises(SnowflakeClientError): SnowflakeClient('test_account', 'test_user', 'test_pk', 'test_pw')