Hi
We've just started using databricks and so am a little naive into the file system, especially regarding unity catalog.
The issue is that we're creating a loggeer and wanting to write the files based on a queue handler/listener pattern. The patternn seems relatively standard, and the class seems to work with no errors. However, whilst stdout seems ok and the target file appears to resolve, the file cannot be read:
logging class
import sys
import queue
import logging
from logging.handlers import QueueHandler, QueueListener
from delta_engine.logging.logging_auto_queue_listener import AutoQueueLogListener
from databricks.sdk import WorkspaceClient
# ---------------------------------------------------------------------------------
#
# ---------------------------------------------------------------------------------
class AutoQueueLogListener(QueueListener):
"""
This class merely allows for auto starting of the listening process negating the need for
a consumer to invoke start()
# See: https://docs.python.org/3/library/logging.handlers.html#queuelistener
"""
def __init__(self, queue, *handlers):
super().__init__(queue, *handlers)
self.start()
# ---------------------------------------------------------------------------------
#
# ---------------------------------------------------------------------------------
class TestLogger(logging.Logger):
"""
Test logging class based on queue to write not only stream but also file
"""
def __init__(self, name: str = "test_logger", filepath: str = None , level: int = logging.DEBUG):
super().__init__(name, level)
formatter = logging.Formatter('%(asctime)s.%(msecs)03d|%(relativeCreated)06d|%(threadName)s|%(funcName)20s()|%(lineno)s|%(levelname)s|%(message)s')
self.log_queue = queue.SimpleQueue()
self.queue_handler = QueueHandler(self.log_queue)
super().addHandler(self.queue_handler)
# No filepath implies no file handling required
if filepath is not None:
filepath = f'{filepath}{name}.log'
else:
filepath = f'{name}.log'
print("Adding file handler for " + filepath)
self.file_handler = logging.FileHandler(filename=filepath, mode='w')
self.file_handler.setFormatter(formatter)
self.console_handler = logging.StreamHandler()
self.console_handler.setFormatter(formatter)
self.listener = AutoQueueLogListener(self.log_queue, self.console_handler, self.file_handler)
self.extra_info = None
if __name__ == "__main__":
def foo():
"""
Simple function to test the logging
"""
logger.info("In foo - throwing an exceptionm")
exception = 100 / 0 # Throw a divide by zero exception
print("Executing __main__")
logger = TestLogger(
name="cdc_logger",
filepath="/Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging/",
level=logging.DEBUG)
try:
logger.debug('This is debug mode')
logger.info('This is info mode')
logger.warning('This is warning mode')
logger.error('This is error mode')
logger.critical('This is critical mode')
#foo()
except Exception as e:
logger.exception(f'This is exception mode: {e.with_traceback(sys.exc_info()[2])}') # Note the message here includes full stack trace
finally:
logger.info('This is finally mode')
print("Completed")
Observations
- For some reason; unless the mode for the handler is "w", an exceptionOSError: [Errno 95] Operation not supported
- The volume allows full privileges to all users
- File is NOT shows in the browser
- After running the terminal shows the file but will not 'cat' it:
Welcome to the Databricks Web Terminal! Please note:
* You can use the Databricks CLI to interact with your workspace.
Run 'databricks --help' to get started.
/Workspace/Users/s.baker@ucas.ac.uk$ cd/Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging//
/Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging$ ls -latr
total 10
-rwxrwxrwx 1 nobody nogroup 14 Jul 14 10:18 cat.txt
-rwxrwxrwx 1 nobody nogroup 42 Jul 14 13:18 test.txt
-rwxrwxrwx 1 1003 nogroup 591 Jul 14 13:44 cdc_logger.log
drwxrwxrwx 2 nobody nogroup 4096 Jul 14 14:01 ..
drwxrwxrwx 2 nobody nogroup 4096 Jul 14 14:01 .
/Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging$
/Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging$ cat cdc_logger.log
cat: cdc_logger.log: No such file or directory
/Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging$
- Attemping to access the file through a Notebook still refuses to acknowledge the file:
Notebook code:
# Read using os after listing files
import os
file_path = "/Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging"
text_to_write = "This is a sample text written to the file."
with open(f'{file_path}/test.txt', 'w') as file:
file.write(text_to_write)
for file in os.listdir(file_path):
print(f"filename:{file}")
location = f"{file_path}/{file}"
try:
with open(location, "r") as content:
print(f"------------------------------------------------------------------------")
print(f"Reading: {location}")
for line in content:
print(f">>\t{line}")
except Exception as e:
print(f"Error reading file: {location}")
print(e)
finally:
print("")
output:
filename:cat.txt
------------------------------------------------------------------------
Reading: /Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging/cat.txt
>> fred was here
filename:cdc_logger.log
------------------------------------------------------------------------
Reading: /Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging/cdc_logger.log
Error reading file: /Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging/cdc_logger.log
[Errno 2] No such file or directory
filename:test.txt
------------------------------------------------------------------------
Reading: /Volumes/develop_1449185084299107/topaz_cdc_feed/cdc_logging/test.txt
>> This is a sample text written to the file.
Final note:
If the filepath to the logger is left blank, so it writes to the running directory - all seems ok. The file can be read through the Databricks browser window.
I'm pretty sure I'm missing something pretty fundamantal.
Any help would be gratefully received
is thrown