I want to sync large files (>100GB) from my local system to a DBX Volume. I see 2 Options with different problems, do you have suggestions?
Option 1: Needs to open the file completely -> Memory issues
with open(local_file_path, 'rb') as file:
file_bytes = file.read()
binary_data = io.BytesIO(file_bytes)
response = workspace.files.upload(dbx_file_path, binary_data, overwrite=True)
if response:
print(response)
Option 2: Only 1MB chunks allowed on DBX site -> Very slow
create_response = workspace.dbfs.create(dbx_file_path, overwrite=True)
handle = create_response.handle
file_size = os.path.getsize(local_file_path)
with open(local_file_path, 'rb') as file:
with tqdm(total=file_size, unit='B', unit_scale=True, desc="Uploading") as pbar:
while chunk := file.read(1024 * 1014):
encoded_chunk = base64.b64encode(chunk).decode('utf-8')
response = workspace.dbfs.add_block(handle, encoded_chunk)
if response:
print("Add block response:", response)
pbar.update(len(chunk))
close_response = workspace.dbfs.close(handle)
print("Close response:", close_response)