File objects represent individual files in Box. They can be used to download a file's contents, upload new versions, and perform other common file operations (move, copy, delete, etc.).
File objects represent individual files in Box. They can be used to download a file's contents, upload new versions, and perform other common file operations such as move, copy, and delete.
References to our documentation:
Create a files_init.py
file on the root of the project and execute the following code:
"""create sample content to box"""
import logging
from utils.box_client_oauth import ConfigOAuth, get_client_oauth
from workshops.files.create_samples import create_samples
logging.basicConfig(level=logging.INFO)
logging.getLogger("box_sdk_gen").setLevel(logging.CRITICAL)
conf = ConfigOAuth()
def main():
client = get_client_oauth(conf)
create_samples(client)
if __name__ == "__main__":
main()
Result:
INFO:root:Folder workshops with id: 223095001439
INFO:root:Folder files with id: 223097997181
Grab the id of the files
folder you'll need it later
Open your Box account and verify that the following content was created:
- All Files
- workshops
-files
Next, create a files.py
file on the root of the project that you will use to write your code.
Create a global constant named SAMPLE_FOLDER
and make it equal to the id of the files
folder.
"""Box Files workshop"""
import datetime
import logging
import os
from typing import List
import shutil
import json
from box_sdk_gen import BoxAPIError
from utils.box_client_oauth import ConfigOAuth, get_client_oauth
from box_sdk_gen.client import BoxClient as Client
from box_sdk_gen.schemas import File, Files
from box_sdk_gen.managers.files import CopyFileParent
from box_sdk_gen.managers.uploads import (
PreflightFileUploadCheckParent,
UploadFileAttributes,
UploadFileAttributesParentField,
)
from box_sdk_gen.managers.zip_downloads import CreateZipDownloadItems
from box_sdk_gen import ByteStream
logging.basicConfig(level=logging.INFO)
logging.getLogger("box_sdk_gen").setLevel(logging.CRITICAL)
SAMPLE_FOLDER = "223097997181"
def main():
conf = ConfigOAuth()
client = get_client_oauth(conf)
user = client.users.get_user_me()
print(f"\nHello, I'm {user.name} ({user.login}) [{user.id}]")
if __name__ == "__main__":
main()
Create a method named upload_file
that receives a client
and a file_path
and uploads the file to a specific folder.
def upload_file(client: Client, file_path: str, folder_id: str) -> File:
"""Upload a file to a Box folder"""
file_name = os.path.basename(file_path)
# upload new file
upload_arg = UploadFileAttributes(
file_name, UploadFileAttributesParentField(folder_id)
)
files: Files = client.uploads.upload_file(
upload_arg, file=open(file_path, "rb")
)
box_file = files.entries[0]
return box_file
Then upload the workshops/files/content_samples/sample_file.txt
file to the files
folder.
def main():
...
# make sure the folder exists
sample_folder = client.folders.get_folder_by_id(SAMPLE_FOLDER)
# New file upload
sample_file = upload_file(
client,
"workshops/files/content_samples/sample_file.txt",
sample_folder.id,
)
print(f"Uploaded {sample_file.name} to folder [{sample_folder.name}]")
Should result in something similar to:
Uploaded sample_file.txt to folder [files]
Files in Box have versions, you can upload a new version of a file by using the upload
method of the file object.
Modify the upload_file
method to upload a new file or a new version depending if the file exists or not.
def upload_file(client: Client, file_path: str, folder_id: str) -> File:
"""Upload a file to a Box folder"""
file_name = os.path.basename(file_path)
try:
# upload new file
upload_arg = UploadFileAttributes(
file_name, UploadFileAttributesParentField(folder_id)
)
files: Files = client.uploads.upload_file(
upload_arg, file=open(file_path, "rb")
)
box_file = files.entries[0]
except BoxAPIError as err:
if err.response_info.body.get("code", None) == "item_name_in_use":
logging.warning("File already exists, updating contents")
box_file_id = err.response_info.body["context_info"]["conflicts"][
"id"
]
try:
# upload new version
upload_arg = UploadFileAttributes(
file_name, UploadFileAttributesParentField(folder_id)
)
files: Files = client.uploads.upload_file_version(
box_file_id, upload_arg, file=open(file_path, "rb")
)
box_file = files.entries[0]
except BoxAPIError as err2:
logging.error("Failed to update %s: %s", box_file.name, err2)
raise err2
else:
raise err
return box_file
Then run your script again, it should result in something similar to:
WARNING:root:File already exists, updating contents
Uploaded sample_file.txt to folder [files]
There is another option to check if a file can be accepted by box before uploading it.
It typically checks for file name duplication and file size, in case you have exceeded your quota.
Modify the upload_file
method to use the preflight_check
method of the folder object:
def upload_file(client: Client, file_path: str, folder_id: str) -> File:
"""Upload a file to a Box folder"""
file_size = os.path.getsize(file_path)
file_name = os.path.basename(file_path)
try:
# pre-flight check
pre_flight_arg = PreflightFileUploadCheckParent(id=folder_id)
client.uploads.preflight_file_upload_check(name=file_name, size=file_size, parent=pre_flight_arg)
# upload new file
upload_arg = UploadFileAttributes(file_name, UploadFileAttributesParentField(folder_id))
files: Files = client.uploads.upload_file(upload_arg, file=open(file_path, "rb"))
box_file = files.entries[0]
except BoxAPIError as err:
if err.response_info.body.get("code", None) == "item_name_in_use":
logging.warning("File already exists, updating contents")
box_file_id = err.response_info.body["context_info"]["conflicts"]["id"]
try:
# upload new version
upload_arg = UploadFileAttributes(file_name, UploadFileAttributesParentField(folder_id))
files: Files = client.uploads.upload_file_version(
box_file_id, upload_arg, file=open(file_path, "rb")
)
box_file = files.entries[0]
except BoxAPIError as err2:
logging.error("Failed to update %s: %s", box_file.name, err2)
raise err2
else:
raise err
return box_file
Then run the script again.
Resulting in:
WARNING:root:File already exists, updating contents
Uploaded sample_file.txt to folder [files]
From a code perspective it isn't much different, so why should I use th preflight check?
The preflight check is a good way to validate if a file can be uploaded before actually uploading it.
Imagine running out of space quota after a long upload, it would be a waste of time and resources.
Also the preflight check uses an OPTIONS
http request which is faster than the POST
request used by the upload
method.
Now let's try to download the file we just uploaded.
Create a method named download_file
that receives a file
and a local_path
and downloads the file.
def download_file(client: Client, file_id: str, local_path_to_file: str):
"""Download a file from Box"""
file_stream: ByteStream = client.downloads.download_file(file_id)
with open(local_path_to_file, "wb") as file:
shutil.copyfileobj(file_stream, file)
Then download the sample_file.txt
file to the root of your project.
def main():
...
# Download file
download_file(client, sample_file.id, "./sample_file_downloaded.txt")
for local_file in os.listdir("./"):
if local_file.endswith(".txt"):
print(local_file)
Resulting in:
WARNING:root:File already exists, updating contents
Uploaded sample_file.txt to folder [files]
requirements.txt
sample_file_downloaded.txt
When you need to download multiple files and folders at once, you can use the download_zip
method of the client
object.
Create a method named download_zip
that receives a client
and a list of items
and downloads the folder as a zip file to a local_path
.
def download_zip(
client: Client,
local_path_to_zip: str,
items: List[CreateZipDownloadItems],
):
"""Download a zip file from Box"""
file_name = os.path.basename(local_path_to_zip)
zip_download = client.zip_downloads.create_zip_download(items, download_file_name=file_name)
file_stream: ByteStream = client.zip_downloads.get_zip_download_content(zip_download.download_url)
with open(local_path_to_zip, "wb") as file:
shutil.copyfileobj(file_stream, file)
Then lets zip the entire root
folder:
def main():
...
# Download zip
user_root = client.folders.get_folder_by_id(SAMPLE_FOLDER)
zip_items_arg = []
for item in client.folders.get_folder_items(user_root.id).entries:
item_arg = CreateZipDownloadItems(type=item.type, id=item.id)
zip_items_arg.append(item_arg)
print("Downloading zip")
download_zip(client, "./sample_zip_downloaded.zip", zip_items_arg)
for local_file in os.listdir("./"):
if local_file.endswith(".zip"):
print(local_file)
Resulting in:
Downloading zip
sample_zip_downloaded.zip
If you open the zip file you should see all content stored by your user.
Note that the items
list can be any combination of files
and folders
.
Now that we have some files to play with, let's explore the file object, as it has a tremendous amount of information.
The first thing is to get the file object, we can do that by using the file
method of the client
object.
Update the SAMPLE_FILE python constant with the id of the sample file inside your Box All Files/workshops/files/
folder.
In my case:
SAMPLE_FILE = "1289038683607"
Create a method that returns a file object based on the file id:
def file_to_json(client: Client, file_id: str) -> str:
"""Get a file from Box"""
file: File = client.files.get_file_by_id(file_id)
file_json = json.dumps(file.to_dict(), indent=2)
return file_json
Test this method on your main method:
def main():
...
file_json = file_to_json(client, SAMPLE_FILE)
print(file_json)
and you should get something like:
{
"id": "1289038683607",
"type": "file",
"etag": "13",
"sequence_id": "13",
"name": "sample_file.txt",
"sha1": "715a6fe7d575e27934e16e474c290048829ffc54",
"file_version": {
"id": "1480058958687",
"type": "file_version",
"sha1": "715a6fe7d575e27934e16e474c290048829ffc54"
},
"description": "This is a sample file",
"size": 42,
...
"item_status": "active"
}
There are many more properties you can explore, check the API Reference for more information.
Now that we have a file object, let's try to update it.
Create a method that updates the description of a file:
def file_update_description(
client: Client, file_id: str, description: str
) -> File:
return client.files.update_file_by_id(file_id, description=description)
Change the description of the previous file:
def main():
...
# Update a file
file = file_update_description(
client,
SAMPLE_FILE,
f"Updating the description at {datetime.datetime.now()}",
)
file = client.files.get_file_by_id(SAMPLE_FILE)
print(f"{file.id} {file.name} {file.description}")
Resulting in:
1289038683607 sample_file.txt
1289038683607 sample_file.txt Updating the description at 2023-11-02 17:21:05.377487
We need a method to list the contents of a folder.
Add this method to your files.py
file:
def folder_list_contents(client: Client, folder_id: str):
folder = client.folders.get_folder_by_id(folder_id)
items = client.folders.get_folder_items(folder_id)
print(f"\nFolder [{folder.name}] content:")
for item in items.entries:
print(f" {item.type.value} {item.id} {item.name}")
Now lets duplicate the file we just updated, and list the folder contents: We can do this directly in our main method:
def main():
...
# Copy a file
try:
file_copied = client.files.copy_file(
SAMPLE_FILE,
CopyFileParent(SAMPLE_FOLDER),
name="sample_file_copy.txt",
)
file_copied_id = file_copied.id
except BoxAPIError as err:
if err.response_info.body.get("code", None) == "item_name_in_use":
logging.warning("Duplicate File already exists")
file_copied_id = err.response_info.body["context_info"][
"conflicts"
]["id"]
else:
raise err
folder_list_contents(client, SAMPLE_FOLDER)
The try block is there to prevent the script from copying the file, if the file duplicate already exists, since we'll be running this script multiple times.
Resulting in:
Folder [files] content:
file 1289038683607 sample_file.txt
file 1351886955787 sample_file_copy.txt
Now lets move the file we just copied to the root of the box account:
def main():
...
# Move a file
try:
file_moved = client.files.update_file_by_id(
file_copied_id, parent=CopyFileParent("0")
)
file_moved_id = file_moved.id
except BoxAPIError as err:
if err.response_info.body.get("code", None) == "item_name_in_use":
logging.warning("File already exists, we'll use it")
file_moved_id = err.response_info.body["context_info"][
"conflicts"
]["id"]
else:
raise err
folder_list_contents(client, "0")
Again we need a try
block.
Resulting in:
Folder [All Files] content:
folder 216797257531 My Signed Documents
folder 221723756896 UIE Samples
folder 223095001439 workshops
file 1204688948039 Get Started with Box.pdf
file 1351892000645 sample_file_copy.txt
Finally we'll delete the file we just copied:
def main():
...
client.files.delete_file_by_id(file_moved_id)
folder_list_contents(client, "0")
Resulting in:
Folder [All Files] content:
folder 216797257531 My Signed Documents
folder 221723756896 UIE Samples
folder 223095001439 workshops
file 1204688948039 Get Started with Box.pdf
file 1351892000645 sample_file_copy.txt
There are many more methods you can try for the file object. Try them out and see what you can find:
- Create a method to rename a file.
- Create a method that returns the complete path (operating system style) of a file object
- Implement the chunked upload method
- Implement the manual upload method
File
objects are very powerful, they allow you to perform many operations on files. This workshop only scratched the surface of what you can do with them.
We'll explore more of them in the other workshops.