Skip to content

Commit

Permalink
Merge pull request #35098 from dimagi/dm/excel-files
Browse files Browse the repository at this point in the history
Improve Excel workbook file resource handling
  • Loading branch information
millerdev authored Sep 5, 2024
2 parents 741addb + bacca53 commit ac14686
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 9 deletions.
12 changes: 12 additions & 0 deletions corehq/util/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,15 @@ def test_get_single_worksheet_missing(self):
formula_filepath = self.get_path('formula_sheet', 'xlsx')
with self.assertRaises(WorkbookJSONError):
get_single_worksheet(formula_filepath, title='NotASheet')

def test_get_workbook_with_file_path_closes_file(self):
filepath = self.get_path('formula_sheet', 'xlsx')
assert isinstance(filepath, str), filepath
workbook = WorkbookJSONReader(filepath)

# assert workbook zip file has been closed
self.assertIsNone(workbook.wb._archive.fp) # hack: deep reach

# should be able to get rows that were unconsumed before file closure
results = list(workbook.get_worksheet())
self.assertEqual(results, [{'name': 'ben', 'formula': 2}])
29 changes: 20 additions & 9 deletions corehq/util/workbook_json/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def set_field_value(cls, obj, field, value):
# try boolean
try:
field, nothing = field.split('?')
assert(nothing.strip() == '')
assert nothing.strip() == ''
except Exception:
pass
else:
Expand Down Expand Up @@ -245,14 +245,25 @@ def __init__(self, file_or_filename):
self.worksheets_by_title = {}
self.worksheets = []

for worksheet in self.wb.worksheets:
try:
ws = WorksheetJSONReader(worksheet, title=worksheet.title)
except IndexError:
raise JSONReaderError('This Excel file has unrecognised formatting. Please try downloading '
'the lookup table first, and then add data to it.')
self.worksheets_by_title[worksheet.title] = ws
self.worksheets.append(ws)
try:
for worksheet in self.wb.worksheets:
try:
ws = WorksheetJSONReader(worksheet, title=worksheet.title)
except IndexError:
raise JSONReaderError('This Excel file has unrecognised formatting. Please try downloading '
'the lookup table first, and then add data to it.')
self.worksheets_by_title[worksheet.title] = ws
self.worksheets.append(ws)
finally:
is_file_like = hasattr(file_or_filename, 'read')
if not is_file_like:
# This is safe because all sheets have been read by
# now. The underlying file is still open because sheets
# hold a reference to it, and ZipFile will not close it
# until all open file references have been closed.
# Additionally, openpyxl will not close the file if some
# rows are not consumed (possibly a bug in openpyxl).
self.wb._archive.close()

def get_worksheet(self, title=None, index=None):
if title is not None and index is not None:
Expand Down

0 comments on commit ac14686

Please sign in to comment.