diff --git a/kobocat-template/templates/show.html b/kobocat-template/templates/show.html index 8b073a9ae..3d30fe4e8 100644 --- a/kobocat-template/templates/show.html +++ b/kobocat-template/templates/show.html @@ -253,7 +253,6 @@

ZIP KML - Excel Analyser Advanced Downloads (beta) diff --git a/onadata/apps/viewer/models/export.py b/onadata/apps/viewer/models/export.py index 4f1628777..29f66e1a2 100644 --- a/onadata/apps/viewer/models/export.py +++ b/onadata/apps/viewer/models/export.py @@ -34,7 +34,6 @@ def __str__(self): CSV_ZIP_EXPORT = 'csv_zip' SAV_ZIP_EXPORT = 'sav_zip' SAV_EXPORT = 'sav' - ANALYSER_EXPORT = 'analyser' EXPORT_MIMES = { 'xls': 'vnd.ms-excel', @@ -55,8 +54,7 @@ def __str__(self): (KML_EXPORT, 'kml'), (CSV_ZIP_EXPORT, 'CSV ZIP'), (SAV_ZIP_EXPORT, 'SAV ZIP'), - (SAV_EXPORT, 'SAV'), - (ANALYSER_EXPORT, 'Analyser') + (SAV_EXPORT, 'SAV') ] EXPORT_TYPE_DICT = dict(export_type for export_type in EXPORT_TYPES) diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py index 7da6ae9e3..8a5512301 100644 --- a/onadata/apps/viewer/tasks.py +++ b/onadata/apps/viewer/tasks.py @@ -70,8 +70,6 @@ def _create_export(xform, export_type): # start async export result = create_kml_export.apply_async( (), arguments, countdown=10) - elif export_type == Export.ANALYSER_EXPORT: - result = create_analyser_export.apply_async((), arguments, countdown=10) else: raise Export.ExportTypeError if result: @@ -126,45 +124,6 @@ def create_xls_export(username, id_string, export_id, query=None, return gen_export.id -@task() -def create_analyser_export(username, id_string, export_id, query=None): - # Mostly a serving of copy pasta based on the above `create_xls_export()`. Enjoy. - - # we re-query the db instead of passing model objects according to - # http://docs.celeryproject.org/en/latest/userguide/tasks.html#state - ext = 'xlsx' - - try: - export = Export.objects.get(id=export_id) - except Export.DoesNotExist: - # no export for this ID return None. - return None - - # though export is not available when for has 0 submissions, we - # catch this since it potentially stops celery - try: - gen_export = generate_export(Export.ANALYSER_EXPORT, ext, username, id_string, export_id, - query, group_delimiter='/', split_select_multiples=True, - binary_select_multiples=False) - except (Exception, NoRecordsFoundError) as e: - export.internal_status = Export.FAILED - export.save() - # mail admins - details = { - 'export_id': export_id, - 'username': username, - 'id_string': id_string - } - report_exception("Analyser Export Exception: Export ID - " - "%(export_id)s, /%(username)s/%(id_string)s" - % details, e, sys.exc_info()) - # Raise for now to let celery know we failed - # - doesnt seem to break celery` - raise - else: - return gen_export.id - - @task() def create_csv_export(username, id_string, export_id, query=None, group_delimiter='/', split_select_multiples=True, diff --git a/onadata/libs/utils/analyser_export.py b/onadata/libs/utils/analyser_export.py deleted file mode 100644 index 654e7411a..000000000 --- a/onadata/libs/utils/analyser_export.py +++ /dev/null @@ -1,233 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals, print_function, division, absolute_import -''' -Created on May 14, 2015 - -@author: esmail -''' - - -import tempfile -import io -import shutil -import copy -import os -from zipfile import ( - ZipFile, - ZIP_DEFLATED, -) - -import lxml.etree as etree -import xlrd -import openpyxl -from openpyxl.writer.excel import save_virtual_workbook - - -NAMESPACES= {'xmlns': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'} - - -def get_worksheet_indices(workbook_file): - workbook_etree= etree.parse(workbook_file) - worksheet_indices= dict() - for sheet_element in workbook_etree.findall('.//xmlns:sheet', NAMESPACES): - sheet_name= sheet_element.attrib['name'] - sheet_index= sheet_element.attrib['sheetId'] - worksheet_indices[sheet_name]= sheet_index - return worksheet_indices - - -# Adapted from http://stackoverflow.com/a/9919409/1877326 -def xls_as_xlsx(xls_file): - # first open using xlrd - source_workbook = xlrd.open_workbook(file_contents=xls_file.read()) - - # Create the destination workbook, deleting and auto-generated worksheets. - destination_workbook = openpyxl.Workbook() # TODO: Would like to figure out how to make appends work with a "write_only" workbook. - for wksht_nm in destination_workbook.get_sheet_names(): - worksheet= destination_workbook.get_sheet_by_name(wksht_nm) - destination_workbook.remove_sheet(worksheet) - - worksheet_names= ['survey', 'choices'] - for wksht_nm in source_workbook.sheet_names(): - source_worksheet= source_workbook.sheet_by_name(wksht_nm) - destination_worksheet= destination_workbook.create_sheet(title=wksht_nm) - - for row in xrange(source_worksheet.nrows): - destination_worksheet.append( [source_worksheet.cell_value(row, col) for col in xrange(source_worksheet.ncols)] ) - - return io.BytesIO(save_virtual_workbook(destination_workbook)) - -def copy_cells(source_worksheet_file, destination_worksheet_file_path, new_string_indices): - destination_worksheet= etree.parse(destination_worksheet_file_path) - - namespace_prefix= '{' + NAMESPACES['xmlns'] + '}' - for _, source_dimension in etree.iterparse(source_worksheet_file, tag=namespace_prefix+'dimension'): - destination_dimension= destination_worksheet.xpath('.//xmlns:dimension', namespaces=NAMESPACES)[0] - destination_dimension.attrib['ref']= source_dimension.attrib['ref'] - source_dimension.getroottree().getroot().clear() # FIXME: Necessary? Enough? - break - else: - raise ValueError('No "dimension" element found in source data file.') - source_worksheet_file.seek(0) - - # Copy the data over row by row, iterating through the (potentially large) - # source instead of loading at once. - destination_sheetData_element= destination_worksheet.xpath('//xmlns:sheetData', namespaces=NAMESPACES)[0] - for _, source_row in etree.iterparse(source_worksheet_file, tag=namespace_prefix+'row'): - # Create a new row element. - destination_row= etree.Element(namespace_prefix+'row', NAMESPACES) - destination_row.attrib.update(source_row.attrib) - - # Copy over the cells one by one. - for source_c in source_row: - destination_c = copy.deepcopy(source_c) - # Remap references to shared strings. - if destination_c.attrib['t'] == 's': - values = destination_c.xpath('.//xmlns:v', namespaces=NAMESPACES) - if values: - destination_v = values[0] - destination_v.text = unicode(new_string_indices[int(destination_v.text)]) - destination_row.append(destination_c) - - # Clean up elements of the source worksheet to save memory. - while source_row.getprevious(): - source_row.getprevious().clean() - print('Previous row cleaned') - source_row.clear() - - # Append in the copied row. - destination_sheetData_element.append(destination_row) - - # Save the changes. - with open(destination_worksheet_file_path, 'w') as destination_worksheet_file: - destination_worksheet.write(destination_worksheet_file, encoding='UTF-8') - -def splice_shared_strings(source_file, destination_etree, new_string_indices): - source_etree= etree.parse(source_file) - original_string_map= dict() - destination_root= destination_etree.getroot() - for i, t_element in enumerate(destination_root.iterfind('.//xmlns:t', NAMESPACES)): - original_string_map[t_element.text]= i - - uniqueCount= len(original_string_map) - for i, t_element in enumerate(source_etree.iterfind('//xmlns:t', NAMESPACES)): - text= t_element.text - if text in original_string_map: - new_string_indices[i]= original_string_map[text] - else: - # Copy the "si" element over to `destination_etree`. - si_element= t_element.getparent() - destination_root.append(si_element) - new_string_indices[i]= uniqueCount - uniqueCount+= 1 - - # Update the "uniqueCount" attribute. - destination_root.attrib['uniqueCount']= unicode(uniqueCount) - - -def insert_xlsform_worksheets(analyser_shared_strings, analyser_survey_worksheet_file_path, analyser_choices_worksheet_file_path, survey_file_xls): - # Create an XLSX copy of the survey file. - survey_file_xlsx= xls_as_xlsx(survey_file_xls) - - with ZipFile(survey_file_xlsx) as survey_zipfile: - # Splice over the shared strings. - with survey_zipfile.open('xl/sharedStrings.xml') as shared_strings_file: - new_string_indices= dict() - splice_shared_strings(shared_strings_file, analyser_shared_strings, new_string_indices) - # Identify the desired worksheet indices. - with survey_zipfile.open('xl/workbook.xml') as workbook_file: - worksheet_indices= get_worksheet_indices(workbook_file) - # Copy over the "survey" sheet. - survey_sheet_path= 'xl/worksheets/sheet' + worksheet_indices['survey'] + '.xml' - with survey_zipfile.open(survey_sheet_path) as source_survey_worksheet_file: - # Create a tempfile that supports seeking. - with tempfile.TemporaryFile('w+') as source_survey_worksheet_tempfile: - source_survey_worksheet_tempfile.write(source_survey_worksheet_file.read()) - source_survey_worksheet_tempfile.seek(0) - copy_cells(source_survey_worksheet_tempfile, analyser_survey_worksheet_file_path, new_string_indices) - # Copy over the "choices" sheet if any. - if 'choices' in worksheet_indices: - choices_sheet_path= 'xl/worksheets/sheet' + worksheet_indices['choices'] + '.xml' - with survey_zipfile.open(choices_sheet_path) as source_choices_worksheet_file: - # Create a tempfile that supports seeking. - with tempfile.TemporaryFile('w+') as source_choices_worksheet_tempfile: - source_choices_worksheet_tempfile.write(source_choices_worksheet_file.read()) - source_choices_worksheet_tempfile.seek(0) - copy_cells(source_choices_worksheet_tempfile, analyser_choices_worksheet_file_path, new_string_indices) - - -def insert_data_sheet(analyser_shared_strings, analyser_data_sheet_file_path, data_file_xlsx): - with ZipFile(data_file_xlsx) as data_zipfile: - with data_zipfile.open('xl/sharedStrings.xml') as shared_strings_file: - new_string_indices= dict() - splice_shared_strings(shared_strings_file, analyser_shared_strings, new_string_indices) - # FIXME: Not safe for multi-sheet data such for repeating groups. - with data_zipfile.open('xl/worksheets/sheet1.xml') as data_worksheet_file: - with tempfile.TemporaryFile('w+') as data_worksheet_tempfile: - data_worksheet_tempfile.write(data_worksheet_file.read()) - data_worksheet_tempfile.seek(0) - copy_cells(data_worksheet_tempfile, analyser_data_sheet_file_path, new_string_indices) - - -def generate_analyser(survey_file_xls, data_file_xlsx, analyser_file_xlsx=None): - ''' - Generate a KoBo Excel Data Analyser pre-populated with survey contents and - data. - - NOTE: Due to the Excel formulas used in the analyser template being incompatible with the - `openpyxl` package, this function relies on low-level manipulation of XLSX features such - as `sharedStrings.xml`, and consequently requires both the data to be provided in an XLSX - file. - - :param survey_file_xls: An XLS-formatted XLSForm containing the "survey" and - "choices" (if present) sheet to be inserted into the analyser. Expected - to be a file-like object that supports the :py:func:`read()` method. - :param data_file_xlsx: An XLSX file containing the data to be inserted into - the analyser's "uncleaned_data" sheet. Per the interface of - :py:class:`zipfile.ZipFile`, this can be either a local path string or a - file-like object. - :param analyser_file_xlsx: The analyser template file, pre-configured with - sheet names and reserved empty sheets. Per the interface of - :py:class:zipfile.ZipFile, this can be either a local path string or a - file-like object. - :rtype: io.BytesIO - ''' - # Use the default analyser template if none was provided. - if not analyser_file_xlsx: - this_scripts_directory= os.path.dirname(__file__) - analyser_filename= 'KoBoToolbox_Excel_Data_Analyser_1.23_TEMPLATE.xlsx' - analyser_file_xlsx= os.path.join(this_scripts_directory, analyser_filename) - - # Create a directory for temporary storage. - temp_directory_path= tempfile.mkdtemp(prefix='analyser_temp_') - try: - # Unzip the analyser in preparation for customization. - with ZipFile(analyser_file_xlsx) as analyser_zipfile: - analyser_zipfile.extractall(temp_directory_path) - zip_contents= [f.filename for f in analyser_zipfile.filelist] - - analyser_shared_strings= etree.parse(temp_directory_path + '/xl/sharedStrings.xml') - - # Copy over the XLSForm's "survey" and "choices" (if present) worksheets. - survey_worksheet_file_path= temp_directory_path + '/xl/worksheets/sheet9.xml' - choices_worksheet_file_path= temp_directory_path + '/xl/worksheets/sheet10.xml' - insert_xlsform_worksheets(analyser_shared_strings, survey_worksheet_file_path, choices_worksheet_file_path, survey_file_xls) - - # Copy the data over to the analyser. - analyser_data_sheet_file_path= temp_directory_path + '/xl/worksheets/sheet8.xml' - insert_data_sheet(analyser_shared_strings, analyser_data_sheet_file_path, data_file_xlsx) - - # Finalize the changes for export. - with open(temp_directory_path + '/xl/sharedStrings.xml', 'wb') as analyser_shared_strings_file: - analyser_shared_strings.write(analyser_shared_strings_file, encoding='UTF-8') - xlsx_out= io.BytesIO() - with ZipFile(xlsx_out, 'w', compression=ZIP_DEFLATED) as zipfile_out: - for file_path in zip_contents: - zipfile_out.write(temp_directory_path + '/' + file_path, file_path) - xlsx_out.seek(0) - - finally: - # Clean the temporary directory. - shutil.rmtree(temp_directory_path) - - return xlsx_out diff --git a/onadata/libs/utils/export_tools.py b/onadata/libs/utils/export_tools.py index d098f7b84..32367ab56 100644 --- a/onadata/libs/utils/export_tools.py +++ b/onadata/libs/utils/export_tools.py @@ -44,7 +44,6 @@ TAGS, NOTES ) -from .analyser_export import generate_analyser # this is Mongo Collection where we will store the parsed submissions @@ -579,29 +578,6 @@ def write_row(data, work_sheet, fields, work_sheet_titles): wb.save(filename=path) - def to_analyser_export(self, path, data, username, xform_id_string, *args): - # Get the XLSForm. - xform = XForm.objects.get(user__username__iexact=username, id_string__exact=xform_id_string) - xlsform_io= xform.to_xlsform() - - if xlsform_io is None: - raise RuntimeError('XLSForm `{}` for user `{}` could not be retrieved from storage.'. - format(xform_id_string, username)) - - prefix = slugify('analyser_data__{}__{}'.format(username, xform_id_string)) - with tempfile.NamedTemporaryFile('w+b', prefix=prefix, suffix='.xlsx',) as xls_data: - # Generate a new XLS export to work from. - self.to_xls_export(xls_data.name, data) - xls_data.file.seek(0) - - # Generate the analyser file. - analyser_io= generate_analyser(xlsform_io, xls_data) - - # Write the generated analyser file to the specified path - # ...which itself points to a temp file. - with open(path, 'wb') as analyser_file: - analyser_file.write(analyser_io.read()) - def to_flat_csv_export( self, path, data, username, id_string, filter_query): # TODO resolve circular import @@ -720,7 +696,6 @@ def generate_export(export_type, extension, username, id_string, Export.CSV_EXPORT: 'to_flat_csv_export', Export.CSV_ZIP_EXPORT: 'to_zipped_csv', Export.SAV_ZIP_EXPORT: 'to_zipped_sav', - Export.ANALYSER_EXPORT: 'to_analyser_export' } xform = XForm.objects.get( @@ -747,9 +722,6 @@ def generate_export(export_type, extension, username, id_string, # generate filename basename = "%s_%s" % ( id_string, datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) - if export_type == Export.ANALYSER_EXPORT: - # Analyser exports should be distinguished by more than just their file extension. - basename= '{}_ANALYSER_{}'.format(id_string, datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) filename = basename + "." + extension # check filename is unique