diff --git a/kobocat-template/templates/show.html b/kobocat-template/templates/show.html
index 8b073a9ae..3d30fe4e8 100644
--- a/kobocat-template/templates/show.html
+++ b/kobocat-template/templates/show.html
@@ -253,7 +253,6 @@
ZIP
KML
- Excel Analyser
Advanced Downloads (beta)
diff --git a/onadata/apps/viewer/models/export.py b/onadata/apps/viewer/models/export.py
index 4f1628777..29f66e1a2 100644
--- a/onadata/apps/viewer/models/export.py
+++ b/onadata/apps/viewer/models/export.py
@@ -34,7 +34,6 @@ def __str__(self):
CSV_ZIP_EXPORT = 'csv_zip'
SAV_ZIP_EXPORT = 'sav_zip'
SAV_EXPORT = 'sav'
- ANALYSER_EXPORT = 'analyser'
EXPORT_MIMES = {
'xls': 'vnd.ms-excel',
@@ -55,8 +54,7 @@ def __str__(self):
(KML_EXPORT, 'kml'),
(CSV_ZIP_EXPORT, 'CSV ZIP'),
(SAV_ZIP_EXPORT, 'SAV ZIP'),
- (SAV_EXPORT, 'SAV'),
- (ANALYSER_EXPORT, 'Analyser')
+ (SAV_EXPORT, 'SAV')
]
EXPORT_TYPE_DICT = dict(export_type for export_type in EXPORT_TYPES)
diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py
index 7da6ae9e3..8a5512301 100644
--- a/onadata/apps/viewer/tasks.py
+++ b/onadata/apps/viewer/tasks.py
@@ -70,8 +70,6 @@ def _create_export(xform, export_type):
# start async export
result = create_kml_export.apply_async(
(), arguments, countdown=10)
- elif export_type == Export.ANALYSER_EXPORT:
- result = create_analyser_export.apply_async((), arguments, countdown=10)
else:
raise Export.ExportTypeError
if result:
@@ -126,45 +124,6 @@ def create_xls_export(username, id_string, export_id, query=None,
return gen_export.id
-@task()
-def create_analyser_export(username, id_string, export_id, query=None):
- # Mostly a serving of copy pasta based on the above `create_xls_export()`. Enjoy.
-
- # we re-query the db instead of passing model objects according to
- # http://docs.celeryproject.org/en/latest/userguide/tasks.html#state
- ext = 'xlsx'
-
- try:
- export = Export.objects.get(id=export_id)
- except Export.DoesNotExist:
- # no export for this ID return None.
- return None
-
- # though export is not available when for has 0 submissions, we
- # catch this since it potentially stops celery
- try:
- gen_export = generate_export(Export.ANALYSER_EXPORT, ext, username, id_string, export_id,
- query, group_delimiter='/', split_select_multiples=True,
- binary_select_multiples=False)
- except (Exception, NoRecordsFoundError) as e:
- export.internal_status = Export.FAILED
- export.save()
- # mail admins
- details = {
- 'export_id': export_id,
- 'username': username,
- 'id_string': id_string
- }
- report_exception("Analyser Export Exception: Export ID - "
- "%(export_id)s, /%(username)s/%(id_string)s"
- % details, e, sys.exc_info())
- # Raise for now to let celery know we failed
- # - doesnt seem to break celery`
- raise
- else:
- return gen_export.id
-
-
@task()
def create_csv_export(username, id_string, export_id, query=None,
group_delimiter='/', split_select_multiples=True,
diff --git a/onadata/libs/utils/analyser_export.py b/onadata/libs/utils/analyser_export.py
deleted file mode 100644
index 654e7411a..000000000
--- a/onadata/libs/utils/analyser_export.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals, print_function, division, absolute_import
-'''
-Created on May 14, 2015
-
-@author: esmail
-'''
-
-
-import tempfile
-import io
-import shutil
-import copy
-import os
-from zipfile import (
- ZipFile,
- ZIP_DEFLATED,
-)
-
-import lxml.etree as etree
-import xlrd
-import openpyxl
-from openpyxl.writer.excel import save_virtual_workbook
-
-
-NAMESPACES= {'xmlns': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
-
-
-def get_worksheet_indices(workbook_file):
- workbook_etree= etree.parse(workbook_file)
- worksheet_indices= dict()
- for sheet_element in workbook_etree.findall('.//xmlns:sheet', NAMESPACES):
- sheet_name= sheet_element.attrib['name']
- sheet_index= sheet_element.attrib['sheetId']
- worksheet_indices[sheet_name]= sheet_index
- return worksheet_indices
-
-
-# Adapted from http://stackoverflow.com/a/9919409/1877326
-def xls_as_xlsx(xls_file):
- # first open using xlrd
- source_workbook = xlrd.open_workbook(file_contents=xls_file.read())
-
- # Create the destination workbook, deleting and auto-generated worksheets.
- destination_workbook = openpyxl.Workbook() # TODO: Would like to figure out how to make appends work with a "write_only" workbook.
- for wksht_nm in destination_workbook.get_sheet_names():
- worksheet= destination_workbook.get_sheet_by_name(wksht_nm)
- destination_workbook.remove_sheet(worksheet)
-
- worksheet_names= ['survey', 'choices']
- for wksht_nm in source_workbook.sheet_names():
- source_worksheet= source_workbook.sheet_by_name(wksht_nm)
- destination_worksheet= destination_workbook.create_sheet(title=wksht_nm)
-
- for row in xrange(source_worksheet.nrows):
- destination_worksheet.append( [source_worksheet.cell_value(row, col) for col in xrange(source_worksheet.ncols)] )
-
- return io.BytesIO(save_virtual_workbook(destination_workbook))
-
-def copy_cells(source_worksheet_file, destination_worksheet_file_path, new_string_indices):
- destination_worksheet= etree.parse(destination_worksheet_file_path)
-
- namespace_prefix= '{' + NAMESPACES['xmlns'] + '}'
- for _, source_dimension in etree.iterparse(source_worksheet_file, tag=namespace_prefix+'dimension'):
- destination_dimension= destination_worksheet.xpath('.//xmlns:dimension', namespaces=NAMESPACES)[0]
- destination_dimension.attrib['ref']= source_dimension.attrib['ref']
- source_dimension.getroottree().getroot().clear() # FIXME: Necessary? Enough?
- break
- else:
- raise ValueError('No "dimension" element found in source data file.')
- source_worksheet_file.seek(0)
-
- # Copy the data over row by row, iterating through the (potentially large)
- # source instead of loading at once.
- destination_sheetData_element= destination_worksheet.xpath('//xmlns:sheetData', namespaces=NAMESPACES)[0]
- for _, source_row in etree.iterparse(source_worksheet_file, tag=namespace_prefix+'row'):
- # Create a new row element.
- destination_row= etree.Element(namespace_prefix+'row', NAMESPACES)
- destination_row.attrib.update(source_row.attrib)
-
- # Copy over the cells one by one.
- for source_c in source_row:
- destination_c = copy.deepcopy(source_c)
- # Remap references to shared strings.
- if destination_c.attrib['t'] == 's':
- values = destination_c.xpath('.//xmlns:v', namespaces=NAMESPACES)
- if values:
- destination_v = values[0]
- destination_v.text = unicode(new_string_indices[int(destination_v.text)])
- destination_row.append(destination_c)
-
- # Clean up elements of the source worksheet to save memory.
- while source_row.getprevious():
- source_row.getprevious().clean()
- print('Previous row cleaned')
- source_row.clear()
-
- # Append in the copied row.
- destination_sheetData_element.append(destination_row)
-
- # Save the changes.
- with open(destination_worksheet_file_path, 'w') as destination_worksheet_file:
- destination_worksheet.write(destination_worksheet_file, encoding='UTF-8')
-
-def splice_shared_strings(source_file, destination_etree, new_string_indices):
- source_etree= etree.parse(source_file)
- original_string_map= dict()
- destination_root= destination_etree.getroot()
- for i, t_element in enumerate(destination_root.iterfind('.//xmlns:t', NAMESPACES)):
- original_string_map[t_element.text]= i
-
- uniqueCount= len(original_string_map)
- for i, t_element in enumerate(source_etree.iterfind('//xmlns:t', NAMESPACES)):
- text= t_element.text
- if text in original_string_map:
- new_string_indices[i]= original_string_map[text]
- else:
- # Copy the "si" element over to `destination_etree`.
- si_element= t_element.getparent()
- destination_root.append(si_element)
- new_string_indices[i]= uniqueCount
- uniqueCount+= 1
-
- # Update the "uniqueCount" attribute.
- destination_root.attrib['uniqueCount']= unicode(uniqueCount)
-
-
-def insert_xlsform_worksheets(analyser_shared_strings, analyser_survey_worksheet_file_path, analyser_choices_worksheet_file_path, survey_file_xls):
- # Create an XLSX copy of the survey file.
- survey_file_xlsx= xls_as_xlsx(survey_file_xls)
-
- with ZipFile(survey_file_xlsx) as survey_zipfile:
- # Splice over the shared strings.
- with survey_zipfile.open('xl/sharedStrings.xml') as shared_strings_file:
- new_string_indices= dict()
- splice_shared_strings(shared_strings_file, analyser_shared_strings, new_string_indices)
- # Identify the desired worksheet indices.
- with survey_zipfile.open('xl/workbook.xml') as workbook_file:
- worksheet_indices= get_worksheet_indices(workbook_file)
- # Copy over the "survey" sheet.
- survey_sheet_path= 'xl/worksheets/sheet' + worksheet_indices['survey'] + '.xml'
- with survey_zipfile.open(survey_sheet_path) as source_survey_worksheet_file:
- # Create a tempfile that supports seeking.
- with tempfile.TemporaryFile('w+') as source_survey_worksheet_tempfile:
- source_survey_worksheet_tempfile.write(source_survey_worksheet_file.read())
- source_survey_worksheet_tempfile.seek(0)
- copy_cells(source_survey_worksheet_tempfile, analyser_survey_worksheet_file_path, new_string_indices)
- # Copy over the "choices" sheet if any.
- if 'choices' in worksheet_indices:
- choices_sheet_path= 'xl/worksheets/sheet' + worksheet_indices['choices'] + '.xml'
- with survey_zipfile.open(choices_sheet_path) as source_choices_worksheet_file:
- # Create a tempfile that supports seeking.
- with tempfile.TemporaryFile('w+') as source_choices_worksheet_tempfile:
- source_choices_worksheet_tempfile.write(source_choices_worksheet_file.read())
- source_choices_worksheet_tempfile.seek(0)
- copy_cells(source_choices_worksheet_tempfile, analyser_choices_worksheet_file_path, new_string_indices)
-
-
-def insert_data_sheet(analyser_shared_strings, analyser_data_sheet_file_path, data_file_xlsx):
- with ZipFile(data_file_xlsx) as data_zipfile:
- with data_zipfile.open('xl/sharedStrings.xml') as shared_strings_file:
- new_string_indices= dict()
- splice_shared_strings(shared_strings_file, analyser_shared_strings, new_string_indices)
- # FIXME: Not safe for multi-sheet data such for repeating groups.
- with data_zipfile.open('xl/worksheets/sheet1.xml') as data_worksheet_file:
- with tempfile.TemporaryFile('w+') as data_worksheet_tempfile:
- data_worksheet_tempfile.write(data_worksheet_file.read())
- data_worksheet_tempfile.seek(0)
- copy_cells(data_worksheet_tempfile, analyser_data_sheet_file_path, new_string_indices)
-
-
-def generate_analyser(survey_file_xls, data_file_xlsx, analyser_file_xlsx=None):
- '''
- Generate a KoBo Excel Data Analyser pre-populated with survey contents and
- data.
-
- NOTE: Due to the Excel formulas used in the analyser template being incompatible with the
- `openpyxl` package, this function relies on low-level manipulation of XLSX features such
- as `sharedStrings.xml`, and consequently requires both the data to be provided in an XLSX
- file.
-
- :param survey_file_xls: An XLS-formatted XLSForm containing the "survey" and
- "choices" (if present) sheet to be inserted into the analyser. Expected
- to be a file-like object that supports the :py:func:`read()` method.
- :param data_file_xlsx: An XLSX file containing the data to be inserted into
- the analyser's "uncleaned_data" sheet. Per the interface of
- :py:class:`zipfile.ZipFile`, this can be either a local path string or a
- file-like object.
- :param analyser_file_xlsx: The analyser template file, pre-configured with
- sheet names and reserved empty sheets. Per the interface of
- :py:class:zipfile.ZipFile, this can be either a local path string or a
- file-like object.
- :rtype: io.BytesIO
- '''
- # Use the default analyser template if none was provided.
- if not analyser_file_xlsx:
- this_scripts_directory= os.path.dirname(__file__)
- analyser_filename= 'KoBoToolbox_Excel_Data_Analyser_1.23_TEMPLATE.xlsx'
- analyser_file_xlsx= os.path.join(this_scripts_directory, analyser_filename)
-
- # Create a directory for temporary storage.
- temp_directory_path= tempfile.mkdtemp(prefix='analyser_temp_')
- try:
- # Unzip the analyser in preparation for customization.
- with ZipFile(analyser_file_xlsx) as analyser_zipfile:
- analyser_zipfile.extractall(temp_directory_path)
- zip_contents= [f.filename for f in analyser_zipfile.filelist]
-
- analyser_shared_strings= etree.parse(temp_directory_path + '/xl/sharedStrings.xml')
-
- # Copy over the XLSForm's "survey" and "choices" (if present) worksheets.
- survey_worksheet_file_path= temp_directory_path + '/xl/worksheets/sheet9.xml'
- choices_worksheet_file_path= temp_directory_path + '/xl/worksheets/sheet10.xml'
- insert_xlsform_worksheets(analyser_shared_strings, survey_worksheet_file_path, choices_worksheet_file_path, survey_file_xls)
-
- # Copy the data over to the analyser.
- analyser_data_sheet_file_path= temp_directory_path + '/xl/worksheets/sheet8.xml'
- insert_data_sheet(analyser_shared_strings, analyser_data_sheet_file_path, data_file_xlsx)
-
- # Finalize the changes for export.
- with open(temp_directory_path + '/xl/sharedStrings.xml', 'wb') as analyser_shared_strings_file:
- analyser_shared_strings.write(analyser_shared_strings_file, encoding='UTF-8')
- xlsx_out= io.BytesIO()
- with ZipFile(xlsx_out, 'w', compression=ZIP_DEFLATED) as zipfile_out:
- for file_path in zip_contents:
- zipfile_out.write(temp_directory_path + '/' + file_path, file_path)
- xlsx_out.seek(0)
-
- finally:
- # Clean the temporary directory.
- shutil.rmtree(temp_directory_path)
-
- return xlsx_out
diff --git a/onadata/libs/utils/export_tools.py b/onadata/libs/utils/export_tools.py
index d098f7b84..32367ab56 100644
--- a/onadata/libs/utils/export_tools.py
+++ b/onadata/libs/utils/export_tools.py
@@ -44,7 +44,6 @@
TAGS,
NOTES
)
-from .analyser_export import generate_analyser
# this is Mongo Collection where we will store the parsed submissions
@@ -579,29 +578,6 @@ def write_row(data, work_sheet, fields, work_sheet_titles):
wb.save(filename=path)
- def to_analyser_export(self, path, data, username, xform_id_string, *args):
- # Get the XLSForm.
- xform = XForm.objects.get(user__username__iexact=username, id_string__exact=xform_id_string)
- xlsform_io= xform.to_xlsform()
-
- if xlsform_io is None:
- raise RuntimeError('XLSForm `{}` for user `{}` could not be retrieved from storage.'.
- format(xform_id_string, username))
-
- prefix = slugify('analyser_data__{}__{}'.format(username, xform_id_string))
- with tempfile.NamedTemporaryFile('w+b', prefix=prefix, suffix='.xlsx',) as xls_data:
- # Generate a new XLS export to work from.
- self.to_xls_export(xls_data.name, data)
- xls_data.file.seek(0)
-
- # Generate the analyser file.
- analyser_io= generate_analyser(xlsform_io, xls_data)
-
- # Write the generated analyser file to the specified path
- # ...which itself points to a temp file.
- with open(path, 'wb') as analyser_file:
- analyser_file.write(analyser_io.read())
-
def to_flat_csv_export(
self, path, data, username, id_string, filter_query):
# TODO resolve circular import
@@ -720,7 +696,6 @@ def generate_export(export_type, extension, username, id_string,
Export.CSV_EXPORT: 'to_flat_csv_export',
Export.CSV_ZIP_EXPORT: 'to_zipped_csv',
Export.SAV_ZIP_EXPORT: 'to_zipped_sav',
- Export.ANALYSER_EXPORT: 'to_analyser_export'
}
xform = XForm.objects.get(
@@ -747,9 +722,6 @@ def generate_export(export_type, extension, username, id_string,
# generate filename
basename = "%s_%s" % (
id_string, datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
- if export_type == Export.ANALYSER_EXPORT:
- # Analyser exports should be distinguished by more than just their file extension.
- basename= '{}_ANALYSER_{}'.format(id_string, datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
filename = basename + "." + extension
# check filename is unique