Source code for bag.spreadsheet.excel

"""Easily import an Excel spreadsheet with headers on the top row."""

from zipfile import BadZipFile
from openpyxl import load_workbook  # pip install openpyxl
from openpyxl.utils.exceptions import InvalidFileException
from bag.web.exceptions import Problem

try:
    from bag.web.pyramid import _
except ImportError:
    _ = str  # and i18n is disabled.
from . import (
    get_corresponding_variable_names,
    raise_if_missing_required_headers,
    raise_if_forbidden_headers,
)


[docs]def excel_reader( stream, worksheet_name=None, required_headers=[], forbidden_headers=[], ): """Read an XLSX file (from ``stream``) and yield objects. Objects? Yes, so you can access the values conveniently. You can pass in the ``worksheet_name`` to be read. If not passed in or not present in the file, the first worksheet will be read. In addition, you may pass a sequence of *required_headers*, and if they aren't all present, KeyError is raised. Let's see an example. Suppose you are reading some Excel file and all you know is it contains the columns "E-mail", "Full Name" and "Gender", not necessarily in that order:: reader = excel_reader( open('contacts.xlsx', mode='rb'), worksheet_name='Mailing', required_headers=['E-mail', 'Full Name', 'Gender']) for o in reader: print(o.full_name, o.e_mail, o.gender) """ try: wb = load_workbook(stream, data_only=True) except (BadZipFile, InvalidFileException, KeyError) as e: raise Problem( _("That is not an XLSX file."), error_title=_("Unable to read the XLSX file"), error_debug=str(e), ) # Grab either the worksheet named "Assets", or simply the first one if worksheet_name and worksheet_name in wb: sheet = wb[worksheet_name] else: sheet = wb[wb.sheetnames[0]] this_is_the_first_row = True for row in sheet.rows: if this_is_the_first_row: # Read and validate the headers this_is_the_first_row = False headers = [cell.value for cell in row] headers = [h.strip() if isinstance(h, str) else h for h in headers] raise_if_missing_required_headers(headers, required_headers) raise_if_forbidden_headers(headers, forbidden_headers) vars = get_corresponding_variable_names(headers, required_headers) index_of_var = {var: i for i, var in enumerate(vars)} class SpreadsheetRow: """A view on a spreadsheet row. You can access data as if they were instance variables. """ __slots__ = ("__cells",) def __init__(self, cells): self.__cells = cells def __getattr__(self, attr): content = self.__cells[index_of_var[attr]].value return content else: yield SpreadsheetRow(row)