Source code for bw2io.extractors.excel
import os
from openpyxl import load_workbook
[docs]def get_cell_value_handle_error(cell):
"""
Retrieve the value of a given cell and handle error types.
Parameters
----------
cell : openpyxl.cell.cell.Cell
The cell to get the value from.
Returns
--------
object
The value of the cell, or None if the cell has an error type.
Examples
---------
>>> from openpyxl import Workbook
>>> wb = Workbook()
>>> ws = wb.active
>>> ws["A1"] = "hello"
>>> assert get_cell_value_handle_error(ws["A1"]) == "hello"
>>> ws["B1"] = "=1/0"
>>> assert get_cell_value_handle_error(ws["B1"]) == None
"""
if cell.data_type == "e":
# Error type
return None
else:
return cell.value
[docs]class ExcelExtractor(object):
"""
A class used to extract data from an Excel file.
Parameters
----------
object : type
The parent object for the ExcelExtractor class.
Returns
-------
object
An instance of the class.
See Also
--------
openpyxl.load_workbook : Load a workbook from a file.
Notes
-----
This class requires the openpyxl package to be installed.
Raises
------
AssertionError
If the file at 'filepath' does not exist.
Parameters
----------
filepath : str
The path to the Excel file.
Returns
-------
list
A list of tuples containing the name of each sheet in the file and the data from each sheet.
Examples
--------
>>> extractor = ExcelExtractor()
>>> filepath = 'example.xlsx'
>>> data = extractor.extract(filepath)
"""
@classmethod
[docs] def extract(cls, filepath):
"""
Extract data from an Excel file.
Parameters
----------
filepath : str
The path to the Excel file.
Returns
-------
list
A list of tuples containing the name of each sheet in the file and the data from each sheet.
Raises
------
AssertionError
If the file at 'filepath' does not exist.
"""
assert os.path.exists(filepath), "Can't file file at path {}".format(filepath)
wb = load_workbook(filepath, data_only=True, read_only=True)
data = [(name, cls.extract_sheet(wb, name)) for name in wb.sheetnames]
wb.close()
return data
@classmethod
[docs] def extract_sheet(cls, wb, name, strip=True):
"""
Extract data from a single sheet in an Excel workbook.
Parameters
----------
wb : openpyxl.workbook.Workbook
The workbook object with the sheet to extract data from.
name : str
The name of the sheet to extract data from.
strip : bool, optional
If True, strip whitespace from cell values, by default True.
Returns
-------
list
A list of lists containing the data from the sheet.
Notes
-----
This method is called by the 'extract' method to extract the data from each sheet in the workbook.
Examples
--------
>>> wb = openpyxl.load_workbook('example.xlsx')
>>> name = 'Sheet1'
>>> data = ExcelExtractor.extract_sheet(wb, sheetname)
"""
ws = wb[name]
_ = lambda x: x.strip() if (strip and hasattr(x, "strip")) else x
return [
[_(get_cell_value_handle_error(cell)) for cell in row] for row in ws.rows
]