このサイトを参考にしつつ、pandasでxlsファイル読み込み時に起こるエラー対処法を記しておく。
import pandas as pd
df = pd.read_excel("WEOApr2019all.xls", sep='\t',
usecols=[1,2,3,4,6,48,49,50,51,52,53],
thousands=',',
na_values=['n/a', '--']
)
weo.head()
--------------------------------------------------------------------------- XLRDError Traceback (most recent call last) <ipython-input-4-974a9b6f19b2> in <module> 4 usecols=[1,2,3,4,6,48,49,50,51,52,53], 5 thousands=',', ----> 6 na_values=['n/a', '--'] 7 ) 8 ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs) 186 else: 187 kwargs[new_arg_name] = new_arg_value --> 188 return func(*args, **kwargs) 189 return wrapper 190 return _deprecate_kwarg ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs) 186 else: 187 kwargs[new_arg_name] = new_arg_value --> 188 return func(*args, **kwargs) 189 return wrapper 190 return _deprecate_kwarg ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/pandas/io/excel.py in read_excel(io, sheet_name, header, names, index_col, parse_cols, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skip_footer, skipfooter, convert_float, mangle_dupe_cols, **kwds) 348 349 if not isinstance(io, ExcelFile): --> 350 io = ExcelFile(io, engine=engine) 351 352 return io.parse( ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/pandas/io/excel.py in __init__(self, io, engine) 651 self._io = _stringify_path(io) 652 --> 653 self._reader = self._engines[engine](self._io) 654 655 def __fspath__(self): ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/pandas/io/excel.py in __init__(self, filepath_or_buffer) 422 self.book = xlrd.open_workbook(file_contents=data) 423 elif isinstance(filepath_or_buffer, compat.string_types): --> 424 self.book = xlrd.open_workbook(filepath_or_buffer) 425 else: 426 raise ValueError('Must explicitly set engine if not passing in' ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows) 155 formatting_info=formatting_info, 156 on_demand=on_demand, --> 157 ragged_rows=ragged_rows, 158 ) 159 return bk ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/xlrd/book.py in open_workbook_xls(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows) 90 t1 = perf_counter() 91 bk.load_time_stage_1 = t1 - t0 ---> 92 biff_version = bk.getbof(XL_WORKBOOK_GLOBALS) 93 if not biff_version: 94 raise XLRDError("Can't determine file's BIFF version") ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/xlrd/book.py in getbof(self, rqd_stream) 1276 bof_error('Expected BOF record; met end of file') 1277 if opcode not in bofcodes: -> 1278 bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8]) 1279 length = self.get2bytes() 1280 if length == MY_EOF: ~/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/xlrd/book.py in bof_error(msg) 1270 1271 def bof_error(msg): -> 1272 raise XLRDError('Unsupported format, or corrupt file: ' + msg) 1273 savpos = self._position 1274 opcode = self.get2bytes() XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'WEO Coun'
上のエラーを前出のサイトを参考にして以下のようにして解決する。
スポンサーリンク
xlwtでxlsを再構築¶
エラー解決には、以下のようにxlsファイルを再構築する必要がある。
# Changing the data types of all strings in the module at once
from __future__ import unicode_literals
# Used to save the file as excel workbook
# Need to install this library
from xlwt import Workbook
# Used to open to corrupt excel file
import io
filename = 'WEOApr2019all.xls'
# Opening the file using 'utf-16' encoding
file1 = io.open(filename, "r", encoding="latin1")
data = file1.readlines()
# Creating a workbook object
xldoc = Workbook()
# Adding a sheet to the workbook object
sheet = xldoc.add_sheet("Sheet1", cell_overwrite_ok=True)
# Iterating and saving the data to sheet
for i, row in enumerate(data):
# Two things are done here
# Removeing the '\n' which comes while reading the file using io.open
# Getting the values after splitting using '\t'
for j, val in enumerate(row.replace('\n', '').split('\t')):
sheet.write(i, j, val)
# Saving the file as an excel file
xldoc.save('myexcel.xls')
import pandas as pd
df = pd.read_excel("myexcel.xls", sep='\t',
usecols=[1,2,3,4,6,48,49,50,51,52,53],
thousands=',',
na_values=['n/a', '--']
)
df.head()
| ISO | WEO Subject Code | Country | Subject Descriptor | Units | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AFG | NGDP_R | Afghanistan | “Gross domestic product, constant prices” | National currency | 533.394 | 552.063 | 574.127 | 599.933 | 629.88 | 664.452 |
| 1 | AFG | NGDP_RPCH | Afghanistan | “Gross domestic product, constant prices” | Percent change | 3 | 3.5 | 3.997 | 4.495 | 4.992 | 5.489 |
| 2 | AFG | NGDP | Afghanistan | “Gross domestic product, current prices” | National currency | “1,488.86” | “1,595.05” | “1,733.54” | “1,902.11” | “2,096.95” | “2,322.65” |
| 3 | AFG | NGDPD | Afghanistan | “Gross domestic product, current prices” | U.S. dollars | 19.99 | 20.682 | 21.928 | 23.577 | 25.45 | 27.608 |
| 4 | AFG | PPPGDP | Afghanistan | “Gross domestic product, current prices” | Purchasing power parity; international dollars | 76.158 | 80.47 | 85.426 | 91.113 | 97.643 | 105.158 |
df[df['Country']=='Japan']
| ISO | WEO Subject Code | Country | Subject Descriptor | Units | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 3780 | JPN | NGDP_R | Japan | “Gross domestic product, constant prices” | National currency | “539,630.51” | “542,060.88” | “544,974.61” | “547,739.42” | “550,621.78” | “553,488.85” |
| 3781 | JPN | NGDP_RPCH | Japan | “Gross domestic product, constant prices” | Percent change | 0.977 | 0.45 | 0.538 | 0.507 | 0.526 | 0.521 |
| 3782 | JPN | NGDP | Japan | “Gross domestic product, current prices” | National currency | “557,821.98” | “567,275.01” | “572,865.67” | “578,293.76” | “584,636.48” | “591,508.09” |
| 3783 | JPN | NGDPD | Japan | “Gross domestic product, current prices” | U.S. dollars | “5,176.21” | “5,495.42” | “5,807.52” | “6,133.86” | “6,475.89” | “6,848.81” |
| 3784 | JPN | PPPGDP | Japan | “Gross domestic product, current prices” | Purchasing power parity; international dollars | “5,749.55” | “5,896.04” | “6,050.97” | “6,207.51” | “6,369.49” | “6,536.65” |
| 3785 | JPN | NGDP_D | Japan | “Gross domestic product, deflator” | Index | 103.371 | 104.652 | 105.118 | 105.578 | 106.178 | 106.869 |
| 3786 | JPN | NGDPRPC | Japan | “Gross domestic product per capita, constant p… | National currency | “4,276,553.37” | “4,310,588.66” | “4,350,171.98” | “4,390,220.80” | “4,433,282.25” | “4,477,706.51” |
| 3787 | JPN | NGDPRPPPPC | Japan | “Gross domestic product per capita, constant p… | Purchasing power parity; 2011 international do… | “39,795.01” | “40,111.72” | “40,480.06” | “40,852.73” | “41,253.44” | “41,666.82” |
| 3788 | JPN | NGDPPC | Japan | “Gross domestic product per capita, current pr… | National currency | “4,420,720.16” | “4,511,097.01” | “4,572,807.91” | “4,635,118.80” | “4,707,148.53” | “4,785,280.96” |
| 3789 | JPN | NGDPDPC | Japan | “Gross domestic product per capita, current pr… | U.S. dollars | “41,021.25” | “43,700.82” | “46,357.59” | “49,163.89” | “52,140.07” | “55,406.63” |
| 3790 | JPN | PPPPC | Japan | “Gross domestic product per capita, current pr… | Purchasing power parity; international dollars | “45,564.98” | “46,886.66” | “48,300.86” | “49,754.20” | “51,283.39” | “52,881.29” |
| 3791 | JPN | NGAP_NPGDP | Japan | Output gap in percent of potential GDP | Percent of potential GDP | -0.161 | -0.186 | -0.222 | -0.208 | -0.157 | 0.078 |
| 3792 | JPN | PPPSH | Japan | Gross domestic product based on purchasing-pow… | Percent | 4.048 | 3.926 | 3.811 | 3.699 | 3.589 | 3.483 |
| 3793 | JPN | PPPEX | Japan | Implied PPP conversion rate | National currency per current international do… | 97.02 | 96.213 | 94.673 | 93.16 | 91.787 | 90.491 |
| 3794 | JPN | NID_NGDP | Japan | Total investment | Percent of GDP | 24.889 | 25.047 | 24.964 | 25.036 | 25.122 | 25.203 |
| 3795 | JPN | NGSD_NGDP | Japan | Gross national savings | Percent of GDP | 28.374 | 28.623 | 28.452 | 28.506 | 28.582 | 28.677 |
| 3796 | JPN | PCPI | Japan | “Inflation, average consumer prices” | Index | 102.414 | 103.995 | 105.17 | 106.438 | 107.855 | 109.373 |
| 3797 | JPN | PCPIPCH | Japan | “Inflation, average consumer prices” | Percent change | 1.067 | 1.543 | 1.13 | 1.205 | 1.332 | 1.407 |
| 3798 | JPN | PCPIE | Japan | “Inflation, end of period consumer prices” | Index | 103.348 | 104.385 | 105.643 | 106.917 | 108.421 | 109.947 |
| 3799 | JPN | PCPIEPCH | Japan | “Inflation, end of period consumer prices” | Percent change | 1.708 | 1.004 | 1.205 | 1.205 | 1.407 | 1.407 |
| 3800 | JPN | FLIBOR6 | Japan | Six-month London interbank offered rate (LIBOR) | Percent | -0.02 | -0.031 | NaN | NaN | NaN | NaN |
| 3801 | JPN | TM_RPCH | Japan | Volume of imports of goods and services | Percent change | 2.235 | 1.678 | 2.854 | 2.454 | 2.336 | 2.268 |
| 3802 | JPN | TMG_RPCH | Japan | Volume of Imports of goods | Percent change | 2.235 | 1.678 | 2.854 | 2.454 | 2.336 | 2.268 |
| 3803 | JPN | TX_RPCH | Japan | Volume of exports of goods and services | Percent change | 2.138 | 2.51 | 3.046 | 2.763 | 2.683 | 2.641 |
| 3804 | JPN | TXG_RPCH | Japan | Volume of exports of goods | Percent change | 2.138 | 2.51 | 3.046 | 2.763 | 2.683 | 2.641 |
| 3805 | JPN | LUR | Japan | Unemployment rate | Percent of total labor force | 2.433 | 2.433 | 2.433 | 2.433 | 2.433 | 2.433 |
| 3806 | JPN | LE | Japan | Employment | Persons | 67.418 | 67.638 | NaN | NaN | NaN | NaN |
| 3807 | JPN | LP | Japan | Population | Persons | 126.184 | 125.751 | 125.277 | 124.764 | 124.202 | 123.61 |
| 3808 | JPN | GGR | Japan | General government revenue | National currency | “189,764.31” | “196,401.40” | “198,513.37” | “200,426.24” | “202,598.58” | “204,987.15” |
| 3809 | JPN | GGR_NGDP | Japan | General government revenue | Percent of GDP | 34.019 | 34.622 | 34.653 | 34.658 | 34.654 | 34.655 |
| 3810 | JPN | GGX | Japan | General government total expenditure | National currency | “205,629.37” | “208,500.82” | “209,215.80” | “210,754.02” | “213,591.36” | “217,555.52” |
| 3811 | JPN | GGX_NGDP | Japan | General government total expenditure | Percent of GDP | 36.863 | 36.755 | 36.521 | 36.444 | 36.534 | 36.78 |
| 3812 | JPN | GGXCNL | Japan | General government net lending/borrowing | National currency | “-15,865.06” | “-12,099.42” | “-10,702.43” | “-10,327.78” | “-10,992.78” | “-12,568.38” |
| 3813 | JPN | GGXCNL_NGDP | Japan | General government net lending/borrowing | Percent of GDP | -2.844 | -2.133 | -1.868 | -1.786 | -1.88 | -2.125 |
| 3814 | JPN | GGSB | Japan | General government structural balance | National currency | “-15,694.77” | “-11,892.91” | “-10,453.54” | “-10,091.91” | “-10,812.41” | “-12,658.82” |
| 3815 | JPN | GGSB_NPGDP | Japan | General government structural balance | Percent of potential GDP | -2.809 | -2.093 | -1.821 | -1.741 | -1.847 | -2.142 |
| 3816 | JPN | GGXONLB | Japan | General government primary net lending/borrowing | National currency | “-15,335.57” | “-12,022.21” | “-10,925.58” | “-10,738.19” | “-11,377.72” | “-12,409.77” |
| 3817 | JPN | GGXONLB_NGDP | Japan | General government primary net lending/borrowing | Percent of GDP | -2.749 | -2.119 | -1.907 | -1.857 | -1.946 | -2.098 |
| 3818 | JPN | GGXWDN | Japan | General government net debt | National currency | “856,902.98” | “869,002.40” | “879,704.83” | “890,032.61” | “901,025.39” | “913,593.77” |
| 3819 | JPN | GGXWDN_NGDP | Japan | General government net debt | Percent of GDP | 153.616 | 153.189 | 153.562 | 153.907 | 154.117 | 154.452 |
| 3820 | JPN | GGXWDG | Japan | General government gross debt | National currency | “1,325,057.19” | “1,344,686.31” | “1,360,076.75” | “1,374,956.20” | “1,391,267.62” | “1,409,598.14” |
| 3821 | JPN | GGXWDG_NGDP | Japan | General government gross debt | Percent of GDP | 237.541 | 237.043 | 237.416 | 237.761 | 237.971 | 238.306 |
| 3822 | JPN | NGDP_FY | Japan | “Gross domestic product corresponding to fisca… | National currency | “557,821.98” | “567,275.01” | “572,865.67” | “578,293.76” | “584,636.48” | “591,508.09” |
| 3823 | JPN | BCA | Japan | Current account balance | U.S. dollars | 180.401 | 196.557 | 202.565 | 212.827 | 224.054 | 237.95 |
| 3824 | JPN | BCA_NGDPD | Japan | Current account balance | Percent of GDP | 3.485 | 3.577 | 3.488 | 3.47 | 3.46 | 3.474 |
スポンサーリンク
