Closed
Description
from pandas import * df = read_csv(open('mydata.csv.gz', 'r'), compression='gzip', index_col=False) df = df[(df.land != 1)] print df # # Int64Index: 977579 entries, 0 to 1100398 # Data columns (total 89 columns): # # sample 100,000 rows, only use some of the columns rows = np.random.choice(df.index.values, 100000) keep_cols = ['sq_ft', 'zip', 'year', 'bathrooms', 'bedrooms', 'floors'] sampled_df = df.ix[rows, keep_cols] sampled_df.loc[sampled_df.year.notnull()].year # works fine sampled_df.loc[sampled_df.year.notnull(),['year']] # MemoryError --------------------------------------------------------------------------- MemoryError Traceback (most recent call last) in () 1 #sampled_df.loc[sampled_df['year'].notnull(),['year']] 2 sampled_df.loc[sampled_df.year.notnull()].year ----> 3 sampled_df.loc[sampled_df.year.notnull(),['year']] /home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/indexing.pyc in __getitem__(self, key) 695 def __getitem__(self, key): 696 if type(key) is tuple: --> 697 return self._getitem_tuple(key) 698 else: 699 return self._getitem_axis(key, axis=0) /home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/indexing.pyc in _getitem_tuple(self, tup) 260 # ugly hack for GH #836 261 if self._multi_take_opportunity(tup): --> 262 return self._multi_take(tup) 263 264 # no shortcut needed /home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/indexing.pyc in _multi_take(self, tup) 300 index = self._convert_for_reindex(tup[0], axis=0) 301 columns = self._convert_for_reindex(tup[1], axis=1) --> 302 return self.obj.reindex(index=index, columns=columns) 303 elif isinstance(self.obj, Panel4D): 304 conv = [self._convert_for_reindex(x, axis=i) /home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/frame.pyc in reindex(self, index, columns, method, level, fill_value, limit, copy, takeable) 2623 if index is not None: 2624 frame = frame._reindex_index(index, method, copy, level, -> 2625 fill_value, limit, takeable) 2626 2627 return frame /home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/frame.pyc in _reindex_index(self, new_index, method, copy, level, fill_value, limit, takeable) 2703 new_index, indexer = self.index.reindex(new_index, method, level, 2704 limit=limit, copy_if_needed=True, -> 2705 takeable=takeable) 2706 return self._reindex_with_indexers(new_index, indexer, None, None, 2707 copy, fill_value) /home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/index.pyc in reindex(self, target, method, level, limit, copy_if_needed, takeable) 930 raise ValueError("cannot reindex a non-unique index " 931 "with a method or limit") --> 932 indexer, _ = self.get_indexer_non_unique(target) 933 934 return target, indexer /home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/index.pyc in get_indexer_non_unique(self, target, **kwargs) 843 tgt_values = target.values 844 --> 845 indexer, missing = self._engine.get_indexer_non_unique(tgt_values) 846 return Index(indexer), missing 847 /home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/index.so in pandas.index.IndexEngine.get_indexer_non_unique (pandas/index.c:5049)() MemoryError:
Sorry I haven't figured out how to reproduce the error with a toy example.