Skip to content

MemoryError when using .loc or .ix #4280

Closed
@BAM-BAM-BAM

Description

@BAM-BAM-BAM
from pandas import *
df = read_csv(open('mydata.csv.gz', 'r'), compression='gzip', index_col=False)
df = df[(df.land != 1)]
print df
# 
# Int64Index: 977579 entries, 0 to 1100398
# Data columns (total 89 columns):
# 

# sample 100,000 rows, only use some of the columns
rows = np.random.choice(df.index.values, 100000)
keep_cols = ['sq_ft', 'zip', 'year', 'bathrooms', 'bedrooms', 'floors']
sampled_df = df.ix[rows, keep_cols]

sampled_df.loc[sampled_df.year.notnull()].year        # works fine
sampled_df.loc[sampled_df.year.notnull(),['year']]    # MemoryError

---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
 in ()
      1 #sampled_df.loc[sampled_df['year'].notnull(),['year']]
      2 sampled_df.loc[sampled_df.year.notnull()].year
----> 3 sampled_df.loc[sampled_df.year.notnull(),['year']]

/home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/indexing.pyc in __getitem__(self, key)
    695     def __getitem__(self, key):
    696         if type(key) is tuple:
--> 697             return self._getitem_tuple(key)
    698         else:
    699             return self._getitem_axis(key, axis=0)

/home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/indexing.pyc in _getitem_tuple(self, tup)
    260         # ugly hack for GH #836
    261         if self._multi_take_opportunity(tup):
--> 262             return self._multi_take(tup)
    263 
    264         # no shortcut needed

/home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/indexing.pyc in _multi_take(self, tup)
    300             index = self._convert_for_reindex(tup[0], axis=0)
    301             columns = self._convert_for_reindex(tup[1], axis=1)
--> 302             return self.obj.reindex(index=index, columns=columns)
    303         elif isinstance(self.obj, Panel4D):
    304             conv = [self._convert_for_reindex(x, axis=i)

/home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/frame.pyc in reindex(self, index, columns, method, level, fill_value, limit, copy, takeable)
   2623         if index is not None:
   2624             frame = frame._reindex_index(index, method, copy, level,
-> 2625                                          fill_value, limit, takeable)
   2626 
   2627         return frame

/home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/frame.pyc in _reindex_index(self, new_index, method, copy, level, fill_value, limit, takeable)
   2703         new_index, indexer = self.index.reindex(new_index, method, level,
   2704                                                 limit=limit, copy_if_needed=True,
-> 2705                                                 takeable=takeable)
   2706         return self._reindex_with_indexers(new_index, indexer, None, None,
   2707                                            copy, fill_value)

/home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/index.pyc in reindex(self, target, method, level, limit, copy_if_needed, takeable)
    930                         raise ValueError("cannot reindex a non-unique index "
    931                                          "with a method or limit")
--> 932                     indexer, _ = self.get_indexer_non_unique(target)
    933 
    934         return target, indexer

/home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/core/index.pyc in get_indexer_non_unique(self, target, **kwargs)
    843             tgt_values = target.values
    844 
--> 845         indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
    846         return Index(indexer), missing
    847 

/home/jprior/Scratch/VENV1/lib/python2.7/site-packages/pandas/index.so in pandas.index.IndexEngine.get_indexer_non_unique (pandas/index.c:5049)()

MemoryError: 

Sorry I haven't figured out how to reproduce the error with a toy example.

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugIndexingRelated to indexing on series/frames, not to indexes themselves

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

        翻译: