https://pandas.pydata.org/pandas-docs/stable/user_guide/boolean.html reference for ^ XOR logical https://stackoverflow.com/questions/432842/how-do-you-get-the-logical-xor-of-two-variables-in-python
and, not, or # Boolean operators <, ==, >, != # arithmetic operators $, |, ~ # bitwise operators
# Pandas, Python Boolean Operation reference for pandas https://pandas.pydata.org/pandas-docs/stable/user_guide/boolean.html reference for ^ XOR logical https://stackoverflow.com/questions/432842/how-do-you-get-the-logical-xor-of-two-variables-in-python and, not, or # boolean operators <, ==, >, != # arithmetic operators $, |, ~ # bitwise operators True and False False or True True ^ False True ^ True False ^ False def logical_xor(a, b): if bool(a) == bool(b): return False else: return True logical_xor(True, False), True^False logical_xor(False, False), False^False not True, not False (not True) or (True) [int(not True), int(~ True)] [True == False,(not True) == False, ~True == False ] [True | False, True or False, True and False, True & False, not True, ~True] import pandas as pd import numpy as np dict = {'a':[True, True, False, False], 'b': [True, True, True, True], 'c':[False, True, False, True]} df = pd.DataFrame(dict) df.astype("bool") df df = df*1 df df['and&_operation'] = df.product(axis=1, skipna = True) df.astype(bool) df = df*1 df['or|_operation'] = df.sum(axis = 1) df['or|_operation'] = df['or|_operation'] >= 1 df = df.astype(bool) df df['or|_operation'] df["all_operation"] = df[["a","b","c"]].all(axis=1) df["any_operation"] = df[["a","b","c"]].any(axis=1) df df[df["all_operation"]] df[~df["all_operation"]] # # add empty rows df = df.append(pd.Series(), ignore_index=True) df = pd.concat([df, pd.DataFrame([[np.nan] * df.shape[1]], columns=df.columns)], ignore_index=True) df.loc[len(df)+1, df.columns[1]] = np.nan df df_null_mask = pd.isnull(df) df_null_mask["null"] = df_null_mask.all(axis=1) df_null_mask["null"] df_null_mask df_null = df_null_mask[df_null_mask['null'] == True] null_index = df_null.index null_index not_null_index = df.index.difference(null_index) df.loc[not_null_index,].astype("bool") # # filter empty row df_null_mask = pd.isnull(df) df_null_mask["null"] = df_null_mask.all(axis=1) df_null = df_null_mask[df_null_mask['null'] == True] null_index = df_null.index not_null_index = df.index.difference(null_index) df.loc[not_null_index,] list(null_index) import numpy as np import pandas as pd df_movie = pd.read_html("https://github.com/Amangitcode/DataSink/blob/master/imdb_1000.csv")[0] df_movie df_movie.columns df_movie.genre == "Crime" df_movie["genre"] == "Crime" mask_crime = df_movie["genre"] == "Crime" df_movie[mask_crime] df_movie[df_movie.genre == "Crime"][df_movie["duration"]>=160] df_movie[(df_movie.genre == "Crime") & (df_movie["duration"]>=160)] df_movie["empty_column1"], df_movie["empty_column2"], df_movie["useless_column3"] = [np.nan, "", "AAA"] df_movie empty_columns = [col for col in df_movie.columns if df_movie[col].isnull().all()] empty_columns list_mask = [len(set(df_movie[a]))<=1 for a in df_movie.columns.to_list()] list_mask len(set(df_movie["duration"])) index1 = df_movie.columns[list_mask] index1 list_dont_want = list(index1) + empty_columns list_dont_want list_want = [column for column in df_movie.columns if column not in list_dont_want] list_want df_movie[list_want] list_try = ['ad',"a",7] 8 not in list_try useless_cloumns = [] useful_cloumns = [] for column in df_movie.columns: if len(set(df_movie[column]))<=1: print("single value is" + str(set(df_movie[column])) + " @ "+ str(column)) useless_cloumns.append(column) elif df_movie[column].isnull().all(): print("empty column @ " + str(column)) useless_cloumns.append(column) else: useful_cloumns.append(column) print(useless_cloumns) print(useful_cloumns) df_movie[useful_cloumns] df_movie[useless_cloumns] df_movie.columns.to_list() list_mask = [len(set(df_movie[a]))<=1 for a in df_movie.columns.to_list()] list_mask [b for a, b in zip(list_mask, df_movie.columns.to_list()) if a] [b for a, b in zip(list_mask, df_movie.columns.to_list()) if not a] useless_cloumns = [not(ele) for ele in list_mask] [b for a, b in zip(useless_cloumns, df_movie.columns.to_list()) if a] list_nan = [np.nan, np.nan , np.nan] list_nan list_nan[0] == list_nan[1] np.nan == np.nan index2 = df_movie.columns[df_movie.nunique() <= 1] index2 index3 = df_movie.columns index4 = index3.difference(index2) index4 df_movie[index4]