%%html
<script src="https://bits.csb.pitt.edu/preamble.js"></script>

f = open('./Hospital_Inpatient_Discharges__SPARCS_De-Identified___2014.csv')
i = 0
for line in f:
    row = line.split(',')
    print(len(row))
    i += 1
    if i > 10:
        break

37
38
37
38
38
40
37
37
38
37
37

import csv
f = open('./Hospital_Inpatient_Discharges__SPARCS_De-Identified___2014.csv')
reader = csv.reader(f)
i = 0
for row in reader:
    print(len(row))
    i += 1
    if i > 10:
        break

37
37
37
37
37
37
37
37
37
37
37

#usecols only extracts those columns - much faster
import pandas as pd
data = pd.read_csv('./Hospital_Inpatient_Discharges__SPARCS_De-Identified___2014.csv.gz', usecols=[3,7,10,14,15,16,17,29,30,35])

/var/folders/s6/r0hv0w_j5pnbjrpqcsn9sq8m0000gp/T/ipykernel_32823/2813859563.py:3: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.
  data = pd.read_csv('./Hospital_Inpatient_Discharges__SPARCS_De-Identified___2014.csv.gz', usecols=[3,7,10,14,15,16,17,29,30,35])

data

data.to_numpy()

array([[37.0, 'F', '8', ..., 90335341.0, nan, '$9546.85'],
       [37.0, 'F', '3', ..., 90335341.0, nan, '$11462.75'],
       [37.0, 'M', '1', ..., 90335341.0, 167816.0, '$1609.40'],
       ...,
       [943.0, 'F', '1', ..., 267443.0, nan, '$11673.00'],
       [943.0, 'F', '1', ..., 267443.0, 249597.0, '$16722.00'],
       [943.0, 'M', '1', ..., 251080.0, nan, '$11637.00']], dtype=object)

data.columns

Index(['Facility ID', 'Gender', 'Length of Stay', 'CCS Diagnosis Code',
       'CCS Diagnosis Description', 'CCS Procedure Code',
       'CCS Procedure Description', 'Attending Provider License Number',
       'Operating Provider License Number', 'Total Charges'],
      dtype='object')

data.Gender  #get's gender column

0          F
1          F
2          M
3          F
4          F
          ..
2367278    F
2367279    M
2367280    F
2367281    F
2367282    M
Name: Gender, Length: 2367283, dtype: object

data["CCS Diagnosis Code"][:3] # can't use dot syntax if column name has spaces

0    197
1    146
2     50
Name: CCS Diagnosis Code, dtype: int64

data[:1]

data[0]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/indexes/base.py:3790, in Index.get_loc(self, key)
   3789 try:
-> 3790     return self._engine.get_loc(casted_key)
   3791 except KeyError as err:

File index.pyx:152, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:181, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:7080, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 0

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[12], line 1
----> 1 data[0] 

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/frame.py:3896, in DataFrame.__getitem__(self, key)
   3894 if self.columns.nlevels > 1:
   3895     return self._getitem_multilevel(key)
-> 3896 indexer = self.columns.get_loc(key)
   3897 if is_integer(indexer):
   3898     indexer = [indexer]

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/indexes/base.py:3797, in Index.get_loc(self, key)
   3792     if isinstance(casted_key, slice) or (
   3793         isinstance(casted_key, abc.Iterable)
   3794         and any(isinstance(x, slice) for x in casted_key)
   3795     ):
   3796         raise InvalidIndexError(key)
-> 3797     raise KeyError(key) from err
   3798 except TypeError:
   3799     # If we have a listlike key, _check_indexing_error will raise
   3800     #  InvalidIndexError. Otherwise we fall through and re-raise
   3801     #  the TypeError.
   3802     self._check_indexing_error(key)

KeyError: 0

data.iloc[3,1]

'F'

data.iloc[0,:]

Facility ID                                                             37.0
Gender                                                                     F
Length of Stay                                                             8
CCS Diagnosis Code                                                       197
CCS Diagnosis Description            Skin and subcutaneous tissue infections
CCS Procedure Code                                                         0
CCS Procedure Description                                            NO PROC
Attending Provider License Number                                 90335341.0
Operating Provider License Number                                        NaN
Total Charges                                                       $9546.85
Name: 0, dtype: object

%%html
<div id="pdiloc" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#pdiloc';
	jQuery(divid).asker({
	    id: divid,
	    question: "What does data.iloc[0] return?",
		answers: ["First row",'First column','37','Facility ID','Error'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>

data.iloc[0]

Facility ID                                                             37.0
Gender                                                                     F
Length of Stay                                                             8
CCS Diagnosis Code                                                       197
CCS Diagnosis Description            Skin and subcutaneous tissue infections
CCS Procedure Code                                                         0
CCS Procedure Description                                            NO PROC
Attending Provider License Number                                 90335341.0
Operating Provider License Number                                        NaN
Total Charges                                                       $9546.85
Name: 0, dtype: object

data.iloc[0]

Facility ID                                                             37.0
Gender                                                                     F
Length of Stay                                                             8
CCS Diagnosis Code                                                       197
CCS Diagnosis Description            Skin and subcutaneous tissue infections
CCS Procedure Code                                                         0
CCS Procedure Description                                            NO PROC
Attending Provider License Number                                 90335341.0
Operating Provider License Number                                        NaN
Total Charges                                                       $9546.85
Name: 0, dtype: object

data.loc[:3,'Gender']

0    F
1    F
2    M
3    F
Name: Gender, dtype: object

data.loc[[0,3,5],['CCS Diagnosis Description','CCS Diagnosis Code']]

data[10:20]

data[10:20].loc[[0,3,5]]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[21], line 1
----> 1 data[10:20].loc[[0,3,5]]

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/indexing.py:1153, in _LocationIndexer.__getitem__(self, key)
   1150 axis = self.axis or 0
   1152 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1153 return self._getitem_axis(maybe_callable, axis=axis)

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/indexing.py:1382, in _LocIndexer._getitem_axis(self, key, axis)
   1379     if hasattr(key, "ndim") and key.ndim > 1:
   1380         raise ValueError("Cannot index with multidimensional key")
-> 1382     return self._getitem_iterable(key, axis=axis)
   1384 # nested tuple slicing
   1385 if is_nested_tuple(key, labels):

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/indexing.py:1322, in _LocIndexer._getitem_iterable(self, key, axis)
   1319 self._validate_key(key, axis)
   1321 # A collection of keys
-> 1322 keyarr, indexer = self._get_listlike_indexer(key, axis)
   1323 return self.obj._reindex_with_indexers(
   1324     {axis: [keyarr, indexer]}, copy=True, allow_dups=True
   1325 )

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/indexing.py:1520, in _LocIndexer._get_listlike_indexer(self, key, axis)
   1517 ax = self.obj._get_axis(axis)
   1518 axis_name = self.obj._get_axis_name(axis)
-> 1520 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
   1522 return keyarr, indexer

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/indexes/base.py:6114, in Index._get_indexer_strict(self, key, axis_name)
   6111 else:
   6112     keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 6114 self._raise_if_missing(keyarr, indexer, axis_name)
   6116 keyarr = self.take(indexer)
   6117 if isinstance(key, Index):
   6118     # GH 42790 - Preserve name from an Index

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/indexes/base.py:6175, in Index._raise_if_missing(self, key, indexer, axis_name)
   6173     if use_interval_msg:
   6174         key = list(key)
-> 6175     raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   6177 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
   6178 raise KeyError(f"{not_found} not in index")

KeyError: "None of [Index([0, 3, 5], dtype='int64')] are in the [index]"

data[10:20].iloc[[0,3,5]]

data[data.Gender == 'M'][:3]

data.iloc[:,[0,3,5]][:3]

data.sort_values(by="Total Charges").head(3)

data.sort_values(by="Total Charges",ascending=False).head(3)

%%html
<div id="mostexp" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#mostexp';
	jQuery(divid).asker({
	    id: divid,
	    question: "Is $99999.65 the most expensive record?",
		answers: ["Yes",'Yes','No','No'],
        extra: ["It's at the top of the sorted records, don't make me second-guess myself.","I'm not very familiar with the American health care system so this already seems like an insanely high cost","And I can tell you why not","Because you wouldn't ask if Yes was the answer"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>

data['Total Charges'] = data['Total Charges'].str.lstrip('$').astype(float)

data.sort_values(by="Total Charges",ascending=False).head(3)

# setting errors='coerce' below will result in NaN values for invalid input
data['Length of Stay'] = pd.to_numeric(data['Length of Stay'], errors='coerce')
data['Length of Stay'].fillna(120,inplace=True)

data['Charge per day'] = data['Total Charges']/data['Length of Stay']

data

pd.to_numeric('120 +',errors='coerce')

nan

pd.to_numeric('120 +',errors='ignore')

'120 +'

pd.to_numeric('120 +')

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File lib.pyx:2368, in pandas._libs.lib.maybe_convert_numeric()

ValueError: Unable to parse string "120 +"

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[35], line 1
----> 1 pd.to_numeric('120 +')

File /opt/homebrew/Caskroom/mambaforge/base/lib/python3.10/site-packages/pandas/core/tools/numeric.py:222, in to_numeric(arg, errors, downcast, dtype_backend)
    220 coerce_numeric = errors not in ("ignore", "raise")
    221 try:
--> 222     values, new_mask = lib.maybe_convert_numeric(  # type: ignore[call-overload]  # noqa: E501
    223         values,
    224         set(),
    225         coerce_numeric=coerce_numeric,
    226         convert_to_masked_nullable=dtype_backend is not lib.no_default
    227         or isinstance(values_dtype, StringDtype),
    228     )
    229 except (ValueError, TypeError):
    230     if errors == "raise":

File lib.pyx:2410, in pandas._libs.lib.maybe_convert_numeric()

ValueError: Unable to parse string "120 +" at position 0

data.groupby('Facility ID')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x2ecbe84c0>

#note: make sure mean() is only applied to columns with numerical values, or you will get an error
data.groupby('Facility ID').mean(numeric_only=True).sort_values(by='Total Charges')

data.groupby('Gender').mean(numeric_only=True).sort_values(by='Total Charges').loc[:,['Total Charges']]

data.groupby('Gender').mean(numeric_only=True).sort_values(by='Total Charges').reset_index().loc[:,['Gender','Total Charges']]

data.groupby(['CCS Procedure Description','Gender'])['Total Charges'].mean()

CCS Procedure Description  Gender
ABDOMINAL PARACENTESIS     F         56996.536195
                           M         58532.559231
ABORTION (TERM OF PREG)    F         26512.622947
ALCO/DRUG REHAB/DETOX      F         18954.578164
                           M         18187.073614
                                         ...     
UPPER GI X-RAY             M         31221.460246
URETERAL CATHETERIZATN     F         36143.329481
                           M         34182.121658
VARI VEIN STRIP;LOW LMB    F         42519.938571
                           M         65763.081875
Name: Total Charges, Length: 446, dtype: float64

gendercosts = data.groupby(['CCS Procedure Description','Gender'])['Total Charges'].mean().unstack()
gendercosts

genderdiffs = gendercosts.F - gendercosts.M
genderdiffs

CCS Procedure Description
ABDOMINAL PARACENTESIS     -1536.023036
ABORTION (TERM OF PREG)             NaN
ALCO/DRUG REHAB/DETOX        767.504550
AMPUTATE LOWER EXTRMITY     8105.310748
AORTIC RESECTION; REPL     29809.145110
                               ...     
UNGROUPABLE               -37709.761136
UP GASTRO ENDOSC/BIOPSY     -714.553239
UPPER GI X-RAY             -2881.806172
URETERAL CATHETERIZATN      1961.207823
VARI VEIN STRIP;LOW LMB   -23243.143304
Length: 233, dtype: float64

genderdiffs.dropna(inplace=True)  #remove rows with NaN, modify genderdiffs in place
genderdiffs

CCS Procedure Description
ABDOMINAL PARACENTESIS     -1536.023036
ALCO/DRUG REHAB/DETOX        767.504550
AMPUTATE LOWER EXTRMITY     8105.310748
AORTIC RESECTION; REPL     29809.145110
APPENDECTOMY                 209.527573
                               ...     
UNGROUPABLE               -37709.761136
UP GASTRO ENDOSC/BIOPSY     -714.553239
UPPER GI X-RAY             -2881.806172
URETERAL CATHETERIZATN      1961.207823
VARI VEIN STRIP;LOW LMB   -23243.143304
Length: 202, dtype: float64

genderdiffs.sort_values().head(5)

CCS Procedure Description
EXTRA CIRC AUX OPEN HRT   -69453.636115
DX PRCS ON EYE            -63462.345439
SWAN-GANZ CATH MONITOR    -46738.846837
PRCS ON SPLEEN            -46219.301962
CORNEAL TRANSPLANT        -45028.501148
dtype: float64

genderdiffs.sort_values().tail(5)

CCS Procedure Description
LENS & CATARACT PRCS       35378.240602
OT INTRAOCULAR THER PRC    35907.146236
BONE MARROW TRANSPLANT     37066.859998
DES LES RETINA/CHOROID     52461.621223
CORONARY THROMBOLYSIS      87870.550000
dtype: float64

df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],
)

df2 = pd.DataFrame(
    {
        "B": ["B2", "B3", "B6", "B7"],
        "D": ["D2", "D3", "D6", "D7"],
        "F": ["F2", "F3", "F6", "F7"],
    },
    index=[2, 3, 6, 7],
)

df1

df2

pd.concat([df1,df2])

left = pd.DataFrame(
    {
        "key": ["K0", "K1", "K2", "K3"],
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
    }
)


right = pd.DataFrame(
    {
        "key": [ "K1", "K2", "K4"],
        "C": ["C1", "C2", "C4"],
        "D": ["D1", "D2", "D4"],
    }
)

left

right

pd.merge(left,right,on='key',how='inner')

pd.merge(left,right,on='key',how='outer')

pd.merge(left,right,on='key',how='left')

	Facility ID	Gender	Length of Stay	CCS Diagnosis Code	CCS Diagnosis Description	CCS Procedure Code	CCS Procedure Description	Attending Provider License Number	Operating Provider License Number	Total Charges
1426998	1439.0	M	1	108	Congestive heart failure; nonhypertensive	0	NO PROC	251948.0	NaN	$0.50
780065	989.0	M	2	657	Mood disorders	218	PSYCHO/PSYCHI EVAL/THER	199003.0	154247.0	$1.00
781531	989.0	F	1	661	Substance-related disorders	0	NO PROC	145418.0	NaN	$1.00

	Facility ID	Gender	Length of Stay	CCS Diagnosis Code	CCS Diagnosis Description	CCS Procedure Code	CCS Procedure Description	Attending Provider License Number	Operating Provider License Number	Total Charges
77722	213.0	F	10	100	Acute myocardial infarction	44	COR ARTERY BYP GRF-CABG	224769.0	224769.0	$99999.65
451015	635.0	F	1	115	Aortic; peripheral; and visceral artery aneurysms	52	AORTIC RESECTION; REPL	232988.0	232988.0	$99999.46
1579047	1456.0	M	12	2	Septicemia (except in labor)	157	AMPUTATE LOWER EXTRMITY	258717.0	265448.0	$99998.45

	Facility ID	Gender	Length of Stay	CCS Diagnosis Code	CCS Diagnosis Description	CCS Procedure Code	CCS Procedure Description	Attending Provider License Number	Operating Provider License Number	Total Charges
967254	1169.0	M	120 +	63	Diseases of white blood cells	64	BONE MARROW TRANSPLANT	198304.0	229870.0	8593455.88
1560003	1456.0	F	120 +	143	Abdominal hernia	86	OTHER HERNIA REPAIR	165181.0	165181.0	6272871.31
957685	1169.0	F	120 +	6	Hepatitis	176	OT ORGAN TRANSPLANTATN	236414.0	183253.0	5745201.42

	Length of Stay	CCS Diagnosis Code	CCS Procedure Code	Attending Provider License Number	Operating Provider License Number	Total Charges	Charge per day
Facility ID
377.0	2.670732	121.170732	202.439024	4.664694e+07	4.826434e+07	4060.734024	1614.759785
111.0	3.611111	159.777778	0.000000	1.929986e+05	NaN	4492.587778	1473.003327
37.0	3.162791	123.906977	54.779070	8.298531e+07	7.666251e+06	4939.879651	1628.579552
9250.0	2.203456	205.840173	137.955076	1.082429e+07	1.264187e+07	5618.928860	2497.869519
165.0	4.597884	180.206349	181.920635	2.051072e+05	4.744077e+07	6376.906455	1580.321499
...	...	...	...	...	...	...	...
563.0	6.279619	129.367643	78.085210	2.363745e+05	6.581608e+05	89385.986450	20937.033431
1446.0	5.633545	214.859847	157.190406	3.497396e+05	3.955800e+05	124204.521077	36290.348526
1139.0	8.937250	205.031446	87.044577	3.341266e+05	3.847957e+05	131992.454991	17629.688410
1138.0	38.527005	215.072013	213.108020	1.963923e+05	1.966056e+05	175997.805859	4195.405483
1486.0	61.821429	134.039683	194.801587	1.863599e+05	1.921974e+05	195780.272579	2486.885826

	Total Charges
Gender
U	9858.198939
F	36867.585805
M	44419.344760

Electronic Medical Records with Pandas¶

09/28/2023¶

The Data¶

Possible Fix¶

`pandas`¶

Data Access¶

Data Access¶

`iloc`: Position indexing¶

`.loc`: Label indexing¶

Boolean Indexing¶

Sorting¶

String Methods¶

Correct sorting¶

Creating New Columns¶

Group by¶

Example¶

Example¶

Combining DataFrames¶

Join¶

Inner Join¶

Outer Join¶

Left Join¶

Questions¶

	Facility ID	Gender	Length of Stay	CCS Diagnosis Code	CCS Diagnosis Description	CCS Procedure Code	CCS Procedure Description	Attending Provider License Number	Operating Provider License Number	Total Charges
0	37.0	F	8	197	Skin and subcutaneous tissue infections	0	NO PROC	90335341.0	NaN	$9546.85
1	37.0	F	3	146	Diverticulosis and diverticulitis	0	NO PROC	90335341.0	NaN	$11462.75
2	37.0	M	1	50	Diabetes mellitus with complications	202	ELECTROCARDIOGRAM	90335341.0	167816.0	$1609.40
3	37.0	F	1	154	Noninfectious gastroenteritis	202	ELECTROCARDIOGRAM	90335341.0	167816.0	$2638.75
4	37.0	F	3	124	Acute and chronic tonsillitis	0	NO PROC	90335341.0	NaN	$3538.25
...	...	...	...	...	...	...	...	...	...	...
2367278	943.0	F	1	245	Syncope	0	NO PROC	156102.0	NaN	$10074.00
2367279	943.0	M	1	149	Biliary tract disease	0	NO PROC	267443.0	NaN	$21252.00
2367280	943.0	F	1	102	Nonspecific chest pain	0	NO PROC	267443.0	NaN	$11673.00
2367281	943.0	F	1	660	Alcohol-related disorders	171	SUTURE SKIN/SUBCUT TISS	267443.0	249597.0	$16722.00
2367282	943.0	M	1	2	Septicemia (except in labor)	0	NO PROC	251080.0	NaN	$11637.00

	CCS Diagnosis Description	CCS Diagnosis Code
0	Skin and subcutaneous tissue infections	197
3	Noninfectious gastroenteritis	154
5	Influenza	123

Gender	F	M	U
CCS Procedure Description
ABDOMINAL PARACENTESIS	56996.536195	58532.559231	NaN
ABORTION (TERM OF PREG)	26512.622947	NaN	NaN
ALCO/DRUG REHAB/DETOX	18954.578164	18187.073614	NaN
AMPUTATE LOWER EXTRMITY	100926.639344	92821.328596	NaN
AORTIC RESECTION; REPL	144019.629159	114210.484050	NaN
...	...	...	...
UNGROUPABLE	68101.627500	105811.388636	NaN
UP GASTRO ENDOSC/BIOPSY	42625.453678	43340.006917	NaN
UPPER GI X-RAY	28339.654074	31221.460246	NaN
URETERAL CATHETERIZATN	36143.329481	34182.121658	NaN
VARI VEIN STRIP;LOW LMB	42519.938571	65763.081875	NaN

	A	B	C	D	F
0	A0	B0	C0	D0	NaN
1	A1	B1	C1	D1	NaN
2	A2	B2	C2	D2	NaN
3	A3	B3	C3	D3	NaN
2	NaN	B2	NaN	D2	F2
3	NaN	B3	NaN	D3	F3
6	NaN	B6	NaN	D6	F6
7	NaN	B7	NaN	D7	F7

	key	A	B	C	D
0	K0	A0	B0	NaN	NaN
1	K1	A1	B1	C1	D1
2	K2	A2	B2	C2	D2
3	K3	A3	B3	NaN	NaN
4	K4	NaN	NaN	C4	D4

	A	B	C	D	F
0	A0	B0	C0	D0	NaN
1	A1	B1	C1	D1	NaN
2	A2	B2	C2	D2	NaN
3	A3	B3	C3	D3	NaN
2	NaN	B2	NaN	D2	F2
3	NaN	B3	NaN	D3	F3
6	NaN	B6	NaN	D6	F6
7	NaN	B7	NaN	D7	F7

	key	A	B	C	D
0	K0	A0	B0	NaN	NaN
1	K1	A1	B1	C1	D1
2	K2	A2	B2	C2	D2
3	K3	A3	B3	NaN	NaN
4	K4	NaN	NaN	C4	D4

Electronic Medical Records with Pandas¶

09/28/2023¶

The Data¶

Possible Fix¶

pandas¶

Data Access¶

Data Access¶

iloc: Position indexing¶

.loc: Label indexing¶

Boolean Indexing¶

Sorting¶

String Methods¶

Correct sorting¶

Creating New Columns¶

Group by¶

Example¶

Example¶

Combining DataFrames¶

Join¶

Inner Join¶

Outer Join¶

Left Join¶

Questions¶

`pandas`¶

`iloc`: Position indexing¶

`.loc`: Label indexing¶

	A	B	C	D	F
0	A0	B0	C0	D0	NaN
1	A1	B1	C1	D1	NaN
2	A2	B2	C2	D2	NaN
3	A3	B3	C3	D3	NaN
2	NaN	B2	NaN	D2	F2
3	NaN	B3	NaN	D3	F3
6	NaN	B6	NaN	D6	F6
7	NaN	B7	NaN	D7	F7

	key	A	B	C	D
0	K0	A0	B0	NaN	NaN
1	K1	A1	B1	C1	D1
2	K2	A2	B2	C2	D2
3	K3	A3	B3	NaN	NaN
4	K4	NaN	NaN	C4	D4