-
Notifications
You must be signed in to change notification settings - Fork 0
/
cv_parser.py
40 lines (25 loc) · 899 Bytes
/
cv_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json
import pandas as pd
SHEETS = ['Professional Experience', 'Volunteer Activity', 'Education']
results = {}
# Load the data
for sheet in SHEETS:
df = pd.read_excel('data.xlsx', sheet_name=sheet)
# drop rows where all values are NaN
df = df.dropna(how='all')
df['When'] = df['When'].ffill()
df['Where'] = df['Where'].ffill()
df['Organization'] = df['Organization'].ffill()
if sheet != 'Education':
df['Role'] = df['Role'].ffill()
df = df.fillna('')
groups = ['When', 'Where', 'Organization']
if sheet == 'Education':
groups.append('Degree Program')
groups.append('Specialization')
else:
groups.append('Role')
grouped_df = df.groupby(groups).agg(list).reset_index()
result = grouped_df.to_dict(orient="records")
results[sheet] = result
json.dump(results, open('data.json', 'w'), indent=2)