In [462]:
import numpy as np
import pandas as pd

Read the Table

In [463]:
df = pd.read_csv("stat.csv")
In [464]:
df
Out[464]:
StartDate EndDate Status IPAddress Progress Duration (in seconds) Finished RecordedDate ResponseId RecipientLastName ... Q15 Q17_First Click Q17_Last Click Q17_Page Submit Q17_Click Count Q25_First Click Q25_Last Click Q25_Page Submit Q25_Click Count Q23
0 Start Date End Date Response Type IP Address Progress Duration (in seconds) Finished Recorded Date Response ID Recipient Last Name ... An ogive is a graph that plots the cumulative ... Timing - First Click Timing - Last Click Timing - Page Submit Timing - Click Count Timing - First Click Timing - Last Click Timing - Page Submit Timing - Click Count The following image was taken from poll result...
1 {"ImportId":"startDate","timeZone":"America/De... {"ImportId":"endDate","timeZone":"America/Denv... {"ImportId":"status"} {"ImportId":"ipAddress"} {"ImportId":"progress"} {"ImportId":"duration"} {"ImportId":"finished"} {"ImportId":"recordedDate","timeZone":"America... {"ImportId":"_recordId"} {"ImportId":"recipientLastName"} ... {"ImportId":"QID15"} {"ImportId":"QID17_FIRST_CLICK"} {"ImportId":"QID17_LAST_CLICK"} {"ImportId":"QID17_PAGE_SUBMIT"} {"ImportId":"QID17_CLICK_COUNT"} {"ImportId":"QID25_FIRST_CLICK"} {"ImportId":"QID25_LAST_CLICK"} {"ImportId":"QID25_PAGE_SUBMIT"} {"ImportId":"QID25_CLICK_COUNT"} {"ImportId":"QID23"}
2 2020-01-23 13:13:50 2020-01-23 13:19:35 IP Address 167.96.33.58 100 345 True 2020-01-23 13:19:36 R_3Gj6c2T6hE38jMz NaN ... True 3.054 23.902 25.606 2 2.746 22.481 25.459 2 c. 67% is an example of a sample statistic.
3 2020-01-23 13:12:11 2020-01-23 13:27:14 IP Address 167.96.125.116 100 903 True 2020-01-23 13:27:15 R_239lqf5UVpvuyLJ NaN ... True 19.347 19.347 25.22 1 69.054 95.34 105.475 3 Concluding that 67% of Louisiana approves of B...
4 2020-01-23 13:12:49 2020-01-23 13:28:13 IP Address 167.96.25.95 100 923 True 2020-01-23 13:28:13 R_1o6lFA2ZLYLrVwT NaN ... True 623.445 659.009 659.962 5 9.262 9.262 15.835 1 c. 67% is an example of a sample statistic.
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
90 2020-01-23 14:40:09 2020-01-23 15:10:09 IP Address 167.96.32.160 25 1800 False 2020-01-24 11:01:42 R_1pKk2Mh4uRtmDkS NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
91 2020-01-23 13:14:22 2020-01-24 09:43:15 IP Address 167.96.13.30 31 73733 False 2020-01-24 11:02:06 R_1KkjkaxiAtpEtRQ NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
92 2020-01-23 13:14:12 2020-01-23 13:48:49 IP Address 167.96.20.216 69 2077 False 2020-01-24 11:02:07 R_338TLHkD16LEnrB NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
93 2020-01-24 01:07:28 2020-01-24 01:46:55 IP Address 167.96.102.195 13 2367 False 2020-01-24 11:02:07 R_RJGeSLnj1i0fKaR NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
94 2020-01-23 14:38:08 2020-01-23 15:15:21 IP Address 167.96.109.168 88 2232 False 2020-01-24 11:02:16 R_2OT5H9sXduyiGla NaN ... True 5.997 67.566 68.647 2 NaN NaN NaN NaN NaN

95 rows × 44 columns

In [465]:
df = df.drop(df.index[1])
df = df.drop(df.index[0])
df = df.reset_index(drop = True)
In [466]:
df
Out[466]:
StartDate EndDate Status IPAddress Progress Duration (in seconds) Finished RecordedDate ResponseId RecipientLastName ... Q15 Q17_First Click Q17_Last Click Q17_Page Submit Q17_Click Count Q25_First Click Q25_Last Click Q25_Page Submit Q25_Click Count Q23
0 2020-01-23 13:13:50 2020-01-23 13:19:35 IP Address 167.96.33.58 100 345 True 2020-01-23 13:19:36 R_3Gj6c2T6hE38jMz NaN ... True 3.054 23.902 25.606 2 2.746 22.481 25.459 2 c. 67% is an example of a sample statistic.
1 2020-01-23 13:12:11 2020-01-23 13:27:14 IP Address 167.96.125.116 100 903 True 2020-01-23 13:27:15 R_239lqf5UVpvuyLJ NaN ... True 19.347 19.347 25.22 1 69.054 95.34 105.475 3 Concluding that 67% of Louisiana approves of B...
2 2020-01-23 13:12:49 2020-01-23 13:28:13 IP Address 167.96.25.95 100 923 True 2020-01-23 13:28:13 R_1o6lFA2ZLYLrVwT NaN ... True 623.445 659.009 659.962 5 9.262 9.262 15.835 1 c. 67% is an example of a sample statistic.
3 2020-01-23 13:12:51 2020-01-23 13:28:20 IP Address 167.96.16.19 100 929 True 2020-01-23 13:28:20 R_2R2sokeJV1mxjzm NaN ... True 48.963 48.963 52.428 1 9.848 9.848 20.606 1 c. 67% is an example of a sample statistic.
4 2020-01-23 13:13:02 2020-01-23 13:32:25 IP Address 167.96.72.188 100 1163 True 2020-01-23 13:32:25 R_1daaC1AYLusFgGF NaN ... True 9.449 9.449 11.571 1 37.499 37.499 39.122 1 c. 67% is an example of a sample statistic.
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88 2020-01-23 14:40:09 2020-01-23 15:10:09 IP Address 167.96.32.160 25 1800 False 2020-01-24 11:01:42 R_1pKk2Mh4uRtmDkS NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
89 2020-01-23 13:14:22 2020-01-24 09:43:15 IP Address 167.96.13.30 31 73733 False 2020-01-24 11:02:06 R_1KkjkaxiAtpEtRQ NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
90 2020-01-23 13:14:12 2020-01-23 13:48:49 IP Address 167.96.20.216 69 2077 False 2020-01-24 11:02:07 R_338TLHkD16LEnrB NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
91 2020-01-24 01:07:28 2020-01-24 01:46:55 IP Address 167.96.102.195 13 2367 False 2020-01-24 11:02:07 R_RJGeSLnj1i0fKaR NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
92 2020-01-23 14:38:08 2020-01-23 15:15:21 IP Address 167.96.109.168 88 2232 False 2020-01-24 11:02:16 R_2OT5H9sXduyiGla NaN ... True 5.997 67.566 68.647 2 NaN NaN NaN NaN NaN

93 rows × 44 columns

Comparing the answer with key

In [542]:
def comparison(text1 , text2):
    text1 = text1.lower()
    text1 = text1.split(" ")
    

    text2 = text2.lower()
    text2 = text2.split(" ")
    

    k = 0 
    if len(text1) == len(text2):
        for i in range(0, len(text1)):
            if text1[i] == text2[i]:
                k +=1

        if k == len(text2):
            return 1
        else:
            return 0

    else:
        return 0
In [468]:
cols = df.columns
In [469]:
cols=[c for c in cols if 'Q' in c] 
In [470]:
cols = [c for c in cols if '_' not in c] 
In [471]:
len(cols)
Out[471]:
7
In [472]:
df1 = pd.DataFrame(columns = cols)
for i in df.index:
    for j in cols:
        df1.loc[i,j]= df.loc[i,j]
In [473]:
df1
Out[473]:
Q1 Q16 Q7 Q4 Q13 Q15 Q23
0 Jack Fuqua 4 0.30 Nominal, ordinal, interval, and ratio data positively skewed True c. 67% is an example of a sample statistic.
1 Mackenzie Toussel 4 0.60 Nominal and interval data only negatively skewed True Concluding that 67% of Louisiana approves of B...
2 Claire Hammond 4 0.60 Nominal, ordinal, interval, and ratio data positively skewed True c. 67% is an example of a sample statistic.
3 Alexis DeFoe 4 0.60 Nominal, ordinal, interval, and ratio data positively skewed True c. 67% is an example of a sample statistic.
4 Camryn Meaux 4 0.60 Nominal, ordinal, interval, and ratio data positively skewed True c. 67% is an example of a sample statistic.
... ... ... ... ... ... ... ...
88 abigail Besselman 5 0.60 NaN NaN NaN NaN
89 Colin Chapman 4 0.60 NaN NaN NaN NaN
90 Eduardo Lara 4 1 Nominal, ordinal, interval, and ratio data positively skewed NaN NaN
91 Enricco Ledet 5 NaN NaN NaN NaN NaN
92 Alvaro Lacayo 5 0.60 Nominal, ordinal, interval, and ratio data positively skewed True NaN

93 rows × 7 columns

In [474]:
df1 = df1.fillna(0)
df1
Out[474]:
Q1 Q16 Q7 Q4 Q13 Q15 Q23
0 Jack Fuqua 4 0.30 Nominal, ordinal, interval, and ratio data positively skewed True c. 67% is an example of a sample statistic.
1 Mackenzie Toussel 4 0.60 Nominal and interval data only negatively skewed True Concluding that 67% of Louisiana approves of B...
2 Claire Hammond 4 0.60 Nominal, ordinal, interval, and ratio data positively skewed True c. 67% is an example of a sample statistic.
3 Alexis DeFoe 4 0.60 Nominal, ordinal, interval, and ratio data positively skewed True c. 67% is an example of a sample statistic.
4 Camryn Meaux 4 0.60 Nominal, ordinal, interval, and ratio data positively skewed True c. 67% is an example of a sample statistic.
... ... ... ... ... ... ... ...
88 abigail Besselman 5 0.60 0 0 0 0
89 Colin Chapman 4 0.60 0 0 0 0
90 Eduardo Lara 4 1 Nominal, ordinal, interval, and ratio data positively skewed 0 0
91 Enricco Ledet 5 0 0 0 0 0
92 Alvaro Lacayo 5 0.60 Nominal, ordinal, interval, and ratio data positively skewed True 0

93 rows × 7 columns

In [475]:
list1 = df1[df1.columns[1]].unique()
if len(list1) == 2:
    del df1[df1.columns[1]]
    cols2 = cols[2:]
else:
    cols2 = cols[1:]
In [476]:
#df1["Q7"].astype("str")
for i in df1.columns:
    df1[i] = df1[i].astype(str)

Answer Key

In [477]:
key = pd.read_csv("key.csv")
key["Key"] = key["Key"].astype(str)
key_list = list(key["Key"])
In [478]:
key_list
Out[478]:
['0.6',
 'Nominal, ordinal, interval, and ratio data',
 'positively skewed',
 'TRUE',
 'c. 67% is an example of a sample statistic.']

Grading

In [479]:
for i in df1.index:
    k = 0
    for j in cols2:
        df1.loc[i, j] = comparison(df1.loc[i, j], key_list[k])
        k +=1
In [480]:
df1
Out[480]:
Q1 Q7 Q4 Q13 Q15 Q23
0 Jack Fuqua 0 1 1 1 1
1 Mackenzie Toussel 0 0 0 1 0
2 Claire Hammond 0 1 1 1 1
3 Alexis DeFoe 0 1 1 1 1
4 Camryn Meaux 0 1 1 1 1
... ... ... ... ... ... ...
88 abigail Besselman 0 0 0 0 0
89 Colin Chapman 0 0 0 0 0
90 Eduardo Lara 0 1 1 0 0
91 Enricco Ledet 0 0 0 0 0
92 Alvaro Lacayo 0 1 1 1 0

93 rows × 6 columns

In [481]:
for i in df1.index:
    temp  = 0
    for j in cols2:
        temp = temp + pd.to_numeric(df1.loc[i, j])
    df1.loc[i, "grade"] = temp
In [482]:
df1
Out[482]:
Q1 Q7 Q4 Q13 Q15 Q23 grade
0 Jack Fuqua 0 1 1 1 1 4.0
1 Mackenzie Toussel 0 0 0 1 0 1.0
2 Claire Hammond 0 1 1 1 1 4.0
3 Alexis DeFoe 0 1 1 1 1 4.0
4 Camryn Meaux 0 1 1 1 1 4.0
... ... ... ... ... ... ... ...
88 abigail Besselman 0 0 0 0 0 0.0
89 Colin Chapman 0 0 0 0 0 0.0
90 Eduardo Lara 0 1 1 0 0 2.0
91 Enricco Ledet 0 0 0 0 0 0.0
92 Alvaro Lacayo 0 1 1 1 0 3.0

93 rows × 7 columns

Save the Result

In [483]:
from datetime import datetime

date = datetime.today().strftime('%Y-%m-%d')
In [484]:
text = "Grade_" + date + ".csv"
df1.to_csv(text, index = False)
In [485]:
print("Done")
Done
In [546]:
grade_list= pd.read_csv("total.csv") 
grade_list
Out[546]:
id grade
0 Carlos Ledezma 3
1 Haley Norton 0
2 Colin Chapman 2
3 johnny Sims 0
4 Jack fuqua 1
... ... ...
59 j 0
60 ava arcemont 1
61 Sydney Fant 0
62 Claire Barbera 1
63 bryson gonzalez 1

64 rows × 2 columns

In [547]:
y = 0
text = "Grade_" + date
for i in grade_list.index:
    y= 0
    id2 = grade_list.loc[i, "id"]
    for j in df1.index:
        id1 = df1.loc[j, df1.columns[0]]
        if comparison(id1, id2) == 1:
            grade_list.loc[i, "grade"] = df1.loc[j, "grade"] + grade_list.loc[i, "grade"]
            grade_list.loc[i, text] = df1.loc[j, "grade"]
            j = df1.index -1
            y = 1
            
    if y == 0:
        grade_list.loc[i, text] = "a"
            
In [548]:
grade_list.to_csv("total.csv", index = False)
Out[548]:
id grade Grade1_2020-06-23
0 Carlos Ledezma 3.0 a
1 Haley Norton 3.0 3
2 Colin Chapman 2.0 0
3 johnny Sims 0.0 a
4 Jack fuqua 5.0 4
... ... ... ...
59 j 0.0 a
60 ava arcemont 1.0 a
61 Sydney Fant 0.0 a
62 Claire Barbera 1.0 a
63 bryson gonzalez 1.0 a

64 rows × 3 columns

In [ ]: