diff --git a/01-CalculateSpecialBonus.py b/01-CalculateSpecialBonus.py new file mode 100644 index 0000000..bf5671d --- /dev/null +++ b/01-CalculateSpecialBonus.py @@ -0,0 +1,56 @@ +# Problem 1 :Calculate Special Bonus ( https://leetcode.com/problems/calculate-special-bonus/) + +import pandas as pd + +# solution 1: using a loop to iterate through the DataFrame +def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame: + result = [] + for i in range(len(employees)): + id = employees['employee_id'][i] + name = employees['name'][i] + salary = employees['salary'][i] + if (id%2==0) or (name[0]=='M'): + result.append([id, 0]) + else: + result.append([id, employees['salary'][i]]) + df = pd.DataFrame(result, columns=(['employee_id', 'bonus'])).sort_values(by=['employee_id']) + return df + +# solution 2: using a loop to iterate through the DataFrame; just added the bonus column to the original DataFrame +def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame: + employees['bonus'] = 0 + for i in range(len(employees)): + id = employees['employee_id'][i] + name = employees['name'][i] + salary = employees['salary'][i] + if (id%2==0) or (name[0]=='M'): + employees['bonus'][i] = 0 + else: + employees['bonus'][i] = salary + return employees[['employee_id','bonus']].sort_values(by=['employee_id']) + +# solution 3: using a vectorized approach +# Row-wise computation using DataFrame.apply() with a lambda function +def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame: + employees['bonus'] = employees.apply( + lambda x: x['salary'] if x['employee_id']%2!=0 and not x['name'].startswith('M') else 0, + axis=1 + ) + return employees[['employee_id', 'bonus']].sort_values(by='employee_id') + +# solution 4: Fully vectorized approach using boolean indexing and Series.where() +def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame: + employees['bonus'] = employees['salary'].where( + (~employees['name'].str.startswith('M')) & (employees['employee_id'] % 2 == 1), 0 + ) + return employees[['employee_id', 'bonus']].sort_values('employee_id') + +""" +# Note: + +.str is used for vectorized string operations on a Series (e.g., employees['name'].str.startswith('M')). +In apply with axis=1, x['name'] is a scalar string, so native startswith() works without .str. + +# The .where() method is used to replace values where the condition is False. +It keeps the original values where the condition is True and replaces them with 0 where the condition is False. +""" diff --git a/02-FixNamesInATable.py b/02-FixNamesInATable.py new file mode 100644 index 0000000..6c6c5ec --- /dev/null +++ b/02-FixNamesInATable.py @@ -0,0 +1,17 @@ +# Problem 2 : Fix Names in a Table ( https://leetcode.com/problems/fix-names-in-a-table/ ) + +import pandas as pd + +# solution 1: manual implementation +def fix_names(users: pd.DataFrame) -> pd.DataFrame: + users['name'] = users['name'].str[0].str.upper() + users['name'].str[1:].str.lower() + return users.sort_values(by=['user_id']) + +# solution 2: inbuilt method +def fix_names(users: pd.DataFrame) -> pd.DataFrame: + users['name'] = users['name'].str.capitalize() + return users.sort_values(by=['user_id']) + +''' +#Note: there is a .title(), but it is will capitalizes the first letter of each word. +''' \ No newline at end of file diff --git a/03-PatientsWithACondition.py b/03-PatientsWithACondition.py new file mode 100644 index 0000000..3a00fc1 --- /dev/null +++ b/03-PatientsWithACondition.py @@ -0,0 +1,31 @@ +# Problem 3 : Patients with a Condition ( https://leetcode.com/problems/patients-with-a-condition/) + +import pandas as pd + +def find_patients(patients: pd.DataFrame) -> pd.DataFrame: + result = [] + for i in range(len(patients)): + p_id = patients['patient_id'][i] + p_name = patients['patient_name'][i] + cond = patients['conditions'][i] + for c in cond.split(): + if c.startswith('DIAB1'): + result.append([p_id, p_name, cond]) + break + return pd.DataFrame(result, columns=['patient_id','patient_name', 'conditions']) + +def find_patients(patients: pd.DataFrame) -> pd.DataFrame: + df = patients[patients['conditions'].str.startswith('DIAB1') | patients['conditions'].str.contains(' DIAB1')] + return df + +def find_patients(patients: pd.DataFrame) -> pd.DataFrame: + return patients[patients['conditions'].str.contains(r"(^|\s)DIAB1", regex=True)] + +""" +# Note: isin matches the whole string and is not suitable for this case. +r: raw string +^: matches the start of the string +\s: matches any whitespace character (space, tab, newline) +DIAB1: the condition we are looking for +The regex pattern r"(^|\s)DIAB1" ensures that "DIAB1" is either at the start of the string or preceded by a whitespace character. +""" \ No newline at end of file