From 23eb016118f804fcb8c7072f293710e0aa39887f Mon Sep 17 00:00:00 2001 From: Punya Ira Anand Date: Thu, 27 Feb 2025 17:51:15 -0600 Subject: [PATCH 1/2] Create Invalid Tweets --- Invalid Tweets | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 Invalid Tweets diff --git a/Invalid Tweets b/Invalid Tweets new file mode 100644 index 0000000..99db7b6 --- /dev/null +++ b/Invalid Tweets @@ -0,0 +1,8 @@ +import pandas as pd + +def invalid_tweets(tweets: pd.DataFrame) -> pd.DataFrame: + isvalid = tweets['content'].str.len()>15 + #print(isvalid) + #print(type(isvalid)) + df = tweets[isvalid] + return df[['tweet_id']] From 2b0194bebee03edfa2ddc2229b92c3895efb978e Mon Sep 17 00:00:00 2001 From: Punya Ira Anand Date: Thu, 27 Feb 2025 17:55:23 -0600 Subject: [PATCH 2/2] Create Article Views I --- Article Views I | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 Article Views I diff --git a/Article Views I b/Article Views I new file mode 100644 index 0000000..da2c72b --- /dev/null +++ b/Article Views I @@ -0,0 +1,14 @@ +import pandas as pd + +def article_views(views: pd.DataFrame) -> pd.DataFrame: + df = views[views['author_id'] ==views['viewer_id']] + #df = df.drop_duplicates(subset =['author_id'],inplace= False) #we have to store the new dataframe when using inplace =False + #df.sort_values(by = ['author_id'],inplace = True) + #return df[['author_id']].rename(columns={'author_id':'id'}) + + df = df['author_id'].unique() #returns an array + df = pd.DataFrame(df, columns=['id']) # Convert array to DataFrame and rename + df = df.sort_values(by=['id'], ascending=True) # Sort the DataFrame + print(df) + return df +