diff --git a/Article Views I.py b/Article Views I.py new file mode 100644 index 0000000..b20d002 --- /dev/null +++ b/Article Views I.py @@ -0,0 +1,12 @@ +import pandas as pd + +def article_views(views: pd.DataFrame) -> pd.DataFrame: + df = views[views['author_id'] == views['viewer_id']] + # df = df.drop_duplicates(subset = ['author_id']) + # df = df.sort_values(by = ['author_id']) + # return df[['author_id']].rename(columns = {'author_id' : 'id'}) + df = df['author_id'].unique() + print(df) + df = pd.DataFrame(df,columns =['id']) + df = df.sort_values(by = ['id']) + return df \ No newline at end of file diff --git a/Invalid Tweets.py b/Invalid Tweets.py new file mode 100644 index 0000000..f3516f0 --- /dev/null +++ b/Invalid Tweets.py @@ -0,0 +1,6 @@ +import pandas as pd + +def invalid_tweets(tweets: pd.DataFrame) -> pd.DataFrame: + df = tweets[(tweets['content'].str.len()> 15)] + + return df[['tweet_id']] \ No newline at end of file