diff --git a/bag.ipynb b/bag.ipynb index 663baf4b..b3717af7 100644 --- a/bag.ipynb +++ b/bag.ipynb @@ -138,6 +138,25 @@ "b.count().compute() # Count total number of records" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(b.groupby(lambda record:record[\"occupation\"]).map(lambda kv: ( kv[0], len(kv[1]))).compute()[:10]) # using groupby to count people in each occupation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "city_counts = b.foldby(key = lambda record:record[\"address\"][\"city\"], binop = lambda acc, record:acc + 1, initial = 0 ) # using foldby to count people living by city\n", + "city_counts.take(10)" + ] + }, { "cell_type": "markdown", "metadata": {},