Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions module2/webcrawl-rayon/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "webcrawl-rayon"
version = "0.1.0"
edition = "2021"

[dependencies]
wikipedia = "0.3.4"
rayon = "1.7.0"
38 changes: 38 additions & 0 deletions module2/webcrawl-rayon/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
SHELL := /bin/bash
.PHONY: help

help:
@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'

clean: ## Clean the project using cargo
cargo clean

build: ## Build the project using cargo
cargo build

run: ## Run the project using cargo
cargo run

test: ## Run the tests using cargo
cargo test

lint: ## Run the linter using cargo
@rustup component add clippy 2> /dev/null
cargo clippy

format: ## Format the code using cargo
@rustup component add rustfmt 2> /dev/null
cargo fmt

release:
cargo build --release

all: format lint test run

bump: ## Bump the version of the project
@echo "Current version is $(shell cargo pkgid | cut -d# -f2)"
@read -p "Enter the new version: " version; \
updated_version=$$(cargo pkgid | cut -d# -f2 | sed "s/$(shell cargo pkgid | cut -d# -f2)/$$version/"); \
sed -i -E "s/^version = .*/version = \"$$updated_version\"/" Cargo.toml
@echo "Version bumped to $$(cargo pkgid | cut -d# -f2)"
rm Cargo.toml-e
78 changes: 78 additions & 0 deletions module2/webcrawl-rayon/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*

* Uses wikipedia crate to fetch pages

* Processes page content

* Collects timing metrics

* Concurrent page processing

* Shows crate usage and concurrency in Rust
*/

use rayon::prelude::*;
use wikipedia::http::default::Client;
use wikipedia::Page;
use wikipedia::Wikipedia;

struct ProcessedPage {
title: String,
data: String,
}

const PAGES: [&str; 9] = [
"Giannis Antetokounmpo",
"James Harden",
"Russell Westbrook",
"Stephen Curry",
"Kevin Durant",
"LeBron James",
"Kobe Bryant",
"Michael Jordan",
"Shaquille O'Neal",
];

fn process_page(page: &Page<Client>) -> ProcessedPage {
let title = page.get_title().unwrap();
let content = page.get_content().unwrap();
ProcessedPage {
title,
data: content,
}
}

//times how long it takes to process the pages and total time
fn main() {
//start timer
let start = std::time::Instant::now();
let wikipedia = Wikipedia::<Client>::default();
let pages: Vec<_> = PAGES
.par_iter() //parallel iterator
.map(|&p| wikipedia.page_from_title(p.to_string()))
.collect();

let processed_pages: Vec<ProcessedPage> = pages.par_iter().map(process_page).collect();
for page in processed_pages {
//time how long it takes to process each page
let start_page = std::time::Instant::now();

println!("Title: {}", page.title.as_str());
//grab first sentence of the page
let first_sentence = page.data.split('.').next().unwrap();
println!("First sentence: {}", first_sentence);
//count the number of words in the page
let word_count = page.data.split_whitespace().count();
println!("Word count: {}", word_count);
//prints time it took to process each page
println!("Page time: {:?}", start_page.elapsed());
}
//descriptive statistics of: total time, average time per page, and total number of pages, as well as the number of threads used
println!("Total time: {:?}", start.elapsed());
println!(
"Average time per page: {:?}",
start.elapsed() / PAGES.len() as u32
);
println!("Total number of pages: {}", PAGES.len());
println!("Number of threads: {}", rayon::current_num_threads());
}
Loading