From dcff73ed78c210bdfe14fb8c601dc7d790fff058 Mon Sep 17 00:00:00 2001 From: mikehquan19 Date: Fri, 5 Dec 2025 18:44:36 -0600 Subject: [PATCH 1/6] Rewrite the comet calender scraper --- parser/cometCalendarParser.go | 2 + scrapers/cometCalendar.go | 204 ++++++++++++++++++---------------- 2 files changed, 110 insertions(+), 96 deletions(-) diff --git a/parser/cometCalendarParser.go b/parser/cometCalendarParser.go index fd23056..e01b2ce 100644 --- a/parser/cometCalendarParser.go +++ b/parser/cometCalendarParser.go @@ -13,6 +13,8 @@ import ( "github.com/UTDNebula/nebula-api/api/schema" ) +// TODO: FIND A WAY TO DYNAMICALLY RETRIEVE THIS + // Some events have only the building name, not the abbreviation // Maps building names to their abbreviations var buildingAbbreviations = map[string]string{ diff --git a/scrapers/cometCalendar.go b/scrapers/cometCalendar.go index 26e42af..4bafbb7 100644 --- a/scrapers/cometCalendar.go +++ b/scrapers/cometCalendar.go @@ -21,7 +21,7 @@ import ( // RawEvent mirrors the nested event payload returned by the calendar API. type RawEvent struct { - Event map[string]interface{} `json:"event"` + Event map[string]any `json:"event"` } // APICalendarResponse models the calendar API pagination envelope. @@ -37,92 +37,42 @@ func ScrapeCometCalendar(outDir string) { if err != nil { panic(err) } - cli := http.Client{Timeout: 15 * time.Second} + client := http.Client{Timeout: 15 * time.Second} var calendarData APICalendarResponse // Get the total number of pages log.Printf("Getting the number of pages...") - if err := scrapeAndUnmarshal(&cli, 0, &calendarData); err != nil { + if err := scrapeAndUnmarshal(&client, 0, &calendarData); err != nil { panic(err) } numPages := calendarData.Page["total"] log.Printf("The number of pages is %d!\n\n", numPages) - var events []schema.Event + var calendarEvents []schema.Event for page := range numPages { log.Printf("Scraping events of page %d...", page+1) - if err := scrapeAndUnmarshal(&cli, page+1, &calendarData); err != nil { + if err := scrapeAndUnmarshal(&client, page+1, &calendarData); err != nil { panic(err) } for _, rawEvent := range calendarData.Events { - // Parse the time - eventInstance := toMap(toMap(toSlice(rawEvent.Event["event_instances"])[0])["event_instance"]) - startTime := parseTime(toString(eventInstance["start"])) - endTime := startTime - if toString(eventInstance["end"]) != "" { - endTime = parseTime(toString(eventInstance["end"])) - } - - // Parse location - location := strings.Trim(fmt.Sprintf("%s, %s", toString(rawEvent.Event["location_name"]), toString(rawEvent.Event["room_number"])), " ,") - - // Parse the event types, event topic, and event target audience - filters := toMap(rawEvent.Event["filters"]) - eventTypes := []string{} - eventTopics := []string{} - targetAudiences := []string{} - - rawTypes := toSlice(filters["event_types"]) - for _, rawType := range rawTypes { - eventTypes = append(eventTypes, toString(toMap(rawType)["name"])) - } - - rawAudiences := toSlice(filters["event_target_audience"]) - for _, audience := range rawAudiences { - targetAudiences = append(targetAudiences, toString(toMap(audience)["name"])) - } - - rawTopics := toSlice(filters["event_topic"]) - for _, topic := range rawTopics { - eventTopics = append(eventTopics, toString(toMap(topic)["name"])) - } - - // Parse the event departments, and tags - departments := []string{} - tags := []string{} - - rawTags := toSlice(rawEvent.Event["tags"]) - for _, tag := range rawTags { - tags = append(tags, tag.(string)) - } - - rawDeparments := toSlice(rawEvent.Event["departments"]) - for _, deparment := range rawDeparments { - departments = append(departments, toMap(deparment)["name"].(string)) - } - - // Parse the contact info, =ote that some events won't have contact phone number - rawContactInfo := toMap(rawEvent.Event["custom_fields"]) - contactInfo := [3]string{} - for i, infoField := range []string{ - "contact_information_name", "contact_information_email", "contact_information_phone", - } { - contactInfo[i] = toString(rawContactInfo[infoField]) - } - - events = append(events, schema.Event{ + startTime, endTime := parseStartAndEndTime(rawEvent) + eventTypes, targetAudiences, eventTopics := parseFilters(rawEvent) + departments, tags := parseDepartmentsAndTags(rawEvent) + contactInfo := parseContactInfo(rawEvent) + + calendarEvents = append(calendarEvents, schema.Event{ Id: primitive.NewObjectID(), - Summary: toString(rawEvent.Event["title"]), - Location: location, + Summary: convert[string](rawEvent.Event["title"]), + Location: parseEventLocation(rawEvent), StartTime: startTime, EndTime: endTime, - Description: toString(rawEvent.Event["description_text"]), + Description: convert[string](rawEvent.Event["description_text"]), EventType: eventTypes, TargetAudience: targetAudiences, Topic: eventTopics, EventTags: tags, - EventWebsite: toString(rawEvent.Event["url"]), + EventWebsite: convert[string](rawEvent.Event["url"]), Department: departments, ContactName: contactInfo[0], ContactEmail: contactInfo[1], @@ -132,71 +82,133 @@ func ScrapeCometCalendar(outDir string) { log.Printf("Scraped events of page %d successfully!\n", page+1) } - if err := utils.WriteJSON(fmt.Sprintf("%s/cometCalendarScraped.json", outDir), events); err != nil { + writePath := fmt.Sprintf("%s/cometCalendarScraped.json", outDir) + if err := utils.WriteJSON(writePath, calendarEvents); err != nil { panic(err) } - log.Printf("Finished scraping %d events successfully!\n\n", len(events)) + log.Printf("Finished scraping %d events successfully!\n\n", len(calendarEvents)) } // scrapeAndUnmarshal fetches a calendar page and decodes it into data. func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { // Call API to get the byte data calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page) - req, err := http.NewRequest("GET", calendarUrl, nil) + request, err := http.NewRequest("GET", calendarUrl, nil) if err != nil { return err } - res, err := client.Do(req) + response, err := client.Do(request) if err != nil { return err } - if res != nil && res.StatusCode != 200 { - return fmt.Errorf("ERROR: Non-200 status is returned, %s", res.Status) + if response != nil && response.StatusCode != 200 { + return fmt.Errorf("ERROR: Non-200 status is returned, %s", response.Status) } // Unmarshal bytes to the response data buffer := bytes.Buffer{} - if _, err = buffer.ReadFrom(res.Body); err != nil { + if _, err = buffer.ReadFrom(response.Body); err != nil { return err } - res.Body.Close() + response.Body.Close() if err = json.Unmarshal(buffer.Bytes(), &data); err != nil { return err } return nil } -// toSlice attempts to convert data into a slice of interface{}. -func toSlice(data interface{}) []interface{} { - if array, ok := data.([]interface{}); ok { - return array +// parseStartAndEndTime parses the start and end time of the event +func parseStartAndEndTime(event RawEvent) (time.Time, time.Time) { + instance := convert[map[string]any]( + convert[map[string]any](convert[[]any](event.Event["event_instances"])[0])["event_instance"], + ) + + // Converts RFC3339 timestamp string to time.Time + startTime, err := time.Parse(time.RFC3339, convert[string](instance["start"])) + if err != nil { + panic(err) } - return nil + var endTime time.Time + if convert[string](instance["end"]) != "" { + endTime, err = time.Parse(time.RFC3339, convert[string](instance["end"])) + if err != nil { + panic(err) + } + } else { + endTime = startTime + } + return startTime, endTime +} + +func parseEventLocation(event RawEvent) string { + building := convert[string](event.Event["location_name"]) + room_num := convert[string](event.Event["room_number"]) + location := strings.Trim(fmt.Sprintf("%s, %s", building, room_num), " ,") + return location } -// toMap attempts to convert data into a map keyed by string. -func toMap(data interface{}) map[string]interface{} { - if dataMap, ok := data.(map[string]interface{}); ok { - return dataMap +// Parse the event types, event topic, and event target audience +func parseFilters(event RawEvent) ([]string, []string, []string) { + eventTypes := []string{} + targetAudiences := []string{} + eventTopics := []string{} + + filters := convert[map[string]any](event.Event["filters"]) + + rawTypes := convert[[]any](filters["event_types"]) + for _, rawType := range rawTypes { + eventTypes = append(eventTypes, convert[string](convert[map[string]any](rawType)["name"])) } - return nil + + rawAudiences := convert[[]any](filters["event_target_audience"]) + for _, audience := range rawAudiences { + targetAudiences = append(targetAudiences, convert[string](convert[map[string]any](audience)["name"])) + } + + rawTopics := convert[[]any](filters["event_topic"]) + for _, topic := range rawTopics { + eventTopics = append(eventTopics, convert[string](convert[map[string]any](topic)["name"])) + } + return eventTypes, targetAudiences, eventTopics } -// toString returns the string form of data or empty string when nil. -func toString(data interface{}) string { - if data != nil { - if dataString, ok := data.(string); ok { - return dataString - } +// Parse the event departments, and event tags +func parseDepartmentsAndTags(event RawEvent) ([]string, []string) { + departments := []string{} + tags := []string{} + + rawTags := convert[[]any](event.Event["tags"]) + for _, tag := range rawTags { + tags = append(tags, convert[string](tag)) } - return "" + + rawDeparments := convert[[]any](event.Event["departments"]) + for _, deparment := range rawDeparments { + departments = append(departments, convert[string](convert[map[string]any](deparment)["name"])) + } + return departments, tags } -// parseTime converts an RFC3339 timestamp string to a time.Time. -func parseTime(stringTime string) time.Time { - parsedTime, err := time.Parse(time.RFC3339, stringTime) - if err != nil { - panic(err) +// Parse the contact info. +func parseContactInfo(event RawEvent) [3]string { + // Note that some events won't have contact phone number + rawContactInfo := convert[map[string]any](event.Event["custom_fields"]) + contactInfo := [3]string{} + for i, infoField := range []string{ + "contact_information_name", + "contact_information_email", + "contact_information_phone", + } { + contactInfo[i] = convert[string](rawContactInfo[infoField]) + } + return contactInfo +} + +// convert() attempts to convert data into types for this scraper +func convert[T []any | map[string]any | string](data any) T { + if newTypeData, ok := data.(T); ok { + return newTypeData } - return parsedTime + var zeroValue T + return zeroValue } From 9a0c0582144c41ff6270884fdc06bd3dc7bf77ee Mon Sep 17 00:00:00 2001 From: mikehquan19 Date: Fri, 5 Dec 2025 23:15:59 -0600 Subject: [PATCH 2/6] Dynamically retrieve valid abbrs --- parser/cometCalendarParser.go | 40 ++++++++++++++++- scrapers/cometCalendar.go | 84 +++++++++++++++++++++-------------- 2 files changed, 88 insertions(+), 36 deletions(-) diff --git a/parser/cometCalendarParser.go b/parser/cometCalendarParser.go index e01b2ce..2321604 100644 --- a/parser/cometCalendarParser.go +++ b/parser/cometCalendarParser.go @@ -1,3 +1,7 @@ +/* + This file contains the code for the comet calendar events parser. +*/ + package parser import ( @@ -17,7 +21,7 @@ import ( // Some events have only the building name, not the abbreviation // Maps building names to their abbreviations -var buildingAbbreviations = map[string]string{ +var DefaultBuildings = map[string]string{ "Activity Center": "AB", "Activity Center Bookstore": "ACB", "Administration": "AD", @@ -76,7 +80,7 @@ var buildingAbbreviations = map[string]string{ } // Valid building abreviations for checking -var validAbbreviations []string = []string{ +var DefaultValid []string = []string{ "AB", "ACB", "AD", @@ -148,6 +152,7 @@ func ParseCometCalendar(inDir string, outDir string) { } multiBuildingMap := make(map[string]map[string]map[string][]schema.Event) + buildingAbbreviations, validAbbreviations := getAbbreviations(inDir) for _, event := range allEvents { @@ -241,3 +246,34 @@ func ParseCometCalendar(inDir string, outDir string) { utils.WriteJSON(fmt.Sprintf("%s/cometCalendar.json", outDir), result) } + +// getAbbreviations dynamically retrieves the all of the locations abbreviations +func getAbbreviations(inDir string) (map[string]string, []string) { + // Get the locations from the map scraper + mapFile, err := os.ReadFile(inDir + "/mapLocations.json") + if err != nil { + // Fall back if we haven't scraped the locations yet + return DefaultBuildings, DefaultValid + } + var locations []map[string]any + if err = json.Unmarshal(mapFile, &locations); err != nil { + panic(err) + } + + // Process the abbreviations + buildingsAbbrs := make(map[string]string, 0) + validAbbrs := make([]string, 0) + + for _, location := range locations { + name := *utils.ConvertFromInterface[string](location["name"]) + acronym := *utils.ConvertFromInterface[string](location["acronym"]) + + // Trim the tailing acronym in the name + trimmedName := strings.Split(name, " (")[0] + buildingsAbbrs[trimmedName] = acronym + + validAbbrs = append(validAbbrs, acronym) + } + + return buildingsAbbrs, validAbbrs +} diff --git a/scrapers/cometCalendar.go b/scrapers/cometCalendar.go index 4bafbb7..0207248 100644 --- a/scrapers/cometCalendar.go +++ b/scrapers/cometCalendar.go @@ -1,5 +1,5 @@ /* - This file contains the code for the events scraper. + This file contains the code for the comet calendar events scraper. */ package scrapers @@ -19,6 +19,8 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" ) +const BASE_CAL_URL string = "https://calendar.utdallas.edu/api/2/events" + // RawEvent mirrors the nested event payload returned by the calendar API. type RawEvent struct { Event map[string]any `json:"event"` @@ -31,7 +33,7 @@ type APICalendarResponse struct { Date map[string]string `json:"date"` } -// ScrapeCometCalendar retrieves calendar events through the API and writes normalized JSON output. +// ScrapeCometCalendar retrieves calendar events through the API func ScrapeCometCalendar(outDir string) { err := os.MkdirAll(outDir, 0777) if err != nil { @@ -42,7 +44,7 @@ func ScrapeCometCalendar(outDir string) { // Get the total number of pages log.Printf("Getting the number of pages...") - if err := scrapeAndUnmarshal(&client, 0, &calendarData); err != nil { + if err := callAPIAndUnmarshal(&client, 0, &calendarData); err != nil { panic(err) } numPages := calendarData.Page["total"] @@ -51,20 +53,20 @@ func ScrapeCometCalendar(outDir string) { var calendarEvents []schema.Event for page := range numPages { log.Printf("Scraping events of page %d...", page+1) - if err := scrapeAndUnmarshal(&client, page+1, &calendarData); err != nil { + if err := callAPIAndUnmarshal(&client, page+1, &calendarData); err != nil { panic(err) } - for _, rawEvent := range calendarData.Events { - startTime, endTime := parseStartAndEndTime(rawEvent) - eventTypes, targetAudiences, eventTopics := parseFilters(rawEvent) - departments, tags := parseDepartmentsAndTags(rawEvent) - contactInfo := parseContactInfo(rawEvent) + // Parse all necessary info + startTime, endTime := getTime(rawEvent) + eventTypes, targetAudiences, eventTopics := getFilters(rawEvent) + departments, tags := getDepartmentsAndTags(rawEvent) + contactInfo := getContactInfo(rawEvent) calendarEvents = append(calendarEvents, schema.Event{ Id: primitive.NewObjectID(), Summary: convert[string](rawEvent.Event["title"]), - Location: parseEventLocation(rawEvent), + Location: getEventLocation(rawEvent), StartTime: startTime, EndTime: endTime, Description: convert[string](rawEvent.Event["description_text"]), @@ -90,13 +92,18 @@ func ScrapeCometCalendar(outDir string) { } // scrapeAndUnmarshal fetches a calendar page and decodes it into data. -func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { +func callAPIAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { // Call API to get the byte data - calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page) + calendarUrl := fmt.Sprintf("%s?days=365&pp=100&page=%d", BASE_CAL_URL, page) request, err := http.NewRequest("GET", calendarUrl, nil) if err != nil { return err } + request.Header = http.Header{ + "Content-type": {"application/json"}, + "Accept": {"application/json"}, + } + response, err := client.Do(request) if err != nil { return err @@ -104,30 +111,32 @@ func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse if response != nil && response.StatusCode != 200 { return fmt.Errorf("ERROR: Non-200 status is returned, %s", response.Status) } + defer response.Body.Close() // Unmarshal bytes to the response data buffer := bytes.Buffer{} if _, err = buffer.ReadFrom(response.Body); err != nil { return err } - response.Body.Close() if err = json.Unmarshal(buffer.Bytes(), &data); err != nil { return err } + return nil } -// parseStartAndEndTime parses the start and end time of the event -func parseStartAndEndTime(event RawEvent) (time.Time, time.Time) { +// getTime parses the start and end time of the event +func getTime(event RawEvent) (time.Time, time.Time) { instance := convert[map[string]any]( - convert[map[string]any](convert[[]any](event.Event["event_instances"])[0])["event_instance"], - ) + convert[map[string]any]( + convert[[]any](event.Event["event_instances"])[0])["event_instance"]) // Converts RFC3339 timestamp string to time.Time startTime, err := time.Parse(time.RFC3339, convert[string](instance["start"])) if err != nil { panic(err) } + var endTime time.Time if convert[string](instance["end"]) != "" { endTime, err = time.Parse(time.RFC3339, convert[string](instance["end"])) @@ -137,27 +146,30 @@ func parseStartAndEndTime(event RawEvent) (time.Time, time.Time) { } else { endTime = startTime } + return startTime, endTime } -func parseEventLocation(event RawEvent) string { +// getEventLocation parses the location of the event +func getEventLocation(event RawEvent) string { building := convert[string](event.Event["location_name"]) - room_num := convert[string](event.Event["room_number"]) - location := strings.Trim(fmt.Sprintf("%s, %s", building, room_num), " ,") + room := convert[string](event.Event["room_number"]) + location := strings.Trim(fmt.Sprintf("%s, %s", building, room), " ,") + return location } -// Parse the event types, event topic, and event target audience -func parseFilters(event RawEvent) ([]string, []string, []string) { - eventTypes := []string{} +// getFilters parses the types, topics, and target audiences +func getFilters(event RawEvent) ([]string, []string, []string) { + types := []string{} targetAudiences := []string{} - eventTopics := []string{} + topics := []string{} filters := convert[map[string]any](event.Event["filters"]) rawTypes := convert[[]any](filters["event_types"]) for _, rawType := range rawTypes { - eventTypes = append(eventTypes, convert[string](convert[map[string]any](rawType)["name"])) + types = append(types, convert[string](convert[map[string]any](rawType)["name"])) } rawAudiences := convert[[]any](filters["event_target_audience"]) @@ -167,13 +179,14 @@ func parseFilters(event RawEvent) ([]string, []string, []string) { rawTopics := convert[[]any](filters["event_topic"]) for _, topic := range rawTopics { - eventTopics = append(eventTopics, convert[string](convert[map[string]any](topic)["name"])) + topics = append(topics, convert[string](convert[map[string]any](topic)["name"])) } - return eventTypes, targetAudiences, eventTopics + + return types, targetAudiences, topics } -// Parse the event departments, and event tags -func parseDepartmentsAndTags(event RawEvent) ([]string, []string) { +// getDepartmentsAndTags parses the departments, and tags +func getDepartmentsAndTags(event RawEvent) ([]string, []string) { departments := []string{} tags := []string{} @@ -186,14 +199,16 @@ func parseDepartmentsAndTags(event RawEvent) ([]string, []string) { for _, deparment := range rawDeparments { departments = append(departments, convert[string](convert[map[string]any](deparment)["name"])) } + return departments, tags } -// Parse the contact info. -func parseContactInfo(event RawEvent) [3]string { +// getContactInfo parses the contact info. +func getContactInfo(event RawEvent) [3]string { // Note that some events won't have contact phone number - rawContactInfo := convert[map[string]any](event.Event["custom_fields"]) contactInfo := [3]string{} + + rawContactInfo := convert[map[string]any](event.Event["custom_fields"]) for i, infoField := range []string{ "contact_information_name", "contact_information_email", @@ -201,13 +216,14 @@ func parseContactInfo(event RawEvent) [3]string { } { contactInfo[i] = convert[string](rawContactInfo[infoField]) } + return contactInfo } // convert() attempts to convert data into types for this scraper func convert[T []any | map[string]any | string](data any) T { - if newTypeData, ok := data.(T); ok { - return newTypeData + if newTypedData, ok := data.(T); ok { + return newTypedData } var zeroValue T return zeroValue From e289632755fe48192ce9a1ba00ee115a40f3fe22 Mon Sep 17 00:00:00 2001 From: mikehquan19 Date: Sat, 6 Dec 2025 00:05:57 -0600 Subject: [PATCH 3/6] Dynamical retrieval working --- parser/cometCalendarParser.go | 47 ++++++++++++++++++++++------------- scrapers/cometCalendar.go | 19 ++++++++------ 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/parser/cometCalendarParser.go b/parser/cometCalendarParser.go index 2321604..c50bc11 100644 --- a/parser/cometCalendarParser.go +++ b/parser/cometCalendarParser.go @@ -12,13 +12,13 @@ import ( "regexp" "slices" "strings" + "time" + "github.com/UTDNebula/api-tools/scrapers" "github.com/UTDNebula/api-tools/utils" "github.com/UTDNebula/nebula-api/api/schema" ) -// TODO: FIND A WAY TO DYNAMICALLY RETRIEVE THIS - // Some events have only the building name, not the abbreviation // Maps building names to their abbreviations var DefaultBuildings = map[string]string{ @@ -152,7 +152,7 @@ func ParseCometCalendar(inDir string, outDir string) { } multiBuildingMap := make(map[string]map[string]map[string][]schema.Event) - buildingAbbreviations, validAbbreviations := getAbbreviations(inDir) + buildingAbbreviations, validAbbreviations := getLocationAbbreviations(inDir) for _, event := range allEvents { @@ -248,32 +248,45 @@ func ParseCometCalendar(inDir string, outDir string) { } // getAbbreviations dynamically retrieves the all of the locations abbreviations -func getAbbreviations(inDir string) (map[string]string, []string) { +func getLocationAbbreviations(inDir string) (map[string]string, []string) { // Get the locations from the map scraper + var mapFile []byte + mapFile, err := os.ReadFile(inDir + "/mapLocations.json") if err != nil { - // Fall back if we haven't scraped the locations yet - return DefaultBuildings, DefaultValid + if os.IsNotExist(err) { + // Scrape the data if the it doesn't exist yet and then get the map file + scrapers.ScrapeMapLocations(inDir) + time.Sleep(2 * time.Second) + ParseMapLocations(inDir, inDir) + + // If fail to get the locations again, not because unscraped + mapFile, _ = os.ReadFile(inDir + "/mapLocations.json") + } else { + panic(err) + } } - var locations []map[string]any + var locations []schema.MapBuilding if err = json.Unmarshal(mapFile, &locations); err != nil { panic(err) } // Process the abbreviations - buildingsAbbrs := make(map[string]string, 0) - validAbbrs := make([]string, 0) + buildingsAbbreviations := make(map[string]string, 0) + validAbbreviations := make([]string, 0) for _, location := range locations { - name := *utils.ConvertFromInterface[string](location["name"]) - acronym := *utils.ConvertFromInterface[string](location["acronym"]) - - // Trim the tailing acronym in the name - trimmedName := strings.Split(name, " (")[0] - buildingsAbbrs[trimmedName] = acronym + // Trim the following acronym in the name + trimmedName := strings.Split(*location.Name, " (")[0] + // Fallback on the locations that have no acronyms + acronym := "" + if location.Acronym != nil { + acronym = *location.Acronym + } - validAbbrs = append(validAbbrs, acronym) + buildingsAbbreviations[trimmedName] = acronym + validAbbreviations = append(validAbbreviations, acronym) } - return buildingsAbbrs, validAbbrs + return buildingsAbbreviations, validAbbreviations } diff --git a/scrapers/cometCalendar.go b/scrapers/cometCalendar.go index 0207248..3a4f613 100644 --- a/scrapers/cometCalendar.go +++ b/scrapers/cometCalendar.go @@ -19,7 +19,7 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" ) -const BASE_CAL_URL string = "https://calendar.utdallas.edu/api/2/events" +const CAL_URL string = "https://calendar.utdallas.edu/api/2/events" // RawEvent mirrors the nested event payload returned by the calendar API. type RawEvent struct { @@ -44,7 +44,8 @@ func ScrapeCometCalendar(outDir string) { // Get the total number of pages log.Printf("Getting the number of pages...") - if err := callAPIAndUnmarshal(&client, 0, &calendarData); err != nil { + + if err := callAndUnmarshal(&client, 0, &calendarData); err != nil { panic(err) } numPages := calendarData.Page["total"] @@ -53,7 +54,7 @@ func ScrapeCometCalendar(outDir string) { var calendarEvents []schema.Event for page := range numPages { log.Printf("Scraping events of page %d...", page+1) - if err := callAPIAndUnmarshal(&client, page+1, &calendarData); err != nil { + if err := callAndUnmarshal(&client, page+1, &calendarData); err != nil { panic(err) } for _, rawEvent := range calendarData.Events { @@ -81,6 +82,7 @@ func ScrapeCometCalendar(outDir string) { ContactPhoneNumber: contactInfo[2], }) } + log.Printf("Scraped events of page %d successfully!\n", page+1) } @@ -88,13 +90,14 @@ func ScrapeCometCalendar(outDir string) { if err := utils.WriteJSON(writePath, calendarEvents); err != nil { panic(err) } + log.Printf("Finished scraping %d events successfully!\n\n", len(calendarEvents)) } // scrapeAndUnmarshal fetches a calendar page and decodes it into data. -func callAPIAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { +func callAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { // Call API to get the byte data - calendarUrl := fmt.Sprintf("%s?days=365&pp=100&page=%d", BASE_CAL_URL, page) + calendarUrl := fmt.Sprintf("%s?days=365&pp=100&page=%d", CAL_URL, page) request, err := http.NewRequest("GET", calendarUrl, nil) if err != nil { return err @@ -162,7 +165,7 @@ func getEventLocation(event RawEvent) string { // getFilters parses the types, topics, and target audiences func getFilters(event RawEvent) ([]string, []string, []string) { types := []string{} - targetAudiences := []string{} + audiences := []string{} topics := []string{} filters := convert[map[string]any](event.Event["filters"]) @@ -174,7 +177,7 @@ func getFilters(event RawEvent) ([]string, []string, []string) { rawAudiences := convert[[]any](filters["event_target_audience"]) for _, audience := range rawAudiences { - targetAudiences = append(targetAudiences, convert[string](convert[map[string]any](audience)["name"])) + audiences = append(audiences, convert[string](convert[map[string]any](audience)["name"])) } rawTopics := convert[[]any](filters["event_topic"]) @@ -182,7 +185,7 @@ func getFilters(event RawEvent) ([]string, []string, []string) { topics = append(topics, convert[string](convert[map[string]any](topic)["name"])) } - return types, targetAudiences, topics + return types, audiences, topics } // getDepartmentsAndTags parses the departments, and tags From f355915edec6a123fd6b8b4fade5b6152c8f2ab3 Mon Sep 17 00:00:00 2001 From: mikehquan19 Date: Sat, 6 Dec 2025 00:14:24 -0600 Subject: [PATCH 4/6] Add the sleep (not sure if it's worth it) --- parser/cometCalendarParser.go | 1 + 1 file changed, 1 insertion(+) diff --git a/parser/cometCalendarParser.go b/parser/cometCalendarParser.go index c50bc11..05a6694 100644 --- a/parser/cometCalendarParser.go +++ b/parser/cometCalendarParser.go @@ -259,6 +259,7 @@ func getLocationAbbreviations(inDir string) (map[string]string, []string) { scrapers.ScrapeMapLocations(inDir) time.Sleep(2 * time.Second) ParseMapLocations(inDir, inDir) + time.Sleep(2 * time.Second) // Probably a good idea to let it sleep before moving on? // If fail to get the locations again, not because unscraped mapFile, _ = os.ReadFile(inDir + "/mapLocations.json") From 2395774d5da814d0e41a14617a98068e43a1f171 Mon Sep 17 00:00:00 2001 From: mikehquan19 Date: Sat, 6 Dec 2025 00:23:02 -0600 Subject: [PATCH 5/6] I like it that way --- parser/cometCalendarParser.go | 17 ++++++++++------- scrapers/cometCalendar.go | 17 ++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/parser/cometCalendarParser.go b/parser/cometCalendarParser.go index 05a6694..7db635b 100644 --- a/parser/cometCalendarParser.go +++ b/parser/cometCalendarParser.go @@ -259,10 +259,13 @@ func getLocationAbbreviations(inDir string) (map[string]string, []string) { scrapers.ScrapeMapLocations(inDir) time.Sleep(2 * time.Second) ParseMapLocations(inDir, inDir) - time.Sleep(2 * time.Second) // Probably a good idea to let it sleep before moving on? + time.Sleep(2 * time.Second) - // If fail to get the locations again, not because unscraped - mapFile, _ = os.ReadFile(inDir + "/mapLocations.json") + // If fail to get the locations again, it's not because location is unscraped + mapFile, err = os.ReadFile(inDir + "/mapLocations.json") + if err != nil { + panic(err) + } } else { panic(err) } @@ -280,13 +283,13 @@ func getLocationAbbreviations(inDir string) (map[string]string, []string) { // Trim the following acronym in the name trimmedName := strings.Split(*location.Name, " (")[0] // Fallback on the locations that have no acronyms - acronym := "" + abbreviation := "" if location.Acronym != nil { - acronym = *location.Acronym + abbreviation = *location.Acronym } - buildingsAbbreviations[trimmedName] = acronym - validAbbreviations = append(validAbbreviations, acronym) + buildingsAbbreviations[trimmedName] = abbreviation + validAbbreviations = append(validAbbreviations, abbreviation) } return buildingsAbbreviations, validAbbreviations diff --git a/scrapers/cometCalendar.go b/scrapers/cometCalendar.go index 3a4f613..0a1857f 100644 --- a/scrapers/cometCalendar.go +++ b/scrapers/cometCalendar.go @@ -131,8 +131,7 @@ func callAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) // getTime parses the start and end time of the event func getTime(event RawEvent) (time.Time, time.Time) { instance := convert[map[string]any]( - convert[map[string]any]( - convert[[]any](event.Event["event_instances"])[0])["event_instance"]) + convert[map[string]any](convert[[]any](event.Event["event_instances"])[0])["event_instance"]) // Converts RFC3339 timestamp string to time.Time startTime, err := time.Parse(time.RFC3339, convert[string](instance["start"])) @@ -164,28 +163,28 @@ func getEventLocation(event RawEvent) string { // getFilters parses the types, topics, and target audiences func getFilters(event RawEvent) ([]string, []string, []string) { - types := []string{} - audiences := []string{} - topics := []string{} + eventTypes := []string{} + targetAudiences := []string{} + eventTopics := []string{} filters := convert[map[string]any](event.Event["filters"]) rawTypes := convert[[]any](filters["event_types"]) for _, rawType := range rawTypes { - types = append(types, convert[string](convert[map[string]any](rawType)["name"])) + eventTypes = append(eventTypes, convert[string](convert[map[string]any](rawType)["name"])) } rawAudiences := convert[[]any](filters["event_target_audience"]) for _, audience := range rawAudiences { - audiences = append(audiences, convert[string](convert[map[string]any](audience)["name"])) + targetAudiences = append(targetAudiences, convert[string](convert[map[string]any](audience)["name"])) } rawTopics := convert[[]any](filters["event_topic"]) for _, topic := range rawTopics { - topics = append(topics, convert[string](convert[map[string]any](topic)["name"])) + eventTopics = append(eventTopics, convert[string](convert[map[string]any](topic)["name"])) } - return types, audiences, topics + return eventTypes, targetAudiences, eventTopics } // getDepartmentsAndTags parses the departments, and tags From f8fbb07b8fa789a4025fa52a534888a15504d94a Mon Sep 17 00:00:00 2001 From: mikehquan19 Date: Sat, 6 Dec 2025 01:08:20 -0600 Subject: [PATCH 6/6] Add go-style error handling --- parser/cometCalendarParser.go | 21 +++++++++++++-------- scrapers/cometCalendar.go | 17 +++++++++-------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/parser/cometCalendarParser.go b/parser/cometCalendarParser.go index 7db635b..4575b6c 100644 --- a/parser/cometCalendarParser.go +++ b/parser/cometCalendarParser.go @@ -152,7 +152,11 @@ func ParseCometCalendar(inDir string, outDir string) { } multiBuildingMap := make(map[string]map[string]map[string][]schema.Event) - buildingAbbreviations, validAbbreviations := getLocationAbbreviations(inDir) + // Some events have only the building name, not the abbreviation + buildingAbbreviations, validAbbreviations, err := getLocationAbbreviations(inDir) + if err != nil { + panic(err) + } for _, event := range allEvents { @@ -248,7 +252,7 @@ func ParseCometCalendar(inDir string, outDir string) { } // getAbbreviations dynamically retrieves the all of the locations abbreviations -func getLocationAbbreviations(inDir string) (map[string]string, []string) { +func getLocationAbbreviations(inDir string) (map[string]string, []string, error) { // Get the locations from the map scraper var mapFile []byte @@ -264,20 +268,21 @@ func getLocationAbbreviations(inDir string) (map[string]string, []string) { // If fail to get the locations again, it's not because location is unscraped mapFile, err = os.ReadFile(inDir + "/mapLocations.json") if err != nil { - panic(err) + return nil, nil, err } } else { - panic(err) + return nil, nil, err } } + var locations []schema.MapBuilding if err = json.Unmarshal(mapFile, &locations); err != nil { - panic(err) + return nil, nil, err } // Process the abbreviations - buildingsAbbreviations := make(map[string]string, 0) - validAbbreviations := make([]string, 0) + buildingsAbbreviations := make(map[string]string, 0) // Maps building names to their abbreviations + validAbbreviations := make([]string, 0) // Valid building abreviations for checking for _, location := range locations { // Trim the following acronym in the name @@ -292,5 +297,5 @@ func getLocationAbbreviations(inDir string) (map[string]string, []string) { validAbbreviations = append(validAbbreviations, abbreviation) } - return buildingsAbbreviations, validAbbreviations + return buildingsAbbreviations, validAbbreviations, nil } diff --git a/scrapers/cometCalendar.go b/scrapers/cometCalendar.go index 0a1857f..3a4f613 100644 --- a/scrapers/cometCalendar.go +++ b/scrapers/cometCalendar.go @@ -131,7 +131,8 @@ func callAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) // getTime parses the start and end time of the event func getTime(event RawEvent) (time.Time, time.Time) { instance := convert[map[string]any]( - convert[map[string]any](convert[[]any](event.Event["event_instances"])[0])["event_instance"]) + convert[map[string]any]( + convert[[]any](event.Event["event_instances"])[0])["event_instance"]) // Converts RFC3339 timestamp string to time.Time startTime, err := time.Parse(time.RFC3339, convert[string](instance["start"])) @@ -163,28 +164,28 @@ func getEventLocation(event RawEvent) string { // getFilters parses the types, topics, and target audiences func getFilters(event RawEvent) ([]string, []string, []string) { - eventTypes := []string{} - targetAudiences := []string{} - eventTopics := []string{} + types := []string{} + audiences := []string{} + topics := []string{} filters := convert[map[string]any](event.Event["filters"]) rawTypes := convert[[]any](filters["event_types"]) for _, rawType := range rawTypes { - eventTypes = append(eventTypes, convert[string](convert[map[string]any](rawType)["name"])) + types = append(types, convert[string](convert[map[string]any](rawType)["name"])) } rawAudiences := convert[[]any](filters["event_target_audience"]) for _, audience := range rawAudiences { - targetAudiences = append(targetAudiences, convert[string](convert[map[string]any](audience)["name"])) + audiences = append(audiences, convert[string](convert[map[string]any](audience)["name"])) } rawTopics := convert[[]any](filters["event_topic"]) for _, topic := range rawTopics { - eventTopics = append(eventTopics, convert[string](convert[map[string]any](topic)["name"])) + topics = append(topics, convert[string](convert[map[string]any](topic)["name"])) } - return eventTypes, targetAudiences, eventTopics + return types, audiences, topics } // getDepartmentsAndTags parses the departments, and tags