diff options
| author | Serguey Parkhomovsky <xindigo@gmail.com> | 2025-12-08 18:21:46 -0800 |
|---|---|---|
| committer | Serguey Parkhomovsky <xindigo@gmail.com> | 2025-12-08 18:22:05 -0800 |
| commit | 4df3b2195e9a0a4204bc9c0829420e267c678f5a (patch) | |
| tree | a495bcdead77c14c1732b09c8133e594d8f1ec1f | |
| parent | 542116a0dd3fc9e7556e9800dec7663cd4d401f6 (diff) | |
add year capability
| -rw-r--r-- | main.go | 134 |
1 files changed, 70 insertions, 64 deletions
@@ -94,6 +94,7 @@ func isValidXMLByte(b byte) bool { } func main() { + fmt.Println("Program started") // Create SQLite database db, err := sql.Open("sqlite3", "weeklybeats.db") if err != nil { @@ -131,90 +132,95 @@ func main() { } defer insertStmt.Close() - page := 1 + year := 2012 totalItems := 0 + for year <= 2024 { + page := 1 - for { - fmt.Printf("Fetching page %d...\n", page) + for { + fmt.Printf("Fetching page %d for year %d...\n", page, year) - // Construct URL with page parameter - url := fmt.Sprintf("https://weeklybeats.com/music/rss?limit=1000&page=%d", page) + // Construct URL with page parameter + url := fmt.Sprintf("https://weeklybeats.com/music/rss?limit=1000&year=%d&page=%d", year, page) - // Fetch RSS feed - resp, err := http.Get(url) - if err != nil { - log.Printf("Failed to fetch page %d: %v", page, err) - break - } - - if resp.StatusCode != 200 { - fmt.Printf("Received status %d for page %d, stopping\n", resp.StatusCode, page) - resp.Body.Close() - break - } - - body, err := io.ReadAll(resp.Body) - resp.Body.Close() - - if err != nil { - log.Printf("Failed to read response body for page %d: %v", page, err) - break - } + // Fetch RSS feed + resp, err := http.Get(url) + if err != nil { + log.Printf("Failed to fetch page %d: %v", page, err) + break + } - cleanedBody := filterInvalidXMLBytes(body) + if resp.StatusCode != 200 { + fmt.Printf("Received status %d for page %d, stopping\n", resp.StatusCode, page) + resp.Body.Close() + break + } - // Parse XML - var rss RSS - err = xml.Unmarshal(cleanedBody, &rss) - if err != nil { - log.Printf("Failed to parse XML for page %d: %v", page, err) - break - } + body, err := io.ReadAll(resp.Body) + resp.Body.Close() - // Check if we got any items - if len(rss.Channel.Items) == 0 { - fmt.Printf("No items found on page %d, stopping\n", page) - break - } + if err != nil { + log.Printf("Failed to read response body for page %d: %v", page, err) + break + } - fmt.Printf("Found %d items on page %d\n", len(rss.Channel.Items), page) + cleanedBody := filterInvalidXMLBytes(body) - // Insert items into database - pageItems := 0 - for _, item := range rss.Channel.Items { - titleAuthor, err := parseTitle(item.Title) + // Parse XML + var rss RSS + err = xml.Unmarshal(cleanedBody, &rss) if err != nil { - log.Printf("Failed to parse title for item '%s': %v", item.Title, err) - continue + log.Printf("Failed to parse XML for page %d: %v", page, err) + break } - yearWeek, err := parseYearWeek(item.Category) - if err != nil { - log.Printf("Failed to parse date for item '%s': %v", item.Title, err) - continue + // Check if we got any items + if len(rss.Channel.Items) == 0 { + fmt.Printf("No items found on page %d, stopping\n", page) + break } - _, err = insertStmt.Exec(titleAuthor.Title, item.Link, titleAuthor.Author, yearWeek.Week, yearWeek.Year, item.Enclosure.URL) - if err != nil { - log.Printf("Failed to insert item '%s': %v", item.Title, err) - continue + fmt.Printf("Found %d items on page %d\n", len(rss.Channel.Items), page) + + // Insert items into database + pageItems := 0 + for _, item := range rss.Channel.Items { + titleAuthor, err := parseTitle(item.Title) + if err != nil { + log.Printf("Failed to parse title for item '%s': %v", item.Title, err) + continue + } + + yearWeek, err := parseYearWeek(item.Category) + if err != nil { + log.Printf("Failed to parse date for item '%s': %v", item.Title, err) + continue + } + + _, err = insertStmt.Exec(titleAuthor.Title, item.Link, titleAuthor.Author, yearWeek.Week, yearWeek.Year, item.Enclosure.URL) + if err != nil { + log.Printf("Failed to insert item '%s': %v", item.Title, err) + continue + } + pageItems++ } - pageItems++ - } - fmt.Printf("Inserted %d new items from page %d\n", pageItems, page) - totalItems += pageItems + fmt.Printf("Inserted %d new items from page %d\n", pageItems, page) + totalItems += pageItems - // Be respectful to the server - time.Sleep(500 * time.Millisecond) + // Be respectful to the server + time.Sleep(500 * time.Millisecond) - page++ + page++ - // Safety check to prevent infinite loops - if page > 1000 { - fmt.Println("Reached maximum page limit (1000), stopping") - break + // Safety check to prevent infinite loops + if page > 1000 { + fmt.Println("Reached maximum page limit (1000), stopping") + break + } } + + year += 2 } // Print summary |
