Initial commit: scraped data and chart.json

This commit is contained in:
Tyler Hallada 2020-12-14 13:25:37 -05:00
commit 0a0cb55ba6
7 changed files with 1507 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/target
.env
tags

1235
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

16
Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "lastfm-stats"
version = "0.1.0"
authors = ["Tyler Hallada <tyler@hallada.net>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0"
dotenv = "0.15"
reqwest = { version = "0.10", features = ["json"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tokio = { version = "0.2", features = ["full"] }
urlencoding = "1.1"

1
artists.json Normal file

File diff suppressed because one or more lines are too long

81
chart.json Normal file
View File

@ -0,0 +1,81 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.0.2.json",
"data": {
"url": "tags.json"
},
"transform": [
{
"window": [
{
"op": "rank",
"as": "rank"
}
],
"sort": [
{
"field": "play_count",
"order": "descending"
}
]
},
{
"filter": {
"not": {
"field": "name",
"oneOf": [
"seen live",
"All",
"USA",
"american",
"electronica",
"electro",
"british",
"UK",
"10s",
"00s",
"beautiful"
]
}
}
},
{
"filter": "datum.rank <= 40"
}
],
"description": "Top genre tags played this year",
"encoding": {
"x": {
"field": "play_count",
"type": "quantitative",
"axis": {
"title": "play count",
"titleFontSize": 18,
"labelFontSize": 14
}
},
"y": {
"field": "name",
"type": "ordinal",
"sort": {
"op": "sum",
"field": "play_count",
"order": "descending"
},
"axis": {
"labelExpr": "lower(datum.value)",
"title": "genre",
"titleFontSize": 18,
"labelFontSize": 14
}
}
},
"mark": "bar",
"title": {
"text": "Top Genre Tags Played in 2020",
"fontSize": 24,
"offset": 20
},
"height": {
"step": 26
}
}

170
src/main.rs Normal file
View File

@ -0,0 +1,170 @@
use anyhow::{anyhow, Context, Result};
use dotenv::dotenv;
use reqwest::Client;
use serde::de::{self, Deserializer};
use serde::{Deserialize, Serialize, Serializer};
use serde_json::Value;
use std::collections::HashMap;
use std::env;
use std::fmt::Display;
use std::fs::File;
use std::path::Path;
use std::str::FromStr;
use tokio::prelude::*;
use tokio::time::{delay_for, Duration};
#[derive(Debug, Serialize, Deserialize)]
struct Artist {
name: String,
#[serde(deserialize_with = "from_str", serialize_with = "to_str")]
playcount: u32,
url: String,
}
fn from_str<'de, T, D>(deserializer: D) -> Result<T, D::Error>
where
T: FromStr,
T::Err: Display,
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
T::from_str(&s).map_err(de::Error::custom)
}
fn to_str<S>(x: &u32, s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
s.serialize_str(&x.to_string())
}
#[derive(Debug, Serialize, Deserialize)]
struct Tag {
name: String,
count: u32,
url: String,
}
#[derive(Debug, Serialize, Deserialize)]
struct TopTag {
name: String,
play_count: u32,
}
async fn get_top_artist(
client: &Client,
api_key: &str,
user: &str,
start_page: u64,
) -> Result<Vec<Artist>> {
let mut artists = Vec::new();
let mut current_page = start_page;
loop {
let url = format!("https://ws.audioscrobbler.com/2.0/?method=user.gettopartists&user={}&api_key={}&page={}&format=json", user, api_key, current_page);
dbg!(&url);
let mut response = client.get(&url).send().await?.json::<Value>().await?;
let mut topartists = response
.get_mut("topartists")
.context("topartists key not found")?
.take();
let mut attr = topartists
.get_mut("@attr")
.context("@attr key not found")?
.take();
let page: u64 = attr
.get_mut("page")
.context("page key not found")?
.take()
.as_str()
.context("page value is not a str")?
.parse()?;
let total_pages: u64 = attr
.get_mut("totalPages")
.context("totalPages key not found")?
.take()
.as_str()
.context("totalPages value is not a str")?
.parse()?;
artists.append(&mut serde_json::from_value(
topartists
.get_mut("artist")
.context("artist key not found")?
.take(),
)?);
if page >= total_pages {
break;
}
delay_for(Duration::from_secs(1)).await;
current_page = page + 1;
}
Ok(artists)
}
async fn get_artist_top_tags(client: &Client, api_key: &str, artist: &str) -> Result<Vec<Tag>> {
let url = format!("https://ws.audioscrobbler.com/2.0/?method=artist.gettoptags&artist={}&api_key={}&period=12month&format=json", urlencoding::encode(artist), api_key);
dbg!(&url);
let response = client.get(&url).send().await?;
if response.status().is_success() {
let mut json = response.json::<Value>().await?;
Ok(serde_json::from_value(
json.get_mut("toptags")
.context("toptags key not found")?
.take()
.get_mut("tag")
.context("tag key not found")?
.take(),
)?)
} else {
Err(anyhow!("Bad status: {}", response.status()))
}
}
async fn load_artists(client: &Client, api_key: &str, user: &str) -> Result<Vec<Artist>> {
let artists_path = Path::new("artists.json");
if artists_path.exists() {
Ok(serde_json::from_reader(&File::open(artists_path)?)?)
} else {
let artists = get_top_artist(&client, &api_key, &user, 1).await?;
serde_json::to_writer(&File::create(artists_path)?, &artists)?;
Ok(artists)
}
}
async fn load_top_tags(client: &Client, api_key: &str, artists: &[Artist]) -> Result<Vec<TopTag>> {
let tags_path = Path::new("tags.json");
if tags_path.exists() {
Ok(serde_json::from_reader(&File::open(tags_path)?)?)
} else {
let mut top_tags: HashMap<String, u32> = HashMap::new();
for artist in artists {
delay_for(Duration::from_secs(1)).await;
if let Ok(tags) = get_artist_top_tags(&client, &api_key, &artist.name).await {
for tag in tags.iter() {
*top_tags.entry(tag.name.clone()).or_insert(0) += artist.playcount;
}
} else {
println!("Could not get top tags for artist: {}", artist.name);
}
}
let mut top_tags: Vec<TopTag> = top_tags
.into_iter()
.map(|(name, play_count)| TopTag { name, play_count })
.collect();
top_tags.sort_unstable_by_key(|top_tag| top_tag.play_count);
serde_json::to_writer(&File::create(tags_path)?, &top_tags)?;
Ok(top_tags)
}
}
#[tokio::main]
async fn main() {
dotenv().ok();
let user = env::var("LASTFM_USER").expect("LASTFM_USER is defined in .env file");
let api_key = env::var("LASTFM_API_KEY").expect("LASTFM_API_KEY is defined in .env file");
let client = Client::new();
let artists = load_artists(&client, &api_key, &user).await.unwrap();
dbg!(artists.len());
let top_tags = load_top_tags(&client, &api_key, &artists).await.unwrap();
dbg!(&top_tags.len());
}

1
tags.json Normal file

File diff suppressed because one or more lines are too long