import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os

Guía ¶

Primer vistazo a los datos
Referencias


df = pd.read_csv("top10s.csv", encoding='latin')
df.pop("Unnamed: 0")
df = df.drop(df.loc[df["bpm"] == 0].index[0], axis = 0) #Un álbum con todo en 0 daba problemas
top_10 = np.array(df.sort_values(by = "pop", ascending = False).head(10)[["title", "artist"]])
for i in range(10):
    print(str(i+1) + "º " + top_10[i][0] + " - " + top_10[i][1])

1º Memories - Maroon 5
2º Lose You To Love Me - Selena Gomez
3º Someone You Loved - Lewis Capaldi
4º Señorita - Shawn Mendes
5º How Do You Sleep? - Sam Smith
6º Trampoline (with ZAYN) - SHAED
7º South of the Border (feat. Camila Cabello & Cardi B) - Ed Sheeran
8º Truth Hurts - Lizzo
9º Good as Hell (feat. Ariana Grande) - Remix - Lizzo
10º Happier - Marshmello


top_genres_5 = df["top genre"].value_counts()
top_genres_5 /= top_genres_5.sum()
top_genres_5 = top_genres_5.sort_values(ascending = False)
top_genres_5 = pd.concat([top_genres_5[:5], pd.Series(top_genres_5[5:].sum(), index = ["others"])])


fig, ax = plt.subplots(figsize=(8, 8))
fig = plt.pie(top_genres_5, labels = top_genres_5.index, autopct='%.1f%%', explode=(0, 0, 0, 0, 0, 0.07))
plt.show()


fig, axs = plt.subplots(3, 3)

fig.suptitle("Características en función de la popularidad de la canción (Sobre 100)")
fig.set_dpi(200)
fig.set_size_inches(20, 10)

axs[0][0].scatter(df["pop"], df["bpm"])
axs[0][0].set_title("bpm - Beats por minuto")
axs[0][1].scatter(df["pop"], df["nrgy"])
axs[0][1].set_title("nrgy - Energía")
axs[0][2].scatter(df["pop"], df["dnce"])
axs[0][2].set_title("dnce - Bailabilidad")
axs[1][0].scatter(df["pop"], df["dB"])
axs[1][0].set_title("dB - decibelios")
axs[1][1].scatter(df["pop"], df["live"])
axs[1][1].set_title("live - Vitalidad")
axs[1][2].scatter(df["pop"], df["val"])
axs[1][2].set_title("val - Positividad")
axs[2][0].scatter(df["pop"], df["dur"])
axs[2][0].set_title("dur - Duración")
axs[2][1].scatter(df["pop"], df["acous"])
axs[2][1].set_title("acous - Acústica")
axs[2][2].scatter(df["pop"], df["spch"])
axs[2][2].set_title("spch - Expresión")

plt.show()


muestra = df.loc[:, "bpm":"spch"].drop("dur", axis = 1)
muestra.describe().round(decimals = 2)


variance = abs((muestra.std() / muestra.mean()) * 100).round(decimals = 2)
fig, ax = plt.subplots()
bars = ax.barh(variance.index, variance, color="blue")

ax.bar_label(bars)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
    
plt.show()


asimetry = ((muestra.median() - muestra.mean() / muestra.std()) * 100).round(decimals = 2)

fig, ax = plt.subplots()
bars = ax.barh(asimetry.index, asimetry, color="blue")

ax.bar_label(bars)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
    
plt.show()


Y = df.loc[:300, "year":"spch"]
fig, ax = plt.subplots(figsize = (8, 8))
fig = plt.imshow(Y.corr(), cmap = "YlOrRd")
ax.set_xticks(range(10))
ax.set_xticklabels(Y.axes[1])
ax.set_yticks(range(10))
ax.set_yticklabels(Y.axes[1])
for (j,i),label in np.ndenumerate((Y.corr()*100).round(2)):
    ax.text(i,j,label,ha='center',va='center')
plt.show()


from sklearn.decomposition import PCA

normalized_df = df.loc[:, "year":"spch"].astype(float)
normalized_df = (normalized_df-normalized_df.min())/(normalized_df.max()-normalized_df.min())

y = np.array(df["pop"])

pca = PCA()
pca.fit(normalized_df, y)
new_df = pca.transform(normalized_df)

def myplot(score,coeff,labels=None):
    xs = score[:,0]
    ys = score[:,1]
    n = coeff.shape[0]

    plt.scatter(xs ,ys, c = y) #without scaling
    for i in range(n):
        plt.arrow(0, 0, coeff[i,0], coeff[i,1],color = 'r',alpha = 0.5)
        if labels is None:
            plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, f"PC{i+1}", color = 'g', ha = 'center', va = 'center')
        else:
            plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, labels[i], color = 'g', ha = 'center', va = 'center')

plt.xlabel("PC{}".format(1))
plt.ylabel("PC{}".format(2))
plt.grid()

#Call the function. 
myplot(new_df[:,0:2], pca.components_.T) 
plt.show()


pd.DataFrame(
    data    = pca.components_ * 100,
    columns = normalized_df.columns,
    index   = [f"PC{i}" for i in range(1, 11)]
).round(decimals = 2)


# Our df is ordered by year, so we need to shuffle its positions
randomized_df = df.sample(frac = 1)
X_train = randomized_df.loc[:300, "year":"spch"]
X_valid = randomized_df.loc[300:, "year":"spch"]
y_train = randomized_df.loc[:300, "pop"]
y_valid = randomized_df.loc[300:, "pop"]


from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.Dense(512, activation='relu', input_shape=[10]),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(1),
])


model.compile(
    optimizer='adam',
    loss='mae',
)


history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=256,
    epochs=10,
)

Epoch 1/10
2/2 [==============================] - 1s 243ms/step - loss: 64.7548 - val_loss: 52.4095
Epoch 2/10
2/2 [==============================] - 0s 77ms/step - loss: 48.6241 - val_loss: 9.9740
Epoch 3/10
2/2 [==============================] - 0s 74ms/step - loss: 32.9567 - val_loss: 10.6641
Epoch 4/10
2/2 [==============================] - 0s 77ms/step - loss: 16.2248 - val_loss: 27.3055
Epoch 5/10
2/2 [==============================] - 0s 75ms/step - loss: 22.9471 - val_loss: 33.4117
Epoch 6/10
2/2 [==============================] - 0s 78ms/step - loss: 28.3121 - val_loss: 21.9123
Epoch 7/10
2/2 [==============================] - 0s 76ms/step - loss: 24.4084 - val_loss: 29.9317
Epoch 8/10
2/2 [==============================] - 0s 79ms/step - loss: 27.2406 - val_loss: 9.8840
Epoch 9/10
2/2 [==============================] - 0s 73ms/step - loss: 15.9930 - val_loss: 15.4821
Epoch 10/10
2/2 [==============================] - 0s 77ms/step - loss: 15.9951 - val_loss: 24.2204


# convert the training history to a dataframe
history_df = pd.DataFrame(history.history)
# use Pandas native plot method
history_df['loss'].plot()

<AxesSubplot:>


history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=512,
    epochs=10000,
)


model.save("trained_models/model1.h5")


# convert the training history to a dataframe
history_df = pd.DataFrame(history.history)
# use Pandas native plot method
history_df['loss'].plot()
plt.show()


model.predict([[2013, 60, 60, 70, -4, 60, 80, 200, 50, 50]])

1/1 [==============================] - 0s 100ms/step

array([[67.28367]], dtype=float32)


model_copy = keras.models.load_model("trained_models/model1.h5")


model_copy.predict([[2017,178,82,65,-4,10,82,229,23,17]])

1/1 [==============================] - 0s 51ms/step

array([[77.615974]], dtype=float32)


import ipywidgets as widgets

	bpm	nrgy	dnce	dB	live	val	acous	spch
count	602.00	602.00	602.00	602.00	602.00	602.00	602.00	602.00
mean	118.74	70.62	64.49	-5.49	17.80	52.31	14.35	8.37
std	24.34	16.07	13.13	1.70	13.09	22.43	20.78	7.48
min	43.00	4.00	23.00	-15.00	2.00	4.00	0.00	3.00
25%	100.00	61.00	57.00	-6.00	9.00	35.00	2.00	4.00
50%	120.00	74.00	66.00	-5.00	12.00	52.00	6.00	5.00
75%	129.00	82.00	73.00	-4.00	24.00	69.00	17.00	9.00
max	206.00	98.00	97.00	-2.00	74.00	98.00	99.00	48.00

	year	bpm	nrgy	dnce	dB	live	val	dur	acous	spch
PC1	62.35	-5.81	-37.15	-17.58	-22.34	-12.22	-48.26	2.10	37.28	-4.24
PC2	-72.07	6.26	-15.50	-36.31	-9.89	8.16	-45.63	17.71	25.25	-3.19
PC3	-22.10	-19.97	-33.29	29.04	-13.37	-32.19	49.14	-3.50	59.24	-3.10
PC4	9.86	6.39	-0.97	-9.07	-9.09	67.62	21.32	5.01	30.24	61.07
PC5	3.50	40.38	15.81	-44.99	9.17	-58.42	14.55	-9.06	6.06	47.49
PC6	-10.89	-35.79	-15.25	33.33	-28.16	-25.07	-23.62	21.69	-37.06	58.60
PC7	-3.68	74.39	-40.59	26.00	-37.94	6.68	7.01	1.53	-22.93	-8.23
PC8	-3.88	29.66	26.26	59.85	37.82	-4.68	-41.94	-12.22	34.87	16.03
PC9	13.16	12.60	12.06	4.14	14.68	-6.63	11.57	94.55	9.11	-9.42
PC10	0.70	1.93	65.62	6.02	-71.96	-4.86	-2.85	-0.65	17.68	-11.42

Guía ¶

Análisis de las canciones más escuchadas entre 2010 y 2019¶

1. Primer vistazo a los datos¶

2. PCA(Análisis de Componentes Principales)¶

3. Análisis mediante inteligencia artificial¶

99. Referencias¶

Análisis de las canciones más escuchadas entre 2010 y 2019¶

Guía¶

1. Primer vistazo a los datos¶

2. PCA(Análisis de Componentes Principales)¶

3. Análisis mediante inteligencia artificial¶

99. Referencias¶

Guía ¶