Grafik Problemi(matplotlib)

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

from sklearn.preprocessing import RobustScaler

plt.style.use("bmh")

import ta

from datetime import timedelta

from keras.models import Sequential

from keras.layers import LSTM, Dense, Dropout

df = pd.read_csv("USDTRY=X.csv")

df['Date'] = pd.to_datetime(df.Date)

# Setting the index

df.set_index('Date', inplace=True)

# Dropping any NaNs

df.dropna(inplace=True)

# Adding all the indicators

df = ta.add_all_ta_features(df, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True)

# Dropping everything else besides 'Close' and the Indicators

df.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)

# Checking the new df with indicators

print(df.shape)

# Only using the last 1000 days of data to get a more accurate representation of the current climate

df = df.tail(1000)

# Scale fitting the close prices separately for inverse_transformations purposes later

close_scaler = RobustScaler()

close_scaler.fit(df[['Close']])

# Normalizing/Scaling the Data

scaler = RobustScaler()

df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

# Plotting the Closing Prices

df['Close'].plot(figsize=(16,5))

plt.title("Satis Fiyati")

plt.ylabel("Fiyat(olcekli)")

# plt.show()

def split_sequence(seq, n_steps_in, n_steps_out):

    """

    Splits the multivariate time sequence

    """

    

    # Creating a list for both variables

    X, y = [], []

    

    for i in range(len(seq)):

        

        # Finding the end of the current sequence

        end = i + n_steps_in

        out_end = end + n_steps_out

        

        # Breaking out of the loop if we have exceeded the dataset's length

        if out_end > len(seq):

            break

        

        # Splitting the sequences into: x = past prices and indicators, y = prices ahead

        seq_x, seq_y = seq[i:end, :], seq[end:out_end, 0]

        

        X.append(seq_x)

        y.append(seq_y)

    

    return np.array(X), np.array(y)

def visualize_training_results(results):

    """

    Plots the loss and accuracy for the training and testing data

    """

    history = results.history

    plt.figure(figsize=(16,5))

    plt.plot(history['val_loss'])

    plt.plot(history['loss'])

    plt.legend(['val_loss', 'loss'])

    plt.title('Loss')

    plt.xlabel('Epochs')

    plt.ylabel('Loss')

    plt.show()

    

    plt.figure(figsize=(16,5))

    plt.plot(history['val_accuracy'])

    plt.plot(history['accuracy'])

    plt.legend(['val_accuracy', 'accuracy'])

    plt.title('Dogruluk')

    plt.xlabel('Okuma')

    plt.ylabel('Dogruluk')

    plt.show()

def layer_maker(n_layers, n_nodes, activation, drop=None, d_rate=.5):

    """

    Creates a specified number of hidden layers for an RNN

    Optional: Adds regularization option - the dropout layer to prevent potential overfitting (if necessary)

    """

    

    # Creating the specified number of hidden layers with the specified number of nodes

    for x in range(1,n_layers+1):

        model.add(LSTM(n_nodes, activation=activation, return_sequences=True))

        # Adds a Dropout layer after every Nth hidden layer (the 'drop' variable)

        try:

            if x % drop == 0:

                model.add(Dropout(d_rate))

        except:

            pass

def validater(n_per_in, n_per_out):

    """

    Runs a 'For' loop to iterate through the length of the DF and create predicted values for every stated interval

    Returns a DF containing the predicted values for the model with the corresponding index values based on a business day frequency

    """

    

    # Creating an empty DF to store the predictions

    predictions = pd.DataFrame(index=df.index, columns=[df.columns[0]])

    for i in range(1, len(df)-n_per_in, n_per_out):

        # Creating rolling intervals to predict off of

        x = df[-i - n_per_in:-i]

        # Predicting using rolling intervals

        yhat = model.predict(np.array(x).reshape(1, n_per_in, n_features))

        # Transforming values back to their normal prices

        yhat = close_scaler.inverse_transform(yhat)[0]

        # DF to store the values and append later, frequency uses business days

        pred_df = pd.DataFrame(yhat, 

                               index=pd.date_range(start=x.index[-1]+timedelta(days=1), 

                                                   periods=len(yhat), 

                                                   freq="B"),

                               columns=[x.columns[0]])

        # Updating the predictions DF

        predictions.update(pred_df)

        

    return predictions

def val_rmse(df1, df2):

    """

    Calculates the root mean square error between the two Dataframes

    """

    df = df1.copy()

    

    # Adding a new column with the closing prices from the second DF

    df['close2'] = df2.Close

    

    # Dropping the NaN values

    df.dropna(inplace=True)

    

    # Adding another column containing the difference between the two DFs' closing prices

    df['diff'] = df.Close - df.close2

    

    # Squaring the difference and getting the mean

    rms = (df[['diff']]**2).mean()

    

    # Returning the sqaure root of the root mean square

    return float(np.sqrt(rms))

# How many periods looking back to learn

n_per_in  = 30

# How many periods to predict

n_per_out = 10

# Features 

n_features = df.shape[1]

# Splitting the data into appropriate sequences

X, y = split_sequence(df.to_numpy(), n_per_in, n_per_out)

# Instatiating the model

model = Sequential()

# Activation

activ = "tanh"

# Input layer

model.add(LSTM(90, 

               activation=activ, 

               return_sequences=True, 

               input_shape=(n_per_in, n_features)))

# Hidden layers

layer_maker(n_layers=2, 

            n_nodes=30, 

            activation=activ,

            drop=1,

            d_rate=.1)

# Final Hidden layer

model.add(LSTM(90, activation=activ))

# Output layer

model.add(Dense(n_per_out))

# Model summary

model.summary()

# Compiling the data with selected specifications

model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

res = model.fit(X, y, epochs=2, batch_size=32, validation_split=0.1)

visualize_training_results(res)

# Transforming the actual values to their original price

actual = pd.DataFrame(close_scaler.inverse_transform(df[["Close"]]), 

                      index=df.index, 

                      columns=[df.columns[0]])

# Getting a DF of the predicted values to validate against

predictions = validater(n_per_in, n_per_out)

# Printing the RMSE

print("RMSE:", val_rmse(actual, predictions))

    

# Plotting

plt.figure(figsize=(16,6))

# Plotting those predictions

plt.plot(predictions, label='Tahmin edilen')

# Plotting the actual values

plt.plot(actual, label='Ger├žek')

plt.title(f"Tahmin ve Gercek Fiyat")

plt.ylabel("Fiyat")

plt.legend()

plt.show()

# Predicting off of the most recent days from the original DF

yhat = model.predict(np.array(df.tail(n_per_in)).reshape(1, n_per_in, n_features))

# Transforming the predicted values back to their original format

yhat = close_scaler.inverse_transform(yhat)[0]

# Creating a DF of the predicted prices

preds = pd.DataFrame(yhat, 

                     index=pd.date_range(start=df.index[-1]+timedelta(days=1), 

                                         periods=len(yhat), 

                                         freq="B"), 

                     columns=[df.columns[0]])

    # Number of periods back to plot the actual values

pers = n_per_in

# Transforming the actual values to their original price

actual = pd.DataFrame(close_scaler.inverse_transform(df[["Close"]].tail(pers)), 

                      index=df.Close.tail(pers).index, 

                      columns=[df.columns[0]]).append(preds.head(1))

# Printing the predicted prices

print(preds)

# Plotting

plt.figure(figsize=(15,6))

plt.plot(preds, label="Tahmini Fiyat")

plt.plot(actual, label="Gercek Fiyat")

plt.ylabel("Fiyat")

plt.xlabel("Tarihler")

plt.title(f"Onumuzdeki {len(yhat)} gun icin")

plt.legend()

plt.show()

Merhabalar,
Elimde bu ┼čekilde bir kod var. Kodun mant─▒─č─▒n─▒ kabaca anlad─▒m. Fakat gelecek g├╝nk├╝ tahminlerde grafikte bozulma oluyor(Screenshot by Lightshot). Dolar ile alakas─▒ yok. BTC EUR filan hepsinde oluyor ayn─▒ ┼čey. Kur ge├žmi┼čini yahoo finance den al─▒yorum.
Kodun orijinal hali burada (Price-Forecaster/Stock-RNN-Deep-Learning-TechIndicators.ipynb at master ┬Ě marcosan93/Price-Forecaster ┬Ě GitHub)

Bu sayfadaki " Validating the Model" k─▒sm─▒ndan hata verdi─či i├žin plt.xlim('2018-05', '2020-05') buray─▒ ├ž─▒kartt─▒m ve ├ž─▒kartt─▒ktan sonra hata vermemeye ba┼člad─▒. Bu hatay─▒ verdi─či yer ÔÇťtahmin ve ger├žek fiyatÔÇŁ grafi─či, ÔÇťgelecek g├╝nk├╝ tahminÔÇŁ grafi─či ile alakas─▒ yok gibi ama sizce grafikteki problem buray─▒ ├ž─▒kartt─▒─č─▒m i├žin mi oldu ? Sizce grafikte bozulma neden oluyor ve nas─▒l d├╝zeltebilirim ?

Merhaba bozulmadan kast─▒n─▒z nedir tam olarak, nas─▒l bir grafik beklemekteydiniz?

usd-try olsun btc-usd olsun tahmini de─čerin ba┼člang─▒c─▒ndan ├Ânce, ger├žek fiyat grafikte d├╝┼č├╝┼č ya┼č─▒yor. Verilerimde bu ┼čekilde bir veri yok halbuki Screenshot by Lightshot

En sondaki preds ve actual i├žin,

print(preds) ve print(actual)'─▒ payla┼čabilir misiniz?

preds

               Close
2021-05-06  7.056682
2021-05-07  6.994374
2021-05-10  6.947605
2021-05-11  6.982694
2021-05-12  6.995717
2021-05-13  6.988362
2021-05-14  6.955207
2021-05-17  6.906001
2021-05-18  6.916530
2021-05-19  6.963354
2021-05-20  7.007742
2021-05-21  7.021664
2021-05-24  7.016685
2021-05-25  7.009973
2021-05-26  7.128006
2021-05-27  7.152076
2021-05-28  7.158733
2021-05-31  7.181411
2021-06-01  7.203786
2021-06-02  7.286486
2021-06-03  7.226585
2021-06-04  7.307771
2021-06-07  7.291287
2021-06-08  7.306860
2021-06-09  7.241479
2021-06-10  7.314354
2021-06-11  7.315176
2021-06-14  7.315245
2021-06-15  7.307352
2021-06-16  7.380007

actual

               Close
2020-12-31  7.373730
2021-01-01  7.433800
2021-01-04  7.433420
2021-01-05  7.421500
2021-01-06  7.383790
...              ...
2021-04-30  8.184880
2021-05-03  8.274550
2021-05-04  8.259000
2021-05-05  8.321720
2021-05-06  7.056682

[91 rows x 1 columns]

Sonda birden ini┼č ya┼č─▒yor gibi. Sizce ne yapmal─▒y─▒m ?

Evet, sebebi de

bu k─▒s─▒m. actualÔÇÖ─▒n sonuna ilk tahmin de─čeri ekleniyor. Galiba g├Ârsel a├ž─▒dan arada kopukluk olmas─▒n diye yap─▒lm─▒┼č. Bu .appendÔÇÖi kald─▒r─▒rsan─▒z d├╝zelir diye tahmin ediyorum.

U─čra┼čt─▒─č─▒n─▒z i├žin te┼ček├╝rler. Benim anlamad─▒─č─▒m ba┼čka bir ┼čey ise, tahmini de─čerler her d├Âvizde ger├žek de─čerin ├žok alt─▒nda ba┼čl─▒yor.

Burada adam─▒n yapt─▒─č─▒nda tahmini de─čer ger├žek de─čerin devam─▒ niteli─činde, fakat bende s├╝rekli ve her d├Âvizde ger├žek de─čerin a┼ča─č─▒s─▒ndan ba┼čl─▒yor.Sizce bu ÔÇťepochsÔÇŁ miktar─▒n─▒n azl─▒─č─▒ndan m─▒ kaynakl─▒ ?

Olabilir evet, belki ba┼čka nedenler de olabilir. visualize_training_results sizde nas─▒l grafikler veriyor?

![Figure_3|690x276]

ÔÇťepochsÔÇŁ de─čerini 1000 yapt─▒m. G├Âzlemlerime g├Âre epochs i ne kadar artt─▒r─▒rsam gelecek g├╝nki tahmin artt─▒d─▒─č─▒m miktara oranla o kadar a┼ča─č─▒da ba┼čl─▒yor.

Hala anlayabilmi┼č de─čilim. Bu grafik adam─▒n grafi─či, ve gelecek g├╝nki tahmini as─▒l grafik ile uyum halinde. Bende ise gelecek tahmini grafi─či ger├žek grafikten a┼ča─č─▒da ba┼čl─▒yor. : (

acaba buray─▒ ├ž─▒kartt─▒─č─▒m i├žin mi oldu. Benim ├žal─▒┼čt─▒rd─▒─č─▒m kod ile adam─▒n kodunun tek fark─▒ buras─▒.
├çal─▒┼čt─▒rd─▒─č─▒m koda adam─▒n yapt─▒─č─▒ gibi bunu ekliyince plt.xlim('2018-05', '2020-05') bu hatay─▒ veriyor:

matplotlib.units.ConversionError: Failed to convert value(s) to axis units: '2005-05'

Yard─▒mc─▒ olabilirseniz ├žok ├žok sevinirim. 2 g├╝nd├╝r bu sorunu ├ž├Âzmeye ├žal─▒┼č─▒yorum :slight_smile:

Yok, o x-ekseninin limitlerini ayarlamaya ├žal─▒┼č─▒yor, sizde zaten k─▒rm─▒z─▒n─▒n bitti─či yerden mavi ba┼čl─▒yor bir sorun yok diye d├╝┼č├╝n├╝yorum. Yine de yapmak isterseniz o de─čerleri datetimeÔÇÖa ├ževirmeniz gerekebilir veya di─čer elemanlar─▒n format─▒n─▒ takip etmek gerekebilir (2018-05 yerine %Y-%m-%d, mesela 2018-05-01).

Validation loss bir yerden sonra artmaya ba┼člam─▒┼č ve dolay─▒s─▒yla overfittingÔÇÖe u─čram─▒┼č model. E─čer e─čitildi─či veri ├╝zerindeki tahminini ├žizdirirseniz heralde neredeyse birebir takip ediyordur. Ama ertesi g├╝nler i├žin performans veremiyor haliyle. (Bu arada accuracy b├Âylesi bir regresyon taskÔÇÖ─▒ i├žin uygun bir metrik de─čil, o daha ├žok s─▒n─▒fland─▒rmada kullan─▒l─▒yor.)

┼×unlar─▒ deneyebilirsiniz denemediyseniz:

  • epochÔÇÖu 100-400 aras─▒ tutmak iyi olabilir
  • optimizerÔÇÖ─▒n learning rateÔÇÖi ile oynanabilir; default 0.01 galiba belki 0.05 ve sular─▒ denenebilir
  • batch_size da de─či┼čtirilebilir; ne kadar veri var bilmiyorum ama 64 veya 16 da denenebilir
  • Katman say─▒s─▒ 1ÔÇÖe indirilebilir; hidden unit say─▒s─▒ da 20ÔÇÖye ├žekilebilir
  • Scaler Robust de─čil de d├╝z MinMax veya StandardScaler olarak de─či┼čtirilebilir
  • Early stopping koyulabilir epochÔÇÖu k─▒s─▒tlamadan; epoch 1_000 olabilir o durumda, kendi dursun. (ama duraca─č─▒ metrik accuracy olmas─▒n, pek makul de─čil)
  • ├ľnemli bir parametre de ka├ž g├╝n geriye bakt─▒─č─▒. 30 ├žok olabilir.

Bunlar─▒n hepsi varsay─▒m, denemeden ┼čunu yap─▒n kesin daha iyi olur diyebilecek bir yetkinli─čim yok. Bunlar─▒ deneyip de yine benzeri sonu├žla kar┼č─▒la┼čabilirsiniz. Model ┼č├Âyle ya da b├Âyle sihirli bir de─čnek g├Ârevi g├Âremeyip, orada t─▒kanabilir.

2 Be─čeni

Te┼čekk├╝rler, deneyece─čim.

Hepsini denedim ama olmad─▒. Sald─▒m art─▒k bu adam─▒n kodunu : ) . Kendim derin ├Â─črenmenin mant─▒─č─▒n─▒ anlayarak yapmaya ├žal─▒┼čaca─č─▒m.

1 Be─čeni