import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split def bulk_quantile_loss(): def loss(y_true, y_pred): q = np.array([.01, .02, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .98, .99]) e = y_true - y_pred return tf.reduce_mean(tf.maximum(q * e, (q - 1) * e), axis=-1) return loss # Define the custom quantile loss function def quantile_loss(q): def loss(y_true, y_pred): e = y_true - y_pred return tf.reduce_mean(tf.maximum(q * e, (q - 1) * e)) return loss def true_func(x): # Y = 2 + 1.5 * X + epsilon # Linear relationship with variance increasing # Y = 1 + np.exp(X / 4) + epsilon # Y = 1 + np.exp(X / 4) + np.sin((2*np.pi/5)*X) return 1 + np.exp(x / 4) + 3*np.sin((2*np.pi/5)*x) # Generate synthetic dataset def make_data(num_points=1000): np.random.seed(42) X = np.random.rand(num_points, 1) * 10 # epsilon = np.random.normal(0, X/2, size=(num_points, 1)) # Noise increasing with X epsilon = np.random.normal(0, 0.25 + X/5, size=(num_points, 1)) Y = true_func(X) Y_eps = Y + epsilon # Split into training and test sets X_train, X_test, Y_train, Y_test = train_test_split(X, Y_eps, test_size=0.2, random_state=42) return X_train, X_test, Y_train, Y_test, X, Y def build_model(loss=tf.keras.losses.MeanSquaredError()): model = tf.keras.models.Sequential([ tf.keras.layers.InputLayer(shape=(1,)), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(1) # Output layer ]) model.compile(optimizer='adam', loss=loss) return model def run(num_points=1000, num_plot_pts=200): # Define quantiles quantiles = [0.05, 0.5, 0.95] models = {} X_train, X_test, Y_train, Y_test, X, Y = make_data(num_points=num_points) print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape) # Train a model for each quantile for q in quantiles: print(f"Training quantile {q} model...") models[q] = build_model(loss=quantile_loss(q)) models[q].fit(X_train, Y_train, epochs=100, batch_size=32, verbose=0) # Generate test data predictions X_range = np.linspace(X.min(), X.max(), num_plot_pts).reshape(-1, 1) predictions = {q: models[q].predict(X_range) for q in quantiles} model = build_model(loss=tf.keras.losses.MeanAbsoluteError()) print(f"Training MAE model...") model.fit(X_train, Y_train, epochs=100, batch_size=32, verbose=0) mae_predictions = model.predict(X_range) model = build_model() print(f"Training MSE model...") model.fit(X_train, Y_train, epochs=100, batch_size=32, verbose=0) mse_predictions = model.predict(X_range) model = build_model(loss=bulk_quantile_loss()) print(f"Training bulk quantile model...") model.fit(X_train, Y_train, epochs=100, batch_size=32, verbose=0) bulk_predictions = model.predict(X_range) # Plot the results plt.figure(figsize=(8, 6)) plt.scatter(X_test[::4, 0], Y_test[::4, 0], alpha=0.3, label="Test Data") plt.plot(X_range, predictions[0.05], label="Quantile 0.05", color='red') plt.plot(X_range, predictions[0.5], label="Quantile 0.5 (Median)", color='green') plt.plot(X_range, predictions[0.95], label="Quantile 0.95", color='blue') plt.plot(X_range, mae_predictions, label="MAE", color='magenta') plt.plot(X_range, mse_predictions, label="MSE", color='cyan') plt.plot(X_range, bulk_predictions, label="Bulk Quantile Model (Wimmers)", color='orange') plt.plot(X_range, true_func(X_range), label="True Function", color='black') plt.xlabel("X") plt.ylabel("Y") plt.legend() plt.title("Quantile Regression") plt.show()