From aff0373a445d344be1c88016a136de1f24fc3679 Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 12:57:38 +0200 Subject: [PATCH 1/9] Adding Optuna for Transformer. --- src/modules/optuna_transformer.py | 145 ++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 src/modules/optuna_transformer.py diff --git a/src/modules/optuna_transformer.py b/src/modules/optuna_transformer.py new file mode 100644 index 0000000..4383d2d --- /dev/null +++ b/src/modules/optuna_transformer.py @@ -0,0 +1,145 @@ +import optuna +import tensorflow as tf +from modules.data_processor import DatasetProcessor, TextPreprocessor +from modules.transformer_components import ( + PositionalEmbedding, + TransformerEncoder, + TransformerDecoder, + evaluate_bleu, +) +from modules.utils import ModelPaths +from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau +import logging +import os + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + + +def build_transformer_model(trial, preprocessor): + """ + Build a Transformer model with hyperparameters suggested by Optuna. + + Args: + trial (optuna.trial.Trial): The trial object for hyperparameter optimization. + preprocessor (TextPreprocessor): Preprocessor object containing sequence length and vocabulary size. + + Returns: + tf.keras.Model: The compiled Transformer model. + """ + # Hyperparameters to optimize + embed_dim = trial.suggest_categorical("embed_dim", [64, 128, 256]) + dense_dim = trial.suggest_int("dense_dim", 512, 2048, step=512) + num_heads = trial.suggest_categorical("num_heads", [2, 4, 8]) + dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1) + + sequence_length = preprocessor.sequence_length + vocab_size = preprocessor.vocab_size + + # Build the Transformer model + encoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="english") + x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs) + encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) + + decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french") + x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs) + x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs) + x = tf.keras.layers.Dropout(dropout_rate)(x) + decoder_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(x) + + transformer = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + + # Compile the model + transformer.compile( + optimizer=tf.keras.optimizers.Adam(), + loss=tf.keras.losses.SparseCategoricalCrossentropy(), + metrics=["accuracy"], + ) + + return transformer + + +def objective(trial): + """ + Objective function for Optuna to optimize the Transformer model using BLEU score. + + Args: + trial (optuna.trial.Trial): The trial object for hyperparameter optimization. + + Returns: + float: BLEU score of the model on the validation dataset. + """ + # Load and preprocess the dataset + processor = DatasetProcessor(file_path="src/data/en-fr.parquet") + processor.load_data() + processor.process_data() + data_splits = processor.shuffle_and_split() + train_df, val_df = data_splits["train"], data_splits["validation"] + + preprocessor = TextPreprocessor() + preprocessor.adapt(train_df) + + train_ds = preprocessor.make_dataset(train_df) + val_ds = preprocessor.make_dataset(val_df) + + # Build the model + model = build_transformer_model(trial, preprocessor) + + # Define callbacks + callbacks = [ + EarlyStopping( + monitor="val_loss", + patience=2, + mode="min", + verbose=1, + restore_best_weights=True, + ), + ReduceLROnPlateau( + monitor="val_loss", + factor=0.5, + patience=3, + mode="min", + verbose=1, + ), + ] + + # Train the model + model.fit( + train_ds, + validation_data=val_ds, + epochs=5, # Use fewer epochs for faster optimization + verbose=1, + callbacks=callbacks, + ) + + # Calculate BLEU score on the validation dataset + bleu_score = evaluate_bleu(model, val_ds, preprocessor) + return bleu_score + + +def main(): + """ + Main function to run the Optuna optimization. + """ + study = optuna.create_study(direction="maximize") + study.optimize(objective, n_trials=20) + + logging.info("Best trial:") + logging.info(f" Value (BLEU Score): {study.best_trial.value}") + logging.info(" Params:") + for key, value in study.best_trial.params.items(): + logging.info(f" {key}: {value}") + + # Save the best hyperparameters + best_params = study.best_trial.params + with open("src/models/optuna_best_params.json", "w") as f: + import json + + json.dump(best_params, f, indent=4) + + +if __name__ == "__main__": + main() From 29e0b652e5df1a84355769c5d7f279f71a8b4484 Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 13:13:34 +0200 Subject: [PATCH 2/9] Enabling GPU for optuna. --- src/modules/optuna_transformer.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/modules/optuna_transformer.py b/src/modules/optuna_transformer.py index 4383d2d..377b095 100644 --- a/src/modules/optuna_transformer.py +++ b/src/modules/optuna_transformer.py @@ -7,10 +7,9 @@ TransformerDecoder, evaluate_bleu, ) -from modules.utils import ModelPaths from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau import logging -import os +import json # Configure logging logging.basicConfig( @@ -107,13 +106,14 @@ def objective(trial): ] # Train the model - model.fit( - train_ds, - validation_data=val_ds, - epochs=5, # Use fewer epochs for faster optimization - verbose=1, - callbacks=callbacks, - ) + with tf.device("/GPU:0"): + model.fit( + train_ds, + validation_data=val_ds, + epochs=5, # Use fewer epochs for faster optimization + verbose=1, + callbacks=callbacks, + ) # Calculate BLEU score on the validation dataset bleu_score = evaluate_bleu(model, val_ds, preprocessor) @@ -125,19 +125,17 @@ def main(): Main function to run the Optuna optimization. """ study = optuna.create_study(direction="maximize") - study.optimize(objective, n_trials=20) + study.optimize(objective, n_trials=5) logging.info("Best trial:") - logging.info(f" Value (BLEU Score): {study.best_trial.value}") - logging.info(" Params:") + logging.info(f"Value (BLEU Score): {study.best_trial.value}") + logging.info("Params:") for key, value in study.best_trial.params.items(): logging.info(f" {key}: {value}") # Save the best hyperparameters best_params = study.best_trial.params - with open("src/models/optuna_best_params.json", "w") as f: - import json - + with open("src/models/optuna_transformer_best_params.json", "w") as f: json.dump(best_params, f, indent=4) From dd77851feaba9493269d854515bb943aac5ac011 Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 15:40:28 +0200 Subject: [PATCH 3/9] update. --- src/modules/optuna_transformer.py | 4 ++-- src/translation_french_english.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/modules/optuna_transformer.py b/src/modules/optuna_transformer.py index 377b095..5778491 100644 --- a/src/modules/optuna_transformer.py +++ b/src/modules/optuna_transformer.py @@ -30,7 +30,7 @@ def build_transformer_model(trial, preprocessor): tf.keras.Model: The compiled Transformer model. """ # Hyperparameters to optimize - embed_dim = trial.suggest_categorical("embed_dim", [64, 128, 256]) + embed_dim = trial.suggest_categorical("embed_dim", [64, 128]) dense_dim = trial.suggest_int("dense_dim", 512, 2048, step=512) num_heads = trial.suggest_categorical("num_heads", [2, 4, 8]) dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1) @@ -110,7 +110,7 @@ def objective(trial): model.fit( train_ds, validation_data=val_ds, - epochs=5, # Use fewer epochs for faster optimization + epochs=3, # Use fewer epochs for faster optimization verbose=1, callbacks=callbacks, ) diff --git a/src/translation_french_english.py b/src/translation_french_english.py index a19f451..e4b97a4 100644 --- a/src/translation_french_english.py +++ b/src/translation_french_english.py @@ -54,9 +54,10 @@ def transformer_model( }, ) # Define model parameters - embed_dim = 128 - dense_dim = 2048 - num_heads = 8 + embed_dim = 64 + dense_dim = 1536 + num_heads = 2 + dropout_rate = 0.4 sequence_length = preprocessor.sequence_length vocab_size = preprocessor.vocab_size @@ -68,7 +69,7 @@ def transformer_model( decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french") x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs) x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs) - x = tf.keras.layers.Dropout(0.5)(x) + x = tf.keras.layers.Dropout(dropout_rate)(x) decoder_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(x) transformer = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) From 7ee7d40fd40fe665c82816af80210cd235ec799c Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 16:11:19 +0200 Subject: [PATCH 4/9] Addressing comment 1. --- src/modules/optuna_transformer.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/modules/optuna_transformer.py b/src/modules/optuna_transformer.py index 5778491..8a68f61 100644 --- a/src/modules/optuna_transformer.py +++ b/src/modules/optuna_transformer.py @@ -40,16 +40,26 @@ def build_transformer_model(trial, preprocessor): # Build the Transformer model encoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="english") - x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs) - encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) + encoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( + encoder_inputs + ) + encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)( + encoder_embeddings + ) decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french") - x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs) - x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs) - x = tf.keras.layers.Dropout(dropout_rate)(x) - decoder_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(x) + decoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( + decoder_inputs + ) + decoder_outputs = TransformerDecoder(embed_dim, dense_dim, num_heads)( + decoder_embeddings, encoder_outputs + ) + dropout_outputs = tf.keras.layers.Dropout(dropout_rate)(decoder_outputs) + final_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")( + dropout_outputs + ) - transformer = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + transformer = tf.keras.Model([encoder_inputs, decoder_inputs], final_outputs) # Compile the model transformer.compile( @@ -106,7 +116,8 @@ def objective(trial): ] # Train the model - with tf.device("/GPU:0"): + device = "/GPU:0" if tf.config.list_physical_devices("GPU") else "/CPU:0" + with tf.device(device): model.fit( train_ds, validation_data=val_ds, From 2e688f5eee298d5947b85d15a1d4d656a79ceee7 Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 16:14:53 +0200 Subject: [PATCH 5/9] Addressing coment 2. --- src/translation_french_english.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/translation_french_english.py b/src/translation_french_english.py index e4b97a4..9c65b3b 100644 --- a/src/translation_french_english.py +++ b/src/translation_french_english.py @@ -34,6 +34,7 @@ def transformer_model( provided training and validation datasets. Args: + transformer_model_path (str): Path to the saved Transformer model. preprocessor (TextPreprocessor): Preprocessor object containing sequence length and vocabulary size information. train_ds (tf.data.Dataset): Training dataset. @@ -46,13 +47,14 @@ def transformer_model( # Load the saved model logging.info("Loading the saved Transformer model.") return tf.keras.models.load_model( - "src/models/transformer_best_model.keras", + transformer_model_path, custom_objects={ "PositionalEmbedding": PositionalEmbedding, "TransformerEncoder": TransformerEncoder, "TransformerDecoder": TransformerDecoder, }, ) + # Define model parameters embed_dim = 64 dense_dim = 1536 @@ -63,16 +65,26 @@ def transformer_model( # Build the Transformer model encoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="english") - x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs) - encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) + encoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( + encoder_inputs + ) + encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)( + encoder_embeddings + ) decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french") - x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs) - x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs) - x = tf.keras.layers.Dropout(dropout_rate)(x) - decoder_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(x) + decoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( + decoder_inputs + ) + decoder_outputs = TransformerDecoder(embed_dim, dense_dim, num_heads)( + decoder_embeddings, encoder_outputs + ) + dropout_outputs = tf.keras.layers.Dropout(dropout_rate)(decoder_outputs) + final_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")( + dropout_outputs + ) - transformer = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + transformer = tf.keras.Model([encoder_inputs, decoder_inputs], final_outputs) # Compile the model transformer.compile( From b2c10bf1fae4b452a03ffd0ea2fd21a7f8ff238c Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 16:27:12 +0200 Subject: [PATCH 6/9] Adding tests. --- tests/test_transformer_model.py | 138 ++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 tests/test_transformer_model.py diff --git a/tests/test_transformer_model.py b/tests/test_transformer_model.py new file mode 100644 index 0000000..2e3ceaf --- /dev/null +++ b/tests/test_transformer_model.py @@ -0,0 +1,138 @@ +import pytest +import tensorflow as tf +from modules.data_processor import DatasetProcessor, TextPreprocessor +from modules.transformer_components import ( + PositionalEmbedding, + TransformerEncoder, + TransformerDecoder, + evaluate_bleu, +) +from translation_french_english import transformer_model +from modules.utils import ModelPaths +import os + + +@pytest.fixture +def setup_data(): + """ + Fixture to set up a smaller dataset and preprocessor for testing. + """ + processor = DatasetProcessor(file_path="src/data/en-fr.parquet") + processor.load_data() + processor.process_data() + data_splits = processor.shuffle_and_split() + train_df = data_splits["train"].sample(n=100) # Use only 100 samples for training + val_df = data_splits["validation"].sample( + n=50 + ) # Use only 50 samples for validation + test_df = data_splits["test"].sample(n=50) # Use only 50 samples for testing + + preprocessor = TextPreprocessor() + preprocessor.adapt(train_df) + + train_ds = preprocessor.make_dataset(train_df) + val_ds = preprocessor.make_dataset(val_df) + test_ds = preprocessor.make_dataset(test_df) + + return preprocessor, train_ds, val_ds, test_ds + + +def test_transformer_model_build(setup_data): + """ + Test if the Transformer model is built correctly. + """ + preprocessor, train_ds, val_ds, _ = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + + # Check if the model is compiled + assert model.optimizer is not None, "Model is not compiled." + assert model.loss is not None, "Loss function is not defined." + assert model.metrics is not None, "Metrics are not defined." + + +def test_transformer_model_training(setup_data): + """ + Test if the Transformer model can be trained without errors. + """ + preprocessor, train_ds, val_ds, _ = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + + # Train the model for 1 epoch + history = model.fit( + train_ds, + validation_data=val_ds, + epochs=1, + verbose=0, + ) + + # Check if training history is returned + assert "loss" in history.history, "Training loss is not recorded." + assert "val_loss" in history.history, "Validation loss is not recorded." + + +def test_transformer_model_evaluation(setup_data): + """ + Test if the Transformer model can be evaluated without errors. + """ + preprocessor, train_ds, val_ds, test_ds = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + + # Evaluate the model + results = model.evaluate(test_ds, verbose=0) + + # Check if evaluation results are returned + assert len(results) == 2, "Evaluation did not return loss and accuracy." + assert results[0] >= 0, "Test loss is invalid." + assert 0 <= results[1] <= 1, "Test accuracy is invalid." + + +def test_transformer_model_bleu_score(setup_data): + """ + Test if the BLEU score can be calculated for the Transformer model. + """ + preprocessor, train_ds, val_ds, test_ds = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + + # Calculate BLEU score + bleu_score = evaluate_bleu(model, test_ds, preprocessor) + + # Check if BLEU score is valid + assert 0 <= bleu_score <= 1, "BLEU score is invalid." + + +def test_transformer_model_loading(setup_data): + """ + Test if the Transformer model can be loaded from a saved file. + """ + preprocessor, train_ds, val_ds, _ = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build and save the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + model.save(transformer_model_path) + + # Load the model + loaded_model = tf.keras.models.load_model( + transformer_model_path, + custom_objects={ + "PositionalEmbedding": PositionalEmbedding, + "TransformerEncoder": TransformerEncoder, + "TransformerDecoder": TransformerDecoder, + }, + ) + + # Check if the loaded model is valid + assert loaded_model is not None, "Failed to load the Transformer model." + assert loaded_model.optimizer is not None, "Loaded model is not compiled." From e248e2488b0f8aa0e47fa10e7e57d9c7298d885a Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 16:30:11 +0200 Subject: [PATCH 7/9] Updating workflow. --- .github/workflows/test.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2806bdc..beba1b0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -5,14 +5,18 @@ on: branches: - main paths: - - "src/modules/model.py" # Trigger only if this file is modified + - "src/modules/model_sentiment_analysis.py" # Trigger only if this file is modified - "src/modules/data_processor.py" # Trigger only if this file is modified + - "src/modules/transformer_component.py" + - "src/translation_french_english.py" pull_request: branches: - main paths: - - "src/modules/model.py" # Trigger only if this file is modified + - "src/modules/model_sentiment_analysis.py" # Trigger only if this file is modified - "src/modules/data_processor.py" # Trigger only if this file is modified + - "src/modules/transformer_component.py" + - "src/translation_french_english.py" jobs: test: From ae636e9d22037e5b1fc7103b69e09ca8e8722915 Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 16:32:31 +0200 Subject: [PATCH 8/9] Fixing typo. --- .github/workflows/test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index beba1b0..c5f5134 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -7,7 +7,7 @@ on: paths: - "src/modules/model_sentiment_analysis.py" # Trigger only if this file is modified - "src/modules/data_processor.py" # Trigger only if this file is modified - - "src/modules/transformer_component.py" + - "src/modules/transformer_components.py" - "src/translation_french_english.py" pull_request: branches: @@ -15,7 +15,7 @@ on: paths: - "src/modules/model_sentiment_analysis.py" # Trigger only if this file is modified - "src/modules/data_processor.py" # Trigger only if this file is modified - - "src/modules/transformer_component.py" + - "src/modules/transformer_components.py" - "src/translation_french_english.py" jobs: From 69a25f492078b3019ec9656e20fcf5d85c2239cb Mon Sep 17 00:00:00 2001 From: Jeremy Vachier <89128100+jvachier@users.noreply.github.com> Date: Sun, 4 May 2025 16:36:54 +0200 Subject: [PATCH 9/9] Creating mock data. --- tests/test_transformer_model.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/test_transformer_model.py b/tests/test_transformer_model.py index 2e3ceaf..363f42d 100644 --- a/tests/test_transformer_model.py +++ b/tests/test_transformer_model.py @@ -15,21 +15,27 @@ @pytest.fixture def setup_data(): """ - Fixture to set up a smaller dataset and preprocessor for testing. + Fixture to set up a mocked dataset and preprocessor for testing. """ - processor = DatasetProcessor(file_path="src/data/en-fr.parquet") - processor.load_data() - processor.process_data() - data_splits = processor.shuffle_and_split() - train_df = data_splits["train"].sample(n=100) # Use only 100 samples for training - val_df = data_splits["validation"].sample( - n=50 - ) # Use only 50 samples for validation - test_df = data_splits["test"].sample(n=50) # Use only 50 samples for testing + import pandas as pd + # Create a small mock dataset + mock_data = { + "en": ["hello", "how are you", "good morning", "thank you", "goodbye"], + "fr": ["bonjour", "comment ça va", "bon matin", "merci", "au revoir"], + } + mock_df = pd.DataFrame(mock_data) + + # Split the mock dataset + train_df = mock_df.sample(frac=0.6, random_state=42) + val_df = mock_df.drop(train_df.index).sample(frac=0.5, random_state=42) + test_df = mock_df.drop(train_df.index).drop(val_df.index) + + # Initialize the preprocessor preprocessor = TextPreprocessor() preprocessor.adapt(train_df) + # Create TensorFlow datasets train_ds = preprocessor.make_dataset(train_df) val_ds = preprocessor.make_dataset(val_df) test_ds = preprocessor.make_dataset(test_df)