-
Notifications
You must be signed in to change notification settings - Fork 0
Transformer Hyperparameters Tunning #13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
aff0373
Adding Optuna for Transformer.
jvachier 29e0b65
Enabling GPU for optuna.
jvachier dd77851
update.
jvachier 7ee7d40
Addressing comment 1.
jvachier 2e688f5
Addressing coment 2.
jvachier b2c10bf
Adding tests.
jvachier e248e24
Updating workflow.
jvachier ae636e9
Fixing typo.
jvachier 69a25f4
Creating mock data.
jvachier File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,154 @@ | ||
| import optuna | ||
| import tensorflow as tf | ||
| from modules.data_processor import DatasetProcessor, TextPreprocessor | ||
| from modules.transformer_components import ( | ||
| PositionalEmbedding, | ||
| TransformerEncoder, | ||
| TransformerDecoder, | ||
| evaluate_bleu, | ||
| ) | ||
| from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau | ||
| import logging | ||
| import json | ||
|
|
||
| # Configure logging | ||
| logging.basicConfig( | ||
| level=logging.INFO, | ||
| format="%(asctime)s - %(levelname)s - %(message)s", | ||
| ) | ||
|
|
||
|
|
||
| def build_transformer_model(trial, preprocessor): | ||
| """ | ||
| Build a Transformer model with hyperparameters suggested by Optuna. | ||
|
|
||
| Args: | ||
| trial (optuna.trial.Trial): The trial object for hyperparameter optimization. | ||
| preprocessor (TextPreprocessor): Preprocessor object containing sequence length and vocabulary size. | ||
|
|
||
| Returns: | ||
| tf.keras.Model: The compiled Transformer model. | ||
| """ | ||
| # Hyperparameters to optimize | ||
| embed_dim = trial.suggest_categorical("embed_dim", [64, 128]) | ||
| dense_dim = trial.suggest_int("dense_dim", 512, 2048, step=512) | ||
| num_heads = trial.suggest_categorical("num_heads", [2, 4, 8]) | ||
| dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1) | ||
|
|
||
| sequence_length = preprocessor.sequence_length | ||
| vocab_size = preprocessor.vocab_size | ||
|
|
||
| # Build the Transformer model | ||
| encoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="english") | ||
| encoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( | ||
| encoder_inputs | ||
| ) | ||
| encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)( | ||
| encoder_embeddings | ||
| ) | ||
|
|
||
| decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french") | ||
| decoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( | ||
| decoder_inputs | ||
| ) | ||
| decoder_outputs = TransformerDecoder(embed_dim, dense_dim, num_heads)( | ||
| decoder_embeddings, encoder_outputs | ||
| ) | ||
| dropout_outputs = tf.keras.layers.Dropout(dropout_rate)(decoder_outputs) | ||
| final_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")( | ||
| dropout_outputs | ||
| ) | ||
|
|
||
| transformer = tf.keras.Model([encoder_inputs, decoder_inputs], final_outputs) | ||
|
|
||
| # Compile the model | ||
| transformer.compile( | ||
| optimizer=tf.keras.optimizers.Adam(), | ||
| loss=tf.keras.losses.SparseCategoricalCrossentropy(), | ||
| metrics=["accuracy"], | ||
| ) | ||
|
|
||
| return transformer | ||
|
|
||
|
|
||
| def objective(trial): | ||
| """ | ||
| Objective function for Optuna to optimize the Transformer model using BLEU score. | ||
|
|
||
| Args: | ||
| trial (optuna.trial.Trial): The trial object for hyperparameter optimization. | ||
|
|
||
| Returns: | ||
| float: BLEU score of the model on the validation dataset. | ||
| """ | ||
| # Load and preprocess the dataset | ||
| processor = DatasetProcessor(file_path="src/data/en-fr.parquet") | ||
| processor.load_data() | ||
| processor.process_data() | ||
| data_splits = processor.shuffle_and_split() | ||
| train_df, val_df = data_splits["train"], data_splits["validation"] | ||
|
|
||
| preprocessor = TextPreprocessor() | ||
| preprocessor.adapt(train_df) | ||
|
|
||
| train_ds = preprocessor.make_dataset(train_df) | ||
| val_ds = preprocessor.make_dataset(val_df) | ||
|
|
||
| # Build the model | ||
| model = build_transformer_model(trial, preprocessor) | ||
|
|
||
| # Define callbacks | ||
| callbacks = [ | ||
| EarlyStopping( | ||
| monitor="val_loss", | ||
| patience=2, | ||
| mode="min", | ||
| verbose=1, | ||
| restore_best_weights=True, | ||
| ), | ||
| ReduceLROnPlateau( | ||
| monitor="val_loss", | ||
| factor=0.5, | ||
| patience=3, | ||
| mode="min", | ||
| verbose=1, | ||
| ), | ||
| ] | ||
|
|
||
| # Train the model | ||
| device = "/GPU:0" if tf.config.list_physical_devices("GPU") else "/CPU:0" | ||
| with tf.device(device): | ||
| model.fit( | ||
| train_ds, | ||
| validation_data=val_ds, | ||
| epochs=3, # Use fewer epochs for faster optimization | ||
| verbose=1, | ||
| callbacks=callbacks, | ||
| ) | ||
|
|
||
| # Calculate BLEU score on the validation dataset | ||
| bleu_score = evaluate_bleu(model, val_ds, preprocessor) | ||
| return bleu_score | ||
|
|
||
|
|
||
| def main(): | ||
| """ | ||
| Main function to run the Optuna optimization. | ||
| """ | ||
| study = optuna.create_study(direction="maximize") | ||
| study.optimize(objective, n_trials=5) | ||
|
|
||
| logging.info("Best trial:") | ||
| logging.info(f"Value (BLEU Score): {study.best_trial.value}") | ||
| logging.info("Params:") | ||
| for key, value in study.best_trial.params.items(): | ||
| logging.info(f" {key}: {value}") | ||
|
|
||
| # Save the best hyperparameters | ||
| best_params = study.best_trial.params | ||
| with open("src/models/optuna_transformer_best_params.json", "w") as f: | ||
| json.dump(best_params, f, indent=4) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,144 @@ | ||
| import pytest | ||
| import tensorflow as tf | ||
| from modules.data_processor import DatasetProcessor, TextPreprocessor | ||
| from modules.transformer_components import ( | ||
| PositionalEmbedding, | ||
| TransformerEncoder, | ||
| TransformerDecoder, | ||
| evaluate_bleu, | ||
| ) | ||
| from translation_french_english import transformer_model | ||
| from modules.utils import ModelPaths | ||
| import os | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def setup_data(): | ||
| """ | ||
| Fixture to set up a mocked dataset and preprocessor for testing. | ||
| """ | ||
| import pandas as pd | ||
|
|
||
| # Create a small mock dataset | ||
| mock_data = { | ||
| "en": ["hello", "how are you", "good morning", "thank you", "goodbye"], | ||
| "fr": ["bonjour", "comment ça va", "bon matin", "merci", "au revoir"], | ||
| } | ||
| mock_df = pd.DataFrame(mock_data) | ||
|
|
||
| # Split the mock dataset | ||
| train_df = mock_df.sample(frac=0.6, random_state=42) | ||
| val_df = mock_df.drop(train_df.index).sample(frac=0.5, random_state=42) | ||
| test_df = mock_df.drop(train_df.index).drop(val_df.index) | ||
|
|
||
| # Initialize the preprocessor | ||
| preprocessor = TextPreprocessor() | ||
| preprocessor.adapt(train_df) | ||
|
|
||
| # Create TensorFlow datasets | ||
| train_ds = preprocessor.make_dataset(train_df) | ||
| val_ds = preprocessor.make_dataset(val_df) | ||
| test_ds = preprocessor.make_dataset(test_df) | ||
|
|
||
| return preprocessor, train_ds, val_ds, test_ds | ||
|
|
||
|
|
||
| def test_transformer_model_build(setup_data): | ||
| """ | ||
| Test if the Transformer model is built correctly. | ||
| """ | ||
| preprocessor, train_ds, val_ds, _ = setup_data | ||
| transformer_model_path = "src/models/test_transformer_model.keras" | ||
|
|
||
| # Build the model | ||
| model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) | ||
|
|
||
| # Check if the model is compiled | ||
| assert model.optimizer is not None, "Model is not compiled." | ||
| assert model.loss is not None, "Loss function is not defined." | ||
| assert model.metrics is not None, "Metrics are not defined." | ||
|
|
||
|
|
||
| def test_transformer_model_training(setup_data): | ||
| """ | ||
| Test if the Transformer model can be trained without errors. | ||
| """ | ||
| preprocessor, train_ds, val_ds, _ = setup_data | ||
| transformer_model_path = "src/models/test_transformer_model.keras" | ||
|
|
||
| # Build the model | ||
| model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) | ||
|
|
||
| # Train the model for 1 epoch | ||
| history = model.fit( | ||
| train_ds, | ||
| validation_data=val_ds, | ||
| epochs=1, | ||
| verbose=0, | ||
| ) | ||
|
|
||
| # Check if training history is returned | ||
| assert "loss" in history.history, "Training loss is not recorded." | ||
| assert "val_loss" in history.history, "Validation loss is not recorded." | ||
|
|
||
|
|
||
| def test_transformer_model_evaluation(setup_data): | ||
| """ | ||
| Test if the Transformer model can be evaluated without errors. | ||
| """ | ||
| preprocessor, train_ds, val_ds, test_ds = setup_data | ||
| transformer_model_path = "src/models/test_transformer_model.keras" | ||
|
|
||
| # Build the model | ||
| model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) | ||
|
|
||
| # Evaluate the model | ||
| results = model.evaluate(test_ds, verbose=0) | ||
|
|
||
| # Check if evaluation results are returned | ||
| assert len(results) == 2, "Evaluation did not return loss and accuracy." | ||
| assert results[0] >= 0, "Test loss is invalid." | ||
| assert 0 <= results[1] <= 1, "Test accuracy is invalid." | ||
|
|
||
|
|
||
| def test_transformer_model_bleu_score(setup_data): | ||
| """ | ||
| Test if the BLEU score can be calculated for the Transformer model. | ||
| """ | ||
| preprocessor, train_ds, val_ds, test_ds = setup_data | ||
| transformer_model_path = "src/models/test_transformer_model.keras" | ||
|
|
||
| # Build the model | ||
| model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) | ||
|
|
||
| # Calculate BLEU score | ||
| bleu_score = evaluate_bleu(model, test_ds, preprocessor) | ||
|
|
||
| # Check if BLEU score is valid | ||
| assert 0 <= bleu_score <= 1, "BLEU score is invalid." | ||
|
|
||
|
|
||
| def test_transformer_model_loading(setup_data): | ||
| """ | ||
| Test if the Transformer model can be loaded from a saved file. | ||
| """ | ||
| preprocessor, train_ds, val_ds, _ = setup_data | ||
| transformer_model_path = "src/models/test_transformer_model.keras" | ||
|
|
||
| # Build and save the model | ||
| model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) | ||
| model.save(transformer_model_path) | ||
|
|
||
| # Load the model | ||
| loaded_model = tf.keras.models.load_model( | ||
| transformer_model_path, | ||
| custom_objects={ | ||
| "PositionalEmbedding": PositionalEmbedding, | ||
| "TransformerEncoder": TransformerEncoder, | ||
| "TransformerDecoder": TransformerDecoder, | ||
| }, | ||
| ) | ||
|
|
||
| # Check if the loaded model is valid | ||
| assert loaded_model is not None, "Failed to load the Transformer model." | ||
| assert loaded_model.optimizer is not None, "Loaded model is not compiled." |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] Consider adding a comment or updating the docstring to explain the rationale behind the updated hyperparameter values (e.g., embed_dim, dense_dim, and num_heads) for improved clarity and maintainability.