From a311ad997a09031420768c8390b6ef01d0a73882 Mon Sep 17 00:00:00 2001 From: Chris Morandi Date: Sun, 28 Dec 2025 17:41:21 +0100 Subject: [PATCH] If applied, this commit will add an Extraction strategy to the ArtifactGenerator copy operations which will extract the content of matched tarball files to the output artifact. This is in reponse to the issue detailed here https://github.com/fluxcd/source-watcher/issues/301 Signed-off-by: Chris Morandi Address comments --- api/v1beta1/artifactgenerator_types.go | 6 +- ...tensions.fluxcd.io_artifactgenerators.yaml | 5 + docs/spec/v1beta1/artifactgenerators.md | 33 +- internal/builder/builder.go | 80 +- internal/builder/extract.go | 86 ++ internal/builder/extract_test.go | 765 ++++++++++++++++++ 6 files changed, 960 insertions(+), 15 deletions(-) create mode 100644 internal/builder/extract.go create mode 100644 internal/builder/extract_test.go diff --git a/api/v1beta1/artifactgenerator_types.go b/api/v1beta1/artifactgenerator_types.go index 4a39db4..eeb7f5b 100644 --- a/api/v1beta1/artifactgenerator_types.go +++ b/api/v1beta1/artifactgenerator_types.go @@ -37,6 +37,7 @@ const ( SourceFetchFailedReason = "SourceFetchFailed" OverwriteStrategy = "Overwrite" MergeStrategy = "Merge" + ExtractStrategy = "Extract" EnabledValue = "enabled" DisabledValue = "disabled" ) @@ -149,9 +150,12 @@ type CopyOperation struct { // Strategy specifies the copy strategy to use. // 'Overwrite' will overwrite existing files in the destination. // 'Merge' is for merging YAML files using Helm values merge strategy. + // 'Extract' is for extracting the contents of tarball archives (.tar.gz, .tgz) + // When using glob patterns, non-tarball files are silently skipped. For single file sources, + // the file must be a tarball or an error is returned. Directories are not supported. // If not specified, defaults to 'Overwrite'. // +optional - // +kubebuilder:validation:Enum=Overwrite;Merge + // +kubebuilder:validation:Enum=Overwrite;Merge;Extract Strategy string `json:"strategy,omitempty"` } diff --git a/config/crd/bases/source.extensions.fluxcd.io_artifactgenerators.yaml b/config/crd/bases/source.extensions.fluxcd.io_artifactgenerators.yaml index bb755a9..5240fb9 100644 --- a/config/crd/bases/source.extensions.fluxcd.io_artifactgenerators.yaml +++ b/config/crd/bases/source.extensions.fluxcd.io_artifactgenerators.yaml @@ -85,10 +85,15 @@ spec: Strategy specifies the copy strategy to use. 'Overwrite' will overwrite existing files in the destination. 'Merge' is for merging YAML files using Helm values merge strategy. + 'Extract' is for extracting the contents of tarball archives (.tar.gz, .tgz) + built with flux build artifact or helm package. When using glob patterns, + non-tarball files are silently skipped. For single file sources, the file + must be a tarball or an error is returned. Directories are not supported. If not specified, defaults to 'Overwrite'. enum: - Overwrite - Merge + - Extract type: string to: description: |- diff --git a/docs/spec/v1beta1/artifactgenerators.md b/docs/spec/v1beta1/artifactgenerators.md index 304b2f2..8fd6dd4 100644 --- a/docs/spec/v1beta1/artifactgenerators.md +++ b/docs/spec/v1beta1/artifactgenerators.md @@ -268,8 +268,8 @@ Each copy operation specifies how to copy files from sources into the generated the root of the generated artifact and `path` is the relative path to a file or directory. - `exclude` (optional): A list of glob patterns to filter out from the source selection. Any file matched by `from` that also matches an exclude pattern will be ignored. -- `strategy` (optional): Defines how to handle existing files at the destination, - either `Overwrite` (default) or `Merge` (for YAML files only). +- `strategy` (optional): Defines how to handle files during copy operations: + `Overwrite` (default), `Merge` (for YAML files), or `Extract` (for tarball archives). Copy operations use `cp`-like semantics: @@ -327,6 +327,35 @@ Example of copy with `Merge` strategy: **Note** that the merge strategy will replace _arrays_ entirely, the behavior is identical to how Helm merges `values.yaml` files when using multiple `--values` flags. +##### Extract Strategy + +The `Extract` strategy is used for extracting the contents of tarball archives (`.tar.gz`, `.tgz`) +built with `flux build artifact` or `helm package`. The tarball contents are extracted +to the destination while preserving their internal directory structure. + +Example of copy with `Extract` strategy: + +```yaml +# Extract a Helm chart tarball built with `helm package` +- from: "@oci/podinfo-6.7.0.tgz" + to: "@artifact/" + strategy: Extract + +# Extract multiple tarballs using glob patterns +- from: "@source/charts/*.tgz" + to: "@artifact/charts/" + strategy: Extract + +# Extract tarballs recursively from nested directories +- from: "@source/releases/**/*.tgz" + to: "@artifact/" + strategy: Extract +``` + +**Note** that when using glob patterns (including recursive `**` patterns) with the `Extract` +strategy, non-tarball files are silently skipped. For single file sources, the file must have +a `.tar.gz` or `.tgz` extension. Directories are not supported with this strategy. + ## Working with ArtifactGenerators ### Suspend and Resume Reconciliation diff --git a/internal/builder/builder.go b/internal/builder/builder.go index a50b2de..e28131e 100644 --- a/internal/builder/builder.go +++ b/internal/builder/builder.go @@ -127,6 +127,18 @@ func applyCopyOperations(ctx context.Context, return nil } +// If the copy operation uses the Extract strategy, it uses doublestar.Glob as we do not need to walk the whole tree +// otherwise we us std fs.Glob +func getGlobMatchingEntries(op swapi.CopyOperation, srcRoot *os.Root, srcPattern string) ([]string, error) { + if op.Strategy == swapi.ExtractStrategy { + // Use doublestar.Glob for recursive and advanced glob patterns (e.g., **/*.tar.gz) + return doublestar.Glob(srcRoot.FS(), srcPattern) + } else { + // Use fs.Glob for simple, non-recursive glob patterns + return fs.Glob(srcRoot.FS(), srcPattern) + } +} + // applyCopyOperation applies a single copy operation from the sources to the staging directory. // This function implements cp-like semantics by first analyzing the source pattern to determine // if it's a glob, direct file/directory reference, or wildcard pattern, then making copy decisions @@ -175,11 +187,11 @@ func applyCopyOperation(ctx context.Context, if !isGlobPattern { // Direct path reference - check what it actually is first (cp-like behavior) - return applySingleSourceCopy(ctx, op, srcRoot, srcPattern, stagingRoot, destRelPath, destEndsWithSlash) + return applySingleSourceCopy(ctx, op, srcRoot, srcPattern, stagingRoot, stagingDir, destRelPath, destEndsWithSlash) } - // Glob pattern - find all matches and copy each - matches, err := fs.Glob(srcRoot.FS(), srcPattern) + matches, err := getGlobMatchingEntries(op, srcRoot, srcPattern) + if err != nil { return fmt.Errorf("invalid glob pattern '%s': %w", srcPattern, err) } @@ -188,12 +200,19 @@ func applyCopyOperation(ctx context.Context, return fmt.Errorf("no files match pattern '%s' in source '%s'", srcPattern, srcAlias) } - // Filter out excluded files + // Filter out excluded files and special directory entries filteredMatches := make([]string, 0, len(matches)) for _, match := range matches { - if !shouldExclude(match, op.Exclude) { - filteredMatches = append(filteredMatches, match) + // Skip current directory and parent directory references + // doublestar.Glob returns "." for patterns like "**" which would + // cause the entire source to be copied, bypassing per-file strategies + if match == "." || match == ".." { + continue + } + if shouldExclude(match, op.Exclude) { + continue } + filteredMatches = append(filteredMatches, match) } if len(filteredMatches) == 0 { @@ -206,10 +225,22 @@ func applyCopyOperation(ctx context.Context, return err } - // Calculate destination path based on glob pattern type - destFile := calculateGlobDestination(srcPattern, match, destRelPath) - if err := copyFileWithRoots(ctx, op, srcRoot, match, stagingRoot, destFile); err != nil { - return fmt.Errorf("failed to copy file '%s' to '%s': %w", match, destFile, err) + // Handle Extract strategy for tarballs + if op.Strategy == swapi.ExtractStrategy { + if !isTarball(match) { + // Ignore files that are not tarball archives and directories + continue + } + if err := extractTarball(ctx, srcRoot, match, stagingDir, destRelPath); err != nil { + return fmt.Errorf("failed to extract tarball '%s' to '%s': %w", match, destRelPath, err) + } + } else { + // Calculate destination path based on glob pattern type + destFile := calculateGlobDestination(srcPattern, match, destRelPath) + + if err := copyFileWithRoots(ctx, op, srcRoot, match, stagingRoot, destFile); err != nil { + return fmt.Errorf("failed to copy file '%s' to '%s': %w", match, destFile, err) + } } } @@ -223,6 +254,7 @@ func applySingleSourceCopy(ctx context.Context, srcRoot *os.Root, srcPath string, stagingRoot *os.Root, + stagingDir string, destPath string, destEndsWithSlash bool) error { // Clean the source path to handle trailing slashes @@ -238,10 +270,14 @@ func applySingleSourceCopy(ctx context.Context, } if srcInfo.IsDir() { + // Extract strategy is not supported for directories + if op.Strategy == swapi.ExtractStrategy { + return fmt.Errorf("extract strategy is not supported for directories, got '%s'", srcPath) + } return applySingleDirectoryCopy(ctx, op, srcRoot, srcPath, stagingRoot, destPath) - } else { - return applySingleFileCopy(ctx, op, srcRoot, srcPath, stagingRoot, destPath, destEndsWithSlash) } + + return applySingleFileCopy(ctx, op, srcRoot, srcPath, stagingRoot, stagingDir, destPath, destEndsWithSlash) } // applySingleFileCopy handles copying a single file using cp-like semantics: @@ -252,12 +288,22 @@ func applySingleFileCopy(ctx context.Context, srcRoot *os.Root, srcPath string, stagingRoot *os.Root, + stagingDir string, destPath string, destEndsWithSlash bool) error { // Check if the file should be excluded if shouldExclude(srcPath, op.Exclude) { return nil // Skip excluded file } + + // Handle Extract strategy for tarballs + if op.Strategy == swapi.ExtractStrategy { + if !isTarball(srcPath) { + return fmt.Errorf("extract strategy requires tarball file (.tar.gz or .tgz), got '%s'", srcPath) + } + return extractTarball(ctx, srcRoot, srcPath, stagingDir, destPath) + } + var finalDestPath string if destEndsWithSlash { @@ -303,6 +349,7 @@ func containsGlobChars(path string) bool { // - dir/** patterns strip the directory prefix (like cp -r dir/** dest/) // - other patterns preserve the full match path func calculateGlobDestination(pattern, match, destPath string) string { + // Check if pattern ends with /** (recursive contents pattern) if strings.HasSuffix(pattern, "/**") { // Extract the directory prefix from pattern (everything before /**) @@ -545,12 +592,21 @@ func shouldExclude(filePath string, excludePatterns []string) bool { return false } + fileName := filepath.Base(filePath) + for _, pattern := range excludePatterns { // We validate the patterns when parsing the copy operation, // so it's safe to use MatchUnvalidated here. if doublestar.MatchUnvalidated(pattern, filePath) { return true } + // For simple patterns without path separators (e.g., "*.md"), + // also match against just the filename. This provides a more + // intuitive user experience where "*.md" excludes all markdown + // files regardless of their directory depth. + if !strings.Contains(pattern, "/") && doublestar.MatchUnvalidated(pattern, fileName) { + return true + } } return false diff --git a/internal/builder/extract.go b/internal/builder/extract.go new file mode 100644 index 0000000..8e54189 --- /dev/null +++ b/internal/builder/extract.go @@ -0,0 +1,86 @@ +/* +Copyright 2025 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package builder + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/fluxcd/pkg/tar" +) + +// tarballExtensions defines the recognized tarball file extensions. +// These are the formats produced by: +// - flux build artifact +// - helm package +// +// Currently supported: .tar.gz and .tgz (gzip-compressed tar archives) +var tarballExtensions = []string{".tar.gz", ".tgz"} + +// isTarball checks if a file path has a recognized tarball extension. +// The check is case-insensitive to handle variations like .TGZ or .Tar.Gz. +func isTarball(path string) bool { + lowerPath := strings.ToLower(path) + for _, ext := range tarballExtensions { + if strings.HasSuffix(lowerPath, ext) { + return true + } + } + return false +} + +// extractTarball extracts a tarball archive to the destination directory. +// It uses fluxcd/pkg/tar.Untar for secure extraction which provides: +// - Automatic gzip decompression +// - Path traversal attack prevention +// - Symlink security validation +// - File permission preservation +// +// The tarball contents are extracted maintaining their internal directory structure. +// If the destination directory doesn't exist, it will be created with 0755 permissions. +func extractTarball(ctx context.Context, + srcRoot *os.Root, + srcPath string, + stagingDir string, + destPath string) error { + if err := ctx.Err(); err != nil { + return err + } + + // Open the tarball through the source root for secure file access + srcFile, err := srcRoot.Open(srcPath) + if err != nil { + return fmt.Errorf("failed to open tarball %q: %w", srcPath, err) + } + defer srcFile.Close() + + // Create the full destination path + fullDestPath := filepath.Join(stagingDir, destPath) + if err := os.MkdirAll(fullDestPath, 0o755); err != nil { + return fmt.Errorf("failed to create destination directory %q: %w", fullDestPath, err) + } + + // Use fluxcd/pkg/tar.Untar for secure extraction + if err := tar.Untar(srcFile, fullDestPath); err != nil { + return fmt.Errorf("failed to extract tarball %q to %q: %w", srcPath, fullDestPath, err) + } + + return nil +} diff --git a/internal/builder/extract_test.go b/internal/builder/extract_test.go new file mode 100644 index 0000000..549ff51 --- /dev/null +++ b/internal/builder/extract_test.go @@ -0,0 +1,765 @@ +/* +Copyright 2025 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package builder_test + +import ( + "archive/tar" + "compress/gzip" + "context" + "os" + "path/filepath" + "testing" + + . "github.com/onsi/gomega" + + gotkmeta "github.com/fluxcd/pkg/apis/meta" + + swapi "github.com/fluxcd/source-watcher/api/v2/v1beta1" +) + +func TestBuild_ExtractStrategy(t *testing.T) { + tests := []struct { + name string + setupFunc func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) + validateFunc func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) + expectedError string + }{ + { + name: "Extract single referenced archive successfully", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + + setupDirs(t, sourceDir, workspaceDir) + + // create a simple archive file with a single file + tarballlPath := filepath.Join(sourceDir, "manifests.tgz") + createTestTarball(tarballlPath) + + spec := &swapi.OutputArtifact{ + Name: "extract-simple-archive", + Copy: []swapi.CopyOperation{ + { + From: "@source/manifests.tgz", + To: "@artifact/manifests", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{ + "source": sourceDir, + } + return spec, sources, workspaceDir + }, + validateFunc: func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) { + g := NewWithT(t) + g.Expect(artifact).ToNot(BeNil()) + + // Read the extracted config from staging directory + stagingDir := filepath.Join(workspaceDir, "extract-simple-archive") + + configPath := filepath.Join(stagingDir, "manifests", "config.yaml") + + configContent, err := os.ReadFile(configPath) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(configContent).ToNot(BeEmpty()) + + // Verify the merged YAML contains expected content + g.Expect(configContent).To(MatchYAML(` +name: app +ports: [8080] +labels: + env: dev + keep: me +version: 1.0.1 +replicas: 2 +env: development +`)) + configPath = filepath.Join(stagingDir, "manifests", "prod", "config.yaml") + configContent, err = os.ReadFile(configPath) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(configContent).ToNot(BeEmpty()) + + // Verify the merged YAML contains expected content + g.Expect(configContent).To(MatchYAML(` +name: app +ports: [8081] +labels: + env: prod + keep: me +version: 1.0.0 +replicas: 5 +env: production +`)) + }, + expectedError: "", + }, + { + name: "Extract multiple referenced archives using same source pattern", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + + setupDirs(t, sourceDir, workspaceDir) + + tarballlPath := filepath.Join(sourceDir, "manifests1.tgz") + createTestTarball(tarballlPath) + + tarballlPath2 := filepath.Join(sourceDir, "manifests2.tgz") + createTestTarballForInt(tarballlPath2) + + spec := &swapi.OutputArtifact{ + Name: "extract-simple-archive", + Copy: []swapi.CopyOperation{ + { + From: "@source/*.tg*", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{ + "source": sourceDir, + } + return spec, sources, workspaceDir + }, + validateFunc: func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) { + g := NewWithT(t) + g.Expect(artifact).ToNot(BeNil()) + + // Read the extracted config from staging directory + stagingDir := filepath.Join(workspaceDir, "extract-simple-archive") + configPath := filepath.Join(stagingDir, "config.yaml") + + configContent, err := os.ReadFile(configPath) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(configContent).ToNot(BeEmpty()) + + // Verify the merged YAML contains expected content + g.Expect(configContent).To(MatchYAML(` +name: app +ports: [8080] +labels: + env: dev + keep: me +version: 1.0.1 +replicas: 2 +env: development +`)) + configPath = filepath.Join(stagingDir, "prod", "config.yaml") + configContent, err = os.ReadFile(configPath) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(configContent).ToNot(BeEmpty()) + + // Verify the merged YAML contains expected content + g.Expect(configContent).To(MatchYAML(` +name: app +ports: [8081] +labels: + env: prod + keep: me +version: 1.0.0 +replicas: 5 +env: production +`)) + + // Verify files from second tarball in int/ subdirectory + // The int/manifests2.tgz should extract to int/ preserving directory structure + intConfigPath := filepath.Join(stagingDir, "int", "config.yaml") + intConfigContent, err := os.ReadFile(intConfigPath) + g.Expect(err).ToNot(HaveOccurred(), "int/config.yaml should exist from manifests2.tgz") + g.Expect(intConfigContent).ToNot(BeEmpty()) + // Verify the merged YAML contains expected content + g.Expect(intConfigContent).To(MatchYAML(` +name: app +ports: [8082] +labels: + env: int + keep: me +version: 1.0.0 +replicas: 3 +env: int +`)) + }, + expectedError: "", + }, + { + name: "Extract multiple referenced archives using recursive glob pattent matching", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + tarballDir1 := filepath.Join(sourceDir, "prod") + tarballDir2 := filepath.Join(sourceDir, "int") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, tarballDir1, tarballDir2, workspaceDir) + + tarballlPath := filepath.Join(tarballDir1, "manifests.tgz") + createTestTarball(tarballlPath) + + tarballlPath2 := filepath.Join(tarballDir2, "manifests.tgz") + createTestTarballForInt(tarballlPath2) + + spec := &swapi.OutputArtifact{ + Name: "extract-simple-archive", + Copy: []swapi.CopyOperation{ + { + From: "@source/**", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{ + "source": sourceDir, + } + return spec, sources, workspaceDir + }, + validateFunc: func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) { + g := NewWithT(t) + g.Expect(artifact).ToNot(BeNil()) + + // Read the extracted config from staging directory + stagingDir := filepath.Join(workspaceDir, "extract-simple-archive") + configPath := filepath.Join(stagingDir, "config.yaml") + + configContent, err := os.ReadFile(configPath) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(configContent).ToNot(BeEmpty()) + + // Verify the merged YAML contains expected content + g.Expect(configContent).To(MatchYAML(` +name: app +ports: [8080] +labels: + env: dev + keep: me +version: 1.0.1 +replicas: 2 +env: development +`)) + configPath = filepath.Join(stagingDir, "prod", "config.yaml") + configContent, err = os.ReadFile(configPath) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(configContent).ToNot(BeEmpty()) + + // Verify the merged YAML contains expected content + g.Expect(configContent).To(MatchYAML(` +name: app +ports: [8081] +labels: + env: prod + keep: me +version: 1.0.0 +replicas: 5 +env: production +`)) + + // Verify files from second tarball in int/ subdirectory + // The int/manifests2.tgz should extract to int/ preserving directory structure + intConfigPath := filepath.Join(stagingDir, "int", "config.yaml") + intConfigContent, err := os.ReadFile(intConfigPath) + g.Expect(err).ToNot(HaveOccurred(), "int/config.yaml should exist from manifests2.tgz") + g.Expect(intConfigContent).ToNot(BeEmpty()) + // Verify the merged YAML contains expected content + g.Expect(intConfigContent).To(MatchYAML(` +name: app +ports: [8082] +labels: + env: int + keep: me +version: 1.0.0 +replicas: 3 +env: int +`)) + }, + expectedError: "", + }, + { + name: "Extract with invalid tarball fails gracefully", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + // Create an invalid tarball (text file with .tgz extension) + createFile(t, sourceDir, "invalid.tgz", "this is not a tarball") + + spec := &swapi.OutputArtifact{ + Name: "extract-invalid-tarball", + Copy: []swapi.CopyOperation{ + { + From: "@source/invalid.tgz", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: nil, + expectedError: "failed to extract tarball", + }, + { + name: "Extract to subdirectory", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + createTestTarball(filepath.Join(sourceDir, "manifests.tgz")) + + spec := &swapi.OutputArtifact{ + Name: "extract-to-subdir", + Copy: []swapi.CopyOperation{ + { + From: "@source/manifests.tgz", + To: "@artifact/extracted/configs/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) { + g := NewWithT(t) + stagingDir := filepath.Join(workspaceDir, "extract-to-subdir") + + // Files should be in the nested directory + g.Expect(filepath.Join(stagingDir, "extracted", "configs", "config.yaml")).To(BeAnExistingFile()) + g.Expect(filepath.Join(stagingDir, "extracted", "configs", "prod", "config.yaml")).To(BeAnExistingFile()) + }, + expectedError: "", + }, + { + name: "Extract with .tar.gz extension", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + // Create tarball with .tar.gz extension instead of .tgz + createTestTarball(filepath.Join(sourceDir, "manifests.tar.gz")) + + spec := &swapi.OutputArtifact{ + Name: "extract-tar-gz", + Copy: []swapi.CopyOperation{ + { + From: "@source/manifests.tar.gz", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) { + g := NewWithT(t) + stagingDir := filepath.Join(workspaceDir, "extract-tar-gz") + g.Expect(filepath.Join(stagingDir, "config.yaml")).To(BeAnExistingFile()) + g.Expect(filepath.Join(stagingDir, "prod", "config.yaml")).To(BeAnExistingFile()) + }, + expectedError: "", + }, + { + name: "Extract with exclude patterns", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + createTestTarball(filepath.Join(sourceDir, "app.tgz")) + createTestTarballForInt(filepath.Join(sourceDir, "int.tgz")) + + spec := &swapi.OutputArtifact{ + Name: "extract-with-exclude", + Copy: []swapi.CopyOperation{ + { + From: "@source/*.tgz", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + Exclude: []string{"int.tgz"}, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) { + g := NewWithT(t) + stagingDir := filepath.Join(workspaceDir, "extract-with-exclude") + + // app.tgz contents should exist + g.Expect(filepath.Join(stagingDir, "config.yaml")).To(BeAnExistingFile()) + + // int.tgz contents should NOT exist (was excluded) + g.Expect(filepath.Join(stagingDir, "int", "config.yaml")).ToNot(BeAnExistingFile()) + }, + expectedError: "", + }, + { + name: "Extract pattern matches no tarballs", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + // Create only non-tarball files + createFile(t, sourceDir, "config.yaml", "key: value") + + spec := &swapi.OutputArtifact{ + Name: "extract-no-tarballs", + Copy: []swapi.CopyOperation{ + { + From: "@source/*.tgz", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: nil, + expectedError: "no files match pattern", + }, + { + name: "Extract overwrites existing files from previous extract", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + // Create two tarballs with different content to prove overwrite works + createTestTarballWithContent(filepath.Join(sourceDir, "first.tgz"), ` +name: first-app +version: 1.0.0 +env: first +`) + createTestTarballWithContent(filepath.Join(sourceDir, "second.tgz"), ` +name: second-app +version: 2.0.0 +env: second +`) + + spec := &swapi.OutputArtifact{ + Name: "extract-overwrite", + Copy: []swapi.CopyOperation{ + { + From: "@source/first.tgz", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + { + From: "@source/second.tgz", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) { + g := NewWithT(t) + stagingDir := filepath.Join(workspaceDir, "extract-overwrite") + + // config.yaml should have content from second.tgz (overwrote first) + configContent, err := os.ReadFile(filepath.Join(stagingDir, "config.yaml")) + g.Expect(err).ToNot(HaveOccurred()) + // Should have second tarball's content, NOT first + g.Expect(string(configContent)).To(ContainSubstring("name: second-app")) + g.Expect(string(configContent)).To(ContainSubstring("version: 2.0.0")) + g.Expect(string(configContent)).To(ContainSubstring("env: second")) + // Should NOT have first tarball's content + g.Expect(string(configContent)).ToNot(ContainSubstring("name: first-app")) + g.Expect(string(configContent)).ToNot(ContainSubstring("env: first")) + }, + expectedError: "", + }, + { + name: "Extract skips non-tarball files in glob pattern", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + // Create a regular file and a tarball + createFile(t, sourceDir, "readme.txt", "This is a readme") + createTestTarball(filepath.Join(sourceDir, "archive.tgz")) + + spec := &swapi.OutputArtifact{ + Name: "extract-skip-non-tarball", + Copy: []swapi.CopyOperation{ + { + From: "@source/*", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: func(t *testing.T, artifact *gotkmeta.Artifact, workspaceDir string) { + g := NewWithT(t) + stagingDir := filepath.Join(workspaceDir, "extract-skip-non-tarball") + + // Tarball contents should be extracted + g.Expect(filepath.Join(stagingDir, "config.yaml")).To(BeAnExistingFile()) + g.Expect(filepath.Join(stagingDir, "prod", "config.yaml")).To(BeAnExistingFile()) + + // readme.txt should NOT exist (non-tarball files are skipped with Extract strategy) + g.Expect(filepath.Join(stagingDir, "readme.txt")).ToNot(BeAnExistingFile()) + }, + expectedError: "", + }, + { + name: "Extract strategy fails for directory source", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + // Create a directory (not a tarball) + createDir(t, sourceDir, "manifests") + createFile(t, sourceDir, "manifests/config.yaml", "key: value") + + spec := &swapi.OutputArtifact{ + Name: "extract-directory-error", + Copy: []swapi.CopyOperation{ + { + From: "@source/manifests", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: nil, + expectedError: "extract strategy is not supported for directories", + }, + { + name: "Extract single file requires tarball extension", + setupFunc: func(t *testing.T) (*swapi.OutputArtifact, map[string]string, string) { + tmpDir := t.TempDir() + sourceDir := filepath.Join(tmpDir, "source") + workspaceDir := filepath.Join(tmpDir, "workspace") + setupDirs(t, sourceDir, workspaceDir) + + // Create a non-tarball file + createFile(t, sourceDir, "config.yaml", "key: value") + + spec := &swapi.OutputArtifact{ + Name: "extract-non-tarball-error", + Copy: []swapi.CopyOperation{ + { + From: "@source/config.yaml", + To: "@artifact/", + Strategy: swapi.ExtractStrategy, + }, + }, + } + sources := map[string]string{"source": sourceDir} + return spec, sources, workspaceDir + }, + validateFunc: nil, + expectedError: "extract strategy requires tarball file", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + spec, sources, workspaceDir := tt.setupFunc(t) + artifact, err := testBuilder.Build(context.Background(), spec, sources, "test-extract", workspaceDir) + if tt.expectedError != "" { + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring(tt.expectedError)) + } else { + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(artifact).ToNot(BeNil()) + + // Validate the result + tt.validateFunc(t, artifact, workspaceDir) + } + }) + } + +} + +// createTestTarball creates a test tarball with sample files +func createTestTarball(path string) error { + file, err := os.Create(path) + if err != nil { + return err + } + defer file.Close() + + gzWriter := gzip.NewWriter(file) + defer gzWriter.Close() + + tarWriter := tar.NewWriter(gzWriter) + defer tarWriter.Close() + + // Add config.yaml.txt + content1 := []byte(` +name: app +ports: [8080] +labels: + env: dev + keep: me +version: 1.0.1 +replicas: 2 +env: development +`) + if err := tarWriter.WriteHeader(&tar.Header{ + Name: "config.yaml", + Mode: 0o644, + Size: int64(len(content1)), + }); err != nil { + return err + } + if _, err := tarWriter.Write(content1); err != nil { + return err + } + + // Add subdir/ + if err := tarWriter.WriteHeader(&tar.Header{ + Name: "prod/", + Mode: 0o755, + Typeflag: tar.TypeDir, + }); err != nil { + return err + } + + // Add prod/config.yaml + content2 := []byte(` +name: app +ports: [8081] +labels: + env: prod + keep: me +version: 1.0.0 +replicas: 5 +env: production +`) + if err := tarWriter.WriteHeader(&tar.Header{ + Name: "prod/config.yaml", + Mode: 0o644, + Size: int64(len(content2)), + }); err != nil { + return err + } + if _, err := tarWriter.Write(content2); err != nil { + return err + } + + return nil +} + +// createTestTarball creates a test tarball with sample files +func createTestTarballForInt(path string) error { + file, err := os.Create(path) + if err != nil { + return err + } + defer file.Close() + + gzWriter := gzip.NewWriter(file) + defer gzWriter.Close() + + tarWriter := tar.NewWriter(gzWriter) + defer tarWriter.Close() + + // Add subdir/ + if err := tarWriter.WriteHeader(&tar.Header{ + Name: "int/", + Mode: 0o755, + Typeflag: tar.TypeDir, + }); err != nil { + return err + } + + // Add config.yaml.txt + content1 := []byte(` +name: app +ports: [8082] +labels: + env: int + keep: me +version: 1.0.0 +replicas: 3 +env: int +`) + if err := tarWriter.WriteHeader(&tar.Header{ + Name: "int/config.yaml", + Mode: 0o644, + Size: int64(len(content1)), + }); err != nil { + return err + } + if _, err := tarWriter.Write(content1); err != nil { + return err + } + + return nil +} + +// createTestTarballWithContent creates a test tarball with custom config.yaml content +func createTestTarballWithContent(path string, configContent string) error { + file, err := os.Create(path) + if err != nil { + return err + } + defer file.Close() + + gzWriter := gzip.NewWriter(file) + defer gzWriter.Close() + + tarWriter := tar.NewWriter(gzWriter) + defer tarWriter.Close() + + // Add config.yaml with custom content + content := []byte(configContent) + if err := tarWriter.WriteHeader(&tar.Header{ + Name: "config.yaml", + Mode: 0o644, + Size: int64(len(content)), + }); err != nil { + return err + } + if _, err := tarWriter.Write(content); err != nil { + return err + } + + return nil +}