From e8ffc2ecdd92b29f1278982ca9b4729cf94fdb93 Mon Sep 17 00:00:00 2001 From: Norman Huth Date: Tue, 23 Dec 2025 20:13:06 +0100 Subject: [PATCH] feat: add command to merge duplicate ingredients by name_slug and country_id - Implements `data-maintenance:merge-duplicate-ingredients` command - Supports `--dry-run` to display changes without applying them - Handles duplicate group merging, pivot entries, and cleanup operations --- .../MergeDuplicateIngredientsCommand.php | 258 ++++++++++++++++++ .../Support/Markdown/FluxRendererTest.php | 100 +++---- 2 files changed, 308 insertions(+), 50 deletions(-) create mode 100644 app/Console/Commands/DataMaintenance/MergeDuplicateIngredientsCommand.php diff --git a/app/Console/Commands/DataMaintenance/MergeDuplicateIngredientsCommand.php b/app/Console/Commands/DataMaintenance/MergeDuplicateIngredientsCommand.php new file mode 100644 index 00000000..24d33b93 --- /dev/null +++ b/app/Console/Commands/DataMaintenance/MergeDuplicateIngredientsCommand.php @@ -0,0 +1,258 @@ +option('dry-run'); + + if ($dryRun) { + $this->components->warn('DRY RUN - No changes will be made'); + } + + $duplicates = $this->findDuplicates(); + + if ($duplicates === []) { + $this->components->info('No duplicate ingredients found.'); + + return self::SUCCESS; + } + + $this->components->info(sprintf('Found %d duplicate groups to merge.', count($duplicates))); + + $totalMerged = 0; + $totalDeleted = 0; + $totalPivotsMoved = 0; + $totalDuplicatePivotsRemoved = 0; + + foreach ($duplicates as $duplicate) { + $result = $this->mergeDuplicateGroup($duplicate, $dryRun); + $totalMerged++; + $totalDeleted += $result['deleted']; + $totalPivotsMoved += $result['pivots_moved']; + $totalDuplicatePivotsRemoved += $result['duplicate_pivots_removed']; + } + + $this->newLine(); + $this->components->info('Summary:'); + $this->table( + ['Metric', 'Count'], + [ + ['Duplicate groups merged', $totalMerged], + ['Ingredients deleted', $totalDeleted], + ['Pivot entries moved', $totalPivotsMoved], + ['Duplicate pivots removed', $totalDuplicatePivotsRemoved], + ] + ); + + if ($dryRun) { + $this->newLine(); + $this->components->warn('DRY RUN - No changes were made. Run without --dry-run to apply.'); + } + + return self::SUCCESS; + } + + /** + * Find all duplicate ingredient groups. + * + * @return array, count: int}> + */ + protected function findDuplicates(): array + { + return DB::table('ingredients') + ->select([ + 'name_slug', + 'country_id', + DB::raw('ARRAY_AGG(id ORDER BY id) as ids'), + DB::raw('COUNT(*) as count'), + ]) + ->groupBy(['name_slug', 'country_id']) + ->havingRaw('COUNT(*) > 1') + ->orderByDesc('count') + ->get() + ->map(function (object $row): array { + // Parse PostgreSQL array format {1,2,3} to PHP array + $ids = trim((string) $row->ids, '{}'); + + return [ + 'name_slug' => (string) $row->name_slug, + 'country_id' => (int) $row->country_id, + 'ids' => array_map(intval(...), explode(',', $ids)), + 'count' => (int) $row->count, + ]; + }) + ->all(); + } + + /** + * Merge a single duplicate group. + * + * @param array{name_slug: string, country_id: int, ids: non-empty-list, count: int} $duplicate + * @return array{deleted: int, pivots_moved: int, duplicate_pivots_removed: int} + */ + protected function mergeDuplicateGroup(array $duplicate, bool $dryRun): array + { + $keepId = $duplicate['ids'][0]; // Keep the lowest ID + $deleteIds = array_slice($duplicate['ids'], 1); + + $this->components->twoColumnDetail( + sprintf('%s (country: %d)', $duplicate['name_slug'], $duplicate['country_id']), + sprintf('Keep ID %d, delete ', $keepId) . implode(', ', $deleteIds) + ); + + $result = [ + 'deleted' => count($deleteIds), + 'pivots_moved' => 0, + 'duplicate_pivots_removed' => 0, + ]; + + if ($dryRun) { + // Count what would be moved + $result['pivots_moved'] = DB::table('ingredient_recipe') + ->whereIn('ingredient_id', $deleteIds) + ->count(); + + return $result; + } + + DB::transaction(function () use ($keepId, $deleteIds, &$result): void { + // 1. Merge hellofresh_ids from all duplicates into the keeper + $this->mergeHelloFreshIds($keepId, $deleteIds); + + // 2. Move pivot entries, handling duplicates + $result['pivots_moved'] = $this->movePivotEntries($keepId, $deleteIds); + + // 3. Remove duplicate pivot entries (same ingredient_id + recipe_id) + $result['duplicate_pivots_removed'] = $this->removeDuplicatePivots($keepId); + + // 4. Delete the duplicate ingredients + DB::table('ingredients')->whereIn('id', $deleteIds)->delete(); + }); + + return $result; + } + + /** + * Merge hellofresh_ids from duplicates into the keeper. + * + * @param list $deleteIds + */ + protected function mergeHelloFreshIds(int $keepId, array $deleteIds): void + { + $allIds = array_merge([$keepId], $deleteIds); + + // Get all hellofresh_ids from all duplicates + $allHelloFreshIds = DB::table('ingredients') + ->whereIn('id', $allIds) + ->pluck('hellofresh_ids') + ->flatMap(function (?string $ids): array { + if ($ids === null) { + return []; + } + + $decoded = json_decode($ids, true); + + return is_array($decoded) ? $decoded : []; + }) + ->unique() + ->values() + ->all(); + + // Update the keeper with merged IDs + DB::table('ingredients') + ->where('id', $keepId) + ->update(['hellofresh_ids' => json_encode($allHelloFreshIds)]); + } + + /** + * Move pivot entries from duplicates to the keeper. + * + * @param list $deleteIds + */ + protected function movePivotEntries(int $keepId, array $deleteIds): int + { + // Get existing recipe_ids for the keeper to avoid duplicates + $existingRecipeIds = DB::table('ingredient_recipe') + ->where('ingredient_id', $keepId) + ->pluck('recipe_id') + ->all(); + + // Get unique recipe_ids from duplicates that don't exist in keeper + $newRecipeIds = DB::table('ingredient_recipe') + ->whereIn('ingredient_id', $deleteIds) + ->whereNotIn('recipe_id', $existingRecipeIds) + ->distinct() + ->pluck('recipe_id') + ->all(); + + // Delete ALL pivot entries from duplicates first + DB::table('ingredient_recipe') + ->whereIn('ingredient_id', $deleteIds) + ->delete(); + + // Insert new unique entries for the keeper + $insertData = array_map(fn (int $recipeId): array => [ + 'ingredient_id' => $keepId, + 'recipe_id' => $recipeId, + ], $newRecipeIds); + + if ($insertData !== []) { + DB::table('ingredient_recipe')->insert($insertData); + } + + return count($newRecipeIds); + } + + /** + * Remove duplicate pivot entries for the keeper. + */ + protected function removeDuplicatePivots(int $keepId): int + { + // Find duplicate recipe_ids for this ingredient + $duplicatePivots = DB::table('ingredient_recipe') + ->select('recipe_id', DB::raw('COUNT(*) as count'), DB::raw('MIN(id) as keep_pivot_id')) + ->where('ingredient_id', $keepId) + ->groupBy('recipe_id') + ->havingRaw('COUNT(*) > 1') + ->get(); + + $removed = 0; + foreach ($duplicatePivots as $duplicatePivot) { + $removed += DB::table('ingredient_recipe') + ->where('ingredient_id', $keepId) + ->where('recipe_id', $duplicatePivot->recipe_id) + ->where('id', '!=', $duplicatePivot->keep_pivot_id) + ->delete(); + } + + return $removed; + } +} diff --git a/tests/Unit/Support/Markdown/FluxRendererTest.php b/tests/Unit/Support/Markdown/FluxRendererTest.php index dbed8ca5..6d8561c1 100644 --- a/tests/Unit/Support/Markdown/FluxRendererTest.php +++ b/tests/Unit/Support/Markdown/FluxRendererTest.php @@ -45,7 +45,7 @@ public function it_renders_heading_level_1(): void $document = $this->parse('# Heading 1'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('Heading 1', (string) $output); + $this->assertStringContainsString('Heading 1', $output); } #[Test] @@ -54,7 +54,7 @@ public function it_renders_heading_level_2(): void $document = $this->parse('## Heading 2'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('Heading 2', (string) $output); + $this->assertStringContainsString('Heading 2', $output); } #[Test] @@ -63,7 +63,7 @@ public function it_renders_heading_level_3_and_above_with_base_size(): void $document = $this->parse('### Heading 3'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('Heading 3', (string) $output); + $this->assertStringContainsString('Heading 3', $output); } #[Test] @@ -72,7 +72,7 @@ public function it_renders_heading_level_4_with_base_size(): void $document = $this->parse('#### Heading 4'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('Heading 4', (string) $output); + $this->assertStringContainsString('Heading 4', $output); } #[Test] @@ -81,7 +81,7 @@ public function it_renders_paragraph(): void $document = $this->parse('This is a paragraph.'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('This is a paragraph.', (string) $output); + $this->assertStringContainsString('This is a paragraph.', $output); } #[Test] @@ -90,7 +90,7 @@ public function it_renders_strong_text(): void $document = $this->parse('This is **bold** text.'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('bold', (string) $output); + $this->assertStringContainsString('bold', $output); } #[Test] @@ -99,7 +99,7 @@ public function it_renders_emphasis_text(): void $document = $this->parse('This is *italic* text.'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('italic', (string) $output); + $this->assertStringContainsString('italic', $output); } #[Test] @@ -110,8 +110,8 @@ public function it_renders_internal_link(): void $document = $this->parse('[Link](/path/to/page)'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('Link', (string) $output); - $this->assertStringNotContainsString('external', (string) $output); + $this->assertStringContainsString('Link', $output); + $this->assertStringNotContainsString('external', $output); } #[Test] @@ -122,7 +122,7 @@ public function it_renders_external_link_with_external_attribute(): void $document = $this->parse('[External](https://other.com/page)'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('External', (string) $output); + $this->assertStringContainsString('External', $output); } #[Test] @@ -131,8 +131,8 @@ public function it_renders_anchor_link_as_internal(): void $document = $this->parse('[Anchor](#section)'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('Anchor', (string) $output); - $this->assertStringNotContainsString('external', (string) $output); + $this->assertStringContainsString('Anchor', $output); + $this->assertStringNotContainsString('external', $output); } #[Test] @@ -141,8 +141,8 @@ public function it_renders_mailto_link_as_internal(): void $document = $this->parse('[Email](mailto:test@example.com)'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('Email', (string) $output); - $this->assertStringNotContainsString('external', (string) $output); + $this->assertStringContainsString('Email', $output); + $this->assertStringNotContainsString('external', $output); } #[Test] @@ -151,9 +151,9 @@ public function it_renders_unordered_list(): void $document = $this->parse("- Item 1\n- Item 2"); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('
    ', (string) $output); - $this->assertStringContainsString('
  • Item 1
  • ', (string) $output); - $this->assertStringContainsString('
  • Item 2
  • ', (string) $output); + $this->assertStringContainsString('
      ', $output); + $this->assertStringContainsString('
    • Item 1
    • ', $output); + $this->assertStringContainsString('
    • Item 2
    • ', $output); } #[Test] @@ -162,8 +162,8 @@ public function it_renders_ordered_list(): void $document = $this->parse("1. First\n2. Second"); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('
        ', (string) $output); - $this->assertStringContainsString('
      1. First
      2. ', (string) $output); + $this->assertStringContainsString('
          ', $output); + $this->assertStringContainsString('
        1. First
        2. ', $output); } #[Test] @@ -172,7 +172,7 @@ public function it_renders_thematic_break(): void $document = $this->parse("Above\n\n---\n\nBelow"); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('', (string) $output); + $this->assertStringContainsString('', $output); } #[Test] @@ -181,7 +181,7 @@ public function it_renders_inline_code(): void $document = $this->parse('Use `inline code` here.'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('inline code', (string) $output); + $this->assertStringContainsString('inline code', $output); } #[Test] @@ -190,8 +190,8 @@ public function it_renders_fenced_code_block(): void $document = $this->parse("```php\necho 'Hello';\n```"); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('
          ', (string) $output);
          -        $this->assertStringContainsString('echo 'Hello';', (string) $output);
          +        $this->assertStringContainsString('
          ', $output);
          +        $this->assertStringContainsString('echo 'Hello';', $output);
               }
           
               #[Test]
          @@ -200,8 +200,8 @@ public function it_renders_indented_code_block(): void
                   $document = $this->parse('    indented code');
                   $output = $this->fluxRenderer->render($document);
           
          -        $this->assertStringContainsString('
          ', (string) $output);
          -        $this->assertStringContainsString('indented code', (string) $output);
          +        $this->assertStringContainsString('
          ', $output);
          +        $this->assertStringContainsString('indented code', $output);
               }
           
               #[Test]
          @@ -210,9 +210,9 @@ public function it_renders_blockquote(): void
                   $document = $this->parse('> This is a quote');
                   $output = $this->fluxRenderer->render($document);
           
          -        $this->assertStringContainsString('', (string) $output);
          -        $this->assertStringContainsString('This is a quote', (string) $output);
          -        $this->assertStringContainsString('', (string) $output);
          +        $this->assertStringContainsString('', $output);
          +        $this->assertStringContainsString('This is a quote', $output);
          +        $this->assertStringContainsString('', $output);
               }
           
               #[Test]
          @@ -222,11 +222,11 @@ public function it_renders_table(): void
                   $document = $this->parse($markdown);
                   $output = $this->fluxRenderer->render($document);
           
          -        $this->assertStringContainsString('', (string) $output);
          -        $this->assertStringContainsString('', (string) $output);
          -        $this->assertStringContainsString('Header 1', (string) $output);
          -        $this->assertStringContainsString('', (string) $output);
          -        $this->assertStringContainsString('Cell 1', (string) $output);
          +        $this->assertStringContainsString('', $output);
          +        $this->assertStringContainsString('', $output);
          +        $this->assertStringContainsString('Header 1', $output);
          +        $this->assertStringContainsString('', $output);
          +        $this->assertStringContainsString('Cell 1', $output);
               }
           
               #[Test]
          @@ -236,7 +236,7 @@ public function it_renders_table_header_cells(): void
                   $document = $this->parse($markdown);
                   $output = $this->fluxRenderer->render($document);
           
          -        $this->assertStringContainsString('Header', (string) $output);
          +        $this->assertStringContainsString('Header', $output);
               }
           
               #[Test]
          @@ -246,7 +246,7 @@ public function it_renders_table_body_cells(): void
                   $document = $this->parse($markdown);
                   $output = $this->fluxRenderer->render($document);
           
          -        $this->assertStringContainsString('Cell', (string) $output);
          +        $this->assertStringContainsString('Cell', $output);
               }
           
               #[Test]
          @@ -255,7 +255,7 @@ public function it_renders_hard_break(): void
                   $document = $this->parse("Line 1  \nLine 2");
                   $output = $this->fluxRenderer->render($document);
           
          -        $this->assertStringContainsString('
          ', (string) $output); + $this->assertStringContainsString('
          ', $output); } #[Test] @@ -264,7 +264,7 @@ public function it_renders_soft_break_as_space(): void $document = $this->parse("Line 1\nLine 2"); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('Line 1 Line 2', (string) $output); + $this->assertStringContainsString('Line 1 Line 2', $output); } #[Test] @@ -273,7 +273,7 @@ public function it_renders_html_block(): void $document = $this->parse("
          Custom HTML
          \n\n"); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('
          Custom HTML
          ', (string) $output); + $this->assertStringContainsString('
          Custom HTML
          ', $output); } #[Test] @@ -282,7 +282,7 @@ public function it_renders_inline_html(): void $document = $this->parse('Text with inline HTML here.'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('inline HTML', (string) $output); + $this->assertStringContainsString('inline HTML', $output); } #[Test] @@ -291,9 +291,9 @@ public function it_escapes_text_content(): void $document = $this->parse('Text with special chars: & < >'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('&', (string) $output); - $this->assertStringContainsString('<', (string) $output); - $this->assertStringContainsString('>', (string) $output); + $this->assertStringContainsString('&', $output); + $this->assertStringContainsString('<', $output); + $this->assertStringContainsString('>', $output); } #[Test] @@ -302,7 +302,7 @@ public function it_escapes_link_url(): void $document = $this->parse('[Link](https://example.com/path?foo=bar&baz=qux)'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('href="https://example.com/path?foo=bar&baz=qux"', (string) $output); + $this->assertStringContainsString('href="https://example.com/path?foo=bar&baz=qux"', $output); } #[Test] @@ -311,7 +311,7 @@ public function it_escapes_code_block_content(): void $document = $this->parse("```\n\n```"); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('<script>', (string) $output); + $this->assertStringContainsString('<script>', $output); } #[Test] @@ -329,8 +329,8 @@ public function it_handles_nested_formatting(): void $document = $this->parse('This is ***bold and italic*** text.'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('', (string) $output); - $this->assertStringContainsString('', (string) $output); + $this->assertStringContainsString('', $output); + $this->assertStringContainsString('', $output); } #[Test] @@ -342,7 +342,7 @@ public function it_treats_relative_url_as_internal(): void $output = $this->fluxRenderer->render($document); // Relative URLs without leading slash are treated as external by parse_url - $this->assertStringContainsString('href="relative/path"', (string) $output); + $this->assertStringContainsString('href="relative/path"', $output); } #[Test] @@ -353,7 +353,7 @@ public function it_treats_same_domain_link_as_internal(): void $document = $this->parse('[Link](https://example.com/page)'); $output = $this->fluxRenderer->render($document); - $this->assertStringNotContainsString('external', (string) $output); + $this->assertStringNotContainsString('external', $output); } #[Test] @@ -362,7 +362,7 @@ public function it_handles_list_item_with_nested_content(): void $document = $this->parse('- Item with **bold** and *italic*'); $output = $this->fluxRenderer->render($document); - $this->assertStringContainsString('
        3. Item with bold and italic
        4. ', (string) $output); + $this->assertStringContainsString('
        5. Item with bold and italic
        6. ', $output); } #[Test] @@ -372,6 +372,6 @@ public function it_handles_url_with_no_host(): void $output = $this->fluxRenderer->render($document); // URLs without a host should not be marked external - $this->assertStringNotContainsString(' external', (string) $output); + $this->assertStringNotContainsString(' external', $output); } }