From 6d13113cbcd17b4c3f29e35c54e30bc3bc31ae52 Mon Sep 17 00:00:00 2001 From: Jonavelle Cuerdo Date: Wed, 17 Dec 2025 14:39:52 -0500 Subject: [PATCH 1/4] Use metadata in 'bulk-update-embeddings' Why these changes are being introduced: * With TDA 3.8.0, we can now retrieve record metadata columns in embeddings read methods. Filtering embeddings by `action="index"` prevents any attempt to update documents that do not exist in OpenSearch (`action="delete"`), which results in an API error.. This is important especially with the current state of tim.opensearch.bulk_update, which will raise a BulkOperationError and cause the 'bulk_update_embeddings' CLI command to exit early. This also includes an additional change to also index embeddings when performing a reindex. How this addresses that need: * Filter embeddings by action="index" * Install latest version of timdex-dataset-api (latest commit) * Update embeddings in fixtures/test/dataset to use 'embeddings_timestamp" Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/USE-273 --- Pipfile.lock | 4 +-- ...ef5-5629-4bc7-b743-7804a34f9593-0.parquet} | Bin 23387 -> 23415 bytes ...0473-7274-4ccd-bb90-f3c9bcc1801d-0.parquet | Bin 0 -> 7317 bytes tim/cli.py | 28 ++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) rename tests/fixtures/dataset/data/embeddings/year=2025/month=12/{day=10/629d15f4-84e4-4b32-92c1-1b1debd377fb-0.parquet => day=17/4a40cef5-5629-4bc7-b743-7804a34f9593-0.parquet} (94%) create mode 100644 tests/fixtures/dataset/data/embeddings/year=2025/month=12/day=17/8a790473-7274-4ccd-bb90-f3c9bcc1801d-0.parquet diff --git a/Pipfile.lock b/Pipfile.lock index c0e2065..b1b89c9 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -735,7 +735,7 @@ }, "timdex-dataset-api": { "git": "https://github.com/MITLibraries/timdex-dataset-api.git", - "ref": "184d87fd0647cae9e1db8488f2a3d88de96a7226" + "ref": "a1d8ad7662d8d864d1fb2b52376312c7c92e7398" }, "typing-extensions": { "hashes": [ @@ -2107,4 +2107,4 @@ "version": "==2.0.1" } } -} +} \ No newline at end of file diff --git a/tests/fixtures/dataset/data/embeddings/year=2025/month=12/day=10/629d15f4-84e4-4b32-92c1-1b1debd377fb-0.parquet b/tests/fixtures/dataset/data/embeddings/year=2025/month=12/day=17/4a40cef5-5629-4bc7-b743-7804a34f9593-0.parquet similarity index 94% rename from tests/fixtures/dataset/data/embeddings/year=2025/month=12/day=10/629d15f4-84e4-4b32-92c1-1b1debd377fb-0.parquet rename to tests/fixtures/dataset/data/embeddings/year=2025/month=12/day=17/4a40cef5-5629-4bc7-b743-7804a34f9593-0.parquet index 90b2f2a101f18a4d48b75e924c6ad358171afc2e..81e5d798970611592790c28ac2ab57a0793262d8 100644 GIT binary patch delta 681 zcma)2U2D@|6iz}((}^JH8-@tpjfsOSow9a}jguwqI-NFIz7?cNyR1!9hpQI7aNeqy zFL*8U<5r3%dDrhRu zg0C+<1{2^37yxxs0{{o}TP%P(31S5S8({S4(BCNPy_qn>8x|eI2YF+iXd-fvZqq>i{+22qBll0Ki5FJ+&KpR@(Az5BwGDF!J0N?Rsq( z12h0epe5+@=P-6-I={dm)&KgYymK}1hf$@n%W+EJ(wM!S$RD`Jaa%D^LXGs^~?Q~DOHfqe$#oF3ns9Q z7cevbO&K*tVBGwoJj8U;*OptJnuh+tew?|zXk{mz2qPhv$oZ5!*pOn0ycqLBvfdD5R}Qt!T@oNuz<5rnWIjC$$=}sS1A7X6fqwE-pH| zDG2@xe}VtNS*@e*y|Ey;c*EUuch5a{+~ApiedZq@cz^sod2we5T>V25M_QkfxOO&( zE6uMs%A7nYPc6t}@`NnN!n7g|JBm1*p5&5$s!vj4$L6J{i>+jWm&ko%dhVZtZ;jc6 zbR(!}semZ1J|2J#a0&)MsY?#snM6q^*KU5)(C1Zk-XJfoKDuAcs%kbOidWzx;(7%k zqRo|n+D>O5T^hU~HqYPpSeGReJYi%zd>~)-@-9@YIW^x_>xDvJ)BS1B}U=P)Q z%m@%;~IvvMz%RX~^2J(z{{q)Ti(g`B(H)OwR+Y3*|a8=?TLrO{-xAPSE}O m0PC|4n9h>BXoX%T*YiuUFr_kyn7_VG(#^bRiX2ykTKXH=qo_mx diff --git a/tests/fixtures/dataset/data/embeddings/year=2025/month=12/day=17/8a790473-7274-4ccd-bb90-f3c9bcc1801d-0.parquet b/tests/fixtures/dataset/data/embeddings/year=2025/month=12/day=17/8a790473-7274-4ccd-bb90-f3c9bcc1801d-0.parquet new file mode 100644 index 0000000000000000000000000000000000000000..af9e013b4f6598fc2d8cd065fc59907bf8fcb125 GIT binary patch literal 7317 zcmb_h3v?XSd7cp?Gi%L`XBX|Vve#zUvbGXv?R#hLorjTwN4EUJCbq${tPq^h?nqjD zAM!qAo0NxoYFZLpLPA40Asp0$PiZL-V#tA%v^d2r=0KsG6H0U11SL2va3F0%=qqXZ z&C2ovo0QXZZST(9$N&Dn|G(E;-R(3c#mv~s&9Ro45EX<)H<>AF=b9lNr8~Uhb2fz z!RYLFT%{!l>JU5d#{RGOVpU;e5fWvKK?8Z43Ylg0Z zXq5@ST=TtZZah0tt?co0^%aXRxwY&!s$MZ$oA9bNKkJusm3*N*md#gk*-0ZiR`)-w zC%k^u1xTGW^uLl!{JI1#pzo5{(m8b(*lkdEv{5YXuKKx3HGcsW+RmCzoc(eE(n73d zGe`va5%_3fY)S8#90(#gOq%g5%zg(k{rq6;d;N3rJb1+y9*7CTXP4%^y1#T~iY&Jc zn-In*(M{8~OuaX?bi~WIE?R3*+c0fP4Z?_{GuoSKwS-#h;?I~CA)HaevWRJM)5MNh z-9k&YQf?T?3~o_k5^7_rUMPhYfr4Qh24f}%A6s;ULWtW~Wn-1YbYkj;C2}Rm@z6sy z=gh*m&J3LqgNs&ZB6O$C9Mhqc5tCDk=yt3xvSb5u49m13rLJ?^0lUu5@Gim}X6d@k z3@oIr&nuH&u@P$Yfju(}2Sl*TqJ}srcXl3VeI65r<=8fZA-JR4j+jgjhF)9^9Zj2n zvPp>ry^Pjc$IC+?U||^y!yU`Ez;l1KQXXp^-fEZ@^oNcXv!JccG3WKHf*EyYJDkIm z97YpQNX0;XEV3L}#&QTT93tMC%y~t>ocF4c^ajIrumGl~mdQY8VtgXJD8&t%a^0pz zdO%#1fDgS9uhw%hv@#si00YB<&AN^%m?}gbvvuYW9fH^<3~h4pB_SIcVmft*rE?uh z>9*C?DcANDLrI%5({eb0mx#?xrzf3uBbN~dZk7c%!9`rWJ2B~13*JaEoQAbbKp@P| ziS1DLYr^i(*fd z9bk%(5tj(2xGGWh#~}V>XwudZ1B6*{FI)!`rIbq5%Z2Yw!AS;XjM?rRf;sSD0@z>m zss}^A2}47A3N8pW%35!bMhme%0yh#kKqm|#EqvNBWZBidf;cS8aVw+h3{H z>WCj5al=3ZTy>iMFS%B)(g&Ds!(uQbXYer__@Vm?p%%Up4$?W6o2KJ%UEHs9b}|#z zA&(#VI;JuT{j#XyPi@k-7>{%h0aKa3_9-3noc=4 zMW&5bswFR*WKaR2iIk%VYT|EcFO=*4n4kTr80)sVZV=mo;kY;?)x1IiDw~kPCde9O zrJf!V>N}=v>X;_72#IPk-Jt$+sqsRsUa1yx-o`iks0~wq5a5Iu7I&t;+@7nHC;e*Z zz7)kCotlmgU)$N6g#6)tad9`Xkdm;j#mJEbQm%6N$v%pVv`o-3OoL&UIP4wmir5%7 zDF*?l!A*XsO{?Xqg^BRwGNhbka|leluLx}Vr?;IuD4AcISiPxbi+0sL;U?C%jIIByv5WghxAjw-43*1wmz^UQT4&XpDcW>j{-a? zF?0)>Aw+d?tr9dfJ+W?B#xRjrP&nP!jjS)A237&C*-I5ph?D+_K*d#pGw`0&*}3YU z#ndt#G?0S`$$=-e&d!NKHGC68Ks%fvWdk78TaQV(0!?U)f&6rvMofn=@g}#2zO4kn z;f`rKacsS7GSS(Y^C#+s!%u1ui#Xyh)B9!am|gEBE4fC=FV{UiBea`<4@SI{M0t9o zJ@1ctjbeBP5N5$(;A()w#k+p-*_Z2AW$-U$YEm<}llxWMxC zu0*9#FIFmhd#)!yT4I41kc_kAUj{}OB7HK|t*P}g@*(}E)SD`0Zhu{-26Z1!AwNJF zt`|R*tooyV6^%o7mkn891cV=Q2ly>6mGUE@19$=2=!QX2aIE8NB){xNZXc7VKK;8% zhTt+BY9H#5Q85;lt`jHz63o3z$g@AGry?DNytNKg2=BE&i3}|g}*OnoU_%mU0;ZQ(E zN4T3d#B^-aawp+a{m)tupcYVt0Z%PJaO-OdQoFY8`$<^bwh`FTTV%<$O0UZZ-pA*= zLN@_O0{KzWN%YS{%D7h>@A`s<%EyShMowTK@|mXNnm6I)#y5VV9qj_)j=FQcW`d;U z$EJn^Z`L8)V&RX2Da5=9|2k-ls40F)u8i!t>xG$nl294(W7x;868ElU3jjmai$mg5 zQlWhC`d+L?c|om1@8A~2DXmub>W#>)%K(4Kd1@maQfWiWdF65?8=qPsIPM=vI#ZvP z#ZPFtO0iMO?o&*MIclS=7r8=A(+YZ-JsaB;G(CLg380;hnx>0GL6@}ghXPR_S7e-p z)Rz^>X18a7LD{W>nSQ#R={kXUskU?(au@x2=~XcW=+kw^XX+9mVhuF%ZGz$^K6 zzZ!NCA?PYm3s56%^2jd~KUef#I@pJC#6Hpj32GXqBYs<$h;LNnAfkRtNda9@Kv0ss zq&!~P-;Q7vRFhi#dF?FN`QbTN< zydv_1dW#I>bc8kZ9mL3fE=vhV`w3)*^SMo?xX`QK85Z`1@98E;K8ipxxg+);*?mIW zaleM1i{DeYSqg=49tgH(JR0r2;KO1WF(Bg@5Pz-(=&($?}LGxq{hV3 zSCsVMcLTjp&=Imo;JZT_@OYxJ<$WMMnpVI%z|ORzdlzUE0NKKpXA*EPsux!?GJseg z$OSdN?iKdE+KX}s)Uz3yUAFLFLo!H-H-~>N7&G72(5yR5U84#t`uTA5-<9m+hZKQN zj|p^ozI(Ye;Vs;)BmqYp0nVm>BvXeUIvl83C~`+K40Y#%6rj;U_h*o3BJ8D?E>lr4 z4T?q!=7FMWQBx4fztQuAVgOoa#sxa{bs&7`#!^dUX$sH}6hXAPHLSiT@&eJp~J@k8#VTJ0ZrQQ?PaK5m(e=}J$anAUMa@Q!ic%8XYk$Dsp}{_bZ5KX#dnz*Jw9lW0)@ji|i0sj&rH646{}8JvM; z2VrZS6i`2V4oc{e-cF$^ny69Kh(0Ex_u2J!ieN7ePK-{;60^Hj%loT^$c@wQq)^~F z>M*n1pQ!_3xsfkqJ)CPS8~|_-#L>w}yFoC0Dtu5w%5pvZO*D+h)akn9-_hP7 z#cia5ZT`_+Qlos=YKd@fu}lf|_DX0Reh|{UTCFhl?zM=h@2ktuL2>{z&YIH=%|B50 z%b5oOZ(Lm!hFvVgS3WE-Hx8pz@(Uxf`c+NFN$1#`f+fDLMr9#iS|@JnMnR=$BvCf_ zkpqCIOc5wFb5IK!d`rDGEEuKRaL|mu*(P2lP3Ucmtu$hv4h{lApI{#ZAie^-JFj&m;TI-cP|VqOqdB`>B4F{P^|q2X+o z_Ya)9Z`obQcL|%5!F5S6Ij`C%pEcBip-3<^dx5dLGCErG>$o++_$7X6#LoveP`g3V zM}aL(bhU53Ba-X|wNvyKoLN+=p!A*1D|%)Q2T0tvj2%2R%^~HP$w^#)RjRngnYXzQ zbh(s(+iLX~ZX{*MaWM*N(0iZK?#w=P8Q3YrTRM^*cVvWQOzGglJgwt=3&dLDE%6V; z_~cfC?_2H2n0%mv_wT$TY8-&Srng#!aTrwZva+$$Zd~xveE& z`>}33KlZ)o`2uY?N1&Ed_Bh|eP1)`iLAZZ+|!hn>_#NqWBvZWnPdo-?%ui`>%V+`TucC zdY(4%o6qsqoIV#wo2E*Sit%rXnOE8}Uu+9hI{TvZ!?&jAvx=X?N}hF@eO?ka6K^iz znp{5tHP3EDknMcD=;e2xO zX9rKq8GkSzZ^_8JGT>`fZqYcLeVY5AM}XB6jc?Ot=Gk zu2``mcpVtPY7lL*ZaxODvy1wRBRe-0hu3eP7%AmPNAykmhId{=n)7Re{f@n2a>a^Z z_1gaC{>tgCfef=k%|5LktnVrf?j2rVYz*TzIG{6PG{tvwTdfrAk$d7l)0#u6s>$|E|Cl z!NUISO}b5^HSN`Wb+-mT!#inVc<1)L-cDW~(&^;z`oWs(UWc8|#<~4##+w^3e#(^1 zSNFQ$=bZfpY`b&t#{Bwq^<2}RO}|6fCTpzTIO~e0>-z@>2e!Dyt>6-vXWPc6(KiP~ zPlpbS7L0D#L`RCwLD+dRSK1EW4VQBIVAFSqAH)&tw|_Q%n%l1pzSn+O|6Rf8S^78a zohuJEhV&wgw#E+hg}ry2^2dfCZt>Z6X4Akae>C+u9_-3!3F;cyJSJwivN)O EAIV$YrT_o{ literal 0 HcmV?d00001 diff --git a/tim/cli.py b/tim/cli.py index a97f4c6..998805e 100644 --- a/tim/cli.py +++ b/tim/cli.py @@ -385,6 +385,7 @@ def bulk_update_embeddings( "embedding_object", ], run_id=run_id, + action="index", ) embeddings_to_index = helpers.format_embeddings(embeddings) @@ -454,12 +455,11 @@ def reindex_source( tim_os.get_index_aliases(client, index), ) - # perform bulk indexing of current records from source - index_results = {"created": 0, "updated": 0, "errors": 0, "total": 0} - + # reindex current records from source td = TIMDEXDataset(location=dataset_path) # bulk index records + index_results = {"created": 0, "updated": 0, "errors": 0, "total": 0} records_to_index = td.read_transformed_records_iter( table="current_records", source=source, @@ -468,7 +468,25 @@ def reindex_source( try: index_results.update(tim_os.bulk_index(client, index, records_to_index)) except BulkIndexingError as exception: - logger.info(f"Bulk indexing failed: {exception}") + logger.error(f"Bulk indexing failed: {exception}") # noqa: TRY400 + + # bulk index embeddings + update_results = {"updated": 0, "errors": 0, "total": 0} + embeddings = td.embeddings.read_dicts_iter( + table="current_embeddings", + columns=[ + "timdex_record_id", + "embedding_strategy", + "embedding_object", + ], + source=source, + action="index", + ) + embeddings_to_index = helpers.format_embeddings(embeddings) + try: + update_results.update(tim_os.bulk_update(client, index, embeddings_to_index)) + except BulkOperationError as exception: + logger.error(f"Bulk update with embeddings failed: {exception}") # noqa: TRY400 - summary_results = {"index": index_results} + summary_results = {"index": index_results, "update": update_results} logger.info(f"Reindex source complete: {json.dumps(summary_results)}") From 10a887fac4ebb5af233f2390fd1f109855de861e Mon Sep 17 00:00:00 2001 From: Jonavelle Cuerdo Date: Wed, 17 Dec 2025 16:20:01 -0500 Subject: [PATCH 2/4] Fix unit test --- tests/test_cli.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 61dbc4f..07a6546 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -344,9 +344,11 @@ def test_bulk_update_embeddings_exit_bulk_operation_error( @patch("tim.opensearch.create_index") @patch("tim.opensearch.promote_index") @patch("tim.opensearch.get_index_aliases") +@patch("tim.opensearch.bulk_update") @patch("tim.opensearch.bulk_index") def test_reindex_source_success( mock_bulk_index, + mock_bulk_update, mock_get_index_aliases, mock_promote_index, mock_create_index, @@ -362,6 +364,11 @@ def test_reindex_source_success( "errors": 0, "total": 1000, } + mock_bulk_update.return_value = { + "updated": 10, + "errors": 0, + "total": 10, + } result = runner.invoke( main, From fa936ed6e4032507fe424b0cf433e8f7daffaf8d Mon Sep 17 00:00:00 2001 From: Jonavelle Cuerdo Date: Wed, 17 Dec 2025 16:22:05 -0500 Subject: [PATCH 3/4] Update dependencies --- Pipfile.lock | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index b1b89c9..20c0553 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -26,20 +26,20 @@ }, "boto3": { "hashes": [ - "sha256:cdd4cc3e5bb08ed8a0c5cc77eca78f98f0239521de0991f14e44b788b0c639b2", - "sha256:d21d22af9aeb1bad8e9b670a221d6534c0120f7e7baf523dafaca83f1f5c3f90" + "sha256:649b134d25b278c24fcc8b3f94519de3884283b7848dc32f42b0ffdd9d19ce99", + "sha256:8112e1beb5978bb455ea4b41a9ef26fc408f6340d8ff69ef93dded4f80fd53e9" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==1.42.9" + "version": "==1.42.12" }, "botocore": { "hashes": [ - "sha256:74f69bfd116cc7c8215481284957eecdb48580e071dd50cb8c64356a866abd8c", - "sha256:f99ba2ca34e24c4ebec150376c815646970753c032eb84f230874b2975a185a8" + "sha256:1f9f63c3d6bb1f768519da30d6018706443c5d8af5472274d183a4945f3d81f8", + "sha256:4f163880350f6d831857ce5d023875b7c6534be862e5affd9fcf82b8d1ab3537" ], "markers": "python_version >= '3.9'", - "version": "==1.42.9" + "version": "==1.42.12" }, "certifi": { "hashes": [ @@ -644,12 +644,12 @@ }, "sentry-sdk": { "hashes": [ - "sha256:8218891d5e41b4ea8d61d2aed62ed10c80e39d9f2959d6f939efbf056857e050", - "sha256:d72f8c61025b7d1d9e52510d03a6247b280094a327dd900d987717a4fce93412" + "sha256:5213190977ff7fdff8a58b722fb807f8d5524a80488626ebeda1b5676c0c1473", + "sha256:6b12ac256769d41825d9b7518444e57fa35b5642df4c7c5e322af4d2c8721172" ], "index": "pypi", "markers": "python_version >= '3.6'", - "version": "==2.47.0" + "version": "==2.48.0" }, "six": { "hashes": [ @@ -927,20 +927,20 @@ }, "boto3-stubs": { "hashes": [ - "sha256:b132be8260eb56010b47499658cf6c485f99ea0190969e7d1adb74c505c83e68", - "sha256:d9b108fb3e0af33fedb0a9f0e214bc1d474a6e092a4b8755e88ccc847dc9c624" + "sha256:18a9a970fd9d8867558f091608f2d9944d6f55143d23cb11785f1c143d91aa54", + "sha256:f69f12c884519a62a6e6d7f296932d4cdc14f47ea987b1eb7b226337f1127d13" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==1.42.9" + "version": "==1.42.12" }, "botocore-stubs": { "hashes": [ - "sha256:92fdd2a1d911355166da3e30e9bb9b1803f7e2caec0d913f5fad3a920352ce6d", - "sha256:9f8b652549d4f727aa69e09d462d18e54a1bd10f3dbb593da56d5d0aafe9756e" + "sha256:788ecde81894f149bf210286f82ff3e49b97ce736a2ecb89f210be3f0374d35e", + "sha256:c2bac920d8c302e15e6bec9593daeaf04777714f8309fe052fd24cf28eb85a76" ], "markers": "python_version >= '3.9'", - "version": "==1.42.9" + "version": "==1.42.11" }, "cachecontrol": { "extras": [ @@ -1253,11 +1253,11 @@ }, "filelock": { "hashes": [ - "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", - "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4" + "sha256:15d9e9a67306188a44baa72f569d2bfd803076269365fdea0934385da4dc361a", + "sha256:b8360948b351b80f420878d8516519a2204b07aefcdcfd24912a5d33127f188c" ], "markers": "python_version >= '3.10'", - "version": "==3.20.0" + "version": "==3.20.1" }, "freezegun": { "hashes": [ @@ -1651,12 +1651,12 @@ }, "pre-commit": { "hashes": [ - "sha256:25e2ce09595174d9c97860a95609f9f852c0614ba602de3561e267547f2335e1", - "sha256:dc5a065e932b19fc1d4c653c6939068fe54325af8e741e74e88db4d28a4dd66b" + "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", + "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61" ], "index": "pypi", "markers": "python_version >= '3.10'", - "version": "==4.5.0" + "version": "==4.5.1" }, "prompt-toolkit": { "hashes": [ @@ -2107,4 +2107,4 @@ "version": "==2.0.1" } } -} \ No newline at end of file +} From e0107bfc568f040eecc919b305128137fea1bd30 Mon Sep 17 00:00:00 2001 From: Jonavelle Cuerdo Date: Thu, 18 Dec 2025 09:42:18 -0500 Subject: [PATCH 4/4] Add docstring --- tim/cli.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tim/cli.py b/tim/cli.py index 998805e..58219bd 100644 --- a/tim/cli.py +++ b/tim/cli.py @@ -430,11 +430,10 @@ def reindex_source( This CLI command performs the following: 1. creates a new index for the source - 2. promotes this index as the primary for the source alias, and added to any other + 2. promotes this index as the primary for the source alias and add to any other aliases passed (e.g. 'timdex') - 3. uses the TDA library to yield only current records from the parquet dataset - for the source - 4. bulk index these records to the new Opensearch index + 3. bulk index current records from the parquet dataset to the index + 4. bulk update current embeddings (if any) from the parquet dataset to the index The net effect is a full refresh for a source in Opensearch, ensuring only current, non-deleted versions of records are used from the parquet dataset.