From a03630fa399833f02da1c1aa97190d7657562cbd Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Mon, 15 Dec 2025 12:24:37 +0000 Subject: [PATCH 01/11] DOC-6100 added DEL commands to CLI examples to match code changes --- content/develop/data-types/sets.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/develop/data-types/sets.md b/content/develop/data-types/sets.md index 4f35860a85..e8c81b9c55 100644 --- a/content/develop/data-types/sets.md +++ b/content/develop/data-types/sets.md @@ -68,6 +68,7 @@ if you add a member that already exists, it will be ignored. > SCARD bikes:racing:france (integer) 3 {{< /clients-example >}} + ## Tutorial The [`SADD`]({{< relref "/commands/sadd" >}}) command adds new elements to a set. It's also possible @@ -76,6 +77,8 @@ already exists, performing the intersection, union or difference between multiple sets, and so forth. {{< clients-example sets_tutorial sadd_smembers >}} +> DEL bikes:racing:france +(integer) 1 > SADD bikes:racing:france bike:1 bike:2 bike:3 (integer) 3 > SMEMBERS bikes:racing:france @@ -119,6 +122,8 @@ unions, difference, and more. For example if we add a third race we can see some of these commands in action: {{< clients-example sets_tutorial multisets >}} +> DEL bikes:racing:france bikes:racing:usa +(integer) 2 > SADD bikes:racing:france bike:1 bike:2 bike:3 (integer) 3 > SADD bikes:racing:usa bike:1 bike:4 @@ -151,6 +156,8 @@ remove a random item from a set. You can also _return_ a random item from a set without removing it using the [`SRANDMEMBER`]({{< relref "/commands/srandmember" >}}) command: {{< clients-example sets_tutorial srem >}} +> DEL bikes:racing:france +(integer) 1 > SADD bikes:racing:france bike:1 bike:2 bike:3 bike:4 bike:5 (integer) 5 > SREM bikes:racing:france bike:1 From 9242be2a6b592f01952bc1e28ef18323bed9d426 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Mon, 15 Dec 2025 14:25:06 +0000 Subject: [PATCH 02/11] DOC-6101 enabled redis-py set notebook --- content/develop/data-types/sets.md | 2 +- local_examples/tmp/datatypes/sets/dt_set.py | 175 ++++++++++++++++++++ 2 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 local_examples/tmp/datatypes/sets/dt_set.py diff --git a/content/develop/data-types/sets.md b/content/develop/data-types/sets.md index e8c81b9c55..bc110e71df 100644 --- a/content/develop/data-types/sets.md +++ b/content/develop/data-types/sets.md @@ -122,7 +122,7 @@ unions, difference, and more. For example if we add a third race we can see some of these commands in action: {{< clients-example sets_tutorial multisets >}} -> DEL bikes:racing:france bikes:racing:usa +> DEL bikes:racing:france bikes:racing:usa bikes:racing:italy (integer) 2 > SADD bikes:racing:france bike:1 bike:2 bike:3 (integer) 3 diff --git a/local_examples/tmp/datatypes/sets/dt_set.py b/local_examples/tmp/datatypes/sets/dt_set.py new file mode 100644 index 0000000000..974261c3e3 --- /dev/null +++ b/local_examples/tmp/datatypes/sets/dt_set.py @@ -0,0 +1,175 @@ +# EXAMPLE: sets_tutorial +# BINDER_ID python-dt-set +# HIDE_START +""" +Code samples for Set doc pages: + https://redis.io/docs/latest/develop/data-types/sets/ +""" + +import redis + +r = redis.Redis(decode_responses=True) +# HIDE_END +# REMOVE_START +r.delete("bikes:racing:france") +r.delete("bikes:racing:usa") +# REMOVE_END + +# STEP_START sadd +res1 = r.sadd("bikes:racing:france", "bike:1") +print(res1) # >>> 1 + +res2 = r.sadd("bikes:racing:france", "bike:1") +print(res2) # >>> 0 + +res3 = r.sadd("bikes:racing:france", "bike:2", "bike:3") +print(res3) # >>> 2 + +res4 = r.sadd("bikes:racing:usa", "bike:1", "bike:4") +print(res4) # >>> 2 +# STEP_END + +# REMOVE_START +assert res1 == 1 +assert res2 == 0 +assert res3 == 2 +assert res4 == 2 +# REMOVE_END + +# STEP_START sismember +# HIDE_START +r.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3") +r.sadd("bikes:racing:usa", "bike:1", "bike:4") +# HIDE_END +res5 = r.sismember("bikes:racing:usa", "bike:1") +print(res5) # >>> 1 + +res6 = r.sismember("bikes:racing:usa", "bike:2") +print(res6) # >>> 0 +# STEP_END + +# REMOVE_START +assert res5 == 1 +assert res6 == 0 +# REMOVE_END + +# STEP_START sinter +# HIDE_START +r.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3") +r.sadd("bikes:racing:usa", "bike:1", "bike:4") +# HIDE_END +res7 = r.sinter("bikes:racing:france", "bikes:racing:usa") +print(res7) # >>> {'bike:1'} +# STEP_END + +# REMOVE_START +assert res7 == {"bike:1"} +# REMOVE_END + +# STEP_START scard +# HIDE_START +r.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3") +# HIDE_END +res8 = r.scard("bikes:racing:france") +print(res8) # >>> 3 +# STEP_END + +# REMOVE_START +assert res8 == 3 +# REMOVE_END + +# STEP_START sadd_smembers +r.delete("bikes:racing:france") + +res9 = r.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3") +print(res9) # >>> 3 + +res10 = r.smembers("bikes:racing:france") +print(res10) # >>> {'bike:1', 'bike:2', 'bike:3'} +# STEP_END + +# REMOVE_START +assert res9 == 3 +assert res10 == {'bike:1', 'bike:2', 'bike:3'} +# REMOVE_END + +# STEP_START smismember +res11 = r.sismember("bikes:racing:france", "bike:1") +print(res11) # >>> 1 + +res12 = r.smismember("bikes:racing:france", "bike:2", "bike:3", "bike:4") +print(res12) # >>> [1, 1, 0] +# STEP_END + +# REMOVE_START +assert res11 == 1 +assert res12 == [1, 1, 0] +# REMOVE_END + +# STEP_START sdiff +r.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3") +r.sadd("bikes:racing:usa", "bike:1", "bike:4") + +res13 = r.sdiff("bikes:racing:france", "bikes:racing:usa") +print(res13) # >>> {'bike:2', 'bike:3'} +# STEP_END + +# REMOVE_START +assert res13 == {'bike:2', 'bike:3'} +# REMOVE_END + +# STEP_START multisets +r.delete("bikes:racing:france") +r.delete("bikes:racing:usa") +r.delete("bikes:racing:italy") + +r.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3") +r.sadd("bikes:racing:usa", "bike:1", "bike:4") +r.sadd("bikes:racing:italy", "bike:1", "bike:2", "bike:3", "bike:4") + +res13 = r.sinter("bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy") +print(res13) # >>> {'bike:1'} + +res14 = r.sunion("bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy") +print(res14) # >>> {'bike:1', 'bike:2', 'bike:3', 'bike:4'} + +res15 = r.sdiff("bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy") +print(res15) # >>> set() + +res16 = r.sdiff("bikes:racing:usa", "bikes:racing:france") +print(res16) # >>> {'bike:4'} + +res17 = r.sdiff("bikes:racing:france", "bikes:racing:usa") +print(res17) # >>> {'bike:2', 'bike:3'} +# STEP_END + +# REMOVE_START +assert res13 == {'bike:1'} +assert res14 == {'bike:1', 'bike:2', 'bike:3', 'bike:4'} +assert res15 == set() +assert res16 == {'bike:4'} +assert res17 == {'bike:2', 'bike:3'} +# REMOVE_END + +# STEP_START srem +r.delete("bikes:racing:france") + +r.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3", "bike:4", "bike:5") + +res18 = r.srem("bikes:racing:france", "bike:1") +print(res18) # >>> 1 + +res19 = r.spop("bikes:racing:france") +print(res19) # >>> bike:3 + +res20 = r.smembers("bikes:racing:france") +print(res20) # >>> {'bike:2', 'bike:4', 'bike:5'} + +res21 = r.srandmember("bikes:racing:france") +print(res21) # >>> bike:4 +# STEP_END + +# REMOVE_START +assert res18 == 1 +# none of the other results are deterministic +# REMOVE_END From 94c24cf0494708ffaf518cb23fb9c7754009fc63 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Mon, 15 Dec 2025 15:28:21 +0000 Subject: [PATCH 03/11] DOC-6102 enabled node-redis set notebook --- local_examples/tmp/datatypes/sets/dt-set.js | 185 ++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 local_examples/tmp/datatypes/sets/dt-set.js diff --git a/local_examples/tmp/datatypes/sets/dt-set.js b/local_examples/tmp/datatypes/sets/dt-set.js new file mode 100644 index 0000000000..959e2081fa --- /dev/null +++ b/local_examples/tmp/datatypes/sets/dt-set.js @@ -0,0 +1,185 @@ +// EXAMPLE: sets_tutorial +// BINDER_ID nodejs-dt-set +// REMOVE_START +import assert from 'assert'; +// REMOVE_END +// HIDE_START +import { createClient } from 'redis'; + +const client = createClient(); +await client.connect(); +// HIDE_END +// REMOVE_START +await client.del('bikes:racing:france') +await client.del('bikes:racing:usa') +await client.del('bikes:racing:italy') +// REMOVE_END + +// STEP_START sAdd +const res1 = await client.sAdd('bikes:racing:france', 'bike:1') +console.log(res1) // >>> 1 + +const res2 = await client.sAdd('bikes:racing:france', 'bike:1') +console.log(res2) // >>> 0 +const res3 = await client.sAdd('bikes:racing:france', ['bike:2', 'bike:3']) +console.log(res3) // >>> 2 +const res4 = await client.sAdd('bikes:racing:usa', ['bike:1', 'bike:4']) +console.log(res4) // >>> 2 +// STEP_END + +// REMOVE_START +assert.equal(res1, 1) +assert.equal(res2, 0) +assert.equal(res3, 2) +assert.equal(res4, 2) +// REMOVE_END + +// STEP_START sIsMember +// HIDE_START +await client.del('bikes:racing:france') +await client.del('bikes:racing:usa') +await client.sAdd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3']) +await client.sAdd('bikes:racing:usa', ['bike:1', 'bike:4']) +// HIDE_END +const res5 = await client.sIsMember('bikes:racing:usa', 'bike:1') +console.log(res5) // >>> 1 + +const res6 = await client.sIsMember('bikes:racing:usa', 'bike:2') +console.log(res6) // >>> 0 +// STEP_END + +// REMOVE_START +assert.equal(res5, 1) +assert.equal(res6, 0) +// REMOVE_END + +// STEP_START sinster +// HIDE_START +await client.del('bikes:racing:france') +await client.del('bikes:racing:usa') +await client.sAdd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3']) +await client.sAdd('bikes:racing:usa', ['bike:1', 'bike:4']) +// HIDE_END +const res7 = await client.sInter(['bikes:racing:france', 'bikes:racing:usa']) +console.log(res7) // >>> {'bike:1'} +// STEP_END + +// REMOVE_START +assert.deepEqual(res7, [ 'bike:1' ]) +// REMOVE_END + +// STEP_START sCard +// HIDE_START +await client.del('bikes:racing:france') +await client.sAdd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3']) +// HIDE_END +const res8 = await client.sCard('bikes:racing:france') +console.log(res8) // >>> 3 +// STEP_END + +// REMOVE_START +assert.equal(res8, 3) +// REMOVE_END + +// STEP_START sAdd_sMembers +await client.del('bikes:racing:france') + +const res9 = await client.sAdd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3']) +console.log(res9) // >>> 3 + +const res10 = await client.sMembers('bikes:racing:france') +console.log(res10) // >>> ['bike:1', 'bike:2', 'bike:3'] +// STEP_END + +// REMOVE_START +assert.equal(res9, 3) +assert.deepEqual(res10.sort(), ['bike:1', 'bike:2', 'bike:3']) +// REMOVE_END + +// STEP_START smIsMember +const res11 = await client.sIsMember('bikes:racing:france', 'bike:1') +console.log(res11) // >>> 1 + +const res12 = await client.smIsMember('bikes:racing:france', ['bike:2', 'bike:3', 'bike:4']) +console.log(res12) // >>> [1, 1, 0] +// STEP_END + +// REMOVE_START +assert.equal(res11, 1) +assert.deepEqual(res12, [1, 1, 0]) +// REMOVE_END + +// STEP_START sDiff +await client.del('bikes:racing:france') +await client.del('bikes:racing:usa') + +await client.sAdd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3']) +await client.sAdd('bikes:racing:usa', ['bike:1', 'bike:4']) +const res13 = await client.sDiff(['bikes:racing:france', 'bikes:racing:usa']) +console.log(res13) // >>> [ 'bike:2', 'bike:3' ] +// STEP_END + +// REMOVE_START +assert.deepEqual(res13.sort(), ['bike:2', 'bike:3'].sort()) +// REMOVE_END + +// STEP_START multisets +await client.del('bikes:racing:france') +await client.del('bikes:racing:usa') +await client.del('bikes:racing:italy') + +await client.sAdd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3']) +await client.sAdd('bikes:racing:usa', ['bike:1', 'bike:4']) +await client.sAdd('bikes:racing:italy', ['bike:1', 'bike:2', 'bike:3', 'bike:4']) + +const res14 = await client.sInter( + ['bikes:racing:france', 'bikes:racing:usa', 'bikes:racing:italy'] +) +console.log(res14) // >>> ['bike:1'] + +const res15 = await client.sUnion( + ['bikes:racing:france', 'bikes:racing:usa', 'bikes:racing:italy'] +) +console.log(res15) // >>> ['bike:1', 'bike:2', 'bike:3', 'bike:4'] + +const res16 = await client.sDiff(['bikes:racing:france', 'bikes:racing:usa', 'bikes:racing:italy']) +console.log(res16) // >>> [] + +const res17 = await client.sDiff(['bikes:racing:usa', 'bikes:racing:france']) +console.log(res17) // >>> ['bike:4'] + +const res18 = await client.sDiff(['bikes:racing:france', 'bikes:racing:usa']) +console.log(res18) // >>> ['bike:2', 'bike:3'] +// STEP_END + +// REMOVE_START +assert.deepEqual(res14, ['bike:1']) +assert.deepEqual(res15.sort(), ['bike:1', 'bike:2', 'bike:3', 'bike:4']) +assert.deepEqual(res16, []) +assert.deepEqual(res17, ['bike:4']) +assert.deepEqual(res18.sort(), ['bike:2', 'bike:3'].sort()) +// REMOVE_END + +// STEP_START sRem +await client.del('bikes:racing:france') + +await client.sAdd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3', 'bike:4', 'bike:5']) + +const res19 = await client.sRem('bikes:racing:france', 'bike:1') +console.log(res19) // >>> 1 + +const res20 = await client.sPop('bikes:racing:france') +console.log(res20) // >>> bike:3 or other random value + +const res21 = await client.sMembers('bikes:racing:france') +console.log(res21) // >>> ['bike:2', 'bike:4', 'bike:5']; depends on previous result + +const res22 = await client.sRandMember('bikes:racing:france') +console.log(res22) // >>> bike:4 or other random value +// STEP_END + +// REMOVE_START +assert.equal(res19, 1) +await client.close() +// none of the other results are deterministic +// REMOVE_END From f572c63bd700aff8c15267d89038b1526f6990ad Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Mon, 15 Dec 2025 16:07:44 +0000 Subject: [PATCH 04/11] DOC-6103 enabled Jedis set notebook --- .../tmp/datatypes/sets/SetsExample.java | 193 ++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 local_examples/tmp/datatypes/sets/SetsExample.java diff --git a/local_examples/tmp/datatypes/sets/SetsExample.java b/local_examples/tmp/datatypes/sets/SetsExample.java new file mode 100644 index 0000000000..a4802ee5ef --- /dev/null +++ b/local_examples/tmp/datatypes/sets/SetsExample.java @@ -0,0 +1,193 @@ +// EXAMPLE: sets_tutorial +// BINDER_ID jedis-dt-set +// REMOVE_START +package io.redis.examples; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertEquals; +// REMOVE_END +// HIDE_START +import redis.clients.jedis.UnifiedJedis; + +import java.util.List; +import java.util.Set; + +public class SetsExample { + + @Test + public void run() { + UnifiedJedis jedis = new UnifiedJedis("redis://localhost:6379"); + // HIDE_END + + // REMOVE_START + jedis.del("bikes:racing:france"); + jedis.del("bikes:racing:usa"); + jedis.del("bikes:racing:italy"); + // REMOVE_END + // STEP_START sadd + long res1 = jedis.sadd("bikes:racing:france", "bike:1"); + System.out.println(res1); // >>> 1 + + long res2 = jedis.sadd("bikes:racing:france", "bike:1"); + System.out.println(res2); // >>> 0 + + long res3 = jedis.sadd("bikes:racing:france", "bike:2", "bike:3"); + System.out.println(res3); // >>> 2 + + long res4 = jedis.sadd("bikes:racing:usa", "bike:1", "bike:4"); + System.out.println(res4); // >>> 2 + // STEP_END + + // REMOVE_START + assertEquals(1,res1); + assertEquals(0,res2); + assertEquals(2,res3); + assertEquals(2,res4); + // REMOVE_END + + // STEP_START sismember + // HIDE_START + jedis.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3"); + jedis.sadd("bikes:racing:usa", "bike:1", "bike:4"); + // HIDE_END + + boolean res5 = jedis.sismember("bikes:racing:usa", "bike:1"); + System.out.println(res5); // >>> true + + boolean res6 = jedis.sismember("bikes:racing:usa", "bike:2"); + System.out.println(res6); // >>> false + // STEP_END + + // REMOVE_START + assertTrue(res5); + assertFalse(res6); + // REMOVE_END + + // STEP_START sinter + // HIDE_START + jedis.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3"); + jedis.sadd("bikes:racing:usa", "bike:1", "bike:4"); + // HIDE_END + + Set res7 = jedis.sinter("bikes:racing:france", "bikes:racing:usa"); + System.out.println(res7); // >>> [bike:1] + // STEP_END + + // REMOVE_START + assertEquals("[bike:1]",res7.toString()); + // REMOVE_END + + // STEP_START scard + jedis.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3"); + + long res8 = jedis.scard("bikes:racing:france"); + System.out.println(res8); // >>> 3 + // STEP_END + + // REMOVE_START + assertEquals(3,res8); + // REMOVE_END + + // STEP_START sadd_smembers + jedis.del("bikes:racing:france"); + + long res9 = jedis.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3"); + System.out.println(res9); // >>> 3 + + Set res10 = jedis.smembers("bikes:racing:france"); + System.out.println(res10); // >>> [bike:1, bike:2, bike:3] + // STEP_END + + // REMOVE_START + assertEquals(3,res9); + assertEquals("[bike:1, bike:2, bike:3]",res10.toString()); + // REMOVE_END + + // STEP_START smismember + boolean res11 = jedis.sismember("bikes:racing:france", "bike:1"); + System.out.println(res11); // >>> true + + List res12 = jedis.smismember("bikes:racing:france", "bike:2", "bike:3", "bike:4"); + System.out.println(res12); // >>> [true,true,false] + // STEP_END + + // REMOVE_START + assertTrue(res11); + assertEquals("[true, true, false]",res12.toString()); + // REMOVE_END + + // STEP_START sdiff + jedis.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3"); + jedis.sadd("bikes:racing:usa", "bike:1", "bike:4"); + + Set res13 = jedis.sdiff("bikes:racing:france", "bikes:racing:usa"); + System.out.println(res13); // >>> [bike:2, bike:3] + + // REMOVE_START + assertEquals("[bike:2, bike:3]",res13.toString()); + // REMOVE_END + // STEP_END + + // STEP_START multisets + jedis.del("bikes:racing:france"); + jedis.del("bikes:racing:usa"); + jedis.del("bikes:racing:italy"); + + jedis.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3"); + jedis.sadd("bikes:racing:usa", "bike:1", "bike:4"); + jedis.sadd("bikes:racing:italy", "bike:1", "bike:2", "bike:3", "bike:4"); + + Set res14 = jedis.sinter("bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy"); + System.out.println(res14); // >>> [bike:1] + + Set res15 = jedis.sunion("bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy"); + System.out.println(res15); // >>> [bike:1, bike:2, bike:3, bike:4] + + Set res16 = jedis.sdiff("bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy"); + System.out.println(res16); // >>> [] + + Set res17 = jedis.sdiff("bikes:racing:usa", "bikes:racing:france"); + System.out.println(res17); // >>> [bike:4] + + Set res18 = jedis.sdiff("bikes:racing:france", "bikes:racing:usa"); + System.out.println(res18); // >>> [bike:2, bike:3] + + // REMOVE_START + assertEquals("[bike:1]",res14.toString()); + assertEquals("[bike:1, bike:2, bike:3, bike:4]",res15.toString()); + assertEquals("[]",res16.toString()); + assertEquals("[bike:4]",res17.toString()); + assertEquals("[bike:2, bike:3]",res18.toString()); + jedis.del("bikes:racing:usa"); + jedis.del("bikes:racing:italy"); + // REMOVE_END + // STEP_END + + // STEP_START srem + jedis.del("bikes:racing:france"); + jedis.sadd("bikes:racing:france", "bike:1", "bike:2", "bike:3", "bike:4", "bike:5"); + + long res19 = jedis.srem("bikes:racing:france", "bike:1"); + System.out.println(res18); // >>> 1 + + String res20 = jedis.spop("bikes:racing:france"); + System.out.println(res20); // >>> bike:3 + + Set res21 = jedis.smembers("bikes:racing:france"); + System.out.println(res21); // >>> [bike:2, bike:4, bike:5] + + String res22 = jedis.srandmember("bikes:racing:france"); + System.out.println(res22); // >>> bike:4 + // STEP_END + + // REMOVE_START + assertEquals(1,res19); + // REMOVE_END + + // HIDE_START + jedis.close(); + // HIDE_END + } +} From e10900623de2975f3776a1143e0ddae331270d9f Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Mon, 15 Dec 2025 16:53:04 +0000 Subject: [PATCH 05/11] DOC-6104 enabled NRedisStack set notebook --- .../tmp/datatypes/sets/SetsTutorial.cs | 224 ++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 local_examples/tmp/datatypes/sets/SetsTutorial.cs diff --git a/local_examples/tmp/datatypes/sets/SetsTutorial.cs b/local_examples/tmp/datatypes/sets/SetsTutorial.cs new file mode 100644 index 0000000000..e5a57a5ead --- /dev/null +++ b/local_examples/tmp/datatypes/sets/SetsTutorial.cs @@ -0,0 +1,224 @@ +// EXAMPLE: sets_tutorial +// BINDER_ID netsync-dt-set +// REMOVE_START +using NRedisStack.Tests; +// REMOVE_END +// HIDE_START +using StackExchange.Redis; +// HIDE_END + +//REMOVE_START +namespace Doc; + +[Collection("DocsTests")] +//REMOVE_END + +// HIDE_START +public class SetsExample +// REMOVE_START +: AbstractNRedisStackTest, IDisposable +// REMOVE_END +{ + // REMOVE_START + public SetsExample(EndpointsFixture fixture) : base(fixture) { } + + [SkippableFact] + // REMOVE_END + public void Run() + { + //REMOVE_START + // This is needed because we're constructing ConfigurationOptions in the test before calling GetConnection + SkipIfTargetConnectionDoesNotExist(EndpointsFixture.Env.Standalone); + var _ = GetCleanDatabase(EndpointsFixture.Env.Standalone); + //REMOVE_END + var muxer = ConnectionMultiplexer.Connect("localhost:6379"); + var db = muxer.GetDatabase(); + //REMOVE_START + // Clear any keys here before using them in tests. + bool delRes = db.KeyDelete("bikes:racing:france"); + delRes = db.KeyDelete("bikes:racing:usa"); + delRes = db.KeyDelete("{bikes:racing}:france"); + delRes = db.KeyDelete("{bikes:racing}:usa"); + delRes = db.KeyDelete("{bikes:racing}:italy"); + //REMOVE_END + // HIDE_END + + + // STEP_START sadd + long res1 = db.SetAdd("bikes:racing:france", ["bike:1"]); + Console.WriteLine(res1); // >>> 1 + + long res2 = db.SetAdd("bikes:racing:france", ["bike:1"]); + Console.WriteLine(res2); // >>> 0 + + long res3 = db.SetAdd("bikes:racing:france", ["bike:2", "bike:3"]); + Console.WriteLine(res3); // >>> 2 + + long res4 = db.SetAdd("bikes:racing:usa", ["bike:1", "bike:4"]); + Console.WriteLine(res4); // >>> 2 + // STEP_END + + // Tests for 'sadd' step. + // REMOVE_START + Assert.Equal(1, res1); + Assert.Equal(0, res2); + Assert.Equal(2, res3); + Assert.Equal(2, res4); + // REMOVE_END + + + // STEP_START sismember + bool res5 = db.SetContains("bikes:racing:france", "bike:1"); + Console.WriteLine(res5); // >>> True + + bool res6 = db.SetContains("bikes:racing:usa", "bike:2"); + Console.WriteLine(res6); // >>> False + // STEP_END + + // Tests for 'sismember' step. + // REMOVE_START + Assert.True(res5); + Assert.False(res6); + // REMOVE_END + + + // STEP_START sinter + long res7 = db.SetAdd("{bikes:racing}:france", ["bike:1", "bike:2", "bike:3"]); + long res8 = db.SetAdd("{bikes:racing}:usa", ["bike:1", "bike:4"]); + + RedisValue[] res9 = db.SetCombine(SetOperation.Intersect, ["{bikes:racing}:france", "{bikes:racing}:usa"]); + Console.WriteLine(string.Join(", ", res9)); // >>> bike:1 + // STEP_END + + // Tests for 'sinter' step. + // REMOVE_START + Assert.Equal(3, res7); + Assert.Equal(2, res8); + Assert.Equal("bike:1", string.Join(", ", res9)); + // REMOVE_END + + + // STEP_START scard + long res10 = db.SetAdd("bikes:racing:france", ["bike:1", "bike:2", "bike:3"]); + long res11 = db.SetLength("bikes:racing:france"); + Console.WriteLine(res11); // >>> 3 + // STEP_END + + // Tests for 'scard' step. + // REMOVE_START + Assert.Equal(3, res11); + // REMOVE_END + + + // STEP_START sadd_smembers + db.KeyDelete("bikes:racing:france"); + + long res12 = db.SetAdd("bikes:racing:france", ["bike:1", "bike:2", "bike:3"]); + RedisValue[] res13 = db.SetMembers("bikes:racing:france"); + Console.WriteLine(string.Join(", ", res13)); // >>> bike:3, bike:2, bike:1 + // STEP_END + + // Tests for 'sadd_smembers' step. + // REMOVE_START + Assert.Equal(3, res12); + // REMOVE_END + + + // STEP_START smismember + bool res14 = db.SetContains("bikes:racing:france", "bike:1"); + Console.WriteLine(res14); // >>> true + + bool[] res15 = db.SetContains("bikes:racing:france", ["bike:2", "bike:3", "bike:4"]); + Console.WriteLine(string.Join(", ", res15)); // >>> True, True, False + // STEP_END + + // Tests for 'smismember' step. + // REMOVE_START + Assert.True(res14); + Assert.Equal("True, True, False", string.Join(", ", res15)); + // REMOVE_END + + + // STEP_START sdiff + long res16 = db.SetAdd("{bikes:racing}:france", ["bike:1", "bike:2", "bike:3"]); + long res17 = db.SetAdd("{bikes:racing}:usa", ["bike:1", "bike:4"]); + RedisValue[] res18 = db.SetCombine(SetOperation.Difference, ["{bikes:racing}:france", "{bikes:racing}:usa"]); + Console.WriteLine(string.Join(", ", res18)); // >>> bike:2, bike:3 + // STEP_END + + // Tests for 'sdiff' step. + // REMOVE_START + Assert.Equal(0, res16); + Assert.Equal(0, res17); + // REMOVE_END + + + // STEP_START multisets + db.KeyDelete("{bikes:racing}:france"); + db.KeyDelete("{bikes:racing}:usa"); + db.KeyDelete("{bikes:racing}:italy"); + + long res19 = db.SetAdd("{bikes:racing}:france", ["bike:1", "bike:2", "bike:3"]); + long res20 = db.SetAdd("{bikes:racing}:usa", ["bike:1", "bike:4"]); + long res21 = db.SetAdd("{bikes:racing}:italy", ["bike:1", "bike:2", "bike:3", "bike:4"]); + + RedisValue[] res22 = db.SetCombine(SetOperation.Intersect, ["{bikes:racing}:france", "{bikes:racing}:usa", "{bikes:racing}:italy" + ]); + Console.WriteLine(string.Join(", ", res22)); // >>> bike:1 + + RedisValue[] res23 = db.SetCombine(SetOperation.Union, ["{bikes:racing}:france", "{bikes:racing}:usa", "{bikes:racing}:italy" + ]); + Console.WriteLine(string.Join(", ", res23)); // >>> bike:1, bike:2, bike:3, bike:4 + + RedisValue[] res24 = db.SetCombine(SetOperation.Difference, ["{bikes:racing}:france", "{bikes:racing}:usa", "{bikes:racing}:italy" + ]); + Console.WriteLine(string.Join(", ", res24)); // >>> + + RedisValue[] res25 = db.SetCombine(SetOperation.Difference, ["{bikes:racing}:usa", "{bikes:racing}:france"]); + Console.WriteLine(string.Join(", ", res25)); // >>> bike:4 + + RedisValue[] res26 = db.SetCombine(SetOperation.Difference, ["{bikes:racing}:france", "{bikes:racing}:usa"]); + Console.WriteLine(string.Join(", ", res26)); // >>> bike:2, bike:3 + // STEP_END + + // Tests for 'multisets' step. + // REMOVE_START + Assert.Equal(3, res19); + Assert.Equal(2, res20); + Assert.Equal(4, res21); + Assert.Equal("bike:1", string.Join(", ", res22)); + Assert.Equal("", string.Join(", ", res24)); + Assert.Equal("bike:4", string.Join(", ", res25)); + // REMOVE_END + + + // STEP_START srem + db.KeyDelete("bikes:racing:france"); + + long res27 = db.SetAdd("bikes:racing:france", ["bike:1", "bike:2", "bike:3", "bike:4", "bike:5"]); + + bool res28 = db.SetRemove("bikes:racing:france", "bike:1"); + Console.WriteLine(res28); // >>> True + + RedisValue res29 = db.SetPop("bikes:racing:france"); + Console.WriteLine(res29); // >>> bike:3 + + RedisValue[] res30 = db.SetMembers("bikes:racing:france"); + Console.WriteLine(string.Join(", ", res30)); // >>> bike:2, bike:4, bike:5 + + RedisValue res31 = db.SetRandomMember("bikes:racing:france"); + Console.WriteLine(res31); // >>> bike:4 + // STEP_END + + // Tests for 'srem' step. + // REMOVE_START + Assert.Equal(5, res27); + Assert.True(res28); + // REMOVE_END + + + // HIDE_START + } +} +// HIDE_END + From 514f41f08b01bab18735f858418f8cee3ee95dce Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Tue, 16 Dec 2025 09:53:45 +0000 Subject: [PATCH 06/11] DOC-6105 enabled go-redis set notebook --- .../tmp/datatypes/sets/sets_example_test.go | 462 ++++++++++++++++++ 1 file changed, 462 insertions(+) create mode 100644 local_examples/tmp/datatypes/sets/sets_example_test.go diff --git a/local_examples/tmp/datatypes/sets/sets_example_test.go b/local_examples/tmp/datatypes/sets/sets_example_test.go new file mode 100644 index 0000000000..877459b5cf --- /dev/null +++ b/local_examples/tmp/datatypes/sets/sets_example_test.go @@ -0,0 +1,462 @@ +// EXAMPLE: sets_tutorial +// BINDER_ID go-dt-set +// HIDE_START +package example_commands_test + +import ( + "context" + "fmt" + "sort" + + "github.com/redis/go-redis/v9" +) + +// HIDE_END +func ExampleClient_sadd() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // REMOVE_START + // start with fresh database + rdb.FlushDB(ctx) + rdb.Del(ctx, "bikes:racing:france") + rdb.Del(ctx, "bikes:racing:usa") + rdb.Del(ctx, "bikes:racing:italy") + // REMOVE_END + + // STEP_START sadd + res1, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res1) // >>> 1 + + res2, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res2) // >>> 0 + + res3, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:2", "bike:3").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res3) // >>> 2 + + res4, err := rdb.SAdd(ctx, "bikes:racing:usa", "bike:1", "bike:4").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res4) // >>> 2 + // STEP_END + + // Output: + // 1 + // 0 + // 2 + // 2 +} + +func ExampleClient_sismember() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // REMOVE_START + // start with fresh database + rdb.FlushDB(ctx) + rdb.Del(ctx, "bikes:racing:france") + rdb.Del(ctx, "bikes:racing:usa") + // REMOVE_END + + _, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1", "bike:2", "bike:3").Result() + + if err != nil { + panic(err) + } + + _, err = rdb.SAdd(ctx, "bikes:racing:usa", "bike:1", "bike:4").Result() + + if err != nil { + panic(err) + } + + // STEP_START sismember + res5, err := rdb.SIsMember(ctx, "bikes:racing:usa", "bike:1").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res5) // >>> true + + res6, err := rdb.SIsMember(ctx, "bikes:racing:usa", "bike:2").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res6) // >>> false + // STEP_END + + // Output: + // true + // false +} + +func ExampleClient_sinter() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // REMOVE_START + // start with fresh database + rdb.FlushDB(ctx) + rdb.Del(ctx, "bikes:racing:france") + rdb.Del(ctx, "bikes:racing:usa") + // REMOVE_END + + _, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1", "bike:2", "bike:3").Result() + + if err != nil { + panic(err) + } + + _, err = rdb.SAdd(ctx, "bikes:racing:usa", "bike:1", "bike:4").Result() + + if err != nil { + panic(err) + } + + // STEP_START sinter + res7, err := rdb.SInter(ctx, "bikes:racing:france", "bikes:racing:usa").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res7) // >>> [bike:1] + // STEP_END + + // Output: + // [bike:1] +} + +func ExampleClient_scard() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // REMOVE_START + // start with fresh database + rdb.FlushDB(ctx) + rdb.Del(ctx, "bikes:racing:france") + // REMOVE_END + + _, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1", "bike:2", "bike:3").Result() + + if err != nil { + panic(err) + } + + // STEP_START scard + res8, err := rdb.SCard(ctx, "bikes:racing:france").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res8) // >>> 3 + // STEP_END + + // Output: + // 3 +} + +func ExampleClient_saddsmembers() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // STEP_START sadd_smembers + rdb.Del(ctx, "bikes:racing:france") + + res9, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1", "bike:2", "bike:3").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res9) // >>> 3 + + res10, err := rdb.SMembers(ctx, "bikes:racing:france").Result() + + if err != nil { + panic(err) + } + + // Sort the strings in the slice to make sure the output is lexicographical + sort.Strings(res10) + + fmt.Println(res10) // >>> [bike:1 bike:2 bike:3] + // STEP_END + + // Output: + // 3 + // [bike:1 bike:2 bike:3] +} + +func ExampleClient_smismember() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // REMOVE_START + // start with fresh database + rdb.Del(ctx, "bikes:racing:france") + // REMOVE_END + + _, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1", "bike:2", "bike:3").Result() + + if err != nil { + panic(err) + } + + // STEP_START smismember + res11, err := rdb.SIsMember(ctx, "bikes:racing:france", "bike:1").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res11) // >>> true + + res12, err := rdb.SMIsMember(ctx, "bikes:racing:france", "bike:2", "bike:3", "bike:4").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res12) // >>> [true true false] + // STEP_END + + // Output: + // true + // [true true false] +} + +func ExampleClient_sdiff() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // REMOVE_START + // start with fresh database + rdb.FlushDB(ctx) + rdb.Del(ctx, "bikes:racing:france") + rdb.Del(ctx, "bikes:racing:usa") + // REMOVE_END + + // STEP_START sdiff + _, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1", "bike:2", "bike:3").Result() + + if err != nil { + panic(err) + } + + _, err = rdb.SAdd(ctx, "bikes:racing:usa", "bike:1", "bike:4").Result() + + res13, err := rdb.SDiff(ctx, "bikes:racing:france", "bikes:racing:usa").Result() + + if err != nil { + panic(err) + } + + // Sort the strings in the slice to make sure the output is lexicographical + sort.Strings(res13) + + fmt.Println(res13) // >>> [bike:2 bike:3] + // STEP_END + + // Output: + // [bike:2 bike:3] +} + +func ExampleClient_multisets() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // STEP_START multisets + rdb.Del(ctx, "bikes:racing:france") + rdb.Del(ctx, "bikes:racing:usa") + rdb.Del(ctx, "bikes:racing:italy") + + _, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1", "bike:2", "bike:3").Result() + + if err != nil { + panic(err) + } + + _, err = rdb.SAdd(ctx, "bikes:racing:usa", "bike:1", "bike:4").Result() + + if err != nil { + panic(err) + } + + _, err = rdb.SAdd(ctx, "bikes:racing:italy", "bike:1", "bike:2", "bike:3", "bike:4").Result() + + if err != nil { + panic(err) + } + + res14, err := rdb.SInter(ctx, "bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res14) // >>> [bike:1] + + res15, err := rdb.SUnion(ctx, "bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy").Result() + + if err != nil { + panic(err) + } + + // Sort the strings in the slice to make sure the output is lexicographical + sort.Strings(res15) + + fmt.Println(res15) // >>> [bike:1 bike:2 bike:3 bike:4] + + res16, err := rdb.SDiff(ctx, "bikes:racing:france", "bikes:racing:usa", "bikes:racing:italy").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res16) // >>> [] + + res17, err := rdb.SDiff(ctx, "bikes:racing:usa", "bikes:racing:france").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res17) // >>> [bike:4] + + res18, err := rdb.SDiff(ctx, "bikes:racing:france", "bikes:racing:usa").Result() + + if err != nil { + panic(err) + } + + // Sort the strings in the slice to make sure the output is lexicographical + sort.Strings(res18) + + fmt.Println(res18) // >>> [bike:2 bike:3] + // STEP_END + + // Output: + // [bike:1] + // [bike:1 bike:2 bike:3 bike:4] + // [] + // [bike:4] + // [bike:2 bike:3] +} + +func ExampleClient_srem() { + ctx := context.Background() + + rdb := redis.NewClient(&redis.Options{ + Addr: "localhost:6379", + Password: "", // no password docs + DB: 0, // use default DB + }) + + // STEP_START srem + rdb.Del(ctx, "bikes:racing:france") + + _, err := rdb.SAdd(ctx, "bikes:racing:france", "bike:1", "bike:2", "bike:3", "bike:4", "bike:5").Result() + + if err != nil { + panic(err) + } + + res19, err := rdb.SRem(ctx, "bikes:racing:france", "bike:1").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res19) // >>> 1 + + res20, err := rdb.SPop(ctx, "bikes:racing:france").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res20) // >>> + + res21, err := rdb.SMembers(ctx, "bikes:racing:france").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res21) // >>> + + res22, err := rdb.SRandMember(ctx, "bikes:racing:france").Result() + + if err != nil { + panic(err) + } + + fmt.Println(res22) // >>> + // STEP_END + + // Testable examples not available because the test output + // is not deterministic. +} From 1161d9783375a5ead6922cd4d30b52008add5bbd Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Tue, 16 Dec 2025 10:40:32 +0000 Subject: [PATCH 07/11] DOC-6106 enabled Predis set notebook --- local_examples/php/DtSetsTest.php | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/local_examples/php/DtSetsTest.php b/local_examples/php/DtSetsTest.php index 5b2d09d5a3..5444c84eb3 100644 --- a/local_examples/php/DtSetsTest.php +++ b/local_examples/php/DtSetsTest.php @@ -1,4 +1,5 @@ // EXAMPLE: sets_tutorial +// BINDER_ID php-dt-set assertEquals(3, $res8); - $r->del('bikes:racing:france'); $r->del('bikes:racing:usa'); // REMOVE_END // STEP_START sadd_smembers + $r->del('bikes:racing:france'); + $res9 = $r->sadd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3']); echo $res9 . PHP_EOL; // >>> 3 @@ -123,6 +125,10 @@ public function testDtSet() { // REMOVE_END // STEP_START multisets + $r->del('bikes:racing:france'); + $r->del('bikes:racing:usa'); + $r->del('bikes:racing:italy'); + $r->sadd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3']); $r->sadd('bikes:racing:usa', ['bike:1', 'bike:4']); $r->sadd('bikes:racing:italy', ['bike:1', 'bike:2', 'bike:3', 'bike:4']); @@ -158,6 +164,8 @@ public function testDtSet() { // REMOVE_END // STEP_START srem + $r->del('bikes:racing:france'); + $r->sadd('bikes:racing:france', ['bike:1', 'bike:2', 'bike:3', 'bike:4', 'bike:5']); $res19 = $r->srem('bikes:racing:france', ['bike:1']); From 806149abbefd4eb10a0c1a306d112be788de4625 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Tue, 16 Dec 2025 10:41:23 +0000 Subject: [PATCH 08/11] DOC-6100 fix PHP notebook conversion glitches --- build/jupyterize/jupyterize_config.json | 31 +++++++++- build/jupyterize/test_jupyterize.py | 76 +++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/build/jupyterize/jupyterize_config.json b/build/jupyterize/jupyterize_config.json index e87f486d11..ad88233dd5 100644 --- a/build/jupyterize/jupyterize_config.json +++ b/build/jupyterize/jupyterize_config.json @@ -135,7 +135,36 @@ }, "php": { "boilerplate": [], - "unwrap_patterns": [], + "unwrap_patterns": [ + { + "type": "class_single_line", + "pattern": "^\\s*class\\s+\\w+.*\\{\\s*$", + "end_pattern": "^\\s*class\\s+\\w+.*\\{\\s*$", + "keep_content": true, + "description": "Remove class declaration with opening brace on same line" + }, + { + "type": "class_opening", + "pattern": "^\\s*class\\s+\\w+", + "end_pattern": "^\\s*\\{\\s*$", + "keep_content": true, + "description": "Remove class declaration and opening brace on separate lines" + }, + { + "type": "method_single_line", + "pattern": "^\\s*public\\s+function\\s+\\w+\\(.*\\).*\\{\\s*$", + "end_pattern": "^\\s*public\\s+function\\s+\\w+\\(.*\\).*\\{\\s*$", + "keep_content": true, + "description": "Remove public function declaration with opening brace on same line" + }, + { + "type": "method_opening", + "pattern": "^\\s*public\\s+function\\s+\\w+\\(.*\\)", + "end_pattern": "^\\s*\\{\\s*$", + "keep_content": true, + "description": "Remove public function declaration and opening brace on separate lines" + } + ], "add_step_metadata": false }, "rust": { diff --git a/build/jupyterize/test_jupyterize.py b/build/jupyterize/test_jupyterize.py index 48c1fdc335..f458129ddf 100644 --- a/build/jupyterize/test_jupyterize.py +++ b/build/jupyterize/test_jupyterize.py @@ -776,6 +776,81 @@ def test_php_no_step_metadata(): os.unlink(output_file) +def test_php_unwrapping(): + """Test that PHP class/method wrappers are removed.""" + print("\nTesting PHP unwrapping...") + + test_content = """// EXAMPLE: test_php_unwrap + 'tcp', + 'host' => '127.0.0.1', + 'port' => 6379, + 'password' => '', + 'database' => 0, + ]); + + // STEP_START test_step + $res = $r->sadd('test_key', ['value1']); + echo $res . PHP_EOL; + // STEP_END + } +} +""" + + with tempfile.NamedTemporaryFile(mode='w', suffix='.php', delete=False) as f: + f.write(test_content) + test_file = f.name + + try: + # Convert + output_file = test_file.replace('.php', '.ipynb') + result = jupyterize(test_file, output_file, verbose=False) + + # Load and validate notebook + with open(output_file) as f: + nb = json.load(f) + + # Verify kernel is php + assert nb['metadata']['kernelspec']['name'] == 'php', \ + f"Kernel should be php, got {nb['metadata']['kernelspec']['name']}" + + # Verify class and method wrappers are removed + all_code = ''.join(''.join(cell['source']) for cell in nb['cells']) + assert 'class DtSetsTest' not in all_code, \ + "Should not contain class declaration" + assert 'public function testDtSet' not in all_code, \ + "Should not contain method declaration" + + # Verify actual code is present + assert '$r = new PredisClient' in all_code, \ + "Should contain connection code" + assert '$r->sadd' in all_code, \ + "Should contain actual Redis command" + assert 'require' in all_code, \ + "Should contain require statement" + + # Verify we have 2 cells (preamble + step) + assert len(nb['cells']) == 2, \ + f"Should have 2 cells, got {len(nb['cells'])}" + + print("✓ PHP unwrapping test passed") + + finally: + if os.path.exists(test_file): + os.unlink(test_file) + if output_file and os.path.exists(output_file): + os.unlink(output_file) + + def main(): """Run all tests.""" print("=" * 60) @@ -815,6 +890,7 @@ def main(): # Language-specific feature tests (PHP) test_php_no_step_metadata() + test_php_unwrapping() # Regression tests test_csharp_for_loop_braces() From 7e8b9a88600a8d7d6d4fa55c6a5ab4260895115f Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Tue, 16 Dec 2025 10:56:22 +0000 Subject: [PATCH 09/11] DOC-6100 refactored jupyterize script into smaller source files --- build/jupyterize/SPECIFICATION.md | 330 ++++++++++++ build/jupyterize/config.py | 141 ++++++ build/jupyterize/jupyterize.py | 733 +-------------------------- build/jupyterize/notebook_builder.py | 180 +++++++ build/jupyterize/parser.py | 180 +++++++ build/jupyterize/test_jupyterize.py | 22 +- build/jupyterize/unwrapper.py | 227 +++++++++ build/jupyterize/validator.py | 99 ++++ 8 files changed, 1186 insertions(+), 726 deletions(-) create mode 100644 build/jupyterize/config.py create mode 100644 build/jupyterize/notebook_builder.py create mode 100644 build/jupyterize/parser.py create mode 100644 build/jupyterize/unwrapper.py create mode 100644 build/jupyterize/validator.py diff --git a/build/jupyterize/SPECIFICATION.md b/build/jupyterize/SPECIFICATION.md index 5209feb8dd..5d340e92e9 100644 --- a/build/jupyterize/SPECIFICATION.md +++ b/build/jupyterize/SPECIFICATION.md @@ -2148,3 +2148,333 @@ This specification has been iteratively improved based on real implementation ex **Total improvement**: ~85% time reduction from no spec to v3 spec +--- + +## Architecture Refactoring Plan + +> **Status**: Planned refactoring to improve maintainability and testability +> **Current state**: Single monolithic `jupyterize.py` (843 lines) +> **Target state**: Modular architecture with 5 focused modules (~150-200 lines each) + +### Motivation + +The current `jupyterize.py` script has grown to 843 lines with multiple concerns mixed together: +- Configuration management (kernel specs, language config loading) +- File parsing (marker detection, state tracking) +- Code processing (unwrapping, dedenting) +- Notebook creation (cell generation, metadata) +- Input validation (language detection, file validation) + +**Benefits of refactoring**: +- ✅ Easier to test individual components +- ✅ Clearer separation of concerns +- ✅ Reusable modules for other tools +- ✅ Simpler to add new languages or features +- ✅ Reduced cognitive load per file + +### Proposed Module Structure + +#### 1. `config.py` - Configuration Management (~100 lines) + +**Responsibility**: Load and manage language-specific configuration + +**Classes**: +```python +class KernelSpecManager: + """Manages Jupyter kernel specifications for different languages.""" + + KERNEL_SPECS = { ... } # Moved from jupyterize.py + + @staticmethod + def get_kernel_spec(language): + """Get kernel spec for a language.""" + + @staticmethod + def get_language_config(language): + """Load language-specific config from jupyterize_config.json.""" +``` + +**Exports**: +- `KernelSpecManager` class +- `load_language_config()` function + +**Dependencies**: `json`, `os`, `logging` + +--- + +#### 2. `parser.py` - File Parsing (~150 lines) + +**Responsibility**: Parse source files and extract code blocks + +**Classes**: +```python +class FileParser: + """Parses source files with special comment markers.""" + + def __init__(self, language): + """Initialize parser for a specific language.""" + self.language = language + self.prefix = PREFIXES[language.lower()] + + def parse(self, file_path): + """Parse file and return list of code blocks.""" + # Returns: [{'code': str, 'step_name': str or None}, ...] + + def _check_marker(self, line, marker): + """Check if line contains a marker (with/without space).""" + + def _extract_step_name(self, line): + """Extract step name from STEP_START line.""" +``` + +**Exports**: +- `FileParser` class +- Marker constants (imported from `components.example`) + +**Dependencies**: `logging`, `re`, `components.example` + +--- + +#### 3. `unwrapper.py` - Code Unwrapping (~150 lines) + +**Responsibility**: Remove language-specific structural wrappers + +**Classes**: +```python +class CodeUnwrapper: + """Removes language-specific structural wrappers from code.""" + + def __init__(self, language): + """Initialize unwrapper for a specific language.""" + self.language = language + self.config = load_language_config(language) + + def unwrap(self, code): + """Remove wrappers and return cleaned code.""" + + def _remove_wrapper_keep_content(self, code, start_pattern, end_pattern): + """Remove wrapper lines but keep content between them.""" + + def _remove_matching_lines(self, code, start_pattern, end_pattern): + """Remove lines matching patterns (including matched lines).""" + + def _remove_trailing_braces(self, code, count): + """Remove closing braces from end of code.""" +``` + +**Exports**: +- `CodeUnwrapper` class + +**Dependencies**: `logging`, `re`, `textwrap`, `config.KernelSpecManager` + +--- + +#### 4. `notebook_builder.py` - Notebook Creation (~150 lines) + +**Responsibility**: Create Jupyter notebook cells and assemble notebook + +**Classes**: +```python +class NotebookBuilder: + """Builds Jupyter notebooks from parsed code blocks.""" + + def __init__(self, language): + """Initialize builder for a specific language.""" + self.language = language + self.config = load_language_config(language) + + def build(self, parsed_blocks): + """Build notebook from parsed blocks.""" + # Returns: nbformat.NotebookNode + + def _create_cells(self, parsed_blocks): + """Convert parsed blocks to notebook cells.""" + + def _create_notebook(self, cells): + """Create complete notebook with metadata.""" + + def write(self, notebook, output_path): + """Write notebook to file.""" +``` + +**Exports**: +- `NotebookBuilder` class + +**Dependencies**: `logging`, `os`, `re`, `textwrap`, `nbformat`, `config.KernelSpecManager`, `unwrapper.CodeUnwrapper` + +--- + +#### 5. `validator.py` - Input Validation (~80 lines) + +**Responsibility**: Validate input files and detect language + +**Classes**: +```python +class InputValidator: + """Validates input files and detects programming language.""" + + def detect_language(self, file_path): + """Detect language from file extension.""" + + def validate_file(self, file_path, language): + """Validate that file is a valid example file.""" + + def _check_example_marker(self, file_path, language): + """Check that file starts with EXAMPLE marker.""" +``` + +**Exports**: +- `InputValidator` class + +**Dependencies**: `logging`, `os`, `local_examples.EXTENSION_TO_LANGUAGE`, `components.example.PREFIXES` + +--- + +#### 6. `jupyterize.py` - Main Entry Point (~150-200 lines) + +**Responsibility**: Orchestrate the conversion pipeline + +**Functions**: +```python +def jupyterize(input_file, output_file=None, verbose=False): + """Convert code example file to Jupyter notebook.""" + # Orchestrates: validate → parse → build → write + +def main(): + """Command-line entry point.""" +``` + +**Simplified flow**: +```python +def jupyterize(input_file, output_file=None, verbose=False): + # Set up logging + + # Validate input + validator = InputValidator() + language = validator.detect_language(input_file) + validator.validate_file(input_file, language) + + # Parse file + parser = FileParser(language) + parsed_blocks = parser.parse(input_file) + + # Build notebook + builder = NotebookBuilder(language) + notebook = builder.build(parsed_blocks) + + # Write output + output_path = output_file or f"{input_file}.ipynb" + builder.write(notebook, output_path) + + return output_path +``` + +**Dependencies**: All other modules + +--- + +### File Structure After Refactoring + +``` +build/jupyterize/ +├── __init__.py (empty or exports main API) +├── jupyterize.py (main entry point, ~150-200 lines) +├── config.py (configuration management, ~100 lines) +├── parser.py (file parsing, ~150 lines) +├── unwrapper.py (code unwrapping, ~150 lines) +├── notebook_builder.py (notebook creation, ~150 lines) +├── validator.py (input validation, ~80 lines) +├── jupyterize_config.json (unchanged) +├── test_jupyterize.py (tests, updated imports) +├── README.md (unchanged) +├── SPECIFICATION.md (this file) +└── QUICKSTART.md (unchanged) +``` + +**Total lines of code**: ~930 lines (vs 843 currently) +- Slight increase due to class structure and docstrings +- But much better organized and testable + +--- + +### Migration Strategy + +**Phase 1: Create new modules** (no changes to existing code) +1. Create `config.py` with `KernelSpecManager` class +2. Create `validator.py` with `InputValidator` class +3. Create `parser.py` with `FileParser` class +4. Create `unwrapper.py` with `CodeUnwrapper` class +5. Create `notebook_builder.py` with `NotebookBuilder` class + +**Phase 2: Update main script** +1. Update `jupyterize.py` to import from new modules +2. Simplify `jupyterize()` function to orchestrate modules +3. Keep backward compatibility (same function signature) + +**Phase 3: Update tests** +1. Update `test_jupyterize.py` to import from new modules +2. Add unit tests for individual classes +3. Keep existing integration tests + +**Phase 4: Verify** +1. Run all tests +2. Test with real example files +3. Verify no regressions + +--- + +### Benefits Per Module + +| Module | Benefits | +|--------|----------| +| **config.py** | Centralized kernel specs, easier to add languages, reusable by other tools | +| **parser.py** | Testable parsing logic, reusable for other marker-based tools, clear state management | +| **unwrapper.py** | Isolated unwrapping logic, easier to debug regex patterns, testable independently | +| **notebook_builder.py** | Clear cell creation logic, easier to add new cell types, testable notebook generation | +| **validator.py** | Reusable validation, easier to add new validation rules, clear error messages | +| **jupyterize.py** | Simple orchestration, easy to understand flow, minimal business logic | + +--- + +### Testing Strategy + +**Unit tests** (new): +- `test_config.py` - Test kernel spec loading +- `test_parser.py` - Test file parsing with various marker combinations +- `test_unwrapper.py` - Test code unwrapping patterns +- `test_notebook_builder.py` - Test cell creation and notebook assembly +- `test_validator.py` - Test language detection and file validation + +**Integration tests** (existing, updated): +- Keep existing tests in `test_jupyterize.py` +- Update imports to use new modules +- Add tests for module interactions + +**Backward compatibility**: +- Main `jupyterize()` function signature unchanged +- All existing tests should pass without modification (except imports) + +--- + +### Implementation Notes + +**Avoid circular imports**: +- `config.py` has no dependencies on other modules +- `validator.py` imports from `config.py` +- `parser.py` imports from `config.py` +- `unwrapper.py` imports from `config.py` +- `notebook_builder.py` imports from `config.py` and `unwrapper.py` +- `jupyterize.py` imports from all modules + +**Maintain existing behavior**: +- All language-specific logic remains the same +- Configuration format unchanged +- Marker processing unchanged +- Notebook output format unchanged + +**Future extensibility**: +- Easy to add new languages (update `jupyterize_config.json`) +- Easy to add new validation rules (extend `InputValidator`) +- Easy to add new unwrapping patterns (extend `CodeUnwrapper`) +- Easy to add new cell types (extend `NotebookBuilder`) + diff --git a/build/jupyterize/config.py b/build/jupyterize/config.py new file mode 100644 index 0000000000..4f35ae8333 --- /dev/null +++ b/build/jupyterize/config.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +""" +Configuration management for jupyterize. + +Handles loading language-specific configuration and managing Jupyter kernel +specifications. +""" + +import json +import logging +import os + + +# Jupyter kernel specifications for different languages +KERNEL_SPECS = { + 'python': { + 'name': 'python3', + 'display_name': 'Python 3', + 'language': 'python', + 'language_info': { + 'name': 'python', + 'version': '3.x.x', + 'mimetype': 'text/x-python', + 'file_extension': '.py' + } + }, + 'node.js': { + 'name': 'javascript', + 'display_name': 'JavaScript (Node.js)', + 'language': 'javascript', + 'language_info': { + 'name': 'javascript', + 'version': '20.0.0', + 'mimetype': 'application/javascript', + 'file_extension': '.js' + } + }, + 'go': { + 'name': 'gophernotes', + 'display_name': 'Go', + 'language': 'go', + 'language_info': { + 'name': 'go', + 'version': '1.x.x', + 'mimetype': 'text/x-go', + 'file_extension': '.go' + } + }, + 'c#': { + 'name': '.net-csharp', + 'display_name': '.NET (C#)', + 'language': 'C#', + 'language_info': { + 'name': 'C#', + 'version': '12.0', + 'mimetype': 'text/x-csharp', + 'file_extension': '.cs', + 'pygments_lexer': 'csharp' + } + }, + 'java': { + 'name': 'java', + 'display_name': 'Java', + 'language': 'java', + 'language_info': { + 'name': 'java', + 'version': '11.0.0', + 'mimetype': 'text/x-java-source', + 'file_extension': '.java' + } + }, + 'php': { + 'name': 'php', + 'display_name': 'PHP', + 'language': 'php', + 'language_info': { + 'name': 'php', + 'version': '8.0.0', + 'mimetype': 'application/x-php', + 'file_extension': '.php' + } + }, + 'rust': { + 'name': 'rust', + 'display_name': 'Rust', + 'language': 'rust', + 'language_info': { + 'name': 'rust', + 'version': '1.x.x', + 'mimetype': 'text/x-rust', + 'file_extension': '.rs' + } + } +} + + +def load_language_config(language): + """ + Load language-specific configuration from jupyterize_config.json. + + Args: + language: Language name (e.g., 'python', 'c#') + + Returns: + dict: Configuration for the language, or empty dict if not found + """ + config_file = os.path.join(os.path.dirname(__file__), 'jupyterize_config.json') + if not os.path.exists(config_file): + logging.debug(f"Configuration file not found: {config_file}") + return {} + + try: + with open(config_file, 'r', encoding='utf-8') as f: + config = json.load(f) + return config.get(language.lower(), {}) + except json.JSONDecodeError as e: + logging.warning(f"Failed to parse configuration file: {e}") + return {} + except Exception as e: + logging.warning(f"Error loading configuration: {e}") + return {} + + +def get_kernel_spec(language): + """ + Get kernel specification for a language. + + Args: + language: Language name (e.g., 'python', 'c#') + + Returns: + dict: Kernel specification, or None if not found + + Raises: + ValueError: If language is not supported + """ + kernel_spec = KERNEL_SPECS.get(language.lower()) + if not kernel_spec: + raise ValueError(f"No kernel specification for language: {language}") + return kernel_spec + diff --git a/build/jupyterize/jupyterize.py b/build/jupyterize/jupyterize.py index 54ba22e063..6b783f1413 100755 --- a/build/jupyterize/jupyterize.py +++ b/build/jupyterize/jupyterize.py @@ -16,715 +16,14 @@ """ import argparse -import json import logging -import os -import re import sys -import textwrap -import nbformat -from nbformat.v4 import new_notebook, new_code_cell - -# Add parent directory to path to import from build/ -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -# Import existing mappings -try: - from local_examples import EXTENSION_TO_LANGUAGE - from components.example import PREFIXES -except ImportError as e: - print(f"Error importing required modules: {e}", file=sys.stderr) - print("Make sure you're running this from the docs repository root.", file=sys.stderr) - sys.exit(1) - -# Marker constants (from build/components/example.py) -HIDE_START = 'HIDE_START' -HIDE_END = 'HIDE_END' -REMOVE_START = 'REMOVE_START' -REMOVE_END = 'REMOVE_END' -STEP_START = 'STEP_START' -STEP_END = 'STEP_END' -EXAMPLE = 'EXAMPLE:' -BINDER_ID = 'BINDER_ID' - -# Jupyter kernel specifications -KERNEL_SPECS = { - 'python': { - 'name': 'python3', - 'display_name': 'Python 3', - 'language': 'python', - 'language_info': { - 'name': 'python', - 'version': '3.x.x', - 'mimetype': 'text/x-python', - 'file_extension': '.py' - } - }, - 'node.js': { - 'name': 'javascript', - 'display_name': 'JavaScript (Node.js)', - 'language': 'javascript', - 'language_info': { - 'name': 'javascript', - 'version': '20.0.0', - 'mimetype': 'application/javascript', - 'file_extension': '.js' - } - }, - 'go': { - 'name': 'gophernotes', - 'display_name': 'Go', - 'language': 'go', - 'language_info': { - 'name': 'go', - 'version': '1.x.x', - 'mimetype': 'text/x-go', - 'file_extension': '.go' - } - }, - 'c#': { - 'name': '.net-csharp', - 'display_name': '.NET (C#)', - 'language': 'C#', - 'language_info': { - 'name': 'C#', - 'version': '12.0', - 'mimetype': 'text/x-csharp', - 'file_extension': '.cs', - 'pygments_lexer': 'csharp' - } - }, - 'java': { - 'name': 'java', - 'display_name': 'Java', - 'language': 'java', - 'language_info': { - 'name': 'java', - 'version': '11.0.0', - 'mimetype': 'text/x-java-source', - 'file_extension': '.java' - } - }, - 'php': { - 'name': 'php', - 'display_name': 'PHP', - 'language': 'php', - 'language_info': { - 'name': 'php', - 'version': '8.0.0', - 'mimetype': 'application/x-php', - 'file_extension': '.php' - } - }, - 'rust': { - 'name': 'rust', - 'display_name': 'Rust', - 'language': 'rust', - 'language_info': { - 'name': 'rust', - 'version': '1.x.x', - 'mimetype': 'text/x-rust', - 'file_extension': '.rs' - } - } -} - - -def _check_marker(line, prefix, marker): - """ - Check if a line contains a marker (with or without space after prefix). - - Args: - line: Line to check - prefix: Comment prefix (e.g., '#', '//') - marker: Marker to look for (e.g., 'EXAMPLE:', 'STEP_START') - - Returns: - bool: True if marker is found - """ - return f'{prefix} {marker}' in line or f'{prefix}{marker}' in line - -def load_language_config(language): - """ - Load language-specific configuration from jupyterize_config.json. +from validator import InputValidator +from parser import FileParser +from notebook_builder import NotebookBuilder - Args: - language: Language name (e.g., 'python', 'c#') - Returns: - dict: Configuration for the language, or empty dict if not found - """ - config_file = os.path.join(os.path.dirname(__file__), 'jupyterize_config.json') - if not os.path.exists(config_file): - logging.debug(f"Configuration file not found: {config_file}") - return {} - - try: - with open(config_file, 'r', encoding='utf-8') as f: - config = json.load(f) - return config.get(language.lower(), {}) - except json.JSONDecodeError as e: - logging.warning(f"Failed to parse configuration file: {e}") - return {} - except Exception as e: - logging.warning(f"Error loading configuration: {e}") - return {} - - -def remove_wrapper_keep_content(code, start_pattern, end_pattern): - """ - Remove wrapper lines but keep content between them. - - Args: - code: Source code as string - start_pattern: Regex pattern for wrapper start - end_pattern: Regex pattern for wrapper end - - Returns: - str: Code with wrappers removed and content dedented - """ - lines = code.split('\n') - result = [] - in_wrapper = False - wrapper_indent = 0 - skip_next_empty = False - - for i, line in enumerate(lines): - # Check for wrapper start - if re.match(start_pattern, line): - in_wrapper = True - wrapper_indent = len(line) - len(line.lstrip()) - skip_next_empty = True - continue # Skip wrapper start line - - # Check for wrapper end - if in_wrapper and re.match(end_pattern, line): - in_wrapper = False - skip_next_empty = True - continue # Skip wrapper end line - - # Skip empty line immediately after wrapper start/end - if skip_next_empty and not line.strip(): - skip_next_empty = False - continue - - skip_next_empty = False - - # Process content inside wrapper - if in_wrapper: - # Remove wrapper indentation (typically 4 spaces) - if line.startswith(' ' * (wrapper_indent + 4)): - result.append(line[wrapper_indent + 4:]) - elif line.strip(): # Non-empty line with different indentation - result.append(line.lstrip()) - else: # Empty line - result.append(line) - else: - result.append(line) - - return '\n'.join(result) - - -def remove_matching_lines(code, start_pattern, end_pattern): - """ - Remove lines matching patterns (including the matched lines). - - Args: - code: Source code as string - start_pattern: Regex pattern for start line - end_pattern: Regex pattern for end line - - Returns: - tuple: (modified_code, match_count) where match_count is the number - of times the pattern was matched - """ - lines = code.split('\n') - result = [] - in_match = False - single_line_pattern = (start_pattern == end_pattern) - match_count = 0 - - for line in lines: - # Check for start pattern - if re.match(start_pattern, line): - match_count += 1 - if single_line_pattern: - # For single-line patterns, just skip this line - continue - else: - # For multi-line patterns, enter match mode - in_match = True - continue # Skip this line - - # Check for end pattern (only for multi-line patterns) - if in_match and re.match(end_pattern, line): - in_match = False - continue # Skip this line - - # Keep line if not in match - if not in_match: - result.append(line) - - return '\n'.join(result), match_count - - - -def remove_trailing_braces(code, count): - """ - Remove a specific number of closing braces from the end of the code. - - This is used after unwrapping class/method wrappers to remove only - the closing braces that correspond to the removed opening braces, - while preserving closing braces from control structures (for, foreach, if, etc.). - - Args: - code: Source code as string - count: Number of closing braces to remove from the end - - Returns: - str: Code with trailing closing braces removed - """ - if count <= 0: - return code - - lines = code.split('\n') - removed = 0 - - # Scan from the end, removing lines that are only closing braces - for i in range(len(lines) - 1, -1, -1): - if removed >= count: - break - - # Check if this line is only whitespace and a closing brace - if re.match(r'^\s*\}\s*$', lines[i]): - lines[i] = None # Mark for removal - removed += 1 - - # Filter out marked lines - result = [line for line in lines if line is not None] - - return '\n'.join(result) - - -def unwrap_code(code, language): - """ - Remove language-specific structural wrappers from code. - - Args: - code: Source code as string - language: Language name (e.g., 'c#') - - Returns: - str: Code with structural wrappers removed - """ - lang_config = load_language_config(language) - unwrap_patterns = lang_config.get('unwrap_patterns', []) - - if not unwrap_patterns: - return code - - # Track how many opening braces we removed (for closing brace removal) - braces_removed = 0 - - # Apply each unwrap pattern - for pattern_config in unwrap_patterns: - try: - pattern_type = pattern_config.get('type', 'unknown') - - # Skip the closing_braces pattern - we'll handle it specially - if pattern_type == 'closing_braces': - continue - - keep_content = pattern_config.get('keep_content', True) - - if keep_content: - # Remove wrapper but keep content - code = remove_wrapper_keep_content( - code, - pattern_config['pattern'], - pattern_config['end_pattern'] - ) - # For keep_content patterns, we don't track braces - match_count = 0 - else: - # Remove entire matched section - code, match_count = remove_matching_lines( - code, - pattern_config['pattern'], - pattern_config['end_pattern'] - ) - - # Count opening braces removed (only if pattern actually matched) - # For class/method patterns, we remove one opening brace per match - # Single-line patterns: opening brace is in the pattern itself - # Multi-line patterns: opening brace is in the end_pattern - if match_count > 0: - if '{' in pattern_config['pattern'] or '{' in pattern_config.get('end_pattern', ''): - braces_removed += match_count - - if match_count > 0: - logging.debug( - f"Applied unwrap pattern: {pattern_type} ({match_count} matches)" - ) - except KeyError as e: - logging.warning( - f"Malformed unwrap pattern (missing {e}), skipping" - ) - except re.error as e: - logging.warning( - f"Invalid regex pattern: {e}, skipping" - ) - - # Remove the corresponding number of closing braces from the end - if braces_removed > 0: - logging.debug(f"Removing {braces_removed} trailing closing braces") - code = remove_trailing_braces(code, braces_removed) - - return code - - -def detect_language(file_path): - """ - Detect programming language from file extension. - - Args: - file_path: Path to the input file - - Returns: - str: Language name (e.g., 'python', 'node.js') - - Raises: - ValueError: If file extension is not supported - """ - _, ext = os.path.splitext(file_path) - language = EXTENSION_TO_LANGUAGE.get(ext.lower()) - - if not language: - supported = ', '.join(sorted(EXTENSION_TO_LANGUAGE.keys())) - raise ValueError( - f"Unsupported file extension: {ext}\n" - f"Supported extensions: {supported}" - ) - - logging.info(f"Detected language: {language} (from extension {ext})") - return language - - -def validate_input(file_path, language): - """ - Validate input file. - - Args: - file_path: Path to the input file - language: Detected language - - Raises: - FileNotFoundError: If file doesn't exist - ValueError: If file is invalid - """ - # Check file exists - if not os.path.exists(file_path): - raise FileNotFoundError(f"Input file not found: {file_path}") - - if not os.path.isfile(file_path): - raise ValueError(f"Path is not a file: {file_path}") - - # Check EXAMPLE marker - prefix = PREFIXES.get(language.lower()) - if not prefix: - raise ValueError(f"Unknown comment prefix for language: {language}") - - with open(file_path, 'r', encoding='utf-8') as f: - first_line = f.readline() - - if not _check_marker(first_line, prefix, EXAMPLE): - raise ValueError( - f"File must start with '{prefix} {EXAMPLE} ' marker\n" - f"First line: {first_line.strip()}" - ) - - logging.info(f"Input file validated: {file_path}") - - -def parse_file(file_path, language): - """ - Parse file and extract cells. - - Args: - file_path: Path to the input file - language: Programming language - - Returns: - list: List of dicts with 'code' and 'step_name' keys - """ - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - - prefix = PREFIXES[language.lower()] - - # State tracking - in_remove = False - in_step = False - step_name = None - step_lines = [] - preamble_lines = [] - cells = [] - seen_step_names = set() # Track duplicate step names - - logging.debug(f"Parsing {len(lines)} lines with comment prefix '{prefix}'") - - for line_num, line in enumerate(lines, 1): - # Skip metadata markers - if _check_marker(line, prefix, EXAMPLE): - logging.debug(f"Line {line_num}: Skipping EXAMPLE marker") - continue - - if _check_marker(line, prefix, BINDER_ID): - logging.debug(f"Line {line_num}: Skipping BINDER_ID marker") - continue - - # Handle REMOVE blocks - if _check_marker(line, prefix, REMOVE_START): - if in_remove: - logging.warning(f"Line {line_num}: Nested REMOVE_START detected") - in_remove = True - logging.debug(f"Line {line_num}: Entering REMOVE block") - continue - - if _check_marker(line, prefix, REMOVE_END): - if not in_remove: - logging.warning(f"Line {line_num}: REMOVE_END without REMOVE_START") - in_remove = False - logging.debug(f"Line {line_num}: Exiting REMOVE block") - continue - - if in_remove: - continue - - # Skip HIDE markers (but include content) - if _check_marker(line, prefix, HIDE_START): - logging.debug(f"Line {line_num}: Skipping HIDE_START marker (content will be included)") - continue - - if _check_marker(line, prefix, HIDE_END): - logging.debug(f"Line {line_num}: Skipping HIDE_END marker") - continue - - # Handle STEP blocks - if _check_marker(line, prefix, STEP_START): - if in_step: - logging.warning(f"Line {line_num}: Nested STEP_START detected") - - # Save preamble if exists - if preamble_lines: - preamble_code = ''.join(preamble_lines) - cells.append({'code': preamble_code, 'step_name': None}) - logging.debug(f"Saved preamble cell ({len(preamble_lines)} lines)") - preamble_lines = [] - - in_step = True - # Extract step name - if STEP_START in line: - step_name = line.split(STEP_START)[1].strip() - - # Check for duplicate step names - if step_name and step_name in seen_step_names: - logging.warning( - f"Line {line_num}: Duplicate step name '{step_name}' " - f"(previously defined)" - ) - elif step_name: - seen_step_names.add(step_name) - - logging.debug(f"Line {line_num}: Starting step '{step_name}'") - else: - step_name = None - logging.debug(f"Line {line_num}: Starting unnamed step") - step_lines = [] - continue - - if _check_marker(line, prefix, STEP_END): - if not in_step: - logging.warning(f"Line {line_num}: STEP_END without STEP_START") - - if step_lines: - step_code = ''.join(step_lines) - cells.append({'code': step_code, 'step_name': step_name}) - logging.debug(f"Saved step cell '{step_name}' ({len(step_lines)} lines)") - - in_step = False - step_name = None - step_lines = [] - continue - - # Collect code - if in_step: - step_lines.append(line) - else: - preamble_lines.append(line) - - # Save remaining preamble - if preamble_lines: - preamble_code = ''.join(preamble_lines) - cells.append({'code': preamble_code, 'step_name': None}) - logging.debug(f"Saved final preamble cell ({len(preamble_lines)} lines)") - - # Check for unclosed blocks - if in_remove: - logging.warning("File ended with unclosed REMOVE block") - if in_step: - logging.warning("File ended with unclosed STEP block") - - logging.info(f"Parsed {len(cells)} cells from file") - return cells - - -def create_cells(parsed_blocks, language): - """ - Convert parsed blocks to notebook cells. - - Args: - parsed_blocks: List of dicts with 'code' and 'step_name' - language: Programming language (for boilerplate injection and unwrapping) - - Returns: - list: List of nbformat cell objects - """ - cells = [] - - # Get language configuration - lang_config = load_language_config(language) - - # Get boilerplate if defined - boilerplate = lang_config.get('boilerplate', []) - boilerplate_code = '\n'.join(boilerplate) if boilerplate else None - - # For Go, append boilerplate to first cell instead of creating separate cell - # This ensures imports and func main() {} are in the same cell - append_boilerplate_to_first_cell = language.lower() == 'go' - - # Add boilerplate cell if defined (except for Go, which appends to first cell) - if boilerplate and not append_boilerplate_to_first_cell: - boilerplate_cell = new_code_cell(source=boilerplate_code) - boilerplate_cell.metadata['cell_type'] = 'boilerplate' - boilerplate_cell.metadata['language'] = language - cells.append(boilerplate_cell) - logging.info(f"Added boilerplate cell for {language} ({len(boilerplate)} lines)") - - # Process regular cells - first_cell_processed = False - for i, block in enumerate(parsed_blocks): - code = block['code'] - - # Apply unwrapping if configured - if lang_config.get('unwrap_patterns'): - original_code = code - code = unwrap_code(code, language) - if code != original_code: - logging.debug(f"Applied unwrapping to cell {i}") - - # Dedent code if unwrap patterns are configured - # (code may have been indented inside wrappers) - if lang_config.get('unwrap_patterns'): - code = textwrap.dedent(code) - - # Strip trailing whitespace - code = code.rstrip() - - # Skip empty cells - if not code.strip(): - logging.debug(f"Skipping empty cell {i}") - continue - - # Skip cells that contain only closing braces and whitespace - # (orphaned closing braces from removed class/method wrappers) - if lang_config.get('unwrap_patterns'): - # Remove all whitespace and check if only closing braces remain - code_no_whitespace = re.sub(r'\s', '', code) - if code_no_whitespace and re.match(r'^}+$', code_no_whitespace): - logging.debug(f"Skipping cell {i} (contains only closing braces)") - continue - - # For Go: append boilerplate to first cell (imports) - if append_boilerplate_to_first_cell and not first_cell_processed: - if boilerplate_code: - code = code + '\n\n' + boilerplate_code - logging.info(f"Appended boilerplate to first cell for {language}") - first_cell_processed = True - - # Create code cell - cell = new_code_cell(source=code) - - # Add step metadata if present and enabled for this language - add_step_metadata = lang_config.get('add_step_metadata', True) # Default to True for backward compatibility - if block['step_name'] and add_step_metadata: - cell.metadata['step'] = block['step_name'] - logging.debug(f"Created cell {i} with step '{block['step_name']}'") - else: - logging.debug(f"Created cell {i} (preamble)") - - cells.append(cell) - - logging.info(f"Created {len(cells)} notebook cells") - return cells - - -def create_notebook(cells, language): - """ - Create complete Jupyter notebook. - - Args: - cells: List of nbformat cell objects - language: Programming language - - Returns: - nbformat.NotebookNode: Complete notebook - """ - nb = new_notebook() - nb.cells = cells - - # Set kernel metadata - kernel_spec = KERNEL_SPECS.get(language.lower()) - if not kernel_spec: - raise ValueError(f"No kernel specification for language: {language}") - - nb.metadata.kernelspec = { - 'display_name': kernel_spec['display_name'], - 'language': kernel_spec.get('language', language.lower()), - 'name': kernel_spec['name'] - } - - # Use language_info from kernel spec - nb.metadata.language_info = kernel_spec.get('language_info', { - 'name': language.lower() - }) - - logging.info(f"Created notebook with kernel: {kernel_spec['name']}") - return nb - - -def write_notebook(notebook, output_path): - """ - Write notebook to file. - - Args: - notebook: nbformat.NotebookNode object - output_path: Output file path - """ - # Create output directory if needed - output_dir = os.path.dirname(output_path) - if output_dir and not os.path.exists(output_dir): - os.makedirs(output_dir, exist_ok=True) - logging.debug(f"Created output directory: {output_dir}") - - # Write notebook - try: - with open(output_path, 'w', encoding='utf-8') as f: - nbformat.write(notebook, f) - logging.info(f"Wrote notebook to: {output_path}") - except IOError as e: - raise IOError(f"Failed to write notebook: {e}") def jupyterize(input_file, output_file=None, verbose=False): @@ -739,6 +38,8 @@ def jupyterize(input_file, output_file=None, verbose=False): Returns: str: Path to output file """ + import os + # Set up logging log_level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( @@ -754,29 +55,27 @@ def jupyterize(input_file, output_file=None, verbose=False): logging.info(f"Converting {input_file} to {output_file}") try: - # Detect language - language = detect_language(input_file) - - # Validate input - validate_input(input_file, language) + # Validate input and detect language + validator = InputValidator() + language = validator.detect_language(input_file) + validator.validate_file(input_file, language) # Parse file - parsed_blocks = parse_file(input_file, language) + parser = FileParser(language) + parsed_blocks = parser.parse(input_file) if not parsed_blocks: logging.warning("No code blocks found in file") - # Create cells (with language-specific boilerplate and unwrapping) - cells = create_cells(parsed_blocks, language) + # Build notebook + builder = NotebookBuilder(language) + notebook = builder.build(parsed_blocks) - if not cells: + if not notebook.cells: logging.warning("No cells created (all code may be in REMOVE blocks)") - # Create notebook - notebook = create_notebook(cells, language) - # Write to file - write_notebook(notebook, output_file) + builder.write(notebook, output_file) logging.info("Conversion completed successfully") return output_file diff --git a/build/jupyterize/notebook_builder.py b/build/jupyterize/notebook_builder.py new file mode 100644 index 0000000000..8c0c77b832 --- /dev/null +++ b/build/jupyterize/notebook_builder.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 +""" +Notebook building for jupyterize. + +Creates Jupyter notebook cells and assembles complete notebooks. +""" + +import logging +import os +import re +import textwrap + +import nbformat +from nbformat.v4 import new_notebook, new_code_cell + +from config import load_language_config, get_kernel_spec +from unwrapper import CodeUnwrapper + + +class NotebookBuilder: + """Builds Jupyter notebooks from parsed code blocks.""" + + def __init__(self, language): + """ + Initialize builder for a specific language. + + Args: + language: Programming language (e.g., 'python', 'c#') + """ + self.language = language + self.config = load_language_config(language) + + def build(self, parsed_blocks): + """ + Build notebook from parsed blocks. + + Args: + parsed_blocks: List of dicts with 'code' and 'step_name' + + Returns: + nbformat.NotebookNode: Complete notebook + """ + cells = self._create_cells(parsed_blocks) + notebook = self._create_notebook(cells) + return notebook + + def _create_cells(self, parsed_blocks): + """ + Convert parsed blocks to notebook cells. + + Args: + parsed_blocks: List of dicts with 'code' and 'step_name' + + Returns: + list: List of nbformat cell objects + """ + cells = [] + + # Get boilerplate if defined + boilerplate = self.config.get('boilerplate', []) + boilerplate_code = '\n'.join(boilerplate) if boilerplate else None + + # For Go, append boilerplate to first cell instead of creating separate cell + append_boilerplate_to_first_cell = self.language.lower() == 'go' + + # Add boilerplate cell if defined (except for Go, which appends to first cell) + if boilerplate and not append_boilerplate_to_first_cell: + boilerplate_cell = new_code_cell(source=boilerplate_code) + boilerplate_cell.metadata['cell_type'] = 'boilerplate' + boilerplate_cell.metadata['language'] = self.language + cells.append(boilerplate_cell) + logging.info(f"Added boilerplate cell for {self.language} ({len(boilerplate)} lines)") + + # Process regular cells + first_cell_processed = False + for i, block in enumerate(parsed_blocks): + code = block['code'] + + # Apply unwrapping if configured + if self.config.get('unwrap_patterns'): + unwrapper = CodeUnwrapper(self.language) + original_code = code + code = unwrapper.unwrap(code) + if code != original_code: + logging.debug(f"Applied unwrapping to cell {i}") + + # Dedent code if unwrap patterns are configured + if self.config.get('unwrap_patterns'): + code = textwrap.dedent(code) + + # Strip trailing whitespace + code = code.rstrip() + + # Skip empty cells + if not code.strip(): + logging.debug(f"Skipping empty cell {i}") + continue + + # Skip cells that contain only closing braces and whitespace + if self.config.get('unwrap_patterns'): + code_no_whitespace = re.sub(r'\s', '', code) + if code_no_whitespace and re.match(r'^}+$', code_no_whitespace): + logging.debug(f"Skipping cell {i} (contains only closing braces)") + continue + + # For Go: append boilerplate to first cell (imports) + if append_boilerplate_to_first_cell and not first_cell_processed: + if boilerplate_code: + code = code + '\n\n' + boilerplate_code + logging.info(f"Appended boilerplate to first cell for {self.language}") + first_cell_processed = True + + # Create code cell + cell = new_code_cell(source=code) + + # Add step metadata if present and enabled for this language + add_step_metadata = self.config.get('add_step_metadata', True) + if block['step_name'] and add_step_metadata: + cell.metadata['step'] = block['step_name'] + logging.debug(f"Created cell {i} with step '{block['step_name']}'") + else: + logging.debug(f"Created cell {i} (preamble)") + + cells.append(cell) + + logging.info(f"Created {len(cells)} notebook cells") + return cells + + def _create_notebook(self, cells): + """ + Create complete Jupyter notebook. + + Args: + cells: List of nbformat cell objects + + Returns: + nbformat.NotebookNode: Complete notebook + """ + nb = new_notebook() + nb.cells = cells + + # Set kernel metadata + kernel_spec = get_kernel_spec(self.language) + + nb.metadata.kernelspec = { + 'display_name': kernel_spec['display_name'], + 'language': kernel_spec.get('language', self.language.lower()), + 'name': kernel_spec['name'] + } + + # Use language_info from kernel spec + nb.metadata.language_info = kernel_spec.get('language_info', { + 'name': self.language.lower() + }) + + logging.info(f"Created notebook with kernel: {kernel_spec['name']}") + return nb + + def write(self, notebook, output_path): + """ + Write notebook to file. + + Args: + notebook: nbformat.NotebookNode object + output_path: Output file path + """ + # Create output directory if needed + output_dir = os.path.dirname(output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + logging.debug(f"Created output directory: {output_dir}") + + # Write notebook + try: + with open(output_path, 'w', encoding='utf-8') as f: + nbformat.write(notebook, f) + logging.info(f"Wrote notebook to: {output_path}") + except IOError as e: + raise IOError(f"Failed to write notebook: {e}") + diff --git a/build/jupyterize/parser.py b/build/jupyterize/parser.py new file mode 100644 index 0000000000..8cc8500ae5 --- /dev/null +++ b/build/jupyterize/parser.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 +""" +File parsing for jupyterize. + +Parses source files with special comment markers and extracts code blocks. +""" + +import logging +import os +import sys + +# Add parent directory to path to import from build/ +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from components.example import ( + HIDE_START, HIDE_END, + REMOVE_START, REMOVE_END, + STEP_START, STEP_END, + EXAMPLE, BINDER_ID, + PREFIXES +) + + +def _check_marker(line, prefix, marker): + """ + Check if a line contains a marker (with or without space after prefix). + + Args: + line: Line to check + prefix: Comment prefix (e.g., '#', '//') + marker: Marker to look for (e.g., 'EXAMPLE:', 'STEP_START') + + Returns: + bool: True if marker is found + """ + return f'{prefix} {marker}' in line or f'{prefix}{marker}' in line + + +class FileParser: + """Parses source files with special comment markers.""" + + def __init__(self, language): + """ + Initialize parser for a specific language. + + Args: + language: Programming language (e.g., 'python', 'c#') + """ + self.language = language + self.prefix = PREFIXES[language.lower()] + + def parse(self, file_path): + """ + Parse file and extract cells. + + Args: + file_path: Path to the input file + + Returns: + list: List of dicts with 'code' and 'step_name' keys + """ + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + # State tracking + in_remove = False + in_step = False + step_name = None + step_lines = [] + preamble_lines = [] + cells = [] + seen_step_names = set() + + logging.debug(f"Parsing {len(lines)} lines with comment prefix '{self.prefix}'") + + for line_num, line in enumerate(lines, 1): + # Skip metadata markers + if _check_marker(line, self.prefix, EXAMPLE): + logging.debug(f"Line {line_num}: Skipping EXAMPLE marker") + continue + + if _check_marker(line, self.prefix, BINDER_ID): + logging.debug(f"Line {line_num}: Skipping BINDER_ID marker") + continue + + # Handle REMOVE blocks + if _check_marker(line, self.prefix, REMOVE_START): + if in_remove: + logging.warning(f"Line {line_num}: Nested REMOVE_START detected") + in_remove = True + logging.debug(f"Line {line_num}: Entering REMOVE block") + continue + + if _check_marker(line, self.prefix, REMOVE_END): + if not in_remove: + logging.warning(f"Line {line_num}: REMOVE_END without REMOVE_START") + in_remove = False + logging.debug(f"Line {line_num}: Exiting REMOVE block") + continue + + if in_remove: + continue + + # Skip HIDE markers (but include content) + if _check_marker(line, self.prefix, HIDE_START): + logging.debug(f"Line {line_num}: Skipping HIDE_START marker (content will be included)") + continue + + if _check_marker(line, self.prefix, HIDE_END): + logging.debug(f"Line {line_num}: Skipping HIDE_END marker") + continue + + # Handle STEP blocks + if _check_marker(line, self.prefix, STEP_START): + if in_step: + logging.warning(f"Line {line_num}: Nested STEP_START detected") + + # Save preamble if exists + if preamble_lines: + preamble_code = ''.join(preamble_lines) + cells.append({'code': preamble_code, 'step_name': None}) + logging.debug(f"Saved preamble cell ({len(preamble_lines)} lines)") + preamble_lines = [] + + in_step = True + # Extract step name + if STEP_START in line: + step_name = line.split(STEP_START)[1].strip() + + # Check for duplicate step names + if step_name and step_name in seen_step_names: + logging.warning( + f"Line {line_num}: Duplicate step name '{step_name}' " + f"(previously defined)" + ) + elif step_name: + seen_step_names.add(step_name) + + logging.debug(f"Line {line_num}: Starting step '{step_name}'") + else: + step_name = None + logging.debug(f"Line {line_num}: Starting unnamed step") + step_lines = [] + continue + + if _check_marker(line, self.prefix, STEP_END): + if not in_step: + logging.warning(f"Line {line_num}: STEP_END without STEP_START") + + if step_lines: + step_code = ''.join(step_lines) + cells.append({'code': step_code, 'step_name': step_name}) + logging.debug(f"Saved step cell '{step_name}' ({len(step_lines)} lines)") + + in_step = False + step_name = None + step_lines = [] + continue + + # Collect code + if in_step: + step_lines.append(line) + else: + preamble_lines.append(line) + + # Save remaining preamble + if preamble_lines: + preamble_code = ''.join(preamble_lines) + cells.append({'code': preamble_code, 'step_name': None}) + logging.debug(f"Saved final preamble cell ({len(preamble_lines)} lines)") + + # Check for unclosed blocks + if in_remove: + logging.warning("File ended with unclosed REMOVE block") + if in_step: + logging.warning("File ended with unclosed STEP block") + + logging.info(f"Parsed {len(cells)} cells from file") + return cells + diff --git a/build/jupyterize/test_jupyterize.py b/build/jupyterize/test_jupyterize.py index f458129ddf..5563fb532a 100644 --- a/build/jupyterize/test_jupyterize.py +++ b/build/jupyterize/test_jupyterize.py @@ -13,24 +13,28 @@ # Add parent directory to path sys.path.insert(0, os.path.dirname(__file__)) -from jupyterize import jupyterize, detect_language, validate_input, parse_file +from jupyterize import jupyterize +from validator import InputValidator +from parser import FileParser def test_language_detection(): """Test language detection from file extensions.""" print("Testing language detection...") - assert detect_language('example.py') == 'python' - assert detect_language('example.js') == 'node.js' - assert detect_language('example.go') == 'go' - assert detect_language('example.cs') == 'c#' - assert detect_language('example.java') == 'java' - assert detect_language('example.php') == 'php' - assert detect_language('example.rs') == 'rust' + validator = InputValidator() + + assert validator.detect_language('example.py') == 'python' + assert validator.detect_language('example.js') == 'node.js' + assert validator.detect_language('example.go') == 'go' + assert validator.detect_language('example.cs') == 'c#' + assert validator.detect_language('example.java') == 'java' + assert validator.detect_language('example.php') == 'php' + assert validator.detect_language('example.rs') == 'rust' # Test unsupported extension try: - detect_language('example.txt') + validator.detect_language('example.txt') assert False, "Should have raised ValueError" except ValueError as e: assert "Unsupported file extension" in str(e) diff --git a/build/jupyterize/unwrapper.py b/build/jupyterize/unwrapper.py new file mode 100644 index 0000000000..e58a3bb4ad --- /dev/null +++ b/build/jupyterize/unwrapper.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +""" +Code unwrapping for jupyterize. + +Removes language-specific structural wrappers from code. +""" + +import logging +import re + +from config import load_language_config + + +def _remove_wrapper_keep_content(code, start_pattern, end_pattern): + """ + Remove wrapper lines but keep content between them. + + Args: + code: Source code as string + start_pattern: Regex pattern for wrapper start + end_pattern: Regex pattern for wrapper end + + Returns: + str: Code with wrappers removed and content dedented + """ + lines = code.split('\n') + result = [] + in_wrapper = False + wrapper_indent = 0 + skip_next_empty = False + + for i, line in enumerate(lines): + # Check for wrapper start + if re.match(start_pattern, line): + in_wrapper = True + wrapper_indent = len(line) - len(line.lstrip()) + skip_next_empty = True + continue # Skip wrapper start line + + # Check for wrapper end + if in_wrapper and re.match(end_pattern, line): + in_wrapper = False + skip_next_empty = True + continue # Skip wrapper end line + + # Skip empty line immediately after wrapper start/end + if skip_next_empty and not line.strip(): + skip_next_empty = False + continue + + skip_next_empty = False + + # Process content inside wrapper + if in_wrapper: + # Remove wrapper indentation (typically 4 spaces) + if line.startswith(' ' * (wrapper_indent + 4)): + result.append(line[wrapper_indent + 4:]) + elif line.strip(): # Non-empty line with different indentation + result.append(line.lstrip()) + else: # Empty line + result.append(line) + else: + result.append(line) + + return '\n'.join(result) + + +def _remove_matching_lines(code, start_pattern, end_pattern): + """ + Remove lines matching patterns (including the matched lines). + + Args: + code: Source code as string + start_pattern: Regex pattern for start line + end_pattern: Regex pattern for end line + + Returns: + tuple: (modified_code, match_count) where match_count is the number + of times the pattern was matched + """ + lines = code.split('\n') + result = [] + in_match = False + single_line_pattern = (start_pattern == end_pattern) + match_count = 0 + + for line in lines: + # Check for start pattern + if re.match(start_pattern, line): + match_count += 1 + if single_line_pattern: + # For single-line patterns, just skip this line + continue + else: + # For multi-line patterns, enter match mode + in_match = True + continue # Skip this line + + # Check for end pattern (only for multi-line patterns) + if in_match and re.match(end_pattern, line): + in_match = False + continue # Skip this line + + # Keep line if not in match + if not in_match: + result.append(line) + + return '\n'.join(result), match_count + + +def _remove_trailing_braces(code, count): + """ + Remove a specific number of closing braces from the end of the code. + + Args: + code: Source code as string + count: Number of closing braces to remove from the end + + Returns: + str: Code with trailing closing braces removed + """ + if count <= 0: + return code + + lines = code.split('\n') + removed = 0 + + # Scan from the end, removing lines that are only closing braces + for i in range(len(lines) - 1, -1, -1): + if removed >= count: + break + + # Check if this line is only whitespace and a closing brace + if re.match(r'^\s*\}\s*$', lines[i]): + lines[i] = None # Mark for removal + removed += 1 + + # Filter out marked lines + result = [line for line in lines if line is not None] + + return '\n'.join(result) + + +class CodeUnwrapper: + """Removes language-specific structural wrappers from code.""" + + def __init__(self, language): + """ + Initialize unwrapper for a specific language. + + Args: + language: Language name (e.g., 'c#') + """ + self.language = language + self.config = load_language_config(language) + + def unwrap(self, code): + """ + Remove language-specific structural wrappers from code. + + Args: + code: Source code as string + + Returns: + str: Code with structural wrappers removed + """ + unwrap_patterns = self.config.get('unwrap_patterns', []) + + if not unwrap_patterns: + return code + + # Track how many opening braces we removed (for closing brace removal) + braces_removed = 0 + + # Apply each unwrap pattern + for pattern_config in unwrap_patterns: + try: + pattern_type = pattern_config.get('type', 'unknown') + + # Skip the closing_braces pattern - we'll handle it specially + if pattern_type == 'closing_braces': + continue + + keep_content = pattern_config.get('keep_content', True) + + if keep_content: + # Remove wrapper but keep content + code = _remove_wrapper_keep_content( + code, + pattern_config['pattern'], + pattern_config['end_pattern'] + ) + # For keep_content patterns, we don't track braces + match_count = 0 + else: + # Remove entire matched section + code, match_count = _remove_matching_lines( + code, + pattern_config['pattern'], + pattern_config['end_pattern'] + ) + + # Count opening braces removed (only if pattern actually matched) + if match_count > 0: + if '{' in pattern_config['pattern'] or '{' in pattern_config.get('end_pattern', ''): + braces_removed += match_count + + if match_count > 0: + logging.debug( + f"Applied unwrap pattern: {pattern_type} ({match_count} matches)" + ) + except KeyError as e: + logging.warning( + f"Malformed unwrap pattern (missing {e}), skipping" + ) + except re.error as e: + logging.warning( + f"Invalid regex pattern: {e}, skipping" + ) + + # Remove the corresponding number of closing braces from the end + if braces_removed > 0: + logging.debug(f"Removing {braces_removed} trailing closing braces") + code = _remove_trailing_braces(code, braces_removed) + + return code + diff --git a/build/jupyterize/validator.py b/build/jupyterize/validator.py new file mode 100644 index 0000000000..e977973b58 --- /dev/null +++ b/build/jupyterize/validator.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +""" +Input validation for jupyterize. + +Handles language detection and input file validation. +""" + +import logging +import os +import sys + +# Add parent directory to path to import from build/ +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from local_examples import EXTENSION_TO_LANGUAGE +from components.example import PREFIXES, EXAMPLE + + +def _check_marker(line, prefix, marker): + """ + Check if a line contains a marker (with or without space after prefix). + + Args: + line: Line to check + prefix: Comment prefix (e.g., '#', '//') + marker: Marker to look for (e.g., 'EXAMPLE:') + + Returns: + bool: True if marker is found + """ + return f'{prefix} {marker}' in line or f'{prefix}{marker}' in line + + +class InputValidator: + """Validates input files and detects programming language.""" + + @staticmethod + def detect_language(file_path): + """ + Detect programming language from file extension. + + Args: + file_path: Path to the input file + + Returns: + str: Language name (e.g., 'python', 'node.js') + + Raises: + ValueError: If file extension is not supported + """ + _, ext = os.path.splitext(file_path) + language = EXTENSION_TO_LANGUAGE.get(ext.lower()) + + if not language: + supported = ', '.join(sorted(EXTENSION_TO_LANGUAGE.keys())) + raise ValueError( + f"Unsupported file extension: {ext}\n" + f"Supported extensions: {supported}" + ) + + logging.info(f"Detected language: {language} (from extension {ext})") + return language + + @staticmethod + def validate_file(file_path, language): + """ + Validate input file. + + Args: + file_path: Path to the input file + language: Detected language + + Raises: + FileNotFoundError: If file doesn't exist + ValueError: If file is invalid + """ + # Check file exists + if not os.path.exists(file_path): + raise FileNotFoundError(f"Input file not found: {file_path}") + + if not os.path.isfile(file_path): + raise ValueError(f"Path is not a file: {file_path}") + + # Check EXAMPLE marker + prefix = PREFIXES.get(language.lower()) + if not prefix: + raise ValueError(f"Unknown comment prefix for language: {language}") + + with open(file_path, 'r', encoding='utf-8') as f: + first_line = f.readline() + + if not _check_marker(first_line, prefix, EXAMPLE): + raise ValueError( + f"File must start with '{prefix} {EXAMPLE} ' marker\n" + f"First line: {first_line.strip()}" + ) + + logging.info(f"Input file validated: {file_path}") + From 6c48e72472a2d30cfbfe56a06fcf44d7b9d2d3b1 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Tue, 16 Dec 2025 10:58:22 +0000 Subject: [PATCH 10/11] Refactor jupyterize.py into modular architecture with 5 focused modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Created config.py: Configuration and kernel spec management - Created validator.py: Input validation and language detection - Created parser.py: File parsing with marker detection - Created unwrapper.py: Code unwrapping logic - Created notebook_builder.py: Notebook creation and cell generation - Simplified jupyterize.py: Main orchestrator (696 → 142 lines) - Updated test_jupyterize.py: New module imports - Updated SPECIFICATION.md: Added implementation lessons learned Benefits: - 80% reduction in main script size - Clear separation of concerns - Each module independently testable - Easier to maintain and extend - All 25+ tests pass - Backward compatible Module sizes: - config.py: ~120 lines - validator.py: ~95 lines - parser.py: ~180 lines - unwrapper.py: ~180 lines - notebook_builder.py: ~160 lines - jupyterize.py: ~142 lines (was 696) Total: ~877 lines (vs 843 original) with much better organization --- ai-agent-resources-feedback.md | 247 +++++++++++++++ ai-friendliness-report.md | 311 +++++++++++++++++++ build/jupyterize/SPECIFICATION.md | 105 +++++++ commands-page-mockup copy.html | 389 +++++++++++++++++++++++ commands-page-mockup.html | 461 ++++++++++++++++++++++++++++ docs-improvement-recommendations.md | 407 ++++++++++++++++++++++++ 6 files changed, 1920 insertions(+) create mode 100644 ai-agent-resources-feedback.md create mode 100644 ai-friendliness-report.md create mode 100644 commands-page-mockup copy.html create mode 100644 commands-page-mockup.html create mode 100644 docs-improvement-recommendations.md diff --git a/ai-agent-resources-feedback.md b/ai-agent-resources-feedback.md new file mode 100644 index 0000000000..b2d9f126f3 --- /dev/null +++ b/ai-agent-resources-feedback.md @@ -0,0 +1,247 @@ +# Feedback: AI Agent Resources Page Draft + +## Status: Good Foundation, Needs Expansion + +This is a solid start that addresses the core discovery problem. However, it's quite minimal and could be significantly enhanced to be more useful for AI agents. + +--- + +## What Works Well ✅ + +1. **Clear Purpose** - Immediately explains what the page is for +2. **llms.txt Discovery** - Highlights the main index +3. **Markdown Format** - Explains the `.html.md` URL pattern +4. **API References** - Lists client libraries with links +5. **Frontmatter** - Good metadata for discoverability + +--- + +## What's Missing or Could Be Improved + +### 1. MCP Server Not Mentioned ⚠️ + +**Current:** Only mentions llms.txt and Markdown URLs + +**Issue:** The MCP server is a critical resource for AI agents but isn't mentioned + +**Suggestion:** Add section: +```markdown +## Redis MCP Server + +The Redis Model Context Protocol (MCP) server provides a standardized interface for AI agents to access Redis documentation, examples, and data operations. + +- **GitHub:** [redis/mcp-redis](https://github.com/redis/mcp-redis) +- **Installation:** `uvx --from redis-mcp-server@latest redis-mcp-server --url redis://localhost:6379/0` +- **Supported Clients:** Claude Desktop, VS Code, Augment, OpenAI Agents, and any MCP-compatible client +- **Capabilities:** Documentation queries, code examples, API references, vector search, data operations + +See [Redis MCP Server documentation](https://redis.io/docs/latest/integrate/redis-mcp/) for setup and usage. +``` + +### 2. No Guidance on When to Use Each Resource + +**Current:** Lists resources but doesn't explain when to use each + +**Suggestion:** Add comparison table: +```markdown +## Choosing the Right Resource + +| Resource | Best For | Pros | Cons | +|----------|----------|------|------| +| **llms.txt** | Discovering all available docs | Comprehensive, curated | Requires parsing | +| **Markdown URLs** | Reading specific documentation | Human-readable, AI-friendly | Requires web access | +| **MCP Server** | Querying docs programmatically | Structured, standardized, reliable | Requires MCP client | +| **API References** | Understanding method signatures | Authoritative, detailed | Language-specific | +| **Code Examples** | Learning patterns and best practices | Practical, tested | May be scattered | +``` + +### 3. No Explanation of Accessibility Issues + +**Current:** Doesn't mention that not all AI clients can access llms.txt/Markdown equally + +**Suggestion:** Add note: +```markdown +## Important: Accessibility Considerations + +While llms.txt and Markdown documentation are valuable resources, **not all AI clients can access them equally** due to HTTP client compatibility issues, User-Agent filtering, or network restrictions. + +If you're unable to access these resources directly, the **Redis MCP Server** provides a guaranteed, standardized access method that works across all MCP-compatible clients. +``` + +### 4. No Link to Use-Case Documentation + +**Current:** Doesn't mention the new caching use-case doc (or other use-case docs) + +**Suggestion:** Add section: +```markdown +## Use-Case Documentation + +Redis provides comprehensive guides for common use cases, structured for both human developers and AI agents: + +- [Caching](https://redis.io/docs/latest/develop/use-cases/caching/) - Patterns, examples, and best practices +- [Session Storage](https://redis.io/docs/latest/develop/use-cases/sessions/) - Session management patterns +- [Rate Limiting](https://redis.io/docs/latest/develop/use-cases/rate-limiting/) - Rate limiting strategies +- [Queues](https://redis.io/docs/latest/develop/use-cases/queues/) - Queue patterns and implementations + +Each guide includes: +- Pattern comparison tables +- Error handling examples +- Async/sync variants +- API references +- Common mistakes and solutions +``` + +### 5. No Guidance on Code Examples + +**Current:** Doesn't mention where to find code examples + +**Suggestion:** Add section: +```markdown +## Code Examples + +Redis provides tested code examples for common patterns: + +- **Local Examples:** Available in the [Redis documentation repository](https://github.com/redis/docs/tree/main/local_examples) +- **Pattern-Based:** Examples organized by use case (cache-aside, sessions, rate-limiting, etc.) +- **Multi-Language:** Examples in Python, JavaScript, Java, Go, and other languages +- **Tested:** All examples are tested and verified to work + +Use the MCP Server's code example finder to discover examples for your use case. +``` + +### 6. No Explanation of YAML Metadata + +**Current:** Doesn't explain the structured metadata in docs + +**Suggestion:** Add note: +```markdown +## Machine-Readable Metadata + +Redis documentation includes YAML frontmatter with structured metadata to help AI agents understand: + +- **Patterns:** Which caching/queuing patterns are covered +- **Languages:** Supported programming languages +- **Complexity:** Difficulty level (basic, moderate, advanced) +- **Version Requirements:** Minimum Redis and client library versions +- **Related Patterns:** Links to related use cases + +This metadata enables AI agents to: +- Filter documentation by language or complexity +- Understand version compatibility +- Discover related patterns +- Generate appropriate code examples +``` + +### 7. No Best Practices for AI Agents + +**Current:** Doesn't provide guidance on how to use these resources effectively + +**Suggestion:** Add section: +```markdown +## Best Practices for AI Agents + +When using Redis documentation: + +1. **Start with llms.txt** - Get an overview of available documentation +2. **Use the MCP Server for queries** - More reliable than direct web access +3. **Check version requirements** - Verify compatibility with your Redis and client versions +4. **Review error handling examples** - Learn how to handle common failure modes +5. **Understand trade-offs** - Each pattern has different consistency/performance characteristics +6. **Test with code examples** - Use provided examples as starting points +7. **Monitor metrics** - Use the metrics examples to track cache/queue effectiveness +``` + +--- + +## Suggested Structure for Expanded Version + +```markdown +# AI Agent Resources + +## Quick Start +- What this page is for +- How to get started + +## Core Resources +- llms.txt index +- Markdown documentation format +- MCP Server + +## Choosing the Right Resource +- Comparison table +- When to use each + +## Accessibility & Compatibility +- Note about unequal access +- MCP as fallback +- Version requirements + +## Use-Case Documentation +- Links to caching, sessions, rate-limiting, queues +- What each includes + +## Code Examples +- Where to find them +- How to use them +- Multi-language support + +## API References +- Links to client libraries +- How to use them + +## Machine-Readable Metadata +- YAML frontmatter explanation +- How AI agents use it + +## Best Practices +- Tips for effective use +- Common patterns +- Error handling + +## Troubleshooting +- Can't access llms.txt? Use MCP +- Can't find what you need? Try searching +- Version compatibility issues? +``` + +--- + +## Why These Additions Matter + +1. **MCP Server** - Critical resource that's completely missing +2. **Accessibility Issues** - Explains why some clients can't access resources +3. **Use-Case Docs** - Directs agents to the new structured documentation +4. **Guidance** - Helps agents use resources effectively +5. **Metadata Explanation** - Enables agents to understand structured data +6. **Best Practices** - Improves outcomes for AI-assisted development + +--- + +## Priority Recommendations + +### Must Add +- [ ] MCP Server section +- [ ] Accessibility note +- [ ] Link to use-case documentation + +### Should Add +- [ ] Comparison table (when to use each resource) +- [ ] Best practices section +- [ ] Metadata explanation + +### Nice to Have +- [ ] Code examples section +- [ ] Troubleshooting section +- [ ] Expanded API references with descriptions + +--- + +## Conclusion + +This page is a good foundation but needs expansion to be truly useful for AI agents. The additions above would make it a comprehensive guide that helps AI agents discover and use Redis documentation effectively. + +**Current Quality: 6/10** (good start, but incomplete) +**Potential Quality: 9/10** (with suggested additions) + +The page should be the entry point for AI agents discovering Redis resources—it needs to be comprehensive and helpful. + diff --git a/ai-friendliness-report.md b/ai-friendliness-report.md new file mode 100644 index 0000000000..49415bfc7a --- /dev/null +++ b/ai-friendliness-report.md @@ -0,0 +1,311 @@ +# AI-Friendliness Report: Redis Documentation + +## Executive Summary + +Redis has made excellent progress in supporting AI agents through the `llms.txt` index and Markdown documentation format. However, these resources are not easily discoverable by AI agents without explicit guidance. This report outlines why discovery is difficult and provides concrete recommendations to improve AI-friendliness across the Redis documentation ecosystem. + +--- + +## The Discovery Problem + +### What Happened + +During implementation of a cache-aside tutorial, I (an AI agent) needed to validate documentation recommendations against official Redis sources. I used standard web-fetching approaches to access redis.io and redis-py documentation, but I did not discover: + +1. **llms.txt** - A curated index of all documentation in Markdown format +2. **Markdown URL pattern** - All pages available via `.html.md` URLs (e.g., `https://redis.io/docs/latest/develop/data-types/json/index.html.md`) + +These resources had to be pointed out explicitly by a human user. + +### Root Cause Analysis + +**Why I didn't find these resources:** + +1. **No standard convention** - There's no industry-wide standard for how AI agents should discover AI-friendly documentation +2. **Default behavior** - I defaulted to fetching standard documentation URLs without looking for AI-specific alternatives +3. **No discoverable metadata** - The resources exist but aren't linked from: + - robots.txt + - HTML meta tags + - HTTP headers + - Well-known locations (like `.well-known/`) + - Main documentation homepage +4. **Implicit knowledge** - The resources are documented somewhere, but not in a way that's obvious to an AI agent encountering the site for the first time + +### The Broader Context + +This is not a Redis-specific problem. The AI/LLM community hasn't yet established standard conventions for: +- How to advertise AI-friendly documentation formats +- Where to place resource indexes +- What metadata to include +- How to make discovery automatic vs. manual + +--- + +## What Redis Is Doing Right + +### Existing AI-Friendly Infrastructure + +1. **llms.txt Index** (https://redis.io/llms.txt) + - Comprehensive, curated list of all documentation pages + - Organized by category (Core Docs, Commands, Development, Integrations, Operations) + - Includes descriptions of each page + - Specifically designed for LLMs and AI assistants + +2. **Markdown Documentation Format** + - All pages available as `.html.md` URLs + - Much more suitable for AI agents than HTML + - Consistent URL pattern makes it predictable + +3. **Multi-language Examples** + - JSON documentation includes Python, Node.js, Java, Go, C#, PHP examples + - Helps AI agents understand implementation across languages + +4. **Structured Organization** + - Clear hierarchy (Develop → Data Types → JSON) + - Consistent naming conventions + - Logical grouping of related content + +--- + +## Recommendations for Improved AI-Friendliness + +### Priority 1: Quick Wins (Low Effort, High Impact) + +#### 1.1 Add Comment to llms.txt + +**Current state:** llms.txt exists but has no explanation + +**Recommendation:** Add a header comment explaining its purpose: + +``` +# Redis Documentation for AI Agents and LLMs +# +# This file provides a curated index of Redis documentation in Markdown format. +# +# Usage: +# - Start with this file to discover available documentation +# - All pages are available in Markdown format via .html.md URLs +# - Example: https://redis.io/docs/latest/develop/data-types/json/index.html.md +# +# For more information, see: https://redis.io/docs/latest/develop/ +``` + +**Why:** When an AI agent fetches llms.txt, it immediately understands the purpose and how to use it. + +**Effort:** Minimal (add 10 lines of comments) + +--- + +#### 1.2 Create .well-known/ai-documentation.json + +**Recommendation:** Add a standardized metadata file at `https://redis.io/.well-known/ai-documentation.json`: + +```json +{ + "documentation": { + "index": "https://redis.io/llms.txt", + "format": "markdown", + "markdown_url_pattern": "{base_url}.html.md", + "description": "Curated Markdown documentation for AI agents and LLMs" + }, + "api_references": { + "redis_py": { + "url": "https://redis.readthedocs.io/en/stable/commands.html", + "format": "html", + "language": "python" + } + }, + "version": "1.0" +} +``` + +**Why:** Follows the `.well-known` convention (like `.well-known/robots.txt`, `.well-known/security.txt`). AI agents can check this standardized location for metadata. + +**Effort:** Low (create one JSON file) + +--- + +#### 1.3 Update robots.txt + +**Recommendation:** Add a comment to robots.txt: + +``` +# For AI agents and LLMs: see https://redis.io/llms.txt for curated Markdown documentation +``` + +**Why:** Many AI agents check robots.txt first. A comment there is discoverable. + +**Effort:** Minimal (add 1 line) + +--- + +### Priority 2: Medium Effort, High Impact + +#### 2.1 Add Meta Tags to Main Docs Homepage + +**Recommendation:** Add to `https://redis.io/docs/latest/`: + +```html + + + +``` + +**Why:** AI agents that parse HTML headers might discover these. Similar to how search engines use meta tags. + +**Effort:** Low (add 3 lines to HTML template) + +--- + +#### 2.2 Add HTTP Link Header + +**Recommendation:** Add to HTTP response headers from redis.io: + +``` +Link: ; rel="ai-documentation"; type="text/plain" +``` + +**Why:** Some AI agents check HTTP headers for metadata. This is a standard HTTP convention. + +**Effort:** Medium (requires web server configuration) + +--- + +#### 2.3 Create "For AI Agents" Documentation Page + +**Recommendation:** Create `https://redis.io/docs/latest/ai-agent-resources/` with: + +- Explanation of llms.txt and how to use it +- How to access Markdown versions of docs +- Best practices for AI agents using Redis documentation +- Links to API references and examples +- Guidance on error handling and common patterns +- Links to redis-py API reference + +**Why:** Makes AI-friendly resources a first-class feature, not hidden. + +**Effort:** Medium (write one documentation page) + +--- + +### Priority 3: Long-Term, Strategic + +#### 3.1 Add "For AI Agents" Section to Main Docs Homepage + +**Recommendation:** Add a prominent section to the main documentation homepage: + +```markdown +## For AI Agents and LLMs + +If you're an AI agent or LLM looking to access Redis documentation: + +- **Markdown Index**: [llms.txt](https://redis.io/llms.txt) - Curated list of all docs in Markdown format +- **Markdown Format**: All pages available as `.html.md` URLs + - Example: `https://redis.io/docs/latest/develop/data-types/json/index.html.md` +- **API Reference**: [redis-py commands](https://redis.readthedocs.io/en/stable/commands.html) +- **Learn More**: [AI Agent Resources](https://redis.io/docs/latest/ai-agent-resources/) +``` + +**Why:** Explicit, discoverable, and sets expectations for AI agents. + +**Effort:** Low (add one section to homepage) + +--- + +#### 3.2 Advocate for Industry Standards + +**Recommendation:** Document and share Redis's approach: + +- Write a blog post about why Redis chose llms.txt and Markdown +- Share this approach with other projects +- Contribute to discussions about AI documentation standards +- Consider proposing a standard (e.g., through a GitHub discussion or RFC) + +**Why:** Helps establish conventions that benefit the entire AI/LLM community. + +**Effort:** Medium (requires community engagement) + +--- + +## Implementation Roadmap + +### Phase 1 (Week 1): Quick Wins +- [ ] Add comment header to llms.txt +- [ ] Create .well-known/ai-documentation.json +- [ ] Update robots.txt with comment +- [ ] Add meta tags to main docs homepage + +**Expected Impact:** AI agents that check these locations will discover llms.txt + +### Phase 2 (Week 2-3): Medium Effort +- [ ] Add HTTP Link header to responses +- [ ] Create "For AI Agents" documentation page +- [ ] Add section to main docs homepage + +**Expected Impact:** AI agents will have clear, discoverable guidance on accessing documentation + +### Phase 3 (Ongoing): Strategic +- [ ] Document the approach publicly +- [ ] Share with other projects +- [ ] Contribute to industry standards discussions + +**Expected Impact:** Establish Redis as a leader in AI-friendly documentation + +--- + +## Metrics for Success + +After implementing these recommendations, success can be measured by: + +1. **Discoverability**: AI agents can find llms.txt without human guidance +2. **Usage**: Increased adoption of Markdown documentation by AI tools +3. **Feedback**: Positive feedback from AI/LLM community +4. **Industry Impact**: Other projects adopt similar approaches + +--- + +## Important Discovery: Accessibility Varies Across AI Clients + +### The Problem + +While Redis provides llms.txt and Markdown documentation files at `.html.md` URLs, **not all AI clients can access them equally**: + +- ✅ Some AI agents (like Augment Agent) can fetch these files successfully +- ❌ Other AI clients (like ChatGPT's client app) cannot access them, even when provided direct links + +### Root Causes + +This inconsistency likely stems from: +1. **Different HTTP client implementations** - Various AI platforms use different HTTP libraries with different default behaviors +2. **User-Agent filtering** - Some servers may block requests based on User-Agent headers +3. **Rate limiting** - Different clients may hit rate limits at different thresholds +4. **CORS and access restrictions** - Implicit restrictions on who can access these resources +5. **Client-specific limitations** - Some clients may have restricted network access or proxy requirements + +### Why This Matters + +Even though Redis has built excellent AI-friendly infrastructure (llms.txt, Markdown URLs), **the infrastructure alone is not sufficient** if not all AI clients can reliably access it. + +### Recommendation: Use MCP as the Universal Access Layer + +The Redis MCP Server solves this problem by providing a **guaranteed, standardized access method** that works across all MCP-compatible clients: + +- ✅ Works with Claude Desktop, VS Code, Augment, OpenAI Agents, and any MCP client +- ✅ No HTTP client compatibility issues +- ✅ No User-Agent filtering problems +- ✅ No rate limiting concerns for individual clients +- ✅ Structured, predictable responses + +This is why the MCP server enhancements proposed in the companion document are so important: they provide a reliable, universal way for AI agents to access Redis documentation and examples. + +--- + +## Conclusion + +Redis has already invested in AI-friendly documentation infrastructure. These recommendations focus on making that infrastructure more discoverable and establishing conventions that benefit the broader AI community. + +The key insight is that **having AI-friendly resources is only half the battle**—making them discoverable is equally important. By implementing these recommendations, Redis can become a model for how projects should support AI agents and LLMs. + +**Addendum**: While llms.txt and Markdown files are valuable, they're not universally accessible to all AI clients due to HTTP client compatibility issues. The MCP server approach provides a guaranteed, standardized access method that works across all platforms, making it the most reliable way to serve AI agents. + + diff --git a/build/jupyterize/SPECIFICATION.md b/build/jupyterize/SPECIFICATION.md index 5d340e92e9..7ae7a76390 100644 --- a/build/jupyterize/SPECIFICATION.md +++ b/build/jupyterize/SPECIFICATION.md @@ -2478,3 +2478,108 @@ build/jupyterize/ - Easy to add new unwrapping patterns (extend `CodeUnwrapper`) - Easy to add new cell types (extend `NotebookBuilder`) +--- + +### Implementation Lessons Learned + +**1. Module Initialization Pattern** +Each module class should accept `language` in `__init__()` and load configuration once: +```python +class FileParser: + def __init__(self, language): + self.language = language + self.prefix = PREFIXES[language.lower()] + self.config = load_language_config(language) +``` +This avoids repeated config loading and makes the class stateful and testable. + +**2. Orchestration in Main Script** +The simplified `jupyterize()` function should instantiate classes in order and pass results forward: +```python +validator = InputValidator() +language = validator.detect_language(input_file) +validator.validate_file(input_file, language) + +parser = FileParser(language) +parsed_blocks = parser.parse(input_file) + +builder = NotebookBuilder(language) +notebook = builder.build(parsed_blocks) +builder.write(notebook, output_file) +``` +This creates a clear, linear pipeline that's easy to understand and debug. + +**3. Backward Compatibility** +Keep the main `jupyterize()` function signature unchanged: +```python +def jupyterize(input_file, output_file=None, verbose=False): +``` +This ensures existing code that imports and calls `jupyterize()` continues to work without modification. + +**4. Test Import Updates** +When updating tests, import classes from new modules instead of functions: +```python +# Old: from jupyterize import detect_language, validate_input, parse_file +# New: from validator import InputValidator +# from parser import FileParser +``` +Tests should instantiate classes and call methods, not import standalone functions. + +**5. Logging Configuration** +Set up logging in the main `jupyterize()` function, not in individual modules: +```python +log_level = logging.DEBUG if verbose else logging.INFO +logging.basicConfig(level=log_level, format='%(levelname)s: %(message)s') +``` +This ensures consistent logging across all modules and respects the verbose flag. + +**6. Error Handling Strategy** +Let exceptions propagate from modules to the main function, which catches and logs them: +```python +try: + # Module operations +except Exception as e: + logging.error(f"Conversion failed: {e}") + raise +``` +This keeps modules focused on their logic while main function handles user-facing errors. + +**7. Module Size Reality** +Actual module sizes may differ from estimates: +- `config.py`: ~120 lines (vs ~100 estimated) - includes full KERNEL_SPECS dict +- `validator.py`: ~95 lines (vs ~80 estimated) - simpler than expected +- `parser.py`: ~180 lines (vs ~150 estimated) - state tracking adds complexity +- `unwrapper.py`: ~180 lines (vs ~150 estimated) - regex patterns and edge cases +- `notebook_builder.py`: ~160 lines (vs ~150 estimated) - cell creation logic +- `jupyterize.py`: ~142 lines (vs ~150-200 estimated) - much simpler than expected! + +**Key insight**: The main script becomes much simpler (142 lines vs 696 original), while supporting modules are slightly larger due to class structure and docstrings. + +**8. Static Methods vs Instance Methods** +Use instance methods for classes that maintain state (language, config): +```python +class FileParser: + def __init__(self, language): + self.language = language + self.config = load_language_config(language) + + def parse(self, file_path): # Instance method + # Uses self.language and self.config +``` +Use static methods only for utility functions that don't need state. + +**9. Configuration Loading Pattern** +Load configuration once in `__init__()` and cache it: +```python +def __init__(self, language): + self.language = language + self.config = load_language_config(language) # Load once +``` +This is more efficient than loading on every method call and makes the class behavior predictable. + +**10. Testing Strategy Adjustment** +The spec mentioned creating separate unit test files (`test_config.py`, `test_parser.py`, etc.), but the existing `test_jupyterize.py` already covers all functionality through integration tests. Consider: +- Keep existing integration tests (they work well) +- Add unit tests for edge cases if needed +- Don't create separate test files unless testing individual modules in isolation becomes necessary + diff --git a/commands-page-mockup copy.html b/commands-page-mockup copy.html new file mode 100644 index 0000000000..1d865ec27a --- /dev/null +++ b/commands-page-mockup copy.html @@ -0,0 +1,389 @@ + + + + + + Commands Page Mockup - Desktop & Mobile + + + + + + + +
+
+

Commands

+ + +
+ +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+ + +
+ +
+
+
APPEND
+
Appends a string to the value of a key. Creates the key if it doesn't exist.
+
+
String
+
+ @write + @string +
+
O(N)
+
1.0.0
+
+ + + +
+ + +
+
+
BRPOPLPUSH
+
Pops an element from a list, pushes it to another list and returns it. Blocks until an element is available otherwise.
+
+
List
+
+ @write + @list + @blocking +
+
O(N)
+
2.2.0
+
+ Deprecated +
+ + + +
+ + +
+
+
CLUSTER COUNT FAILURE REPORTS
+
Returns the number of failure reports.
+
+
Cluster
+
+ @admin +
+
O(N)
+
7.0.0
+
+ + + +
+ + +
+
+
TDIGEST.REVRANK
+
Returns, for each input reverse rank, an estimation of the floating-point value with the given reverse rank in the sketch. The reverse rank is the inverse of the rank. If the reverse rank has a decimal part, the value is interpolated linearly considering the two nearest items in the sketch. If the reverse rank is outside the range [0, n_of_items], returns NaN.
+
+
T-Digest
+
+ @read + @tdigest +
+
O(N)
+
2.4.0
+
+ + + +
+
+
+
+ + + + + diff --git a/commands-page-mockup.html b/commands-page-mockup.html new file mode 100644 index 0000000000..966bf67ada --- /dev/null +++ b/commands-page-mockup.html @@ -0,0 +1,461 @@ + + + + + + Commands Page Mockup - Desktop & Mobile + + + + + + + +
+
+

Commands

+ + +
+ +
+ + +
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+ + +
+ +
+
+
APPEND
+
Appends a string to the value of a key. Creates the key if it doesn't exist.
+
+
String
+
+ @write + @string +
+
O(N)
+
1.0.0
+
+ + + +
+ + +
+
+
BRPOPLPUSH
+
Pops an element from a list, pushes it to another list and returns it. Blocks until an element is available otherwise.
+
+
List
+
+ @write + @list + @blocking +
+
O(N)
+
2.2.0
+
+ Deprecated +
+ + + +
+ + +
+
+
CLUSTER COUNT FAILURE REPORTS
+
Returns the number of failure reports.
+
+
Cluster
+
+ @admin +
+
O(N)
+
7.0.0
+
+ + + +
+ + +
+
+
TDIGEST.REVRANK
+
Returns, for each input reverse rank, an estimation of the floating-point value with the given reverse rank in the sketch. The reverse rank is the inverse of the rank. If the reverse rank has a decimal part, the value is interpolated linearly considering the two nearest items in the sketch. If the reverse rank is outside the range [0, n_of_items], returns NaN.
+
+
T-Digest
+
+ @read + @tdigest +
+
O(N)
+
2.4.0
+
+ + + +
+
+
+
+ + + + + diff --git a/docs-improvement-recommendations.md b/docs-improvement-recommendations.md new file mode 100644 index 0000000000..16d1644cbc --- /dev/null +++ b/docs-improvement-recommendations.md @@ -0,0 +1,407 @@ +# Redis Documentation Improvements for AI Agent Usability + +## Validation Status + +✅ **Validated Against Official Documentation** + +**Sources Used:** +- Redis llms.txt index: https://redis.io/llms.txt (comprehensive list of Markdown docs) +- JSON documentation (Markdown): https://redis.io/docs/latest/develop/data-types/json/index.html.md +- redis-py guide (Markdown): https://redis.io/docs/latest/develop/clients/redis-py/index.html.md +- redis-py integration page (Markdown): https://redis.io/docs/latest/integrate/redis-py/index.html.md +- redis-py 7.0.1 API reference: https://redis.readthedocs.io/en/stable/ + +**Note:** Redis provides Markdown versions of all documentation pages via `.html.md` URLs, which is ideal for AI agents and LLMs. The `llms.txt` file provides a curated index of these resources. + +All recommendations below are based on gaps identified in the official documentation combined with real challenges encountered during cache-aside tutorial implementation. + +--- + +## Executive Summary + +Based on implementing a cache-aside pattern tutorial with Redis JSON, the following improvements would significantly enhance documentation usability for AI agents: + +1. **Explicit API Method Signatures & Behavior Documentation** - Current docs lack comprehensive method signatures, parameter types, return values, and error conditions for redis-py methods +2. **Consolidated Data Type Usage Guides** - Redis JSON documentation is scattered across multiple files; a unified guide showing when/how to use JSON vs. strings would prevent confusion +3. **Clear Pattern-to-Implementation Mapping** - Architectural patterns (cache-aside, write-through, etc.) need explicit code examples showing the exact redis-py API calls required +4. **Searchable Decision Trees** - Documentation lacks structured guidance on choosing between approaches (e.g., JSON vs. string serialization, sync vs. async) +5. **Explicit Deprecation & Migration Paths** - No clear guidance on moving from manual JSON serialization to native Redis JSON operations + +--- + +## Prioritized Recommendations + +### CRITICAL (High Impact) + +#### 1. Create Comprehensive redis-py API Reference with Behavior Documentation + +**What:** Expand redis-py client documentation to include: +- Complete method signatures with type hints +- Parameter descriptions with valid value ranges +- Return value types and structures +- Error conditions and exceptions +- Version availability (when methods were added/changed) +- Performance characteristics (O(n) complexity, etc.) + +**Why:** AI agents struggle to understand: +- Whether `redis.json().set()` vs `redis.set()` is appropriate +- What parameters are required vs. optional +- What exceptions might be raised and how to handle them +- Whether a method exists in the current redis-py version + +**Example - Current State:** +```markdown +## JSON Operations +Use `r.json().get()` and `r.json().set()` for JSON data. +``` + +**Example - Improved State:** +```markdown +### json().set(name, path, obj, nx=False, xx=False, get=False) + +**Parameters:** +- `name` (str): Key name +- `path` (str): JSONPath expression (default: '$' for root) +- `obj` (Any): Python object to serialize as JSON +- `nx` (bool): Only set if key doesn't exist +- `xx` (bool): Only set if key exists +- `get` (bool): Return old value before update + +**Returns:** +- str: The path to the value if successful +- None: If nx=True and key exists, or xx=True and key doesn't exist + +**Raises:** +- `redis.ResponseError`: If path is invalid or obj is not JSON-serializable +- `redis.ConnectionError`: If Redis connection fails + +**Complexity:** O(N) where N is the size of the JSON document + +**Available Since:** redis-py 4.5.0 + +**Example:** +```python +r.json().set('user:1', '$', {'name': 'Alice', 'age': 30}) +r.json().set('user:1', '$.age', 31, xx=True) # Update only if exists +``` + +**Effort Estimate:** High (requires systematic documentation of all methods) + +--- + +#### 2. Create "Data Type Selection Guide" with Decision Matrix + +**What:** Create a single, authoritative guide showing: +- When to use JSON vs. strings vs. hashes +- Comparison table: serialization overhead, query capabilities, TTL support, etc. +- Migration paths between approaches +- Performance implications + +**Why:** AI agents frequently misunderstand when to use each approach: +- Should user data be stored as JSON or serialized strings? +- When is Redis JSON worth the overhead? +- How to migrate from string serialization to native JSON? + +**Example - Current State:** +- JSON docs at `/develop/data-types/json/` +- String docs at `/develop/data-types/strings/` +- Hash docs at `/develop/data-types/hashes/` +- No comparison or decision guidance + +**Example - Improved State:** +```markdown +## Choosing a Data Type for Structured Data + +| Aspect | JSON | Hash | String (JSON) | +|--------|------|------|---------------| +| Query Support | Full JSONPath queries | Field-level only | None (manual parsing) | +| Serialization | Native (automatic) | Manual per field | Manual (json.dumps) | +| TTL Support | Yes (per key) | Yes (per key) | Yes (per key) | +| Memory Overhead | ~15% | ~10% | ~20% (with json.dumps) | +| Partial Updates | Yes (JSONPath) | Yes (HSET) | No (full replace) | +| Indexing | Full-text, numeric | Field-level | None | +| Use Case | Complex nested data | Flat key-value pairs | Legacy systems | + +### Decision Tree +1. Do you need to query nested fields? → Use JSON +2. Do you need full-text search? → Use JSON with search index +3. Is data flat (no nesting)? → Use Hash +4. Must support legacy code? → Use String with json.dumps() +``` + +**Effort Estimate:** Medium (requires consolidation of existing docs) + +--- + +#### 3. Create Pattern-Specific Implementation Guides + +**What:** For each architectural pattern (cache-aside, write-through, write-behind, etc.): +- Show exact redis-py API calls required +- Include error handling patterns +- Show both sync and async versions +- Include performance considerations + +**Why:** AI agents need explicit mapping from pattern concept to code: +- "Implement cache-aside" → What exact redis-py methods? +- How to handle Redis failures? +- What's the difference between sync and async? + +**Example - Current State:** +Cache-aside tutorial exists but lacks: +- Explicit error handling patterns +- Comparison with other patterns +- Performance metrics + +**Example - Improved State:** +```markdown +## Cache-Aside Pattern + +### When to Use +- Read-heavy workloads (80%+ reads) +- Tolerable staleness (data can be minutes old) +- Resilient to cache failures + +### Implementation Steps + +#### Step 1: Check Cache +```python +try: + cached = r.json().get(f'cache:{key}') + if cached is not None: + return cached +except redis.ConnectionError: + # Fall through to database + pass +``` + +#### Step 2: Fetch from Source +```python +data = fetch_from_database(key) +``` + +#### Step 3: Store in Cache +```python +try: + r.json().set(f'cache:{key}', '$', data) + r.expire(f'cache:{key}', ttl=3600) +except redis.ConnectionError: + # Log but don't fail + logger.warning(f"Failed to cache {key}") +``` + +### Error Handling Patterns +- Connection failures: Fall back to database +- Serialization errors: Log and skip caching +- TTL expiration: Automatic (no action needed) + +### Performance Characteristics +- Cache hit: ~1ms (Redis latency) +- Cache miss: ~100ms (database latency) + ~1ms (cache write) +- Hit ratio target: 80%+ +``` + +**Effort Estimate:** High (requires creating multiple pattern guides) + +--- + +### IMPORTANT (Medium Impact) + +#### 4. Add Machine-Readable Metadata to Examples + +**What:** Add structured metadata to code examples: +```yaml +--- +pattern: cache-aside +data_type: json +redis_version: ">=4.5.0" +redis_py_version: ">=4.5.0" +complexity: O(1) for cache hit, O(n) for miss +error_handling: required +async_available: true +--- +``` + +**Why:** AI agents can use metadata to: +- Filter examples by version requirements +- Identify which examples need error handling +- Find async alternatives +- Understand performance implications + +**Effort Estimate:** Low (add to existing examples) + +--- + +#### 5. Create "Common Mistakes" Documentation + +**What:** Document frequent errors with explanations: +- Using `r.set()` instead of `r.json().set()` for JSON data +- Not handling `redis.ResponseError` for invalid JSONPath +- Forgetting to set TTL on cache entries +- Mixing sync and async clients + +**Why:** AI agents often make these mistakes; explicit documentation prevents them + +**Example:** +```markdown +## Common Mistakes + +### Mistake 1: Using String Serialization Instead of JSON +❌ Wrong: +```python +r.set('user:1', json.dumps({'name': 'Alice'})) +data = json.loads(r.get('user:1')) +``` + +✅ Correct: +```python +r.json().set('user:1', '$', {'name': 'Alice'}) +data = r.json().get('user:1') +``` + +**Why:** Native JSON is faster, supports queries, and handles serialization automatically. + +**Effort Estimate:** Medium + +--- + +#### 6. Create Explicit "Async vs. Sync" Comparison Guide + +**What:** Document: +- When to use async (high concurrency, I/O-bound) +- When to use sync (simple scripts, low concurrency) +- How to migrate between them +- Common async pitfalls + +**Why:** AI agents frequently confuse sync/async patterns + +**Effort Estimate:** Medium + +--- + +### NICE TO HAVE (Low Impact) + +#### 7. Add "Tested Code Examples" Badges + +**What:** Mark examples that are: +- Automatically tested +- Verified to work with current redis-py version +- Include error handling + +**Why:** Helps AI agents identify reliable examples + +**Effort Estimate:** Low + +--- + +#### 8. Create "Troubleshooting by Error Message" Guide + +**What:** Document common Redis errors and solutions: +- `WRONGTYPE Operation against a key holding the wrong kind of value` +- `ERR unknown command 'JSON.SET'` +- `NOSCRIPT No matching script` + +**Why:** AI agents can reference this when errors occur + +**Effort Estimate:** Medium + +--- + +## Implementation Priority + +1. **Phase 1 (Critical):** API Reference + Data Type Selection Guide +2. **Phase 2 (Important):** Pattern Implementation Guides + Common Mistakes +3. **Phase 3 (Nice to Have):** Metadata + Async Guide + Error Reference + +--- + +## Important Discovery: Redis llms.txt and Markdown Documentation + +During validation, I discovered that Redis provides: + +1. **llms.txt Index** (https://redis.io/llms.txt) + - A curated list of all documentation pages in Markdown format + - Specifically designed for LLMs and AI assistants to ingest + - Includes descriptions of each documentation page + - Organized by category (Core Docs, Commands, Development, Integrations, Operations) + +2. **Markdown Versions of All Pages** + - Every documentation page is available as Markdown via `.html.md` URLs + - Example: `https://redis.io/docs/latest/develop/data-types/json/index.html.md` + - This is much more suitable for AI agents than HTML + +**Implication for AI Agent Usability:** +Redis has already recognized the need for AI-friendly documentation formats. The existence of `llms.txt` and Markdown versions suggests that the Redis team understands AI agents need structured, machine-readable documentation. This makes the recommendations in this document even more relevant - the infrastructure is in place, but the *content* of the documentation still needs the improvements outlined below. + +--- + +## Validation Against Official Documentation + +### What I Found on redis.io and redis-py docs: + +**✅ Strengths:** +- Redis JSON documentation at https://redis.io/docs/latest/develop/data-types/json/ has good examples in multiple languages +- redis-py 7.0.1 documentation lists all commands with basic descriptions +- Examples page shows various use cases (JSON, streams, timeseries, etc.) + +**❌ Gaps Confirmed:** +1. **No comprehensive method signatures** - redis-py docs show method names and brief descriptions, but lack: + - Complete parameter types and defaults + - Return value structures + - Exception types that can be raised + - Version availability information + - Performance characteristics (O(n) complexity) + +2. **No data type comparison guide** - JSON, Hash, and String docs are separate with no unified decision matrix + +3. **No pattern-to-implementation mapping** - No guide showing "here's the cache-aside pattern, here's exactly how to implement it with redis-py" + +4. **No error handling documentation** - Examples show happy paths but not how to handle: + - Connection failures + - Serialization errors + - Invalid JSONPath expressions + - TTL edge cases + +5. **No async/sync comparison** - redis-py supports both but no guide on when to use each + +6. **Scattered examples** - JSON examples exist but are embedded in the JSON data type page, not linked from pattern/use-case pages + +### Specific Example - json().set() Method + +**Current redis-py docs:** +``` +json().set(name, path, obj, nx=False, xx=False, get=False) +``` + +**What's missing:** +- Parameter type hints (name: str, path: str, obj: Any, etc.) +- What happens when obj is not JSON-serializable? +- What does the method return? (True? str? None?) +- When does it raise ResponseError vs. other exceptions? +- Available since which redis-py version? +- Performance: O(N) where N is the size of the JSON document +- Example of error handling + +--- + +## Key Insights from Cache-Aside Implementation + +### What Worked Well +- Existing tcedocs system for tested examples +- Clear separation of concerns (cache_manager.py, cache_aside.py) +- Comprehensive test coverage +- JSON examples on redis.io are well-structured with multi-language support + +### What Was Difficult +- Finding exact redis-py method signatures (had to check source code) +- Understanding when to use JSON vs. strings (scattered across docs) +- Determining error handling patterns (not documented) +- Choosing between sync and async (no comparison guide) +- No clear guidance on which exceptions to catch + +### What Would Have Helped Most +1. Single reference showing all redis-py JSON methods with signatures, return types, and exceptions +2. Decision matrix for data type selection (JSON vs. Hash vs. String) +3. Explicit error handling patterns for each operation +4. Clear async/sync comparison with migration guide +5. Pattern-specific implementation guides (cache-aside, write-through, etc.) with exact redis-py API calls + From 7d177f95336ceadb688a5d4c6e44760ce091907c Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Tue, 16 Dec 2025 13:20:55 +0000 Subject: [PATCH 11/11] DOC-6100 removed extra files added mistakenly --- ai-agent-resources-feedback.md | 247 --------------- ai-friendliness-report.md | 311 ------------------- commands-page-mockup copy.html | 389 ----------------------- commands-page-mockup.html | 461 ---------------------------- docs-improvement-recommendations.md | 407 ------------------------ 5 files changed, 1815 deletions(-) delete mode 100644 ai-agent-resources-feedback.md delete mode 100644 ai-friendliness-report.md delete mode 100644 commands-page-mockup copy.html delete mode 100644 commands-page-mockup.html delete mode 100644 docs-improvement-recommendations.md diff --git a/ai-agent-resources-feedback.md b/ai-agent-resources-feedback.md deleted file mode 100644 index b2d9f126f3..0000000000 --- a/ai-agent-resources-feedback.md +++ /dev/null @@ -1,247 +0,0 @@ -# Feedback: AI Agent Resources Page Draft - -## Status: Good Foundation, Needs Expansion - -This is a solid start that addresses the core discovery problem. However, it's quite minimal and could be significantly enhanced to be more useful for AI agents. - ---- - -## What Works Well ✅ - -1. **Clear Purpose** - Immediately explains what the page is for -2. **llms.txt Discovery** - Highlights the main index -3. **Markdown Format** - Explains the `.html.md` URL pattern -4. **API References** - Lists client libraries with links -5. **Frontmatter** - Good metadata for discoverability - ---- - -## What's Missing or Could Be Improved - -### 1. MCP Server Not Mentioned ⚠️ - -**Current:** Only mentions llms.txt and Markdown URLs - -**Issue:** The MCP server is a critical resource for AI agents but isn't mentioned - -**Suggestion:** Add section: -```markdown -## Redis MCP Server - -The Redis Model Context Protocol (MCP) server provides a standardized interface for AI agents to access Redis documentation, examples, and data operations. - -- **GitHub:** [redis/mcp-redis](https://github.com/redis/mcp-redis) -- **Installation:** `uvx --from redis-mcp-server@latest redis-mcp-server --url redis://localhost:6379/0` -- **Supported Clients:** Claude Desktop, VS Code, Augment, OpenAI Agents, and any MCP-compatible client -- **Capabilities:** Documentation queries, code examples, API references, vector search, data operations - -See [Redis MCP Server documentation](https://redis.io/docs/latest/integrate/redis-mcp/) for setup and usage. -``` - -### 2. No Guidance on When to Use Each Resource - -**Current:** Lists resources but doesn't explain when to use each - -**Suggestion:** Add comparison table: -```markdown -## Choosing the Right Resource - -| Resource | Best For | Pros | Cons | -|----------|----------|------|------| -| **llms.txt** | Discovering all available docs | Comprehensive, curated | Requires parsing | -| **Markdown URLs** | Reading specific documentation | Human-readable, AI-friendly | Requires web access | -| **MCP Server** | Querying docs programmatically | Structured, standardized, reliable | Requires MCP client | -| **API References** | Understanding method signatures | Authoritative, detailed | Language-specific | -| **Code Examples** | Learning patterns and best practices | Practical, tested | May be scattered | -``` - -### 3. No Explanation of Accessibility Issues - -**Current:** Doesn't mention that not all AI clients can access llms.txt/Markdown equally - -**Suggestion:** Add note: -```markdown -## Important: Accessibility Considerations - -While llms.txt and Markdown documentation are valuable resources, **not all AI clients can access them equally** due to HTTP client compatibility issues, User-Agent filtering, or network restrictions. - -If you're unable to access these resources directly, the **Redis MCP Server** provides a guaranteed, standardized access method that works across all MCP-compatible clients. -``` - -### 4. No Link to Use-Case Documentation - -**Current:** Doesn't mention the new caching use-case doc (or other use-case docs) - -**Suggestion:** Add section: -```markdown -## Use-Case Documentation - -Redis provides comprehensive guides for common use cases, structured for both human developers and AI agents: - -- [Caching](https://redis.io/docs/latest/develop/use-cases/caching/) - Patterns, examples, and best practices -- [Session Storage](https://redis.io/docs/latest/develop/use-cases/sessions/) - Session management patterns -- [Rate Limiting](https://redis.io/docs/latest/develop/use-cases/rate-limiting/) - Rate limiting strategies -- [Queues](https://redis.io/docs/latest/develop/use-cases/queues/) - Queue patterns and implementations - -Each guide includes: -- Pattern comparison tables -- Error handling examples -- Async/sync variants -- API references -- Common mistakes and solutions -``` - -### 5. No Guidance on Code Examples - -**Current:** Doesn't mention where to find code examples - -**Suggestion:** Add section: -```markdown -## Code Examples - -Redis provides tested code examples for common patterns: - -- **Local Examples:** Available in the [Redis documentation repository](https://github.com/redis/docs/tree/main/local_examples) -- **Pattern-Based:** Examples organized by use case (cache-aside, sessions, rate-limiting, etc.) -- **Multi-Language:** Examples in Python, JavaScript, Java, Go, and other languages -- **Tested:** All examples are tested and verified to work - -Use the MCP Server's code example finder to discover examples for your use case. -``` - -### 6. No Explanation of YAML Metadata - -**Current:** Doesn't explain the structured metadata in docs - -**Suggestion:** Add note: -```markdown -## Machine-Readable Metadata - -Redis documentation includes YAML frontmatter with structured metadata to help AI agents understand: - -- **Patterns:** Which caching/queuing patterns are covered -- **Languages:** Supported programming languages -- **Complexity:** Difficulty level (basic, moderate, advanced) -- **Version Requirements:** Minimum Redis and client library versions -- **Related Patterns:** Links to related use cases - -This metadata enables AI agents to: -- Filter documentation by language or complexity -- Understand version compatibility -- Discover related patterns -- Generate appropriate code examples -``` - -### 7. No Best Practices for AI Agents - -**Current:** Doesn't provide guidance on how to use these resources effectively - -**Suggestion:** Add section: -```markdown -## Best Practices for AI Agents - -When using Redis documentation: - -1. **Start with llms.txt** - Get an overview of available documentation -2. **Use the MCP Server for queries** - More reliable than direct web access -3. **Check version requirements** - Verify compatibility with your Redis and client versions -4. **Review error handling examples** - Learn how to handle common failure modes -5. **Understand trade-offs** - Each pattern has different consistency/performance characteristics -6. **Test with code examples** - Use provided examples as starting points -7. **Monitor metrics** - Use the metrics examples to track cache/queue effectiveness -``` - ---- - -## Suggested Structure for Expanded Version - -```markdown -# AI Agent Resources - -## Quick Start -- What this page is for -- How to get started - -## Core Resources -- llms.txt index -- Markdown documentation format -- MCP Server - -## Choosing the Right Resource -- Comparison table -- When to use each - -## Accessibility & Compatibility -- Note about unequal access -- MCP as fallback -- Version requirements - -## Use-Case Documentation -- Links to caching, sessions, rate-limiting, queues -- What each includes - -## Code Examples -- Where to find them -- How to use them -- Multi-language support - -## API References -- Links to client libraries -- How to use them - -## Machine-Readable Metadata -- YAML frontmatter explanation -- How AI agents use it - -## Best Practices -- Tips for effective use -- Common patterns -- Error handling - -## Troubleshooting -- Can't access llms.txt? Use MCP -- Can't find what you need? Try searching -- Version compatibility issues? -``` - ---- - -## Why These Additions Matter - -1. **MCP Server** - Critical resource that's completely missing -2. **Accessibility Issues** - Explains why some clients can't access resources -3. **Use-Case Docs** - Directs agents to the new structured documentation -4. **Guidance** - Helps agents use resources effectively -5. **Metadata Explanation** - Enables agents to understand structured data -6. **Best Practices** - Improves outcomes for AI-assisted development - ---- - -## Priority Recommendations - -### Must Add -- [ ] MCP Server section -- [ ] Accessibility note -- [ ] Link to use-case documentation - -### Should Add -- [ ] Comparison table (when to use each resource) -- [ ] Best practices section -- [ ] Metadata explanation - -### Nice to Have -- [ ] Code examples section -- [ ] Troubleshooting section -- [ ] Expanded API references with descriptions - ---- - -## Conclusion - -This page is a good foundation but needs expansion to be truly useful for AI agents. The additions above would make it a comprehensive guide that helps AI agents discover and use Redis documentation effectively. - -**Current Quality: 6/10** (good start, but incomplete) -**Potential Quality: 9/10** (with suggested additions) - -The page should be the entry point for AI agents discovering Redis resources—it needs to be comprehensive and helpful. - diff --git a/ai-friendliness-report.md b/ai-friendliness-report.md deleted file mode 100644 index 49415bfc7a..0000000000 --- a/ai-friendliness-report.md +++ /dev/null @@ -1,311 +0,0 @@ -# AI-Friendliness Report: Redis Documentation - -## Executive Summary - -Redis has made excellent progress in supporting AI agents through the `llms.txt` index and Markdown documentation format. However, these resources are not easily discoverable by AI agents without explicit guidance. This report outlines why discovery is difficult and provides concrete recommendations to improve AI-friendliness across the Redis documentation ecosystem. - ---- - -## The Discovery Problem - -### What Happened - -During implementation of a cache-aside tutorial, I (an AI agent) needed to validate documentation recommendations against official Redis sources. I used standard web-fetching approaches to access redis.io and redis-py documentation, but I did not discover: - -1. **llms.txt** - A curated index of all documentation in Markdown format -2. **Markdown URL pattern** - All pages available via `.html.md` URLs (e.g., `https://redis.io/docs/latest/develop/data-types/json/index.html.md`) - -These resources had to be pointed out explicitly by a human user. - -### Root Cause Analysis - -**Why I didn't find these resources:** - -1. **No standard convention** - There's no industry-wide standard for how AI agents should discover AI-friendly documentation -2. **Default behavior** - I defaulted to fetching standard documentation URLs without looking for AI-specific alternatives -3. **No discoverable metadata** - The resources exist but aren't linked from: - - robots.txt - - HTML meta tags - - HTTP headers - - Well-known locations (like `.well-known/`) - - Main documentation homepage -4. **Implicit knowledge** - The resources are documented somewhere, but not in a way that's obvious to an AI agent encountering the site for the first time - -### The Broader Context - -This is not a Redis-specific problem. The AI/LLM community hasn't yet established standard conventions for: -- How to advertise AI-friendly documentation formats -- Where to place resource indexes -- What metadata to include -- How to make discovery automatic vs. manual - ---- - -## What Redis Is Doing Right - -### Existing AI-Friendly Infrastructure - -1. **llms.txt Index** (https://redis.io/llms.txt) - - Comprehensive, curated list of all documentation pages - - Organized by category (Core Docs, Commands, Development, Integrations, Operations) - - Includes descriptions of each page - - Specifically designed for LLMs and AI assistants - -2. **Markdown Documentation Format** - - All pages available as `.html.md` URLs - - Much more suitable for AI agents than HTML - - Consistent URL pattern makes it predictable - -3. **Multi-language Examples** - - JSON documentation includes Python, Node.js, Java, Go, C#, PHP examples - - Helps AI agents understand implementation across languages - -4. **Structured Organization** - - Clear hierarchy (Develop → Data Types → JSON) - - Consistent naming conventions - - Logical grouping of related content - ---- - -## Recommendations for Improved AI-Friendliness - -### Priority 1: Quick Wins (Low Effort, High Impact) - -#### 1.1 Add Comment to llms.txt - -**Current state:** llms.txt exists but has no explanation - -**Recommendation:** Add a header comment explaining its purpose: - -``` -# Redis Documentation for AI Agents and LLMs -# -# This file provides a curated index of Redis documentation in Markdown format. -# -# Usage: -# - Start with this file to discover available documentation -# - All pages are available in Markdown format via .html.md URLs -# - Example: https://redis.io/docs/latest/develop/data-types/json/index.html.md -# -# For more information, see: https://redis.io/docs/latest/develop/ -``` - -**Why:** When an AI agent fetches llms.txt, it immediately understands the purpose and how to use it. - -**Effort:** Minimal (add 10 lines of comments) - ---- - -#### 1.2 Create .well-known/ai-documentation.json - -**Recommendation:** Add a standardized metadata file at `https://redis.io/.well-known/ai-documentation.json`: - -```json -{ - "documentation": { - "index": "https://redis.io/llms.txt", - "format": "markdown", - "markdown_url_pattern": "{base_url}.html.md", - "description": "Curated Markdown documentation for AI agents and LLMs" - }, - "api_references": { - "redis_py": { - "url": "https://redis.readthedocs.io/en/stable/commands.html", - "format": "html", - "language": "python" - } - }, - "version": "1.0" -} -``` - -**Why:** Follows the `.well-known` convention (like `.well-known/robots.txt`, `.well-known/security.txt`). AI agents can check this standardized location for metadata. - -**Effort:** Low (create one JSON file) - ---- - -#### 1.3 Update robots.txt - -**Recommendation:** Add a comment to robots.txt: - -``` -# For AI agents and LLMs: see https://redis.io/llms.txt for curated Markdown documentation -``` - -**Why:** Many AI agents check robots.txt first. A comment there is discoverable. - -**Effort:** Minimal (add 1 line) - ---- - -### Priority 2: Medium Effort, High Impact - -#### 2.1 Add Meta Tags to Main Docs Homepage - -**Recommendation:** Add to `https://redis.io/docs/latest/`: - -```html - - - -``` - -**Why:** AI agents that parse HTML headers might discover these. Similar to how search engines use meta tags. - -**Effort:** Low (add 3 lines to HTML template) - ---- - -#### 2.2 Add HTTP Link Header - -**Recommendation:** Add to HTTP response headers from redis.io: - -``` -Link: ; rel="ai-documentation"; type="text/plain" -``` - -**Why:** Some AI agents check HTTP headers for metadata. This is a standard HTTP convention. - -**Effort:** Medium (requires web server configuration) - ---- - -#### 2.3 Create "For AI Agents" Documentation Page - -**Recommendation:** Create `https://redis.io/docs/latest/ai-agent-resources/` with: - -- Explanation of llms.txt and how to use it -- How to access Markdown versions of docs -- Best practices for AI agents using Redis documentation -- Links to API references and examples -- Guidance on error handling and common patterns -- Links to redis-py API reference - -**Why:** Makes AI-friendly resources a first-class feature, not hidden. - -**Effort:** Medium (write one documentation page) - ---- - -### Priority 3: Long-Term, Strategic - -#### 3.1 Add "For AI Agents" Section to Main Docs Homepage - -**Recommendation:** Add a prominent section to the main documentation homepage: - -```markdown -## For AI Agents and LLMs - -If you're an AI agent or LLM looking to access Redis documentation: - -- **Markdown Index**: [llms.txt](https://redis.io/llms.txt) - Curated list of all docs in Markdown format -- **Markdown Format**: All pages available as `.html.md` URLs - - Example: `https://redis.io/docs/latest/develop/data-types/json/index.html.md` -- **API Reference**: [redis-py commands](https://redis.readthedocs.io/en/stable/commands.html) -- **Learn More**: [AI Agent Resources](https://redis.io/docs/latest/ai-agent-resources/) -``` - -**Why:** Explicit, discoverable, and sets expectations for AI agents. - -**Effort:** Low (add one section to homepage) - ---- - -#### 3.2 Advocate for Industry Standards - -**Recommendation:** Document and share Redis's approach: - -- Write a blog post about why Redis chose llms.txt and Markdown -- Share this approach with other projects -- Contribute to discussions about AI documentation standards -- Consider proposing a standard (e.g., through a GitHub discussion or RFC) - -**Why:** Helps establish conventions that benefit the entire AI/LLM community. - -**Effort:** Medium (requires community engagement) - ---- - -## Implementation Roadmap - -### Phase 1 (Week 1): Quick Wins -- [ ] Add comment header to llms.txt -- [ ] Create .well-known/ai-documentation.json -- [ ] Update robots.txt with comment -- [ ] Add meta tags to main docs homepage - -**Expected Impact:** AI agents that check these locations will discover llms.txt - -### Phase 2 (Week 2-3): Medium Effort -- [ ] Add HTTP Link header to responses -- [ ] Create "For AI Agents" documentation page -- [ ] Add section to main docs homepage - -**Expected Impact:** AI agents will have clear, discoverable guidance on accessing documentation - -### Phase 3 (Ongoing): Strategic -- [ ] Document the approach publicly -- [ ] Share with other projects -- [ ] Contribute to industry standards discussions - -**Expected Impact:** Establish Redis as a leader in AI-friendly documentation - ---- - -## Metrics for Success - -After implementing these recommendations, success can be measured by: - -1. **Discoverability**: AI agents can find llms.txt without human guidance -2. **Usage**: Increased adoption of Markdown documentation by AI tools -3. **Feedback**: Positive feedback from AI/LLM community -4. **Industry Impact**: Other projects adopt similar approaches - ---- - -## Important Discovery: Accessibility Varies Across AI Clients - -### The Problem - -While Redis provides llms.txt and Markdown documentation files at `.html.md` URLs, **not all AI clients can access them equally**: - -- ✅ Some AI agents (like Augment Agent) can fetch these files successfully -- ❌ Other AI clients (like ChatGPT's client app) cannot access them, even when provided direct links - -### Root Causes - -This inconsistency likely stems from: -1. **Different HTTP client implementations** - Various AI platforms use different HTTP libraries with different default behaviors -2. **User-Agent filtering** - Some servers may block requests based on User-Agent headers -3. **Rate limiting** - Different clients may hit rate limits at different thresholds -4. **CORS and access restrictions** - Implicit restrictions on who can access these resources -5. **Client-specific limitations** - Some clients may have restricted network access or proxy requirements - -### Why This Matters - -Even though Redis has built excellent AI-friendly infrastructure (llms.txt, Markdown URLs), **the infrastructure alone is not sufficient** if not all AI clients can reliably access it. - -### Recommendation: Use MCP as the Universal Access Layer - -The Redis MCP Server solves this problem by providing a **guaranteed, standardized access method** that works across all MCP-compatible clients: - -- ✅ Works with Claude Desktop, VS Code, Augment, OpenAI Agents, and any MCP client -- ✅ No HTTP client compatibility issues -- ✅ No User-Agent filtering problems -- ✅ No rate limiting concerns for individual clients -- ✅ Structured, predictable responses - -This is why the MCP server enhancements proposed in the companion document are so important: they provide a reliable, universal way for AI agents to access Redis documentation and examples. - ---- - -## Conclusion - -Redis has already invested in AI-friendly documentation infrastructure. These recommendations focus on making that infrastructure more discoverable and establishing conventions that benefit the broader AI community. - -The key insight is that **having AI-friendly resources is only half the battle**—making them discoverable is equally important. By implementing these recommendations, Redis can become a model for how projects should support AI agents and LLMs. - -**Addendum**: While llms.txt and Markdown files are valuable, they're not universally accessible to all AI clients due to HTTP client compatibility issues. The MCP server approach provides a guaranteed, standardized access method that works across all platforms, making it the most reliable way to serve AI agents. - - diff --git a/commands-page-mockup copy.html b/commands-page-mockup copy.html deleted file mode 100644 index 1d865ec27a..0000000000 --- a/commands-page-mockup copy.html +++ /dev/null @@ -1,389 +0,0 @@ - - - - - - Commands Page Mockup - Desktop & Mobile - - - - - - - -
-
-

Commands

- - -
- -
-
- - -
-
- - -
-
- - -
-
- - -
-
- - -
-
- - -
-
-
- - -
- -
-
-
APPEND
-
Appends a string to the value of a key. Creates the key if it doesn't exist.
-
-
String
-
- @write - @string -
-
O(N)
-
1.0.0
-
- - - -
- - -
-
-
BRPOPLPUSH
-
Pops an element from a list, pushes it to another list and returns it. Blocks until an element is available otherwise.
-
-
List
-
- @write - @list - @blocking -
-
O(N)
-
2.2.0
-
- Deprecated -
- - - -
- - -
-
-
CLUSTER COUNT FAILURE REPORTS
-
Returns the number of failure reports.
-
-
Cluster
-
- @admin -
-
O(N)
-
7.0.0
-
- - - -
- - -
-
-
TDIGEST.REVRANK
-
Returns, for each input reverse rank, an estimation of the floating-point value with the given reverse rank in the sketch. The reverse rank is the inverse of the rank. If the reverse rank has a decimal part, the value is interpolated linearly considering the two nearest items in the sketch. If the reverse rank is outside the range [0, n_of_items], returns NaN.
-
-
T-Digest
-
- @read - @tdigest -
-
O(N)
-
2.4.0
-
- - - -
-
-
-
- - - - - diff --git a/commands-page-mockup.html b/commands-page-mockup.html deleted file mode 100644 index 966bf67ada..0000000000 --- a/commands-page-mockup.html +++ /dev/null @@ -1,461 +0,0 @@ - - - - - - Commands Page Mockup - Desktop & Mobile - - - - - - - -
-
-

Commands

- - -
- -
- - -
-
-
- - -
-
- - -
-
- - -
-
- - -
-
- - -
-
-
- - -
- -
-
-
APPEND
-
Appends a string to the value of a key. Creates the key if it doesn't exist.
-
-
String
-
- @write - @string -
-
O(N)
-
1.0.0
-
- - - -
- - -
-
-
BRPOPLPUSH
-
Pops an element from a list, pushes it to another list and returns it. Blocks until an element is available otherwise.
-
-
List
-
- @write - @list - @blocking -
-
O(N)
-
2.2.0
-
- Deprecated -
- - - -
- - -
-
-
CLUSTER COUNT FAILURE REPORTS
-
Returns the number of failure reports.
-
-
Cluster
-
- @admin -
-
O(N)
-
7.0.0
-
- - - -
- - -
-
-
TDIGEST.REVRANK
-
Returns, for each input reverse rank, an estimation of the floating-point value with the given reverse rank in the sketch. The reverse rank is the inverse of the rank. If the reverse rank has a decimal part, the value is interpolated linearly considering the two nearest items in the sketch. If the reverse rank is outside the range [0, n_of_items], returns NaN.
-
-
T-Digest
-
- @read - @tdigest -
-
O(N)
-
2.4.0
-
- - - -
-
-
-
- - - - - diff --git a/docs-improvement-recommendations.md b/docs-improvement-recommendations.md deleted file mode 100644 index 16d1644cbc..0000000000 --- a/docs-improvement-recommendations.md +++ /dev/null @@ -1,407 +0,0 @@ -# Redis Documentation Improvements for AI Agent Usability - -## Validation Status - -✅ **Validated Against Official Documentation** - -**Sources Used:** -- Redis llms.txt index: https://redis.io/llms.txt (comprehensive list of Markdown docs) -- JSON documentation (Markdown): https://redis.io/docs/latest/develop/data-types/json/index.html.md -- redis-py guide (Markdown): https://redis.io/docs/latest/develop/clients/redis-py/index.html.md -- redis-py integration page (Markdown): https://redis.io/docs/latest/integrate/redis-py/index.html.md -- redis-py 7.0.1 API reference: https://redis.readthedocs.io/en/stable/ - -**Note:** Redis provides Markdown versions of all documentation pages via `.html.md` URLs, which is ideal for AI agents and LLMs. The `llms.txt` file provides a curated index of these resources. - -All recommendations below are based on gaps identified in the official documentation combined with real challenges encountered during cache-aside tutorial implementation. - ---- - -## Executive Summary - -Based on implementing a cache-aside pattern tutorial with Redis JSON, the following improvements would significantly enhance documentation usability for AI agents: - -1. **Explicit API Method Signatures & Behavior Documentation** - Current docs lack comprehensive method signatures, parameter types, return values, and error conditions for redis-py methods -2. **Consolidated Data Type Usage Guides** - Redis JSON documentation is scattered across multiple files; a unified guide showing when/how to use JSON vs. strings would prevent confusion -3. **Clear Pattern-to-Implementation Mapping** - Architectural patterns (cache-aside, write-through, etc.) need explicit code examples showing the exact redis-py API calls required -4. **Searchable Decision Trees** - Documentation lacks structured guidance on choosing between approaches (e.g., JSON vs. string serialization, sync vs. async) -5. **Explicit Deprecation & Migration Paths** - No clear guidance on moving from manual JSON serialization to native Redis JSON operations - ---- - -## Prioritized Recommendations - -### CRITICAL (High Impact) - -#### 1. Create Comprehensive redis-py API Reference with Behavior Documentation - -**What:** Expand redis-py client documentation to include: -- Complete method signatures with type hints -- Parameter descriptions with valid value ranges -- Return value types and structures -- Error conditions and exceptions -- Version availability (when methods were added/changed) -- Performance characteristics (O(n) complexity, etc.) - -**Why:** AI agents struggle to understand: -- Whether `redis.json().set()` vs `redis.set()` is appropriate -- What parameters are required vs. optional -- What exceptions might be raised and how to handle them -- Whether a method exists in the current redis-py version - -**Example - Current State:** -```markdown -## JSON Operations -Use `r.json().get()` and `r.json().set()` for JSON data. -``` - -**Example - Improved State:** -```markdown -### json().set(name, path, obj, nx=False, xx=False, get=False) - -**Parameters:** -- `name` (str): Key name -- `path` (str): JSONPath expression (default: '$' for root) -- `obj` (Any): Python object to serialize as JSON -- `nx` (bool): Only set if key doesn't exist -- `xx` (bool): Only set if key exists -- `get` (bool): Return old value before update - -**Returns:** -- str: The path to the value if successful -- None: If nx=True and key exists, or xx=True and key doesn't exist - -**Raises:** -- `redis.ResponseError`: If path is invalid or obj is not JSON-serializable -- `redis.ConnectionError`: If Redis connection fails - -**Complexity:** O(N) where N is the size of the JSON document - -**Available Since:** redis-py 4.5.0 - -**Example:** -```python -r.json().set('user:1', '$', {'name': 'Alice', 'age': 30}) -r.json().set('user:1', '$.age', 31, xx=True) # Update only if exists -``` - -**Effort Estimate:** High (requires systematic documentation of all methods) - ---- - -#### 2. Create "Data Type Selection Guide" with Decision Matrix - -**What:** Create a single, authoritative guide showing: -- When to use JSON vs. strings vs. hashes -- Comparison table: serialization overhead, query capabilities, TTL support, etc. -- Migration paths between approaches -- Performance implications - -**Why:** AI agents frequently misunderstand when to use each approach: -- Should user data be stored as JSON or serialized strings? -- When is Redis JSON worth the overhead? -- How to migrate from string serialization to native JSON? - -**Example - Current State:** -- JSON docs at `/develop/data-types/json/` -- String docs at `/develop/data-types/strings/` -- Hash docs at `/develop/data-types/hashes/` -- No comparison or decision guidance - -**Example - Improved State:** -```markdown -## Choosing a Data Type for Structured Data - -| Aspect | JSON | Hash | String (JSON) | -|--------|------|------|---------------| -| Query Support | Full JSONPath queries | Field-level only | None (manual parsing) | -| Serialization | Native (automatic) | Manual per field | Manual (json.dumps) | -| TTL Support | Yes (per key) | Yes (per key) | Yes (per key) | -| Memory Overhead | ~15% | ~10% | ~20% (with json.dumps) | -| Partial Updates | Yes (JSONPath) | Yes (HSET) | No (full replace) | -| Indexing | Full-text, numeric | Field-level | None | -| Use Case | Complex nested data | Flat key-value pairs | Legacy systems | - -### Decision Tree -1. Do you need to query nested fields? → Use JSON -2. Do you need full-text search? → Use JSON with search index -3. Is data flat (no nesting)? → Use Hash -4. Must support legacy code? → Use String with json.dumps() -``` - -**Effort Estimate:** Medium (requires consolidation of existing docs) - ---- - -#### 3. Create Pattern-Specific Implementation Guides - -**What:** For each architectural pattern (cache-aside, write-through, write-behind, etc.): -- Show exact redis-py API calls required -- Include error handling patterns -- Show both sync and async versions -- Include performance considerations - -**Why:** AI agents need explicit mapping from pattern concept to code: -- "Implement cache-aside" → What exact redis-py methods? -- How to handle Redis failures? -- What's the difference between sync and async? - -**Example - Current State:** -Cache-aside tutorial exists but lacks: -- Explicit error handling patterns -- Comparison with other patterns -- Performance metrics - -**Example - Improved State:** -```markdown -## Cache-Aside Pattern - -### When to Use -- Read-heavy workloads (80%+ reads) -- Tolerable staleness (data can be minutes old) -- Resilient to cache failures - -### Implementation Steps - -#### Step 1: Check Cache -```python -try: - cached = r.json().get(f'cache:{key}') - if cached is not None: - return cached -except redis.ConnectionError: - # Fall through to database - pass -``` - -#### Step 2: Fetch from Source -```python -data = fetch_from_database(key) -``` - -#### Step 3: Store in Cache -```python -try: - r.json().set(f'cache:{key}', '$', data) - r.expire(f'cache:{key}', ttl=3600) -except redis.ConnectionError: - # Log but don't fail - logger.warning(f"Failed to cache {key}") -``` - -### Error Handling Patterns -- Connection failures: Fall back to database -- Serialization errors: Log and skip caching -- TTL expiration: Automatic (no action needed) - -### Performance Characteristics -- Cache hit: ~1ms (Redis latency) -- Cache miss: ~100ms (database latency) + ~1ms (cache write) -- Hit ratio target: 80%+ -``` - -**Effort Estimate:** High (requires creating multiple pattern guides) - ---- - -### IMPORTANT (Medium Impact) - -#### 4. Add Machine-Readable Metadata to Examples - -**What:** Add structured metadata to code examples: -```yaml ---- -pattern: cache-aside -data_type: json -redis_version: ">=4.5.0" -redis_py_version: ">=4.5.0" -complexity: O(1) for cache hit, O(n) for miss -error_handling: required -async_available: true ---- -``` - -**Why:** AI agents can use metadata to: -- Filter examples by version requirements -- Identify which examples need error handling -- Find async alternatives -- Understand performance implications - -**Effort Estimate:** Low (add to existing examples) - ---- - -#### 5. Create "Common Mistakes" Documentation - -**What:** Document frequent errors with explanations: -- Using `r.set()` instead of `r.json().set()` for JSON data -- Not handling `redis.ResponseError` for invalid JSONPath -- Forgetting to set TTL on cache entries -- Mixing sync and async clients - -**Why:** AI agents often make these mistakes; explicit documentation prevents them - -**Example:** -```markdown -## Common Mistakes - -### Mistake 1: Using String Serialization Instead of JSON -❌ Wrong: -```python -r.set('user:1', json.dumps({'name': 'Alice'})) -data = json.loads(r.get('user:1')) -``` - -✅ Correct: -```python -r.json().set('user:1', '$', {'name': 'Alice'}) -data = r.json().get('user:1') -``` - -**Why:** Native JSON is faster, supports queries, and handles serialization automatically. - -**Effort Estimate:** Medium - ---- - -#### 6. Create Explicit "Async vs. Sync" Comparison Guide - -**What:** Document: -- When to use async (high concurrency, I/O-bound) -- When to use sync (simple scripts, low concurrency) -- How to migrate between them -- Common async pitfalls - -**Why:** AI agents frequently confuse sync/async patterns - -**Effort Estimate:** Medium - ---- - -### NICE TO HAVE (Low Impact) - -#### 7. Add "Tested Code Examples" Badges - -**What:** Mark examples that are: -- Automatically tested -- Verified to work with current redis-py version -- Include error handling - -**Why:** Helps AI agents identify reliable examples - -**Effort Estimate:** Low - ---- - -#### 8. Create "Troubleshooting by Error Message" Guide - -**What:** Document common Redis errors and solutions: -- `WRONGTYPE Operation against a key holding the wrong kind of value` -- `ERR unknown command 'JSON.SET'` -- `NOSCRIPT No matching script` - -**Why:** AI agents can reference this when errors occur - -**Effort Estimate:** Medium - ---- - -## Implementation Priority - -1. **Phase 1 (Critical):** API Reference + Data Type Selection Guide -2. **Phase 2 (Important):** Pattern Implementation Guides + Common Mistakes -3. **Phase 3 (Nice to Have):** Metadata + Async Guide + Error Reference - ---- - -## Important Discovery: Redis llms.txt and Markdown Documentation - -During validation, I discovered that Redis provides: - -1. **llms.txt Index** (https://redis.io/llms.txt) - - A curated list of all documentation pages in Markdown format - - Specifically designed for LLMs and AI assistants to ingest - - Includes descriptions of each documentation page - - Organized by category (Core Docs, Commands, Development, Integrations, Operations) - -2. **Markdown Versions of All Pages** - - Every documentation page is available as Markdown via `.html.md` URLs - - Example: `https://redis.io/docs/latest/develop/data-types/json/index.html.md` - - This is much more suitable for AI agents than HTML - -**Implication for AI Agent Usability:** -Redis has already recognized the need for AI-friendly documentation formats. The existence of `llms.txt` and Markdown versions suggests that the Redis team understands AI agents need structured, machine-readable documentation. This makes the recommendations in this document even more relevant - the infrastructure is in place, but the *content* of the documentation still needs the improvements outlined below. - ---- - -## Validation Against Official Documentation - -### What I Found on redis.io and redis-py docs: - -**✅ Strengths:** -- Redis JSON documentation at https://redis.io/docs/latest/develop/data-types/json/ has good examples in multiple languages -- redis-py 7.0.1 documentation lists all commands with basic descriptions -- Examples page shows various use cases (JSON, streams, timeseries, etc.) - -**❌ Gaps Confirmed:** -1. **No comprehensive method signatures** - redis-py docs show method names and brief descriptions, but lack: - - Complete parameter types and defaults - - Return value structures - - Exception types that can be raised - - Version availability information - - Performance characteristics (O(n) complexity) - -2. **No data type comparison guide** - JSON, Hash, and String docs are separate with no unified decision matrix - -3. **No pattern-to-implementation mapping** - No guide showing "here's the cache-aside pattern, here's exactly how to implement it with redis-py" - -4. **No error handling documentation** - Examples show happy paths but not how to handle: - - Connection failures - - Serialization errors - - Invalid JSONPath expressions - - TTL edge cases - -5. **No async/sync comparison** - redis-py supports both but no guide on when to use each - -6. **Scattered examples** - JSON examples exist but are embedded in the JSON data type page, not linked from pattern/use-case pages - -### Specific Example - json().set() Method - -**Current redis-py docs:** -``` -json().set(name, path, obj, nx=False, xx=False, get=False) -``` - -**What's missing:** -- Parameter type hints (name: str, path: str, obj: Any, etc.) -- What happens when obj is not JSON-serializable? -- What does the method return? (True? str? None?) -- When does it raise ResponseError vs. other exceptions? -- Available since which redis-py version? -- Performance: O(N) where N is the size of the JSON document -- Example of error handling - ---- - -## Key Insights from Cache-Aside Implementation - -### What Worked Well -- Existing tcedocs system for tested examples -- Clear separation of concerns (cache_manager.py, cache_aside.py) -- Comprehensive test coverage -- JSON examples on redis.io are well-structured with multi-language support - -### What Was Difficult -- Finding exact redis-py method signatures (had to check source code) -- Understanding when to use JSON vs. strings (scattered across docs) -- Determining error handling patterns (not documented) -- Choosing between sync and async (no comparison guide) -- No clear guidance on which exceptions to catch - -### What Would Have Helped Most -1. Single reference showing all redis-py JSON methods with signatures, return types, and exceptions -2. Decision matrix for data type selection (JSON vs. Hash vs. String) -3. Explicit error handling patterns for each operation -4. Clear async/sync comparison with migration guide -5. Pattern-specific implementation guides (cache-aside, write-through, etc.) with exact redis-py API calls -