diff --git a/crates/tiptap/src/from_md.rs b/crates/tiptap/src/from_md.rs index 58e2e68838..b0bda5dd6c 100644 --- a/crates/tiptap/src/from_md.rs +++ b/crates/tiptap/src/from_md.rs @@ -125,12 +125,36 @@ fn convert_list(l: &mdast::List) -> Value { } } +fn ensure_starts_with_paragraph(content: Vec) -> Vec { + if content.is_empty() { + return vec![json!({ "type": "paragraph" })]; + } + + let first_is_paragraph = content + .first() + .and_then(|v| v.get("type")) + .and_then(|t| t.as_str()) + .map(|t| t == "paragraph") + .unwrap_or(false); + + if first_is_paragraph { + content + } else { + let mut result = vec![json!({ "type": "paragraph" })]; + result.extend(content); + result + } +} + fn convert_list_item(item: &mdast::ListItem) -> Value { let content: Vec = item .children .iter() .filter_map(convert_block_node) .collect(); + + let content = ensure_starts_with_paragraph(content); + json!({ "type": "listItem", "content": content @@ -143,6 +167,9 @@ fn convert_task_item(item: &mdast::ListItem) -> Value { .iter() .filter_map(convert_block_node) .collect(); + + let content = ensure_starts_with_paragraph(content); + json!({ "type": "taskItem", "attrs": { "checked": item.checked.unwrap_or(false) }, diff --git a/crates/tiptap/src/lib.rs b/crates/tiptap/src/lib.rs index e422b264fe..460e32047c 100644 --- a/crates/tiptap/src/lib.rs +++ b/crates/tiptap/src/lib.rs @@ -3,10 +3,12 @@ mod from_ast; mod from_md; mod to_ast; +mod validate; pub use from_ast::mdast_to_markdown; pub use from_md::md_to_tiptap_json; pub use to_ast::tiptap_json_to_mdast; +pub use validate::validate_tiptap_json; pub fn tiptap_json_to_md(json: &serde_json::Value) -> Result { let mdast = tiptap_json_to_mdast(json); @@ -814,7 +816,164 @@ mod tests { } #[test] - fn test_multibyte_chars_no_panic() { + fn test_listitem_always_starts_with_paragraph() { + let json = md_to_tiptap_json("1. Item").unwrap(); + let list_item = &json["content"][0]["content"][0]; + assert_eq!(list_item["type"], "listItem"); + assert_eq!(list_item["content"][0]["type"], "paragraph"); + + let json = md_to_tiptap_json("- Item").unwrap(); + let list_item = &json["content"][0]["content"][0]; + assert_eq!(list_item["type"], "listItem"); + assert_eq!(list_item["content"][0]["type"], "paragraph"); + + let json = md_to_tiptap_json("- [ ] Task").unwrap(); + let task_item = &json["content"][0]["content"][0]; + assert_eq!(task_item["type"], "taskItem"); + assert_eq!(task_item["content"][0]["type"], "paragraph"); + } + + #[test] + fn test_empty_listitem_has_paragraph() { + let json = md_to_tiptap_json("1. ").unwrap(); + let list_item = &json["content"][0]["content"][0]; + assert_eq!(list_item["type"], "listItem"); + assert!( + list_item["content"] + .as_array() + .map(|a| !a.is_empty()) + .unwrap_or(false) + ); + assert_eq!(list_item["content"][0]["type"], "paragraph"); + } + + fn assert_schema_valid(md: &str) { + let json = md_to_tiptap_json(md).unwrap(); + let errors = validate_tiptap_json(&json); + assert!( + errors.is_empty(), + "schema validation failed for markdown:\n{md}\n\nproduced JSON:\n{json:#}\n\nerrors:\n{}", + errors + .iter() + .map(|e| e.to_string()) + .collect::>() + .join("\n") + ); + } + + #[test] + fn test_schema_valid_simple_paragraph() { + assert_schema_valid("Hello, world!"); + } + + #[test] + fn test_schema_valid_heading() { + assert_schema_valid("# Heading 1\n\n## Heading 2\n\n### Heading 3"); + } + + #[test] + fn test_schema_valid_bullet_list() { + assert_schema_valid("- Item 1\n- Item 2\n- Item 3"); + } + + #[test] + fn test_schema_valid_ordered_list() { + assert_schema_valid("1. First\n2. Second\n3. Third"); + } + + #[test] + fn test_schema_valid_task_list() { + assert_schema_valid("- [ ] Todo\n- [x] Done"); + } + + #[test] + fn test_schema_valid_nested_bullet_list() { + assert_schema_valid("- Parent\n - Child 1\n - Child 2\n- Another parent"); + } + + #[test] + fn test_schema_valid_deeply_nested_list() { + assert_schema_valid("- Level 1\n - Level 2\n - Level 3\n - Level 4"); + } + + #[test] + fn test_schema_valid_list_without_text_only_sublist() { + assert_schema_valid("-\n - Sub item"); + } + + #[test] + fn test_schema_valid_nested_ordered_in_bullet() { + assert_schema_valid("- Bullet\n 1. Ordered child\n 2. Another"); + } + + #[test] + fn test_schema_valid_blockquote() { + assert_schema_valid("> A quote\n>\n> Another paragraph"); + } + + #[test] + fn test_schema_valid_code_block() { + assert_schema_valid("```rust\nfn main() {}\n```"); + } + + #[test] + fn test_schema_valid_horizontal_rule() { + assert_schema_valid("Before\n\n---\n\nAfter"); + } + + #[test] + fn test_schema_valid_image() { + assert_schema_valid("![alt](https://example.com/img.png)"); + } + + #[test] + fn test_schema_valid_inline_formatting() { + assert_schema_valid("**bold** and *italic* and `code` and ~~strike~~"); + } + + #[test] + fn test_schema_valid_links() { + assert_schema_valid("[link text](https://example.com)"); + } + + #[test] + fn test_schema_valid_complex_document() { + assert_schema_valid( + "# Title\n\n\ + Some text with **bold** and *italic*.\n\n\ + - Item 1\n - Nested\n- Item 2\n\n\ + > A blockquote\n\n\ + ```js\nconsole.log('hi');\n```\n\n\ + ---\n\n\ + 1. First\n2. Second\n\n\ + - [ ] Task 1\n- [x] Task 2\n\n\ + ![img](https://example.com/img.png)\n\n\ + Final paragraph.", + ); + } + + #[test] + fn test_schema_valid_list_item_with_nested_task_list() { + assert_schema_valid("- Parent\n - [ ] Sub-task\n - [x] Done sub-task"); + } + + #[test] + fn test_schema_valid_blockquote_with_list() { + assert_schema_valid("> - Item in quote\n> - Another item"); + } + + #[test] + fn test_schema_valid_empty_list_items() { + assert_schema_valid("- \n- \n- "); + } + + #[test] + fn test_schema_valid_list_with_code_block() { + assert_schema_valid("- Item\n\n ```\n code\n ```\n\n- Next"); + } + + #[test] + fn test_schema_valid_multibyte_chars_no_panic() { let json = serde_json::json!({ "type": "doc", "content": [{ diff --git a/crates/tiptap/src/validate.rs b/crates/tiptap/src/validate.rs new file mode 100644 index 0000000000..1951983dc6 --- /dev/null +++ b/crates/tiptap/src/validate.rs @@ -0,0 +1,424 @@ +use serde_json::Value; + +#[derive(Debug)] +pub struct ValidationError { + pub path: String, + pub message: String, +} + +impl std::fmt::Display for ValidationError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "at {}: {}", self.path, self.message) + } +} + +pub fn validate_tiptap_json(json: &Value) -> Vec { + let mut errors = Vec::new(); + validate_node(json, "doc", &mut errors); + errors +} + +fn node_type(node: &Value) -> Option<&str> { + node.get("type").and_then(|t| t.as_str()) +} + +fn node_content(node: &Value) -> &[Value] { + node.get("content") + .and_then(|c| c.as_array()) + .map(|a| a.as_slice()) + .unwrap_or(&[]) +} + +fn is_block_type(t: &str) -> bool { + matches!( + t, + "paragraph" + | "heading" + | "bulletList" + | "orderedList" + | "taskList" + | "blockquote" + | "codeBlock" + | "image" + | "horizontalRule" + ) +} + +fn is_inline_type(t: &str) -> bool { + matches!(t, "text" | "hardBreak" | "image") +} + +fn validate_node(node: &Value, path: &str, errors: &mut Vec) { + let Some(typ) = node_type(node) else { + errors.push(ValidationError { + path: path.to_string(), + message: "missing 'type' field".to_string(), + }); + return; + }; + + let content = node_content(node); + + match typ { + "doc" => { + if content.is_empty() { + errors.push(ValidationError { + path: path.to_string(), + message: "doc must contain at least one block node (content: 'block+')" + .to_string(), + }); + } + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if !is_block_type(ct) { + errors.push(ValidationError { + path: child_path.clone(), + message: format!("doc child must be a block node, got '{ct}'"), + }); + } + } + validate_node(child, &child_path, errors); + } + } + + "paragraph" => { + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if !is_inline_type(ct) { + errors.push(ValidationError { + path: child_path.clone(), + message: format!("paragraph child must be an inline node, got '{ct}'"), + }); + } + } + } + } + + "heading" => { + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if !is_inline_type(ct) { + errors.push(ValidationError { + path: child_path.clone(), + message: format!("heading child must be an inline node, got '{ct}'"), + }); + } + } + } + } + + "bulletList" => { + if content.is_empty() { + errors.push(ValidationError { + path: path.to_string(), + message: "bulletList must contain at least one listItem (content: 'listItem+')" + .to_string(), + }); + } + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if ct != "listItem" { + errors.push(ValidationError { + path: child_path.clone(), + message: format!("bulletList child must be 'listItem', got '{ct}'"), + }); + } + } + validate_node(child, &child_path, errors); + } + } + + "orderedList" => { + if content.is_empty() { + errors.push(ValidationError { + path: path.to_string(), + message: + "orderedList must contain at least one listItem (content: 'listItem+')" + .to_string(), + }); + } + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if ct != "listItem" { + errors.push(ValidationError { + path: child_path.clone(), + message: format!("orderedList child must be 'listItem', got '{ct}'"), + }); + } + } + validate_node(child, &child_path, errors); + } + } + + "taskList" => { + if content.is_empty() { + errors.push(ValidationError { + path: path.to_string(), + message: "taskList must contain at least one taskItem (content: 'taskItem+')" + .to_string(), + }); + } + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if ct != "taskItem" { + errors.push(ValidationError { + path: child_path.clone(), + message: format!("taskList child must be 'taskItem', got '{ct}'"), + }); + } + } + validate_node(child, &child_path, errors); + } + } + + "listItem" | "taskItem" => { + if content.is_empty() { + errors.push(ValidationError { + path: path.to_string(), + message: format!( + "{typ} must contain at least a paragraph (content: 'paragraph block*')" + ), + }); + } else { + let first_type = node_type(&content[0]); + if first_type != Some("paragraph") { + errors.push(ValidationError { + path: format!("{path}.content[0]"), + message: format!( + "{typ} must start with a paragraph (content: 'paragraph block*'), got '{}'", + first_type.unwrap_or("unknown") + ), + }); + } + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if !is_block_type(ct) { + errors.push(ValidationError { + path: child_path.clone(), + message: format!("{typ} child must be a block node, got '{ct}'"), + }); + } + } + validate_node(child, &child_path, errors); + } + } + } + + "blockquote" => { + if content.is_empty() { + errors.push(ValidationError { + path: path.to_string(), + message: "blockquote must contain at least one block node (content: 'block+')" + .to_string(), + }); + } + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if !is_block_type(ct) { + errors.push(ValidationError { + path: child_path.clone(), + message: format!("blockquote child must be a block node, got '{ct}'"), + }); + } + } + validate_node(child, &child_path, errors); + } + } + + "codeBlock" => { + for (i, child) in content.iter().enumerate() { + let child_path = format!("{path}.content[{i}]"); + if let Some(ct) = node_type(child) { + if ct != "text" { + errors.push(ValidationError { + path: child_path, + message: format!("codeBlock child must be 'text', got '{ct}'"), + }); + } + } + } + } + + "text" | "hardBreak" | "horizontalRule" | "image" => {} + + _ => {} + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn assert_valid(json: &Value) { + let errors = validate_tiptap_json(json); + assert!( + errors.is_empty(), + "expected valid, got errors:\n{}", + errors + .iter() + .map(|e| e.to_string()) + .collect::>() + .join("\n") + ); + } + + fn assert_invalid(json: &Value, expected_fragment: &str) { + let errors = validate_tiptap_json(json); + assert!( + !errors.is_empty(), + "expected validation errors but got none" + ); + let all = errors + .iter() + .map(|e| e.to_string()) + .collect::>() + .join("\n"); + assert!( + all.contains(expected_fragment), + "expected error containing '{expected_fragment}', got:\n{all}" + ); + } + + #[test] + fn valid_simple_doc() { + assert_valid(&json!({ + "type": "doc", + "content": [{ "type": "paragraph" }] + })); + } + + #[test] + fn valid_list_item_with_paragraph() { + assert_valid(&json!({ + "type": "doc", + "content": [{ + "type": "bulletList", + "content": [{ + "type": "listItem", + "content": [{ + "type": "paragraph", + "content": [{ "type": "text", "text": "hello" }] + }] + }] + }] + })); + } + + #[test] + fn valid_list_item_with_paragraph_then_nested_list() { + assert_valid(&json!({ + "type": "doc", + "content": [{ + "type": "bulletList", + "content": [{ + "type": "listItem", + "content": [ + { "type": "paragraph", "content": [{ "type": "text", "text": "item" }] }, + { + "type": "bulletList", + "content": [{ + "type": "listItem", + "content": [{ "type": "paragraph" }] + }] + } + ] + }] + }] + })); + } + + #[test] + fn invalid_list_item_starting_with_list() { + assert_invalid( + &json!({ + "type": "doc", + "content": [{ + "type": "bulletList", + "content": [{ + "type": "listItem", + "content": [{ + "type": "bulletList", + "content": [{ + "type": "listItem", + "content": [{ "type": "paragraph" }] + }] + }] + }] + }] + }), + "must start with a paragraph", + ); + } + + #[test] + fn invalid_empty_list_item() { + assert_invalid( + &json!({ + "type": "doc", + "content": [{ + "type": "bulletList", + "content": [{ + "type": "listItem", + "content": [] + }] + }] + }), + "must contain at least a paragraph", + ); + } + + #[test] + fn invalid_task_item_without_paragraph() { + assert_invalid( + &json!({ + "type": "doc", + "content": [{ + "type": "taskList", + "content": [{ + "type": "taskItem", + "attrs": { "checked": false }, + "content": [{ + "type": "bulletList", + "content": [{ + "type": "listItem", + "content": [{ "type": "paragraph" }] + }] + }] + }] + }] + }), + "must start with a paragraph", + ); + } + + #[test] + fn invalid_empty_doc() { + assert_invalid( + &json!({ + "type": "doc", + "content": [] + }), + "must contain at least one block", + ); + } + + #[test] + fn invalid_inline_in_doc() { + assert_invalid( + &json!({ + "type": "doc", + "content": [{ "type": "text", "text": "hello" }] + }), + "doc child must be a block node", + ); + } +}