diff --git a/API.md b/API.md index 9d86f4d..732524a 100644 --- a/API.md +++ b/API.md @@ -57,6 +57,7 @@ function parse(source: string, options?: ParserOptions): CSSNode **Flags:** - `is_important` - Whether declaration has `!important` (DECLARATION only) +- `is_browserhack` - Whether declaration property has a browser hack prefix like `*`, `_`, `!`, etc. (DECLARATION only) - `is_vendor_prefixed` - Whether node has vendor prefix (checks name/text based on type) - `has_error` - Whether node has syntax error - `has_prelude` - Whether at-rule has a prelude @@ -575,7 +576,7 @@ Parse a CSS declaration string into a detailed AST. function parse_declaration(source: string): CSSNode ``` -**Example:** +**Example 1: Basic Declaration:** ```typescript import { parse_declaration } from '@projectwallace/css-parser' @@ -594,6 +595,30 @@ for (const valueNode of decl.children) { // IDENTIFIER "red" ``` +**Example 2: Browser Hacks:** + +```typescript +import { parse_declaration } from '@projectwallace/css-parser' + +// Browser hack with * prefix (IE 6/7 hack) +const hack = parse_declaration('*width: 100px') +console.log(hack.property) // "*width" +console.log(hack.is_browserhack) // true + +// Browser hack with _ prefix (IE 6 hack) +const underscore = parse_declaration('_height: 50px') +console.log(underscore.is_browserhack) // true + +// Normal property (not a browser hack) +const normal = parse_declaration('width: 100px') +console.log(normal.is_browserhack) // false + +// Vendor prefix (not a browser hack) +const vendor = parse_declaration('-webkit-transform: scale(1)') +console.log(vendor.is_browserhack) // false +console.log(vendor.is_vendor_prefixed) // true +``` + --- ## `parse_atrule_prelude(at_rule_name, prelude)` diff --git a/src/arena.ts b/src/arena.ts index 5948214..3321037 100644 --- a/src/arena.ts +++ b/src/arena.ts @@ -95,6 +95,7 @@ export const FLAG_HAS_BLOCK = 1 << 3 // Has { } block (for style rules and at-ru export const FLAG_VENDOR_PREFIXED = 1 << 4 // Has vendor prefix (-webkit-, -moz-, -ms-, -o-) export const FLAG_HAS_DECLARATIONS = 1 << 5 // Has declarations (for style rules) export const FLAG_HAS_PARENS = 1 << 6 // Has parentheses syntax (for pseudo-class/pseudo-element functions) +export const FLAG_BROWSERHACK = 1 << 7 // Has browser hack prefix (*property, _property, etc.) // Attribute selector operator constants (stored in 1 byte at offset 2) export const ATTR_OPERATOR_NONE = 0 // [attr] diff --git a/src/css-node.ts b/src/css-node.ts index 4931ce2..9c998c8 100644 --- a/src/css-node.ts +++ b/src/css-node.ts @@ -42,6 +42,7 @@ import { FLAG_HAS_BLOCK, FLAG_HAS_DECLARATIONS, FLAG_HAS_PARENS, + FLAG_BROWSERHACK, } from './arena' import { CHAR_MINUS_HYPHEN, CHAR_PLUS, is_whitespace, is_vendor_prefixed, str_starts_with } from './string-utils' @@ -160,6 +161,7 @@ export type PlainCSSNode = { // Flags (only when true) is_important?: boolean is_vendor_prefixed?: boolean + is_browserhack?: boolean has_error?: boolean // Selector-specific @@ -323,6 +325,12 @@ export class CSSNode { return this.arena.has_flag(this.index, FLAG_IMPORTANT) } + /** Check if this declaration has a browser hack prefix */ + get is_browserhack(): boolean | null { + if (this.type !== DECLARATION) return null + return this.arena.has_flag(this.index, FLAG_BROWSERHACK) + } + /** Check if this has a vendor prefix (computed on-demand) */ get is_vendor_prefixed(): boolean { switch (this.type) { @@ -730,7 +738,10 @@ export class CSSNode { } // 5. Extract flags - if (this.type === DECLARATION) plain.is_important = this.is_important + if (this.type === DECLARATION) { + plain.is_important = this.is_important + plain.is_browserhack = this.is_browserhack + } plain.is_vendor_prefixed = this.is_vendor_prefixed plain.has_error = this.has_error diff --git a/src/parse-declaration.test.ts b/src/parse-declaration.test.ts index 4093e36..2c90949 100644 --- a/src/parse-declaration.test.ts +++ b/src/parse-declaration.test.ts @@ -182,6 +182,32 @@ describe('parse_declaration', () => { }) }) + describe('browser hacks', () => { + const HACK_PREFIXES = '-_!$&*()=%+@,./`[]#~?:<>|'.split('') + + test.each(HACK_PREFIXES)('%s property hack', (char) => { + const node = parse_declaration(`${char}property: value;`) + expect(node.property).toBe(`${char}property`) + expect(node.is_browserhack).toBe(true) + }) + + test('value\\9', () => { + const node = parse_declaration('property: value\\9') + expect(node.value).toBe('value\\9') + expect(node.is_browserhack).toBe(false) + }) + + test('normal property is not a browserhack', () => { + const node = parse_declaration('color: red') + expect(node.is_browserhack).toBe(false) + }) + + test('vendor prefixed property is not a browserhack', () => { + const node = parse_declaration('-o-color: red') + expect(node.is_browserhack).toBe(false) + }) + }) + describe('Value Parsing', () => { test('identifier value', () => { const node = parse_declaration('display: flex') diff --git a/src/parse-declaration.ts b/src/parse-declaration.ts index a53d76a..a5ea07f 100644 --- a/src/parse-declaration.ts +++ b/src/parse-declaration.ts @@ -1,7 +1,8 @@ // Declaration Parser - Parses CSS declarations into structured AST nodes import { Lexer } from './tokenize' -import { CSSDataArena, DECLARATION, FLAG_IMPORTANT } from './arena' +import { CSSDataArena, DECLARATION, FLAG_IMPORTANT, FLAG_BROWSERHACK } from './arena' import { ValueParser } from './parse-value' +import { is_vendor_prefixed } from './string-utils' import { TOKEN_IDENT, TOKEN_COLON, @@ -10,6 +11,13 @@ import { TOKEN_EOF, TOKEN_LEFT_BRACE, TOKEN_RIGHT_BRACE, + TOKEN_LEFT_PAREN, + TOKEN_RIGHT_PAREN, + TOKEN_LEFT_BRACKET, + TOKEN_RIGHT_BRACKET, + TOKEN_COMMA, + TOKEN_HASH, + TOKEN_AT_KEYWORD, type TokenType, } from './token-types' import { trim_boundaries } from './parse-utils' @@ -41,16 +49,86 @@ export class DeclarationParser { // Parse a declaration using a provided lexer (used by Parser to avoid re-tokenization) parse_declaration_with_lexer(lexer: Lexer, end: number): number | null { - // Expect identifier (property name) - whitespace already skipped by caller - if (lexer.token_type !== TOKEN_IDENT) { + // Check for browser hack prefix (single delimiter/special character before identifier) + let has_browser_hack = false + let browser_hack_start = 0 + let browser_hack_line = 1 + let browser_hack_column = 1 + + // Handle @property and #property (tokenized as single tokens) + if (lexer.token_type === TOKEN_AT_KEYWORD || lexer.token_type === TOKEN_HASH) { + // These tokens already include the @ or # prefix in their text + // Mark as browser hack since @ and # prefixes are not standard CSS + has_browser_hack = true + browser_hack_start = lexer.token_start + browser_hack_line = lexer.token_line + browser_hack_column = lexer.token_column + } else if (lexer.token_type === TOKEN_IDENT) { + // Check if identifier starts with browser hack character + // Some hacks like -property, _property are tokenized as single identifiers + const first_char = this.source.charCodeAt(lexer.token_start) + if (first_char === 95) { + // '_' - underscore prefix is always a browser hack + has_browser_hack = true + browser_hack_start = lexer.token_start + browser_hack_line = lexer.token_line + browser_hack_column = lexer.token_column + } else if (first_char === 45) { + // '-' - hyphen prefix could be vendor prefix or browser hack + // Use fast vendor prefix check (no allocations) + if (!is_vendor_prefixed(this.source, lexer.token_start, lexer.token_end)) { + // This is a browser hack like -property + has_browser_hack = true + browser_hack_start = lexer.token_start + browser_hack_line = lexer.token_line + browser_hack_column = lexer.token_column + } + } + } else { + // Browser hacks can use various token types as prefixes + const is_browser_hack_token = + lexer.token_type === TOKEN_DELIM || + lexer.token_type === TOKEN_LEFT_PAREN || + lexer.token_type === TOKEN_RIGHT_PAREN || + lexer.token_type === TOKEN_LEFT_BRACKET || + lexer.token_type === TOKEN_RIGHT_BRACKET || + lexer.token_type === TOKEN_COMMA || + lexer.token_type === TOKEN_COLON + + if (is_browser_hack_token) { + // Save position in case this isn't a browser hack + const delim_saved = lexer.save_position() + browser_hack_start = lexer.token_start + browser_hack_line = lexer.token_line + browser_hack_column = lexer.token_column + + // Consume delimiter and check if next token is identifier + lexer.next_token_fast(true) // skip whitespace + + if ((lexer.token_type as TokenType) === TOKEN_IDENT) { + // This is a browser hack! + has_browser_hack = true + } else { + // Not a browser hack, restore position + lexer.restore_position(delim_saved) + } + } + } + + // Expect identifier, at-keyword, or hash token (property name) - whitespace already skipped by caller + if ( + lexer.token_type !== TOKEN_IDENT && + lexer.token_type !== TOKEN_AT_KEYWORD && + lexer.token_type !== TOKEN_HASH + ) { return null } - let prop_start = lexer.token_start + let prop_start = has_browser_hack ? browser_hack_start : lexer.token_start let prop_end = lexer.token_end // CRITICAL: Capture line/column BEFORE consuming property token - let decl_line = lexer.token_line - let decl_column = lexer.token_column + let decl_line = has_browser_hack ? browser_hack_line : lexer.token_line + let decl_column = has_browser_hack ? browser_hack_column : lexer.token_column // Lookahead: save lexer state before consuming const saved = lexer.save_position() @@ -147,6 +225,11 @@ export class DeclarationParser { this.arena.set_flag(declaration, FLAG_IMPORTANT) } + // Set browser hack flag if found + if (has_browser_hack) { + this.arena.set_flag(declaration, FLAG_BROWSERHACK) + } + // Consume ';' if present if ((lexer.token_type as TokenType) === TOKEN_SEMICOLON) { last_end = lexer.token_end diff --git a/src/parse.ts b/src/parse.ts index 5c7ba09..a9b8bc7 100644 --- a/src/parse.ts +++ b/src/parse.ts @@ -5,8 +5,9 @@ import { CSSNode } from './css-node' import { SelectorParser } from './parse-selector' import { AtRulePreludeParser } from './parse-atrule-prelude' import { DeclarationParser } from './parse-declaration' -import { TOKEN_EOF, TOKEN_LEFT_BRACE, TOKEN_RIGHT_BRACE, TOKEN_SEMICOLON, TOKEN_IDENT, TOKEN_AT_KEYWORD } from './token-types' +import { TOKEN_EOF, TOKEN_LEFT_BRACE, TOKEN_RIGHT_BRACE, TOKEN_SEMICOLON, TOKEN_IDENT, TOKEN_AT_KEYWORD, TOKEN_HASH, TOKEN_DELIM, TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN, TOKEN_LEFT_BRACKET, TOKEN_RIGHT_BRACKET, TOKEN_COMMA, TOKEN_COLON } from './token-types' import { trim_boundaries } from './parse-utils' +import { CHAR_PERIOD, CHAR_GREATER_THAN, CHAR_PLUS, CHAR_TILDE, CHAR_AMPERSAND } from './string-utils' export interface ParserOptions { skip_comments?: boolean @@ -252,14 +253,36 @@ export class Parser { // Parse a declaration: property: value; private parse_declaration(): number | null { - // Expect identifier (property name) - if (this.peek_type() !== TOKEN_IDENT) { - return null + // Check if this could be a declaration (identifier or browser hack prefix) + const token_type = this.peek_type() + + // Accept identifiers, at-keywords, and hash tokens + if (token_type === TOKEN_IDENT || token_type === TOKEN_AT_KEYWORD || token_type === TOKEN_HASH) { + return this.declaration_parser.parse_declaration_with_lexer(this.lexer, this.source.length) + } + + // For delimiters and special tokens, check if they could be browser hack prefixes + // Only accept single-character prefixes that are not CSS selector syntax + if ( + token_type === TOKEN_DELIM || + token_type === TOKEN_LEFT_PAREN || + token_type === TOKEN_RIGHT_PAREN || + token_type === TOKEN_LEFT_BRACKET || + token_type === TOKEN_RIGHT_BRACKET || + token_type === TOKEN_COMMA || + token_type === TOKEN_COLON + ) { + // Check if this delimiter could be a browser hack (not a selector combinator) + const char_code = this.source.charCodeAt(this.lexer.token_start) + // Exclude selector-specific delimiters: . (class), > (child), + (adjacent), ~ (general), & (nesting) + if (char_code === CHAR_PERIOD || char_code === CHAR_GREATER_THAN || char_code === CHAR_PLUS || char_code === CHAR_TILDE || char_code === CHAR_AMPERSAND) { + return null + } + // Let DeclarationParser try to parse it and return null if it's not a valid declaration + return this.declaration_parser.parse_declaration_with_lexer(this.lexer, this.source.length) } - // Use DeclarationParser with shared lexer (no re-tokenization) - // DeclarationParser will handle all parsing and advance the lexer to the right position - return this.declaration_parser.parse_declaration_with_lexer(this.lexer, this.source.length) + return null } // Parse an at-rule: @media, @import, @font-face, etc.