classPlatform: @classmethod defsupports_structured_output(cls) -> bool: """ Returns whether the current platform can support structured output. """ returnFalse
classTPUPlatform: @classmethod defsupports_structured_output(cls) -> bool: logger.warning("Structured output is not supported on TPU.") returnFalse
v1/engine/processor.py:
1 2 3 4 5 6 7
class Processor: def _validate_structured_output(): import vllm.platforms - if vllm.platforms.current_platform.is_tpu(): - raise ValueError("Structured output is not supported on TPU.") + if not vllm.platforms.current_platform.supports_structured_output(): + return
PR2: validate_so 重构
v1/engine/core.py:
1 2 3 4 5 6 7 8 9 10 11
class EngineCore: def add_request(self, request: EngineCoreRequest): ...
classProcessor: def_validate_structured_output(self, params: SamplingParams) -> None: import vllm.platforms if vllm.platforms.current_platform.is_tpu(): raise ValueError("Structured output is not supported on TPU.")
class StructuredOutputManager: - def grammar_init(self, request: Request) -> None: + def init_grammar(self, request: Request) -> None: if request.structured_output_request is None: return
# Initialize the backend the first time it is needed. - # # NOTE: We only support a single backend. We do NOT support different # backends on a per-request basis in V1 (for now, anyway...). if self.backend is None: backend_name = request.sampling_params.guided_decoding.backend_name if backend_name == "xgrammar": - from vllm.v1.structured_output.backend_xgrammar import ( - XgrammarBackend) - self.backend = XgrammarBackend(self.vllm_config) elif backend_name == "guidance": self.backend = GuidanceBackend(self.vllm_config) else: raise ValueError( f"Unsupported structured output backend: {backend_name}")
@static # or @classmethod ? defvalidate_structured_output_request(self, engine_level_backend: str, params: SamplingParams) -> None: ''' Validate rquest content according to engine_level_backend ''' if engine_level_backend.startswith("xgrammar"): XgrammarBackend.validate_grammar(params) elif engine_level_backend.startswith("guidance"): GuidanceBackend.validate_grammar(params) else: # engine_level_backend == "auto" # "auto" is an opt-in to opinionated behavior where we try to # choose a backend based on request contents. This is not the # default as it is less predictable and subject to change # between releases as feature support changes. try: XgrammarBackend.validate_grammar(params) engine_level_backend = "xgrammar" except ValueError: # The request includes some jsonschema feature(s) that # are not supported in xgrammar. Fall back to guidance. engine_level_backend = "guidance" params.guided_decoding.backend = engine_level_backend
将 validate_grammar() 放到各个 SOBackend 类中,变成静态方法 or 类方法?
classXgrammarBackend(StructuredOutputBackend): @classmethod defvalidate_grammar(params: SamplingParams): # Xgrammar with no fallback.
v1/structured_output/backend_guidance.py:
1 2 3 4 5 6 7
classGuidanceBackend(StructuredOutputBackend): @classmethod defvalidate_grammar(params: SamplingParams): # TODO: ideally we would have the LLTokenizer here as Lark syntax # allows <|special_token|> and similar, see # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens # Without tokenizer these are disallowed in grammars.