# HG changeset patch # User Nick Wellnhofer <wellnhofer@aevum.de> # Date 1661168015 -7200 # Mon Aug 22 13:33:35 2022 +0200 # Node ID b41dad88a06f34635558c6947470b35f8abba1c7 # Parent ff771a2f4c9788fd43cf9ccec179d78df6149523 Fix HTML parser with threads and --without-legacy If the legacy functions are disabled, the default "V1" HTML SAX handler isn't initialized in threads other than the main thread. htmlInitParserCtxt would later use the empty V1 SAX handler, resulting in NULL documents. Change htmlInitParserCtxt to initialize the HTML SAX handler by calling xmlSAX2InitHtmlDefaultSAXHandler. This removes the ability to change the default handler but is more in line with the XML parser which initializes the SAX handler by calling xmlSAXVersion, ignoring the V1 default handler. Fixes #399. diff --git a/HTMLparser.c b/HTMLparser.c --- a/HTMLparser.c +++ b/HTMLparser.c @@ -5056,8 +5056,7 @@ htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); return(-1); } - else - memset(sax, 0, sizeof(htmlSAXHandler)); + memset(sax, 0, sizeof(htmlSAXHandler)); /* Allocate the Input stack */ ctxt->inputTab = (htmlParserInputPtr *) @@ -5116,11 +5115,9 @@ ctxt->nodeInfoNr = 0; ctxt->nodeInfoMax = 0; - if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler; - else { - ctxt->sax = sax; - memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); - } + ctxt->sax = sax; + xmlSAX2InitHtmlDefaultSAXHandler(sax); + ctxt->userData = ctxt; ctxt->myDoc = NULL; ctxt->wellFormed = 1;