diff --git a/base/poco/Net/include/Poco/Net/SocketImpl.h b/base/poco/Net/include/Poco/Net/SocketImpl.h index 03aba600c349..3fe1813f2bd1 100644 --- a/base/poco/Net/include/Poco/Net/SocketImpl.h +++ b/base/poco/Net/include/Poco/Net/SocketImpl.h @@ -487,6 +487,7 @@ namespace Net bool _isBrokenTimeout; static constexpr size_t THROTTLER_QUANTUM = 32 * 1024; + static constexpr size_t THROTTLER_MAX_BLOCK_NS = 20'000'000'000ull; size_t _recvThrottlerBudget; size_t _sndThrottlerBudget; ThrottlerPtr _recvThrottler; diff --git a/base/poco/Net/src/SocketImpl.cpp b/base/poco/Net/src/SocketImpl.cpp index a92556f1b235..046bb3a667cd 100644 --- a/base/poco/Net/src/SocketImpl.cpp +++ b/base/poco/Net/src/SocketImpl.cpp @@ -1073,12 +1073,7 @@ void SocketImpl::throttleSend(size_t length, bool blocking) { size_t amount = length < THROTTLER_QUANTUM ? THROTTLER_QUANTUM : length; if (blocking) - { - if (_sndTimeout.totalMicroseconds() != 0) // Avoid throttling over socket send timeout - _sndThrottler->throttle(amount, _sndTimeout.totalMicroseconds() * 1000 / 2); - else - _sndThrottler->throttle(amount); - } + _sndThrottler->throttle(amount, THROTTLER_MAX_BLOCK_NS); else _sndThrottler->throttle(amount, 0); _sndThrottlerBudget += amount; @@ -1092,12 +1087,7 @@ void SocketImpl::throttleRecv(size_t length, bool blocking) { size_t amount = length < THROTTLER_QUANTUM ? THROTTLER_QUANTUM : length; if (blocking) - { - if (_recvTimeout.totalMicroseconds() != 0) // Avoid throttling over socket receive timeout - _recvThrottler->throttle(amount, _recvTimeout.totalMicroseconds() * 1000 / 2); - else - _recvThrottler->throttle(amount); - } + _recvThrottler->throttle(amount, THROTTLER_MAX_BLOCK_NS); else _recvThrottler->throttle(amount, 0); _recvThrottlerBudget += amount; diff --git a/base/poco/XML/include/Poco/XML/expat.h b/base/poco/XML/include/Poco/XML/expat.h index 30c7de08cbb0..f1f91a8376ed 100644 --- a/base/poco/XML/include/Poco/XML/expat.h +++ b/base/poco/XML/include/Poco/XML/expat.h @@ -19,10 +19,11 @@ Copyright (c) 2023 Hanno Böck Copyright (c) 2023 Sony Corporation / Snild Dolkow Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> + Copyright (c) 2025 Matthew Fernandez Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the + a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit @@ -42,21 +43,21 @@ */ #ifndef Expat_INCLUDED -#define Expat_INCLUDED 1 +# define Expat_INCLUDED 1 -#include -#include "expat_external.h" +# include +# include "expat_external.h" -#ifdef __cplusplus +# ifdef __cplusplus extern "C" { -#endif +# endif struct XML_ParserStruct; typedef struct XML_ParserStruct *XML_Parser; typedef unsigned char XML_Bool; -#define XML_TRUE ((XML_Bool)1) -#define XML_FALSE ((XML_Bool)0) +# define XML_TRUE ((XML_Bool)1) +# define XML_FALSE ((XML_Bool)0) /* The XML_Status enum gives the possible return values for several API functions. The preprocessor #defines are included so this @@ -73,11 +74,11 @@ typedef unsigned char XML_Bool; */ enum XML_Status { XML_STATUS_ERROR = 0, -#define XML_STATUS_ERROR XML_STATUS_ERROR +# define XML_STATUS_ERROR XML_STATUS_ERROR XML_STATUS_OK = 1, -#define XML_STATUS_OK XML_STATUS_OK +# define XML_STATUS_OK XML_STATUS_OK XML_STATUS_SUSPENDED = 2 -#define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED +# define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED }; enum XML_Error { @@ -276,7 +277,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, /* Prepare a parser object to be reused. This is particularly valuable when memory allocation overhead is disproportionately high, - such as when a large number of small documnents need to be parsed. + such as when a large number of small documents need to be parsed. All handlers are cleared from the parser, except for the unknownEncodingHandler. The parser's external state is re-initialized except for the values of ns and ns_triplets. @@ -680,7 +681,7 @@ XMLPARSEAPI(void) XML_SetUserData(XML_Parser parser, void *userData); /* Returns the last value set by XML_SetUserData or NULL. */ -#define XML_GetUserData(parser) (*(void **)(parser)) +# define XML_GetUserData(parser) (*(void **)(parser)) /* This is equivalent to supplying an encoding argument to XML_ParserCreate. On success XML_SetEncoding returns non-zero, @@ -752,7 +753,7 @@ XML_GetSpecifiedAttributeCount(XML_Parser parser); XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); -#ifdef XML_ATTR_INFO +# ifdef XML_ATTR_INFO /* Source file byte offsets for the start and end of attribute names and values. The value indices are exclusive of surrounding quotes; thus in a UTF-8 source file an attribute value of "blah" will yield: @@ -773,7 +774,7 @@ typedef struct { */ XMLPARSEAPI(const XML_AttrInfo *) XML_GetAttributeInfo(XML_Parser parser); -#endif +# endif /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is detected. The last call to XML_Parse must have isFinal true; len @@ -970,9 +971,9 @@ XMLPARSEAPI(const char *) XML_GetInputContext(XML_Parser parser, int *offset, int *size); /* For backwards compatibility with previous versions. */ -#define XML_GetErrorLineNumber XML_GetCurrentLineNumber -#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber -#define XML_GetErrorByteIndex XML_GetCurrentByteIndex +# define XML_GetErrorLineNumber XML_GetCurrentLineNumber +# define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber +# define XML_GetErrorByteIndex XML_GetCurrentByteIndex /* Frees the content model passed to the element declaration handler */ XMLPARSEAPI(void) @@ -1032,7 +1033,10 @@ enum XML_FeatureEnum { XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, /* Added in Expat 2.6.0. */ - XML_FEATURE_GE + XML_FEATURE_GE, + /* Added in Expat 2.7.2. */ + XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, /* Additional features must be added to the end of this enum. */ }; @@ -1045,7 +1049,7 @@ typedef struct { XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); -#if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1) +# if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1) /* Added in Expat 2.4.0 for XML_DTD defined and * added in Expat 2.6.0 for XML_GE == 1. */ XMLPARSEAPI(XML_Bool) @@ -1057,7 +1061,17 @@ XML_SetBillionLaughsAttackProtectionMaximumAmplification( XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionActivationThreshold( XML_Parser parser, unsigned long long activationThresholdBytes); -#endif + +/* Added in Expat 2.7.2. */ +XMLPARSEAPI(XML_Bool) +XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, + float maximumAmplificationFactor); + +/* Added in Expat 2.7.2. */ +XMLPARSEAPI(XML_Bool) +XML_SetAllocTrackerActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); +# endif /* Added in Expat 2.6.0. */ XMLPARSEAPI(XML_Bool) @@ -1066,12 +1080,12 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); /* Expat follows the semantic versioning convention. See https://semver.org */ -#define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 7 -#define XML_MICRO_VERSION 1 +# define XML_MAJOR_VERSION 2 +# define XML_MINOR_VERSION 7 +# define XML_MICRO_VERSION 3 -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif /* not Expat_INCLUDED */ diff --git a/base/poco/XML/include/Poco/XML/expat_external.h b/base/poco/XML/include/Poco/XML/expat_external.h index 0dabf7113414..b365fec2ede3 100644 --- a/base/poco/XML/include/Poco/XML/expat_external.h +++ b/base/poco/XML/include/Poco/XML/expat_external.h @@ -38,7 +38,7 @@ */ #ifndef Expat_External_INCLUDED -#define Expat_External_INCLUDED 1 +# define Expat_External_INCLUDED 1 /* External API definitions */ @@ -64,12 +64,12 @@ compiled with the cdecl calling convention as the default since system headers may assume the cdecl convention. */ -#ifndef XMLCALL -# if defined(_MSC_VER) -# define XMLCALL __cdecl -# elif defined(__GNUC__) && defined(__i386) && ! defined(__INTEL_COMPILER) -# define XMLCALL __attribute__((cdecl)) -# else +# ifndef XMLCALL +# if defined(_MSC_VER) +# define XMLCALL __cdecl +# elif defined(__GNUC__) && defined(__i386) && ! defined(__INTEL_COMPILER) +# define XMLCALL __attribute__((cdecl)) +# else /* For any platform which uses this definition and supports more than one calling convention, we need to extend this definition to declare the convention used on that platform, if it's possible to @@ -80,86 +80,86 @@ pre-processor and how to specify the same calling convention as the platform's malloc() implementation. */ -# define XMLCALL -# endif -#endif /* not defined XMLCALL */ +# define XMLCALL +# endif +# endif /* not defined XMLCALL */ -#if ! defined(XML_STATIC) && ! defined(XMLIMPORT) -# ifndef XML_BUILDING_EXPAT +# if ! defined(XML_STATIC) && ! defined(XMLIMPORT) +# ifndef XML_BUILDING_EXPAT /* using Expat from an application */ -# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) && ! defined(__CYGWIN__) -# define XMLIMPORT __declspec(dllimport) +# if defined(_MSC_VER) && ! defined(__BEOS__) && ! defined(__CYGWIN__) +# define XMLIMPORT __declspec(dllimport) +# endif + # endif +# endif /* not defined XML_STATIC */ +# ifndef XML_ENABLE_VISIBILITY +# define XML_ENABLE_VISIBILITY 0 # endif -#endif /* not defined XML_STATIC */ - -#ifndef XML_ENABLE_VISIBILITY -# define XML_ENABLE_VISIBILITY 0 -#endif -#if ! defined(XMLIMPORT) && XML_ENABLE_VISIBILITY -# define XMLIMPORT __attribute__((visibility("default"))) -#endif +# if ! defined(XMLIMPORT) && XML_ENABLE_VISIBILITY +# define XMLIMPORT __attribute__((visibility("default"))) +# endif /* If we didn't define it above, define it away: */ -#ifndef XMLIMPORT -# define XMLIMPORT -#endif - -#if defined(__GNUC__) \ - && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) -# define XML_ATTR_MALLOC __attribute__((__malloc__)) -#else -# define XML_ATTR_MALLOC -#endif - -#if defined(__GNUC__) \ - && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) -# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) -#else -# define XML_ATTR_ALLOC_SIZE(x) -#endif - -#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL - -#ifdef __cplusplus -extern "C" { -#endif +# ifndef XMLIMPORT +# define XMLIMPORT +# endif + +# if defined(__GNUC__) \ + && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) +# define XML_ATTR_MALLOC __attribute__((__malloc__)) +# else +# define XML_ATTR_MALLOC +# endif -#ifdef XML_UNICODE_WCHAR_T -# ifndef XML_UNICODE -# define XML_UNICODE +# if defined(__GNUC__) \ + && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) +# else +# define XML_ATTR_ALLOC_SIZE(x) # endif -# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) -# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" + +# define XMLPARSEAPI(type) XMLIMPORT type XMLCALL + +# ifdef __cplusplus +extern "C" { # endif -#endif -#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ # ifdef XML_UNICODE_WCHAR_T +# ifndef XML_UNICODE +# define XML_UNICODE +# endif +# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) +# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" +# endif +# endif + +# ifdef XML_UNICODE /* Information is UTF-16 encoded. */ +# ifdef XML_UNICODE_WCHAR_T typedef wchar_t XML_Char; typedef wchar_t XML_LChar; -# else +# else typedef unsigned short XML_Char; typedef char XML_LChar; -# endif /* XML_UNICODE_WCHAR_T */ -#else /* Information is UTF-8 encoded. */ +# endif /* XML_UNICODE_WCHAR_T */ +# else /* Information is UTF-8 encoded. */ typedef char XML_Char; typedef char XML_LChar; -#endif /* XML_UNICODE */ +# endif /* XML_UNICODE */ -#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ +# ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ typedef long long XML_Index; typedef unsigned long long XML_Size; -#else +# else typedef long XML_Index; typedef unsigned long XML_Size; -#endif /* XML_LARGE_SIZE */ +# endif /* XML_LARGE_SIZE */ -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif /* not Expat_External_INCLUDED */ diff --git a/base/poco/XML/src/internal.h b/base/poco/XML/src/internal.h index 73b622224d5d..697c4a473b8f 100644 --- a/base/poco/XML/src/internal.h +++ b/base/poco/XML/src/internal.h @@ -108,6 +108,7 @@ #endif #include // ULONG_MAX +#include // size_t #if defined(_WIN32) \ && (! defined(__USE_MINGW_ANSI_STDIO) \ @@ -127,7 +128,7 @@ # elif ULONG_MAX == 18446744073709551615u // 2^64-1 # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" -# elif defined(EMSCRIPTEN) // 32bit mode Emscripten +# elif defined(__wasm32__) // 32bit mode Emscripten or WASI SDK # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "zu" # else @@ -148,6 +149,16 @@ 100.0f #define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ 8388608 // 8 MiB, 2^23 + +#define EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT 100.0f +#define EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT \ + 67108864 // 64 MiB, 2^26 + +// NOTE: If function expat_alloc was user facing, EXPAT_MALLOC_ALIGNMENT would +// have to take sizeof(long double) into account +#define EXPAT_MALLOC_ALIGNMENT sizeof(long long) // largest parser (sub)member +#define EXPAT_MALLOC_PADDING ((EXPAT_MALLOC_ALIGNMENT) - sizeof(size_t)) + /* NOTE END */ #include "Poco/XML/expat.h" // so we can use type XML_Parser below @@ -171,6 +182,9 @@ extern #endif XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c #if defined(XML_TESTING) +void *expat_malloc(XML_Parser parser, size_t size, int sourceLine); +void expat_free(XML_Parser parser, void *ptr, int sourceLine); +void *expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine); extern unsigned int g_bytesScanned; // used for testing only #endif diff --git a/base/poco/XML/src/xmlparse.cpp b/base/poco/XML/src/xmlparse.cpp index 7368e1702494..98d1d55a7d3d 100644 --- a/base/poco/XML/src/xmlparse.cpp +++ b/base/poco/XML/src/xmlparse.cpp @@ -1,4 +1,4 @@ -/* d19ae032c224863c1527ba44d228cc34b99192c3a4c5a27af1f4e054d45ee031 (2.7.1+) +/* 28bcd8b1ba7eb595d82822908257fd9c3589b4243e3c922d0369f35bfcd7b506 (2.7.3+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -41,6 +41,7 @@ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow Copyright (c) 2024-2025 Berkay Eren Ürün Copyright (c) 2024 Hanno Böck + Copyright (c) 2025 Matthew Fernandez Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -97,7 +98,7 @@ #include #include /* memset(), memcpy() */ #include -#include /* UINT_MAX */ +#include /* INT_MAX, UINT_MAX */ #include /* fprintf */ #include /* getenv, rand_s */ #include /* uintptr_t */ @@ -122,8 +123,6 @@ # include "winconfig.h" #endif -#include "expat_config.h" - #include "ascii.h" #include "Poco/XML/expat.h" #include "siphash.h" @@ -241,7 +240,7 @@ typedef struct { unsigned char power; size_t size; size_t used; - const XML_Memory_Handling_Suite *mem; + XML_Parser parser; } HASH_TABLE; static size_t keylen(KEY s); @@ -364,7 +363,7 @@ typedef struct { const XML_Char *end; XML_Char *ptr; XML_Char *start; - const XML_Memory_Handling_Suite *mem; + XML_Parser parser; } STRING_POOL; /* The XML_Char before the name is used to determine whether @@ -459,6 +458,14 @@ typedef struct accounting { unsigned long long activationThresholdBytes; } ACCOUNTING; +typedef struct MALLOC_TRACKER { + XmlBigCount bytesAllocated; + XmlBigCount peakBytesAllocated; // updated live only for debug level >=2 + unsigned long debugLevel; + float maximumAmplificationFactor; // >=1.0 + XmlBigCount activationThresholdBytes; +} MALLOC_TRACKER; + typedef struct entity_stats { unsigned int countEverOpened; unsigned int currentDepth; @@ -562,27 +569,24 @@ static XML_Bool setContext(XML_Parser parser, const XML_Char *context); static void FASTCALL normalizePublicId(XML_Char *s); -static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); +static DTD *dtdCreate(XML_Parser parser); /* do not call if m_parentParser != NULL */ -static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); -static void dtdDestroy(DTD *p, XML_Bool isDocEntity, - const XML_Memory_Handling_Suite *ms); +static void dtdReset(DTD *p, XML_Parser parser); +static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser); static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, - const XML_Memory_Handling_Suite *ms); + XML_Parser parser); static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, STRING_POOL *newPool, const HASH_TABLE *oldTable); static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); -static void FASTCALL hashTableInit(HASH_TABLE *table, - const XML_Memory_Handling_Suite *ms); +static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser); static void FASTCALL hashTableClear(HASH_TABLE *table); static void FASTCALL hashTableDestroy(HASH_TABLE *table); static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table); static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); -static void FASTCALL poolInit(STRING_POOL *pool, - const XML_Memory_Handling_Suite *ms); +static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser); static void FASTCALL poolClear(STRING_POOL *pool); static void FASTCALL poolDestroy(STRING_POOL *pool); static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, @@ -602,15 +606,15 @@ static XML_Content *build_model(XML_Parser parser); static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, const char *end); -static XML_Char *copyString(const XML_Char *s, - const XML_Memory_Handling_Suite *memsuite); +static XML_Char *copyString(const XML_Char *s, XML_Parser parser); static unsigned long generate_hash_secret_salt(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser); static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep, DTD *dtd); + const XML_Char *nameSep, DTD *dtd, + XML_Parser parentParser); static void parserInit(XML_Parser parser, const XML_Char *encodingName); @@ -634,10 +638,10 @@ static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, int sourceLine); static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, int sourceLine); +#endif /* XML_GE == 1 */ static XML_Parser getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff); -#endif /* XML_GE == 1 */ static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel); @@ -780,14 +784,238 @@ struct XML_ParserStruct { unsigned long m_hash_secret_salt; #if XML_GE == 1 ACCOUNTING m_accounting; + MALLOC_TRACKER m_alloc_tracker; ENTITY_STATS m_entity_stats; #endif XML_Bool m_reenter; }; -#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) -#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) -#define FREE(parser, p) (parser->m_mem.free_fcn((p))) +#if XML_GE == 1 +# define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__)) +# define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__)) +# define FREE(parser, p) (expat_free((parser), (p), __LINE__)) +#else +# define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +# define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) +# define FREE(parser, p) (parser->m_mem.free_fcn((p))) +#endif + +#if XML_GE == 1 +static void +expat_heap_stat(XML_Parser rootParser, char op, XmlBigCount absDiff, + XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) { + // NOTE: This can be +infinity or -nan + const float amplification + = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; + fprintf( + stderr, + "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL( + "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n", + (void *)rootParser, rootParser->m_accounting.countBytesDirect, op, + absDiff, newTotal, peakTotal, (double)amplification, sourceLine); +} + +static bool +expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase, + int sourceLine) { + assert(rootParser != NULL); + assert(increase > 0); + + XmlBigCount newTotal = 0; + bool tolerable = true; + + // Detect integer overflow + if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) { + tolerable = false; + } else { + newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase; + + if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) { + assert(newTotal > 0); + // NOTE: This can be +infinity when dividing by zero but not -nan + const float amplification + = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; + if (amplification + > rootParser->m_alloc_tracker.maximumAmplificationFactor) { + tolerable = false; + } + } + } + + if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) { + expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine); + } + + return tolerable; +} + +# if defined(XML_TESTING) +void * +# else +static void * +# endif +expat_malloc(XML_Parser parser, size_t size, int sourceLine) { + // Detect integer overflow + if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) { + return NULL; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size; + + if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated + < bytesToAllocate) { + return NULL; // i.e. signal integer overflow as out-of-memory + } + + if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate, + sourceLine)) { + return NULL; // i.e. signal violation as out-of-memory + } + + // Actually allocate + void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate); + + if (mallocedPtr == NULL) { + return NULL; + } + + // Update in-block recorded size + *(size_t *)mallocedPtr = size; + + // Update accounting + rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate; + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + expat_heap_stat(rootParser, '+', bytesToAllocate, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; +} + +# if defined(XML_TESTING) +void +# else +static void +# endif +expat_free(XML_Parser parser, void *ptr, int sourceLine) { + assert(parser != NULL); + + if (ptr == NULL) { + return; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + // Extract size (to the eyes of malloc_fcn/realloc_fcn) and + // the original pointer returned by malloc/realloc + void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); + const size_t bytesAllocated + = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr; + + // Update accounting + assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated); + rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated; + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + expat_heap_stat(rootParser, '-', bytesAllocated, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + // NOTE: This may be freeing rootParser, so freeing has to come last + parser->m_mem.free_fcn(mallocedPtr); +} + +# if defined(XML_TESTING) +void * +# else +static void * +# endif +expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) { + assert(parser != NULL); + + if (ptr == NULL) { + return expat_malloc(parser, size, sourceLine); + } + + if (size == 0) { + expat_free(parser, ptr, sourceLine); + return NULL; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + // Extract original size (to the eyes of the caller) and the original + // pointer returned by malloc/realloc + void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); + const size_t prevSize = *(size_t *)mallocedPtr; + + // Classify upcoming change + const bool isIncrease = (size > prevSize); + const size_t absDiff + = (size > prevSize) ? (size - prevSize) : (prevSize - size); + + // Ask for permission from accounting + if (isIncrease) { + if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) { + return NULL; // i.e. signal violation as out-of-memory + } + } + + // NOTE: Integer overflow detection has already been done for us + // by expat_heap_increase_tolerable(..) above + assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size); + + // Actually allocate + mallocedPtr = parser->m_mem.realloc_fcn( + mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size); + + if (mallocedPtr == NULL) { + return NULL; + } + + // Update accounting + if (isIncrease) { + assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated + >= absDiff); + rootParser->m_alloc_tracker.bytesAllocated += absDiff; + } else { // i.e. decrease + assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff); + rootParser->m_alloc_tracker.bytesAllocated -= absDiff; + } + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + // Update in-block recorded size + *(size_t *)mallocedPtr = size; + + return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; +} +#endif // XML_GE == 1 XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encodingName) { @@ -831,11 +1059,14 @@ writeRandomBytes_getrandom_nonblock(void *target, size_t count) { void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); const size_t bytesToWrite = count - bytesWrittenTotal; + assert(bytesToWrite <= INT_MAX); + const int bytesWrittenMore = # if defined(HAVE_GETRANDOM) - getrandom(currentTarget, bytesToWrite, getrandomFlags); + (int)getrandom(currentTarget, bytesToWrite, getrandomFlags); # else - syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); + (int)syscall(SYS_getrandom, currentTarget, bytesToWrite, + getrandomFlags); # endif if (bytesWrittenMore > 0) { @@ -1030,9 +1261,10 @@ generate_hash_secret_salt(XML_Parser parser) { static unsigned long get_hash_secret_salt(XML_Parser parser) { - if (parser->m_parentParser != NULL) - return get_hash_secret_salt(parser->m_parentParser); - return parser->m_hash_secret_salt; + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(! rootParser->m_parentParser); + + return rootParser->m_hash_secret_salt; } static enum XML_Error @@ -1118,19 +1350,43 @@ XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep) { - return parserCreate(encodingName, memsuite, nameSep, NULL); + return parserCreate(encodingName, memsuite, nameSep, NULL, NULL); } static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, - DTD *dtd) { - XML_Parser parser; + DTD *dtd, XML_Parser parentParser) { + XML_Parser parser = NULL; + +#if XML_GE == 1 + const size_t increase + = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct); + + if (parentParser != NULL) { + const XML_Parser rootParser = getRootParserOf(parentParser, NULL); + if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) { + return NULL; + } + } +#else + UNUSED_P(parentParser); +#endif if (memsuite) { XML_Memory_Handling_Suite *mtemp; +#if XML_GE == 1 + void *const sizeAndParser + = memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING + + sizeof(struct XML_ParserStruct)); + if (sizeAndParser != NULL) { + *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); + parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) + + EXPAT_MALLOC_PADDING); +#else parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); if (parser != NULL) { +#endif mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = memsuite->malloc_fcn; mtemp->realloc_fcn = memsuite->realloc_fcn; @@ -1138,39 +1394,86 @@ parserCreate(const XML_Char *encodingName, } } else { XML_Memory_Handling_Suite *mtemp; +#if XML_GE == 1 + void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING + + sizeof(struct XML_ParserStruct)); + if (sizeAndParser != NULL) { + *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); + parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) + + EXPAT_MALLOC_PADDING); +#else parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); if (parser != NULL) { +#endif mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = malloc; mtemp->realloc_fcn = realloc; mtemp->free_fcn = free; } - } + } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0 if (! parser) return parser; +#if XML_GE == 1 + // Initialize .m_alloc_tracker + memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER)); + if (parentParser == NULL) { + parser->m_alloc_tracker.debugLevel + = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u); + parser->m_alloc_tracker.maximumAmplificationFactor + = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT; + parser->m_alloc_tracker.activationThresholdBytes + = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT; + + // NOTE: This initialization needs to come this early because these fields + // are read by allocation tracking code + parser->m_parentParser = NULL; + parser->m_accounting.countBytesDirect = 0; + } else { + parser->m_parentParser = parentParser; + } + + // Record XML_ParserStruct allocation we did a few lines up before + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase); + rootParser->m_alloc_tracker.bytesAllocated += increase; + + // Report on allocation + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + + expat_heap_stat(rootParser, '+', increase, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__); + } +#else + parser->m_parentParser = NULL; +#endif // XML_GE == 1 + parser->m_buffer = NULL; parser->m_bufferLim = NULL; parser->m_attsSize = INIT_ATTS_SIZE; - parser->m_atts - = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); + parser->m_atts = (ATTRIBUTE*)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); if (parser->m_atts == NULL) { FREE(parser, parser); return NULL; } #ifdef XML_ATTR_INFO - parser->m_attInfo = (XML_AttrInfo *)MALLOC( - parser, parser->m_attsSize * sizeof(XML_AttrInfo)); + parser->m_attInfo = (XML_AttrInfo*)MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo)); if (parser->m_attInfo == NULL) { FREE(parser, parser->m_atts); FREE(parser, parser); return NULL; } #endif - parser->m_dataBuf - = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + parser->m_dataBuf = (XML_Char*)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); if (parser->m_dataBuf == NULL) { FREE(parser, parser->m_atts); #ifdef XML_ATTR_INFO @@ -1184,7 +1487,7 @@ parserCreate(const XML_Char *encodingName, if (dtd) parser->m_dtd = dtd; else { - parser->m_dtd = dtdCreate(&parser->m_mem); + parser->m_dtd = dtdCreate(parser); if (parser->m_dtd == NULL) { FREE(parser, parser->m_dataBuf); FREE(parser, parser->m_atts); @@ -1218,8 +1521,8 @@ parserCreate(const XML_Char *encodingName, parser->m_protocolEncodingName = NULL; - poolInit(&parser->m_tempPool, &(parser->m_mem)); - poolInit(&parser->m_temp2Pool, &(parser->m_mem)); + poolInit(&parser->m_tempPool, parser); + poolInit(&parser->m_temp2Pool, parser); parserInit(parser, encodingName); if (encodingName && ! parser->m_protocolEncodingName) { @@ -1251,7 +1554,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_processor = prologInitProcessor; XmlPrologStateInit(&parser->m_prologState); if (encodingName != NULL) { - parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + parser->m_protocolEncodingName = copyString(encodingName, parser); } parser->m_curBase = NULL; XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); @@ -1313,7 +1616,6 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_unknownEncodingMem = NULL; parser->m_unknownEncodingRelease = NULL; parser->m_unknownEncodingData = NULL; - parser->m_parentParser = NULL; parser->m_parsingStatus.parsing = XML_INITIALIZED; // Reentry can only be triggered inside m_processor calls parser->m_reenter = XML_FALSE; @@ -1403,7 +1705,7 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { FREE(parser, (void *)parser->m_protocolEncodingName); parser->m_protocolEncodingName = NULL; parserInit(parser, encodingName); - dtdReset(parser->m_dtd, &parser->m_mem); + dtdReset(parser->m_dtd, parser); return XML_TRUE; } @@ -1439,7 +1741,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { parser->m_protocolEncodingName = NULL; else { /* Copy the new encoding name into allocated memory */ - parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + parser->m_protocolEncodingName = copyString(encodingName, parser); if (! parser->m_protocolEncodingName) return XML_STATUS_ERROR; } @@ -1548,9 +1850,10 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, */ if (parser->m_ns) { XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; - parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser); } else { - parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); + parser + = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser); } if (! parser) @@ -1594,7 +1897,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, parser->m_prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) + if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser) || ! setContext(parser, context)) { XML_ParserFree(parser); return NULL; @@ -1706,14 +2009,16 @@ XML_ParserFree(XML_Parser parser) { #else if (parser->m_dtd) #endif /* XML_DTD */ - dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, - &parser->m_mem); - FREE(parser, (void *)parser->m_atts); + dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser); + FREE(parser, parser->m_atts); #ifdef XML_ATTR_INFO - FREE(parser, (void *)parser->m_attInfo); + FREE(parser, parser->m_attInfo); #endif FREE(parser, parser->m_groupConnector); - FREE(parser, parser->m_buffer); + // NOTE: We are avoiding FREE(..) here because parser->m_buffer + // is not being allocated with MALLOC(..) but with plain + // .malloc_fcn(..). + parser->m_mem.free_fcn(parser->m_buffer); FREE(parser, parser->m_dataBuf); FREE(parser, parser->m_nsAtts); FREE(parser, parser->m_unknownEncodingMem); @@ -2032,12 +2337,14 @@ int XMLCALL XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { if (parser == NULL) return 0; - if (parser->m_parentParser) - return XML_SetHashSalt(parser->m_parentParser, hash_salt); + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(! rootParser->m_parentParser); + /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parserBusy(parser)) + if (parserBusy(rootParser)) return 0; - parser->m_hash_secret_salt = hash_salt; + rootParser->m_hash_secret_salt = hash_salt; return 1; } @@ -2305,7 +2612,9 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } - newBuf = (char *)MALLOC(parser, bufferSize); + // NOTE: We are avoiding MALLOC(..) here to leave limiting + // the input size to the application using Expat. + newBuf = (char*)parser->m_mem.malloc_fcn(bufferSize); if (newBuf == 0) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; @@ -2316,7 +2625,10 @@ XML_GetBuffer(XML_Parser parser, int len) { memcpy(newBuf, &parser->m_bufferPtr[-keep], EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep); - FREE(parser, parser->m_buffer); + // NOTE: We are avoiding FREE(..) here because parser->m_buffer + // is not being allocated with MALLOC(..) but with plain + // .malloc_fcn(..). + parser->m_mem.free_fcn(parser->m_buffer); parser->m_buffer = newBuf; parser->m_bufferEnd = parser->m_buffer @@ -2332,7 +2644,10 @@ XML_GetBuffer(XML_Parser parser, int len) { if (parser->m_bufferPtr) { memcpy(newBuf, parser->m_bufferPtr, EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); - FREE(parser, parser->m_buffer); + // NOTE: We are avoiding FREE(..) here because parser->m_buffer + // is not being allocated with MALLOC(..) but with plain + // .malloc_fcn(..). + parser->m_mem.free_fcn(parser->m_buffer); parser->m_bufferEnd = newBuf + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); @@ -2510,28 +2825,43 @@ XML_GetCurrentColumnNumber(XML_Parser parser) { void XMLCALL XML_FreeContentModel(XML_Parser parser, XML_Content *model) { - if (parser != NULL) - FREE(parser, model); + if (parser == NULL) + return; + + // NOTE: We are avoiding FREE(..) here because the content model + // has been created using plain .malloc_fcn(..) rather than MALLOC(..). + parser->m_mem.free_fcn(model); } void *XMLCALL XML_MemMalloc(XML_Parser parser, size_t size) { if (parser == NULL) return NULL; - return MALLOC(parser, size); + + // NOTE: We are avoiding MALLOC(..) here to not include + // user allocations with allocation tracking and limiting. + return parser->m_mem.malloc_fcn(size); } void *XMLCALL XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { if (parser == NULL) return NULL; - return REALLOC(parser, ptr, size); + + // NOTE: We are avoiding REALLOC(..) here to not include + // user allocations with allocation tracking and limiting. + return parser->m_mem.realloc_fcn(ptr, size); } void XMLCALL XML_MemFree(XML_Parser parser, void *ptr) { - if (parser != NULL) - FREE(parser, ptr); + if (parser == NULL) + return; + + // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and + // XML_MemRealloc are not using MALLOC(..) and REALLOC(..) + // but plain .malloc_fcn(..) and .realloc_fcn(..), internally. + parser->m_mem.free_fcn(ptr); } void XMLCALL @@ -2731,6 +3061,13 @@ XML_GetFeatureList(void) { EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, /* Added in Expat 2.6.0. */ {XML_FEATURE_GE, XML_L("XML_GE"), 0}, + /* Added in Expat 2.7.2. */ + {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_L("XML_AT_MAX_AMP"), + (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT}, + {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_AT_ACT_THRES"), + (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT}, #endif {XML_FEATURE_END, NULL, 0}}; @@ -2759,6 +3096,29 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( parser->m_accounting.activationThresholdBytes = activationThresholdBytes; return XML_TRUE; } + +XML_Bool XMLCALL +XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, + float maximumAmplificationFactor) { + if ((parser == NULL) || (parser->m_parentParser != NULL) + || isnan(maximumAmplificationFactor) + || (maximumAmplificationFactor < 1.0f)) { + return XML_FALSE; + } + parser->m_alloc_tracker.maximumAmplificationFactor + = maximumAmplificationFactor; + return XML_TRUE; +} + +XML_Bool XMLCALL +XML_SetAllocTrackerActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes) { + if ((parser == NULL) || (parser->m_parentParser != NULL)) { + return XML_FALSE; + } + parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; +} #endif /* XML_GE == 1 */ XML_Bool XMLCALL @@ -2779,8 +3139,8 @@ static XML_Bool storeRawNames(XML_Parser parser) { TAG *tag = parser->m_tagStack; while (tag) { - int bufSize; - int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + size_t bufSize; + size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); size_t rawNameLen; char *rawNameBuf = tag->buf + nameLen; /* Stop if already stored. Since m_tagStack is a stack, we can stop @@ -2797,9 +3157,9 @@ storeRawNames(XML_Parser parser) { /* Detect and prevent integer overflow. */ if (rawNameLen > (size_t)INT_MAX - nameLen) return XML_FALSE; - bufSize = nameLen + (int)rawNameLen; - if (bufSize > tag->bufEnd - tag->buf) { - char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + bufSize = nameLen + rawNameLen; + if (bufSize > (size_t)(tag->bufEnd - tag->buf)) { + char *temp = (char*)REALLOC(parser, tag->buf, bufSize); if (temp == NULL) return XML_FALSE; /* if tag->name.str points to tag->buf (only when namespace @@ -3125,10 +3485,10 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, tag = parser->m_freeTagList; parser->m_freeTagList = parser->m_freeTagList->parent; } else { - tag = (TAG *)MALLOC(parser, sizeof(TAG)); + tag = (TAG*)MALLOC(parser, sizeof(TAG)); if (! tag) return XML_ERROR_NO_MEMORY; - tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); + tag->buf = (char*)MALLOC(parser, INIT_TAG_BUF_SIZE); if (! tag->buf) { FREE(parser, tag); return XML_ERROR_NO_MEMORY; @@ -3161,7 +3521,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } bufSize = (int)(tag->bufEnd - tag->buf) << 1; { - char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + char *temp = (char*)REALLOC(parser, tag->buf, bufSize); if (temp == NULL) return XML_ERROR_NO_MEMORY; tag->buf = temp; @@ -3540,8 +3900,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } #endif - temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, - parser->m_attsSize * sizeof(ATTRIBUTE)); + temp = (ATTRIBUTE*)REALLOC(parser, parser->m_atts, + parser->m_attsSize * sizeof(ATTRIBUTE)); if (temp == NULL) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; @@ -3559,8 +3919,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } # endif - temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, - parser->m_attsSize * sizeof(XML_AttrInfo)); + temp2 = (XML_AttrInfo*)REALLOC(parser, parser->m_attInfo, + parser->m_attsSize * sizeof(XML_AttrInfo)); if (temp2 == NULL) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; @@ -3695,7 +4055,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { - int j; /* hash table index */ + unsigned int j; /* hash table index */ unsigned long version = parser->m_nsAttsVersion; /* Detect and prevent invalid shift */ @@ -3736,8 +4096,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } #endif - temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, - nsAttsSize * sizeof(NS_ATT)); + temp = (NS_ATT*)REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT)); if (! temp) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; @@ -3790,7 +4149,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, if (! b) return XML_ERROR_UNBOUND_PREFIX; - for (j = 0; j < b->uriLen; j++) { + for (j = 0; j < (unsigned int)b->uriLen; j++) { const XML_Char c = b->uri[j]; if (! poolAppendChar(&parser->m_tempPool, c)) return XML_ERROR_NO_MEMORY; @@ -3884,7 +4243,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, return XML_ERROR_NONE; prefixLen = 0; if (parser->m_ns_triplets && binding->prefix->name) { - for (; binding->prefix->name[prefixLen++];) + while (binding->prefix->name[prefixLen++]) ; /* prefixLen includes null terminator */ } tagNamePtr->localPart = localPart; @@ -3918,7 +4277,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } #endif - uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); + uri = (XML_Char*)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); if (! uri) return XML_ERROR_NO_MEMORY; binding->uriAlloc = n + EXPAND_SPARE; @@ -4164,8 +4523,8 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, } #endif - XML_Char *temp = (XML_Char *)REALLOC( - parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); + XML_Char *temp + = (XML_Char*)REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (temp == NULL) return XML_ERROR_NO_MEMORY; b->uri = temp; @@ -4173,7 +4532,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, } parser->m_freeBindingList = b->nextTagBinding; } else { - b = (BINDING *)MALLOC(parser, sizeof(BINDING)); + b = (BINDING*)MALLOC(parser, sizeof(BINDING)); if (! b) return XML_ERROR_NO_MEMORY; @@ -4191,8 +4550,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, } #endif - b->uri - = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); + b->uri = (XML_Char*)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (! b->uri) { FREE(parser, b); return XML_ERROR_NO_MEMORY; @@ -4612,7 +4970,7 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName, &info)) { ENCODING *enc; - parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); + parser->m_unknownEncodingMem = (void*)MALLOC(parser, XmlSizeOfUnknownEncoding()); if (! parser->m_unknownEncodingMem) { if (info.release) info.release(info.data); @@ -5563,7 +5921,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, return XML_ERROR_NO_MEMORY; } - char *const new_connector = (char *)REALLOC( + char *const new_connector = (char*)REALLOC( parser, parser->m_groupConnector, parser->m_groupSize *= 2); if (new_connector == NULL) { parser->m_groupSize /= 2; @@ -5583,15 +5941,14 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } #endif - int *const new_scaff_index = (int *)REALLOC( + int *const new_scaff_index = (int*)REALLOC( parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); if (new_scaff_index == NULL) return XML_ERROR_NO_MEMORY; dtd->scaffIndex = new_scaff_index; } } else { - parser->m_groupConnector - = (char *)MALLOC(parser, parser->m_groupSize = 32); + parser->m_groupConnector = (char*)MALLOC(parser, parser->m_groupSize = 32); if (! parser->m_groupConnector) { parser->m_groupSize = 0; return XML_ERROR_NO_MEMORY; @@ -5748,8 +6105,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, case XML_ROLE_CONTENT_EMPTY: if (dtd->in_eldecl) { if (parser->m_elementDeclHandler) { - XML_Content *content - = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); + // NOTE: We are avoiding MALLOC(..) here to so that + // applications that are not using XML_FreeContentModel but + // plain free(..) or .free_fcn() to free the content model's + // memory are safe. + XML_Content *content = (XML_Content*)parser->m_mem.malloc_fcn(sizeof(XML_Content)); if (! content) return XML_ERROR_NO_MEMORY; content->quant = XML_CQUANT_NONE; @@ -5805,7 +6165,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, name = el->name; dtd->scaffold[myindex].name = name; nameLen = 0; - for (; name[nameLen++];) + while (name[nameLen++]) ; /* Detect and prevent integer overflow */ @@ -6026,8 +6386,7 @@ processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl, openEntity = *freeEntityList; *freeEntityList = openEntity->next; } else { - openEntity - = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); + openEntity = (OPEN_INTERNAL_ENTITY*)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); if (! openEntity) return XML_ERROR_NO_MEMORY; } @@ -6105,6 +6464,10 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, // process its possible inner entities (which are added to the // m_openInternalEntities during doProlog or doContent calls above) entity->hasMore = XML_FALSE; + if (! entity->is_param + && (openEntity->startTagLevel != parser->m_tagLevel)) { + return XML_ERROR_ASYNC_ENTITY; + } triggerReenter(parser); return result; } // End of entity processing, "if" block will return here @@ -6295,7 +6658,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, case XML_TOK_ENTITY_REF: { const XML_Char *name; ENTITY *entity; - char checkEntityDecl; + bool checkEntityDecl; XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { @@ -6822,8 +7185,8 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( - parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + type->defaultAtts + = (DEFAULT_ATTRIBUTE*)MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! type->defaultAtts) { type->allocDefaultAtts = 0; return 0; @@ -6848,8 +7211,8 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, } #endif - temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, - (count * sizeof(DEFAULT_ATTRIBUTE))); + temp = (DEFAULT_ATTRIBUTE*)REALLOC(parser, type->defaultAtts, + (count * sizeof(DEFAULT_ATTRIBUTE))); if (temp == NULL) return 0; type->allocDefaultAtts = count; @@ -7140,19 +7503,19 @@ normalizePublicId(XML_Char *publicId) { } static DTD * -dtdCreate(const XML_Memory_Handling_Suite *ms) { - DTD *p = (DTD*) ms->malloc_fcn(sizeof(DTD)); +dtdCreate(XML_Parser parser) { + DTD *p = (DTD*)MALLOC(parser, sizeof(DTD)); if (p == NULL) return p; - poolInit(&(p->pool), ms); - poolInit(&(p->entityValuePool), ms); - hashTableInit(&(p->generalEntities), ms); - hashTableInit(&(p->elementTypes), ms); - hashTableInit(&(p->attributeIds), ms); - hashTableInit(&(p->prefixes), ms); + poolInit(&(p->pool), parser); + poolInit(&(p->entityValuePool), parser); + hashTableInit(&(p->generalEntities), parser); + hashTableInit(&(p->elementTypes), parser); + hashTableInit(&(p->attributeIds), parser); + hashTableInit(&(p->prefixes), parser); #ifdef XML_DTD p->paramEntityRead = XML_FALSE; - hashTableInit(&(p->paramEntities), ms); + hashTableInit(&(p->paramEntities), parser); #endif /* XML_DTD */ p->defaultPrefix.name = NULL; p->defaultPrefix.binding = NULL; @@ -7172,7 +7535,7 @@ dtdCreate(const XML_Memory_Handling_Suite *ms) { } static void -dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { +dtdReset(DTD *p, XML_Parser parser) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { @@ -7180,7 +7543,7 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { if (! e) break; if (e->allocDefaultAtts != 0) - ms->free_fcn(e->defaultAtts); + FREE(parser, e->defaultAtts); } hashTableClear(&(p->generalEntities)); #ifdef XML_DTD @@ -7197,9 +7560,9 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { p->in_eldecl = XML_FALSE; - ms->free_fcn(p->scaffIndex); + FREE(parser, p->scaffIndex); p->scaffIndex = NULL; - ms->free_fcn(p->scaffold); + FREE(parser, p->scaffold); p->scaffold = NULL; p->scaffLevel = 0; @@ -7213,7 +7576,7 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { } static void -dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { +dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { @@ -7221,7 +7584,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { if (! e) break; if (e->allocDefaultAtts != 0) - ms->free_fcn(e->defaultAtts); + FREE(parser, e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); #ifdef XML_DTD @@ -7233,10 +7596,10 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { poolDestroy(&(p->pool)); poolDestroy(&(p->entityValuePool)); if (isDocEntity) { - ms->free_fcn(p->scaffIndex); - ms->free_fcn(p->scaffold); + FREE(parser, p->scaffIndex); + FREE(parser, p->scaffold); } - ms->free_fcn(p); + FREE(parser, p); } /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. @@ -7244,7 +7607,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { */ static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, - const XML_Memory_Handling_Suite *ms) { + XML_Parser parser) { HASH_TABLE_ITER iter; /* Copy the prefix table. */ @@ -7325,7 +7688,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, } #endif newE->defaultAtts - = (DEFAULT_ATTRIBUTE*) ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + = (DEFAULT_ATTRIBUTE*)MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! newE->defaultAtts) { return 0; } @@ -7487,7 +7850,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { /* table->size is a power of 2 */ table->size = (size_t)1 << INIT_POWER; tsize = table->size * sizeof(NAMED *); - table->v = (NAMED**) table->mem->malloc_fcn(tsize); + table->v = (NAMED**)MALLOC(table->parser, tsize); if (! table->v) { table->size = 0; return NULL; @@ -7527,7 +7890,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { } size_t tsize = newSize * sizeof(NAMED *); - NAMED **newV = (NAMED**) table->mem->malloc_fcn(tsize); + NAMED **newV = (NAMED**)MALLOC(table->parser, tsize); if (! newV) return NULL; memset(newV, 0, tsize); @@ -7543,7 +7906,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { } newV[j] = table->v[i]; } - table->mem->free_fcn(table->v); + FREE(table->parser, table->v); table->v = newV; table->power = newPower; table->size = newSize; @@ -7556,7 +7919,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { } } } - table->v[i] = (NAMED*) table->mem->malloc_fcn(createSize); + table->v[i] = (NAMED*)MALLOC(table->parser, createSize); if (! table->v[i]) return NULL; memset(table->v[i], 0, createSize); @@ -7569,7 +7932,7 @@ static void FASTCALL hashTableClear(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { - table->mem->free_fcn(table->v[i]); + FREE(table->parser, table->v[i]); table->v[i] = NULL; } table->used = 0; @@ -7579,17 +7942,17 @@ static void FASTCALL hashTableDestroy(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) - table->mem->free_fcn(table->v[i]); - table->mem->free_fcn(table->v); + FREE(table->parser, table->v[i]); + FREE(table->parser, table->v); } static void FASTCALL -hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { +hashTableInit(HASH_TABLE *p, XML_Parser parser) { p->power = 0; p->size = 0; p->used = 0; p->v = NULL; - p->mem = ms; + p->parser = parser; } static void FASTCALL @@ -7609,13 +7972,13 @@ hashTableIterNext(HASH_TABLE_ITER *iter) { } static void FASTCALL -poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { +poolInit(STRING_POOL *pool, XML_Parser parser) { pool->blocks = NULL; pool->freeBlocks = NULL; pool->start = NULL; pool->ptr = NULL; pool->end = NULL; - pool->mem = ms; + pool->parser = parser; } static void FASTCALL @@ -7642,13 +8005,13 @@ poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; - pool->mem->free_fcn(p); + FREE(pool->parser, p); p = tem; } p = pool->freeBlocks; while (p) { BLOCK *tem = p->next; - pool->mem->free_fcn(p); + FREE(pool->parser, p); p = tem; } } @@ -7803,8 +8166,7 @@ poolGrow(STRING_POOL *pool) { if (bytesToAllocate == 0) return XML_FALSE; - temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, - (unsigned)bytesToAllocate); + temp = (BLOCK*)REALLOC(pool->parser, pool->blocks, bytesToAllocate); if (temp == NULL) return XML_FALSE; pool->blocks = temp; @@ -7844,7 +8206,7 @@ poolGrow(STRING_POOL *pool) { if (bytesToAllocate == 0) return XML_FALSE; - tem = (BLOCK*) pool->mem->malloc_fcn(bytesToAllocate); + tem = (BLOCK*)MALLOC(pool->parser, bytesToAllocate); if (! tem) return XML_FALSE; tem->size = blockSize; @@ -7875,12 +8237,17 @@ nextScaffoldPart(XML_Parser parser) { return -1; } #endif - dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); + dtd->scaffIndex = (int*)MALLOC(parser, parser->m_groupSize * sizeof(int)); if (! dtd->scaffIndex) return -1; dtd->scaffIndex[0] = 0; } + // Will casting to int be safe further down? + if (dtd->scaffCount > INT_MAX) { + return -1; + } + if (dtd->scaffCount >= dtd->scaffSize) { CONTENT_SCAFFOLD *temp; if (dtd->scaffold) { @@ -7898,21 +8265,20 @@ nextScaffoldPart(XML_Parser parser) { } #endif - temp = (CONTENT_SCAFFOLD *)REALLOC( - parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); + temp = (CONTENT_SCAFFOLD*)REALLOC(parser, dtd->scaffold, + dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize *= 2; } else { - temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS - * sizeof(CONTENT_SCAFFOLD)); + temp = (CONTENT_SCAFFOLD*)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; } dtd->scaffold = temp; } - next = dtd->scaffCount++; + next = (int)dtd->scaffCount++; me = &dtd->scaffold[next]; if (dtd->scaffLevel) { CONTENT_SCAFFOLD *parent @@ -7959,7 +8325,10 @@ build_model(XML_Parser parser) { const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) + (dtd->contentStringLen * sizeof(XML_Char))); - ret = (XML_Content *)MALLOC(parser, allocsize); + // NOTE: We are avoiding MALLOC(..) here to so that + // applications that are not using XML_FreeContentModel but plain + // free(..) or .free_fcn() to free the content model's memory are safe. + ret = (XML_Content*)parser->m_mem.malloc_fcn(allocsize); if (! ret) return NULL; @@ -8080,7 +8449,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, } static XML_Char * -copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { +copyString(const XML_Char *s, XML_Parser parser) { size_t charsRequired = 0; XML_Char *result; @@ -8092,7 +8461,7 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { charsRequired++; /* Now allocate space for the copy */ - result = (XML_Char*) memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); + result = (XML_Char*)MALLOC(parser, charsRequired * sizeof(XML_Char)); if (result == NULL) return NULL; /* Copy the original into place */ @@ -8111,10 +8480,10 @@ accountingGetCurrentAmplification(XML_Parser rootParser) { + rootParser->m_accounting.countBytesIndirect; const float amplificationFactor = rootParser->m_accounting.countBytesDirect - ? (countBytesOutput + ? ((float)countBytesOutput / (float)(rootParser->m_accounting.countBytesDirect)) - : ((lenOfShortestInclude - + rootParser->m_accounting.countBytesIndirect) + : ((float)(lenOfShortestInclude + + rootParser->m_accounting.countBytesIndirect) / (float)lenOfShortestInclude); assert(! rootParser->m_parentParser); return amplificationFactor; @@ -8298,6 +8667,8 @@ entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { rootParser->m_entity_stats.currentDepth--; } +#endif /* XML_GE == 1 */ + static XML_Parser getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { XML_Parser rootParser = parser; @@ -8313,6 +8684,8 @@ getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { return rootParser; } +#if XML_GE == 1 + const char * unsignedCharToPrintable(unsigned char c) { switch (c) { diff --git a/base/poco/XML/src/xmlrole.c b/base/poco/XML/src/xmlrole.c index 6300dcfad179..2c6133f236a9 100644 --- a/base/poco/XML/src/xmlrole.c +++ b/base/poco/XML/src/xmlrole.c @@ -42,11 +42,10 @@ #include -#ifdef EXPAT_WIN32 -#include "winconfig.h" +#ifdef _WIN32 +# include "winconfig.h" #endif -#include "Poco/XML/expat_external.h" #include "internal.h" #include "xmlrole.h" #include "ascii.h" diff --git a/base/poco/XML/src/xmlrole.h b/base/poco/XML/src/xmlrole.h index a5ad4f7dca35..4cb28ddf9ca4 100644 --- a/base/poco/XML/src/xmlrole.h +++ b/base/poco/XML/src/xmlrole.h @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper Copyright (c) 2002 Karl Waclawek Copyright (c) 2002 Fred L. Drake, Jr. - Copyright (c) 2017-2024 Sebastian Pipping + Copyright (c) 2017-2025 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -34,19 +34,13 @@ */ #ifndef XmlRole_INCLUDED -#define XmlRole_INCLUDED 1 +# define XmlRole_INCLUDED 1 -#ifdef __VMS -/* 0 1 2 3 0 1 2 3 - 1234567890123456789012345678901 1234567890123456789012345678901 */ -# define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt -#endif +# include "xmltok.h" -#include "xmltok.h" - -#ifdef __cplusplus +# ifdef __cplusplus extern "C" { -#endif +# endif enum { XML_ROLE_ERROR = -1, @@ -107,11 +101,11 @@ enum { XML_ROLE_CONTENT_ELEMENT_PLUS, XML_ROLE_PI, XML_ROLE_COMMENT, -#ifdef XML_DTD +# ifdef XML_DTD XML_ROLE_TEXT_DECL, XML_ROLE_IGNORE_SECT, XML_ROLE_INNER_PARAM_ENTITY_REF, -#endif /* XML_DTD */ +# endif /* XML_DTD */ XML_ROLE_PARAM_ENTITY_REF }; @@ -120,23 +114,23 @@ typedef struct prolog_state { const char *end, const ENCODING *enc); unsigned level; int role_none; -#ifdef XML_DTD +# ifdef XML_DTD unsigned includeLevel; int documentEntity; int inEntityValue; -#endif /* XML_DTD */ +# endif /* XML_DTD */ } PROLOG_STATE; void XmlPrologStateInit(PROLOG_STATE *state); -#ifdef XML_DTD +# ifdef XML_DTD void XmlPrologStateInitExternalEntity(PROLOG_STATE *state); -#endif /* XML_DTD */ +# endif /* XML_DTD */ -#define XmlTokenRole(state, tok, ptr, end, enc) \ - (((state)->handler)(state, tok, ptr, end, enc)) +# define XmlTokenRole(state, tok, ptr, end, enc) \ + (((state)->handler)(state, tok, ptr, end, enc)) -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif /* not XmlRole_INCLUDED */ diff --git a/base/poco/XML/src/xmltok.c b/base/poco/XML/src/xmltok.c index 0b5c234c2af2..19626ea7f195 100644 --- a/base/poco/XML/src/xmltok.c +++ b/base/poco/XML/src/xmltok.c @@ -52,7 +52,7 @@ #include /* memcpy */ #include -#ifdef EXPAT_WIN32 +#ifdef _WIN32 # include "winconfig.h" #endif @@ -1398,7 +1398,7 @@ unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, } ENCODING * -XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, +XmlInitUnknownEncoding(void *mem, const int *table, CONVERTER convert, void *userData) { int i; struct unknown_encoding *e = (struct unknown_encoding *)mem; @@ -1661,7 +1661,7 @@ initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, # undef ns ENCODING * -XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, +XmlInitUnknownEncodingNS(void *mem, const int *table, CONVERTER convert, void *userData) { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) diff --git a/base/poco/XML/src/xmltok.h b/base/poco/XML/src/xmltok.h index 73bef31a6d9f..f949ebf9c171 100644 --- a/base/poco/XML/src/xmltok.h +++ b/base/poco/XML/src/xmltok.h @@ -35,113 +35,113 @@ */ #ifndef XmlTok_INCLUDED -#define XmlTok_INCLUDED 1 +# define XmlTok_INCLUDED 1 -#ifdef __cplusplus +# ifdef __cplusplus extern "C" { -#endif +# endif /* The following token may be returned by XmlContentTok */ -#define XML_TOK_TRAILING_RSQB \ - -5 /* ] or ]] at the end of the scan; might be \ - start of illegal ]]> sequence */ +# define XML_TOK_TRAILING_RSQB \ + -5 /* ] or ]] at the end of the scan; might be \ + start of illegal ]]> sequence */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok. */ -#define XML_TOK_NONE -4 /* The string to be scanned is empty */ -#define XML_TOK_TRAILING_CR \ - -3 /* A CR at the end of the scan; \ - might be part of CRLF sequence */ -#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ -#define XML_TOK_PARTIAL -1 /* only part of a token */ -#define XML_TOK_INVALID 0 +# define XML_TOK_NONE -4 /* The string to be scanned is empty */ +# define XML_TOK_TRAILING_CR \ + -3 /* A CR at the end of the scan; \ + might be part of CRLF sequence */ +# define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ +# define XML_TOK_PARTIAL -1 /* only part of a token */ +# define XML_TOK_INVALID 0 /* The following tokens are returned by XmlContentTok; some are also returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. */ -#define XML_TOK_START_TAG_WITH_ATTS 1 -#define XML_TOK_START_TAG_NO_ATTS 2 -#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag */ -#define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 -#define XML_TOK_END_TAG 5 -#define XML_TOK_DATA_CHARS 6 -#define XML_TOK_DATA_NEWLINE 7 -#define XML_TOK_CDATA_SECT_OPEN 8 -#define XML_TOK_ENTITY_REF 9 -#define XML_TOK_CHAR_REF 10 /* numeric character reference */ +# define XML_TOK_START_TAG_WITH_ATTS 1 +# define XML_TOK_START_TAG_NO_ATTS 2 +# define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag */ +# define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 +# define XML_TOK_END_TAG 5 +# define XML_TOK_DATA_CHARS 6 +# define XML_TOK_DATA_NEWLINE 7 +# define XML_TOK_CDATA_SECT_OPEN 8 +# define XML_TOK_ENTITY_REF 9 +# define XML_TOK_CHAR_REF 10 /* numeric character reference */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok. */ -#define XML_TOK_PI 11 /* processing instruction */ -#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ -#define XML_TOK_COMMENT 13 -#define XML_TOK_BOM 14 /* Byte order mark */ +# define XML_TOK_PI 11 /* processing instruction */ +# define XML_TOK_XML_DECL 12 /* XML decl or text decl */ +# define XML_TOK_COMMENT 13 +# define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ -#define XML_TOK_PROLOG_S 15 -#define XML_TOK_DECL_OPEN 16 /* */ -#define XML_TOK_NAME 18 -#define XML_TOK_NMTOKEN 19 -#define XML_TOK_POUND_NAME 20 /* #name */ -#define XML_TOK_OR 21 /* | */ -#define XML_TOK_PERCENT 22 -#define XML_TOK_OPEN_PAREN 23 -#define XML_TOK_CLOSE_PAREN 24 -#define XML_TOK_OPEN_BRACKET 25 -#define XML_TOK_CLOSE_BRACKET 26 -#define XML_TOK_LITERAL 27 -#define XML_TOK_PARAM_ENTITY_REF 28 -#define XML_TOK_INSTANCE_START 29 +# define XML_TOK_PROLOG_S 15 +# define XML_TOK_DECL_OPEN 16 /* */ +# define XML_TOK_NAME 18 +# define XML_TOK_NMTOKEN 19 +# define XML_TOK_POUND_NAME 20 /* #name */ +# define XML_TOK_OR 21 /* | */ +# define XML_TOK_PERCENT 22 +# define XML_TOK_OPEN_PAREN 23 +# define XML_TOK_CLOSE_PAREN 24 +# define XML_TOK_OPEN_BRACKET 25 +# define XML_TOK_CLOSE_BRACKET 26 +# define XML_TOK_LITERAL 27 +# define XML_TOK_PARAM_ENTITY_REF 28 +# define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ -#define XML_TOK_NAME_QUESTION 30 /* name? */ -#define XML_TOK_NAME_ASTERISK 31 /* name* */ -#define XML_TOK_NAME_PLUS 32 /* name+ */ -#define XML_TOK_COND_SECT_OPEN 33 /* */ -#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ -#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ -#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ -#define XML_TOK_COMMA 38 +# define XML_TOK_NAME_QUESTION 30 /* name? */ +# define XML_TOK_NAME_ASTERISK 31 /* name* */ +# define XML_TOK_NAME_PLUS 32 /* name+ */ +# define XML_TOK_COND_SECT_OPEN 33 /* */ +# define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ +# define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ +# define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ +# define XML_TOK_COMMA 38 /* The following token is returned only by XmlAttributeValueTok */ -#define XML_TOK_ATTRIBUTE_VALUE_S 39 +# define XML_TOK_ATTRIBUTE_VALUE_S 39 /* The following token is returned only by XmlCdataSectionTok */ -#define XML_TOK_CDATA_SECT_CLOSE 40 +# define XML_TOK_CDATA_SECT_CLOSE 40 /* With namespace processing this is returned by XmlPrologTok for a name with a colon. */ -#define XML_TOK_PREFIXED_NAME 41 +# define XML_TOK_PREFIXED_NAME 41 -#ifdef XML_DTD -# define XML_TOK_IGNORE_SECT 42 -#endif /* XML_DTD */ +# ifdef XML_DTD +# define XML_TOK_IGNORE_SECT 42 +# endif /* XML_DTD */ -#ifdef XML_DTD -# define XML_N_STATES 4 -#else /* not XML_DTD */ -# define XML_N_STATES 3 -#endif /* not XML_DTD */ +# ifdef XML_DTD +# define XML_N_STATES 4 +# else /* not XML_DTD */ +# define XML_N_STATES 3 +# endif /* not XML_DTD */ -#define XML_PROLOG_STATE 0 -#define XML_CONTENT_STATE 1 -#define XML_CDATA_SECTION_STATE 2 -#ifdef XML_DTD -# define XML_IGNORE_SECTION_STATE 3 -#endif /* XML_DTD */ +# define XML_PROLOG_STATE 0 +# define XML_CONTENT_STATE 1 +# define XML_CDATA_SECTION_STATE 2 +# ifdef XML_DTD +# define XML_IGNORE_SECTION_STATE 3 +# endif /* XML_DTD */ -#define XML_N_LITERAL_TYPES 2 -#define XML_ATTRIBUTE_VALUE_LITERAL 0 -#define XML_ENTITY_VALUE_LITERAL 1 +# define XML_N_LITERAL_TYPES 2 +# define XML_ATTRIBUTE_VALUE_LITERAL 0 +# define XML_ENTITY_VALUE_LITERAL 1 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ -#define XML_UTF8_ENCODE_MAX 4 +# define XML_UTF8_ENCODE_MAX 4 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ -#define XML_UTF16_ENCODE_MAX 2 +# define XML_UTF16_ENCODE_MAX 2 typedef struct position { /* first line and first column are 0 not 1 */ @@ -220,63 +220,63 @@ struct encoding { the prolog outside literals, comments and processing instructions. */ -#define XmlTok(enc, state, ptr, end, nextTokPtr) \ - (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) +# define XmlTok(enc, state, ptr, end, nextTokPtr) \ + (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) -#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) +# define XmlPrologTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) -#define XmlContentTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) +# define XmlContentTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) -#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) +# define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) -#ifdef XML_DTD +# ifdef XML_DTD -# define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) +# define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) -#endif /* XML_DTD */ +# endif /* XML_DTD */ /* This is used for performing a 2nd-level tokenization on the content of a literal that has already been returned by XmlTok. */ -#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ - (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) +# define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ + (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) -#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) +# define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) +# define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ - (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) +# define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ + (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) -#define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) +# define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) -#define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) +# define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) -#define XmlGetAttributes(enc, ptr, attsMax, atts) \ - (((enc)->getAtts)(enc, ptr, attsMax, atts)) +# define XmlGetAttributes(enc, ptr, attsMax, atts) \ + (((enc)->getAtts)(enc, ptr, attsMax, atts)) -#define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) +# define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) -#define XmlPredefinedEntityName(enc, ptr, end) \ - (((enc)->predefinedEntityName)(enc, ptr, end)) +# define XmlPredefinedEntityName(enc, ptr, end) \ + (((enc)->predefinedEntityName)(enc, ptr, end)) -#define XmlUpdatePosition(enc, ptr, end, pos) \ - (((enc)->updatePosition)(enc, ptr, end, pos)) +# define XmlUpdatePosition(enc, ptr, end, pos) \ + (((enc)->updatePosition)(enc, ptr, end, pos)) -#define XmlIsPublicId(enc, ptr, end, badPtr) \ - (((enc)->isPublicId)(enc, ptr, end, badPtr)) +# define XmlIsPublicId(enc, ptr, end, badPtr) \ + (((enc)->isPublicId)(enc, ptr, end, badPtr)) -#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ - (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) +# define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ + (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) -#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ - (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) +# define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ + (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) typedef struct { ENCODING initEnc; @@ -299,7 +299,7 @@ int XmlSizeOfUnknownEncoding(void); typedef int(XMLCALL *CONVERTER)(void *userData, const char *p); -ENCODING *XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, +ENCODING *XmlInitUnknownEncoding(void *mem, const int *table, CONVERTER convert, void *userData); int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, @@ -312,10 +312,10 @@ int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, const char *name); const ENCODING *XmlGetUtf8InternalEncodingNS(void); const ENCODING *XmlGetUtf16InternalEncodingNS(void); -ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, - void *userData); -#ifdef __cplusplus +ENCODING *XmlInitUnknownEncodingNS(void *mem, const int *table, + CONVERTER convert, void *userData); +# ifdef __cplusplus } -#endif +# endif #endif /* not XmlTok_INCLUDED */ diff --git a/base/poco/XML/src/xmltok_ns.c b/base/poco/XML/src/xmltok_ns.c index 746d6c18b8a0..0a23d7e8c498 100644 --- a/base/poco/XML/src/xmltok_ns.c +++ b/base/poco/XML/src/xmltok_ns.c @@ -98,13 +98,13 @@ NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { int i; XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); if (ptr != end) - return 0; + return NULL; *p = 0; if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; i = getEncodingIndex(buf); if (i == UNKNOWN_ENC) - return 0; + return NULL; return NS(encodings)[i]; } diff --git a/ci/jobs/scripts/workflow_hooks/check_backport_branch.py b/ci/jobs/scripts/workflow_hooks/check_backport_branch.py new file mode 100644 index 000000000000..a2d5f95a9250 --- /dev/null +++ b/ci/jobs/scripts/workflow_hooks/check_backport_branch.py @@ -0,0 +1,36 @@ +import sys +from ci.praktika.info import Info +from ci.praktika.utils import Shell + + +if __name__ == "__main__": + info = Info() + if info.workflow_name == "BackportPR": + assert info.base_branch + # Ensure base branch is fetched. + # We use treeless fetch (--filter=tree:0) and no-tags to minimize data transfer. + Shell.run( + f"git fetch --no-tags --prune --no-recurse-submodules --filter=tree:0 origin +{info.base_branch}:{info.base_branch}", + verbose=True, + ) + num_commits = int( + Shell.get_output_or_raise( + f"git rev-list --count {info.base_branch}..{info.sha}" + ) + ) + if num_commits == 0: + print(f"ERROR: No commits found between {info.base_branch} and {info.sha}") + sys.exit(-1) + + if num_commits > 50: + print( + f"ERROR: Number of commits between {info.sha} and {info.base_branch} is {num_commits}. " + f"Backport PR should have between 1 and 50 commits." + ) + sys.exit(-1) + + if not info.get_changed_files(): + print(f"ERROR: No Files changed in the Backport PR.") + sys.exit(-1) + else: + assert False, f"Unsupported workflow name [{info.workflow_name}]" diff --git a/ci/workflows/backport_branches.py b/ci/workflows/backport_branches.py index 948684088b9d..86172c171eb4 100644 --- a/ci/workflows/backport_branches.py +++ b/ci/workflows/backport_branches.py @@ -55,6 +55,7 @@ pre_hooks=[ "python3 ./ci/jobs/scripts/workflow_hooks/store_data.py", "python3 ./ci/jobs/scripts/workflow_hooks/version_log.py", + "python3 ./ci/jobs/scripts/workflow_hooks/check_backport_branch.py", ], workflow_filter_hooks=[should_skip_job], post_hooks=[], diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 3147a63665d9..e667e07775ac 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,13 +2,13 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54514) +SET(VERSION_REVISION 54516) SET(VERSION_MAJOR 25) SET(VERSION_MINOR 8) -SET(VERSION_PATCH 14) -SET(VERSION_GITHASH 2dbc61a9bb88108b1e83b11113f24e68f7c211f7) -SET(VERSION_DESCRIBE v25.8.14.20001.altinityantalya) -SET(VERSION_STRING 25.8.14.20001.altinityantalya) +SET(VERSION_PATCH 16) +SET(VERSION_GITHASH 7a0b36cf8934881236312e9fea094baaf5c709a4) +SET(VERSION_DESCRIBE v25.8.16.20001.altinityantalya) +SET(VERSION_STRING 25.8.16.20001.altinityantalya) # end of autochange SET(VERSION_TWEAK 20001) diff --git a/contrib/c-ares b/contrib/c-ares index d3a507e920e7..3ac47ee46edd 160000 --- a/contrib/c-ares +++ b/contrib/c-ares @@ -1 +1 @@ -Subproject commit d3a507e920e7af18a5efb7f9f1d8044ed4750013 +Subproject commit 3ac47ee46edd8ea40370222f91613fc16c434853 diff --git a/contrib/curl b/contrib/curl index 400fffa90f30..2eebc58c4b8d 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit 400fffa90f30c7a2dc762fa33009d24851bd2016 +Subproject commit 2eebc58c4b8d68c98c8344381a9f6df4cca838fd diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index c616b7d0471e..77dd32ea2a6c 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -40,6 +40,7 @@ set (SRCS "${LIBRARY_DIR}/lib/curl_rtmp.c" "${LIBRARY_DIR}/lib/curl_sasl.c" "${LIBRARY_DIR}/lib/curl_sha512_256.c" + "${LIBRARY_DIR}/lib/curl_share.c" "${LIBRARY_DIR}/lib/curl_sspi.c" "${LIBRARY_DIR}/lib/curl_threads.c" "${LIBRARY_DIR}/lib/curl_trc.c" @@ -86,7 +87,6 @@ set (SRCS "${LIBRARY_DIR}/lib/macos.c" "${LIBRARY_DIR}/lib/md4.c" "${LIBRARY_DIR}/lib/md5.c" - "${LIBRARY_DIR}/lib/memdebug.c" "${LIBRARY_DIR}/lib/mime.c" "${LIBRARY_DIR}/lib/mprintf.c" "${LIBRARY_DIR}/lib/mqtt.c" @@ -102,14 +102,13 @@ set (SRCS "${LIBRARY_DIR}/lib/progress.c" "${LIBRARY_DIR}/lib/psl.c" "${LIBRARY_DIR}/lib/rand.c" - "${LIBRARY_DIR}/lib/rename.c" + "${LIBRARY_DIR}/lib/ratelimit.c" "${LIBRARY_DIR}/lib/request.c" "${LIBRARY_DIR}/lib/rtsp.c" "${LIBRARY_DIR}/lib/select.c" "${LIBRARY_DIR}/lib/sendf.c" "${LIBRARY_DIR}/lib/setopt.c" "${LIBRARY_DIR}/lib/sha256.c" - "${LIBRARY_DIR}/lib/share.c" "${LIBRARY_DIR}/lib/slist.c" "${LIBRARY_DIR}/lib/smb.c" "${LIBRARY_DIR}/lib/smtp.c" @@ -117,7 +116,6 @@ set (SRCS "${LIBRARY_DIR}/lib/socks.c" "${LIBRARY_DIR}/lib/socks_gssapi.c" "${LIBRARY_DIR}/lib/socks_sspi.c" - "${LIBRARY_DIR}/lib/speedcheck.c" "${LIBRARY_DIR}/lib/splay.c" "${LIBRARY_DIR}/lib/strcase.c" "${LIBRARY_DIR}/lib/strdup.c" @@ -149,16 +147,15 @@ set (SRCS "${LIBRARY_DIR}/lib/vauth/spnego_sspi.c" "${LIBRARY_DIR}/lib/vauth/vauth.c" "${LIBRARY_DIR}/lib/vquic/vquic.c" - "${LIBRARY_DIR}/lib/vssh/curl_path.c" "${LIBRARY_DIR}/lib/vssh/libssh.c" "${LIBRARY_DIR}/lib/vssh/libssh2.c" + "${LIBRARY_DIR}/lib/vssh/vssh.c" "${LIBRARY_DIR}/lib/vtls/cipher_suite.c" "${LIBRARY_DIR}/lib/vtls/gtls.c" "${LIBRARY_DIR}/lib/vtls/hostcheck.c" "${LIBRARY_DIR}/lib/vtls/keylog.c" "${LIBRARY_DIR}/lib/vtls/mbedtls.c" - "${LIBRARY_DIR}/lib/vtls/mbedtls_threadlock.c" "${LIBRARY_DIR}/lib/vtls/openssl.c" "${LIBRARY_DIR}/lib/vtls/rustls.c" "${LIBRARY_DIR}/lib/vtls/schannel.c" @@ -175,6 +172,7 @@ set (SRCS "${LIBRARY_DIR}/lib/curlx/inet_pton.c" "${LIBRARY_DIR}/lib/curlx/multibyte.c" "${LIBRARY_DIR}/lib/curlx/nonblock.c" + "${LIBRARY_DIR}/lib/curlx/strcopy.c" "${LIBRARY_DIR}/lib/curlx/strerr.c" "${LIBRARY_DIR}/lib/curlx/strparse.c" "${LIBRARY_DIR}/lib/curlx/timediff.c" diff --git a/contrib/curl-cmake/curl_config.h b/contrib/curl-cmake/curl_config.h index 1b177c5e49b4..0e42508dff0f 100644 --- a/contrib/curl-cmake/curl_config.h +++ b/contrib/curl-cmake/curl_config.h @@ -48,6 +48,7 @@ #define HAVE_SELECT_H #define HAVE_SETJMP_H #define HAVE_SETJMP_H +#define HAVE_STDINT_H #define HAVE_UNISTD_H #define HAVE_POLL_H #define HAVE_PTHREAD_H diff --git a/contrib/postgres b/contrib/postgres index 52dbc8ec5aab..5ad0c31d0c3a 160000 --- a/contrib/postgres +++ b/contrib/postgres @@ -1 +1 @@ -Subproject commit 52dbc8ec5aab7456f4104d3c197af0c5789e23a0 +Subproject commit 5ad0c31d0c3a76ed64655f4d397934b5ecc9696f diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 883fb6902861..02d10e28461c 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -93,6 +93,15 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +namespace ServerSetting +{ + extern const ServerSettingsUInt64 memory_worker_period_ms; + extern const ServerSettingsDouble memory_worker_purge_dirty_pages_threshold_ratio; + extern const ServerSettingsDouble memory_worker_purge_total_memory_threshold_ratio; + extern const ServerSettingsBool memory_worker_correct_memory_tracker; + extern const ServerSettingsBool memory_worker_use_cgroup; +} + Poco::Net::SocketAddress Keeper::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const { auto address = makeSocketAddress(host, port, &logger()); @@ -303,6 +312,8 @@ String getKeeperPath(Poco::Util::LayeredConfiguration & config) int Keeper::main(const std::vector & /*args*/) try { + ServerSettings server_settings; + server_settings.loadSettingsFromConfig(config()); #if USE_JEMALLOC setJemallocBackgroundThreads(true); #endif @@ -327,7 +338,7 @@ try if (!config().has("keeper_server")) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Keeper configuration ( section) not found in config"); - auto updateMemorySoftLimitInConfig = [&](Poco::Util::AbstractConfiguration & config) + auto update_memory_soft_limit_in_config = [&](Poco::Util::AbstractConfiguration & config) { UInt64 memory_soft_limit = 0; if (config.has("keeper_server.max_memory_usage_soft_limit")) @@ -352,7 +363,7 @@ try LOG_INFO(log, "keeper_server.max_memory_usage_soft_limit is set to {}", formatReadableSizeWithBinarySuffix(memory_soft_limit)); }; - updateMemorySoftLimitInConfig(config()); + update_memory_soft_limit_in_config(config()); std::string path = getKeeperPath(config()); std::filesystem::create_directories(path); @@ -380,8 +391,15 @@ try LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); }); - MemoryWorker memory_worker( - config().getUInt64("memory_worker_period_ms", 0), config().getBool("memory_worker_correct_memory_tracker", false), /*use_cgroup*/ true, /*page_cache*/ nullptr); + MemoryWorkerConfig memory_worker_config{ + .rss_update_period_ms = server_settings[ServerSetting::memory_worker_period_ms], + .purge_dirty_pages_threshold_ratio = server_settings[ServerSetting::memory_worker_purge_dirty_pages_threshold_ratio], + .purge_total_memory_threshold_ratio = server_settings[ServerSetting::memory_worker_purge_total_memory_threshold_ratio], + .correct_tracker = server_settings[ServerSetting::memory_worker_correct_memory_tracker], + .use_cgroup = server_settings[ServerSetting::memory_worker_use_cgroup], + }; + + MemoryWorker memory_worker(memory_worker_config, /*page_cache_=*/nullptr); memory_worker.start(); static ServerErrorHandler error_handler; @@ -579,7 +597,7 @@ try { updateLevels(*config, logger()); - updateMemorySoftLimitInConfig(*config); + update_memory_soft_limit_in_config(*config); if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 161ac35134f8..93350ee596b2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -284,6 +284,8 @@ namespace ServerSetting extern const ServerSettingsUInt64 max_view_num_to_warn; extern const ServerSettingsUInt64 max_waiting_queries; extern const ServerSettingsUInt64 memory_worker_period_ms; + extern const ServerSettingsDouble memory_worker_purge_dirty_pages_threshold_ratio; + extern const ServerSettingsDouble memory_worker_purge_total_memory_threshold_ratio; extern const ServerSettingsBool memory_worker_correct_memory_tracker; extern const ServerSettingsBool memory_worker_use_cgroup; extern const ServerSettingsUInt64 merges_mutations_memory_usage_soft_limit; @@ -1255,11 +1257,15 @@ try total_memory_tracker.setPageCache(global_context->getPageCache().get()); } - MemoryWorker memory_worker( - server_settings[ServerSetting::memory_worker_period_ms], - server_settings[ServerSetting::memory_worker_correct_memory_tracker], - global_context->getServerSettings()[ServerSetting::memory_worker_use_cgroup], - global_context->getPageCache()); + MemoryWorkerConfig memory_worker_config{ + .rss_update_period_ms = server_settings[ServerSetting::memory_worker_period_ms], + .purge_dirty_pages_threshold_ratio = server_settings[ServerSetting::memory_worker_purge_dirty_pages_threshold_ratio], + .purge_total_memory_threshold_ratio = server_settings[ServerSetting::memory_worker_purge_total_memory_threshold_ratio], + .correct_tracker = server_settings[ServerSetting::memory_worker_correct_memory_tracker], + .use_cgroup = server_settings[ServerSetting::memory_worker_use_cgroup], + }; + + MemoryWorker memory_worker(memory_worker_config, global_context->getPageCache()); /// This object will periodically calculate some metrics. ServerAsynchronousMetrics async_metrics( diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 312a3ce77f8c..979434eaa0d0 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -919,7 +919,13 @@ struct AccessRights::Node const AccessFlags & parent_flags_go, String path) { - auto grantable_flags = ::DB::getAllGrantableFlags(static_cast(full_name.size())); + Node * target_node = node; + if (!target_node) + target_node = node_go; + + auto grantable_flags = target_node + ? ::DB::getAllGrantableFlags(target_node->level) + : ::DB::getAllGrantableFlags(static_cast(full_name.size())); auto parent_fl = parent_flags & grantable_flags; auto parent_fl_go = parent_flags_go & grantable_flags; auto flags = node ? node->flags : parent_fl; @@ -929,10 +935,6 @@ struct AccessRights::Node auto grants_go = flags_go - parent_fl_go; auto grants = flags - parent_fl - grants_go; - Node * target_node = node; - if (!target_node) - target_node = node_go; - /// Inserts into result only meaningful nodes (e.g. wildcards or leafs). if (target_node && (target_node->isLeaf() || target_node->wildcard_grant)) { diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 8b1084b7b852..b7d1c102085f 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -562,6 +562,27 @@ TEST(AccessRights, RevokeWithParameters) ASSERT_EQ(root.toString(), "GRANT SELECT ON *.* WITH GRANT OPTION, GRANT CREATE USER ON * WITH GRANT OPTION, REVOKE SELECT(bar*) ON default.foo"); } +TEST(AccessRights, RevokeWithParametersWithGrantOption) +{ + AccessRights root; + root.grantWithGrantOption(AccessType::ALL); + root.revokeWildcard(AccessType::INTROSPECTION, "system"); // global grant, do nothing for database revoke + ASSERT_EQ(root.toString(), "GRANT ALL ON *.* WITH GRANT OPTION"); + + root = {}; + root.grant(AccessType::SELECT); + root.grant(AccessType::INTROSPECTION); + root.grant(AccessType::CREATE_USER); + root.revokeWildcard(AccessType::CREATE_USER, "system"); + ASSERT_EQ(root.toString(), "GRANT SELECT, INTROSPECTION ON *.*, GRANT CREATE USER ON *, REVOKE CREATE USER ON system*"); + + root.grantWithGrantOption(AccessType::SELECT); + root.grantWithGrantOption(AccessType::INTROSPECTION); + root.grantWithGrantOption(AccessType::CREATE_USER); + root.revokeWildcard(AccessType::CREATE_USER, "system"); + ASSERT_EQ(root.toString(), "GRANT SELECT, INTROSPECTION ON *.* WITH GRANT OPTION, GRANT CREATE USER ON * WITH GRANT OPTION, REVOKE CREATE USER ON system*"); +} + TEST(AccessRights, ParialRevokeWithGrantOption) { AccessRights root; diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp index b1ff062f2705..2030e46a5873 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -172,9 +172,25 @@ void GroupConcatImpl::deserialize(AggregateDataPtr __restrict place, if constexpr (has_limit) { readVarUInt(cur_data.num_rows, buf); + + if (cur_data.num_rows > std::numeric_limits::max() / 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid groupConcat state: num_rows ({}) is too large and would overflow the offsets array.", cur_data.num_rows); + cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena); - for (auto & offset : cur_data.offsets) - readVarUInt(offset, buf); + + for (size_t i = 0; i < cur_data.offsets.size(); ++i) + { + readVarUInt(cur_data.offsets[i], buf); + + if (cur_data.offsets[i] > cur_data.data_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid offset {} in groupConcat state: exceeds data size {}", cur_data.offsets[i], cur_data.data_size); + + if (i != 0 && cur_data.offsets[i] < cur_data.offsets[i - 1]) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid offsets in groupConcat state: end offset {} is less than start offset {}", cur_data.offsets[i], cur_data.offsets[i - 1]); + } + + if (cur_data.num_rows != 0 && cur_data.offsets.back() != cur_data.data_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid offsets in groupConcat state: last offset {} is not equal to data size {}", cur_data.offsets.back(), cur_data.data_size); } } diff --git a/src/AggregateFunctions/ThetaSketchData.h b/src/AggregateFunctions/ThetaSketchData.h index 476ecda06fdc..f243dccce90a 100644 --- a/src/AggregateFunctions/ThetaSketchData.h +++ b/src/AggregateFunctions/ThetaSketchData.h @@ -21,7 +21,9 @@ template class ThetaSketchData : private boost::noncopyable { private: + /// Used for insertions std::unique_ptr sk_update; + /// Used for merging std::unique_ptr sk_union; datasketches::update_theta_sketch * getSkUpdate() @@ -48,12 +50,26 @@ class ThetaSketchData : private boost::noncopyable void insertOriginal(StringRef value) { getSkUpdate()->update(value.data, value.size); + /// In case of optimization for u8 keys (see addBatchLookupTable()) it is possible to have few calls of insert() after merge(), + /// and we should update sk_union as well, note, that there should not be too many, so performance wise it should be OK + if (sk_union) + { + sk_union->update(*sk_update); + sk_update.reset(nullptr); + } } /// Note that `datasketches::update_theta_sketch.update` will do the hash again. void insert(Key value) { getSkUpdate()->update(value); + /// In case of optimization for u8 keys (see addBatchLookupTable()) it is possible to have few calls of insert() after merge(), + /// and we should update sk_union as well, note, that there should not be too many, so performance wise it should be OK + if (sk_union) + { + sk_union->update(*sk_update); + sk_update.reset(nullptr); + } } UInt64 size() const diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index c1d5174c4b84..824d31c59703 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -46,12 +46,39 @@ struct ColumnContext using NodeToSubcolumnTransformer = std::function; +/// Before columns to substream optimization, we need to make sure, that column with such name as substream does not exists, otherwise the optimize will use it instead of substream. +bool sourceHasColumn(QueryTreeNodePtr column_source, const String & column_name) +{ + auto * table_node = column_source->as(); + if (!table_node) + return {}; + + const auto & storage_snapshot = table_node->getStorageSnapshot(); + return storage_snapshot->tryGetColumn(GetColumnsOptions::All, column_name).has_value(); +} + +/// Sometimes we cannot optimize function to subcolumn because there is no such subcolumn in the table. +/// For example, for column "a Array(Tuple(b UInt32))" function length(a.b) cannot be replaced to +/// a.b.size0, because there is no such subcolumn, even though a.b has type Array(UInt32) +bool canOptimizeToSubcolumn(QueryTreeNodePtr column_source, const String & subcolumn_name) +{ + auto * table_node = column_source->as(); + if (!table_node) + return {}; + + const auto & storage_snapshot = table_node->getStorageSnapshot(); + return storage_snapshot->tryGetColumn(GetColumnsOptions(GetColumnsOptions::All).withRegularSubcolumns(), subcolumn_name).has_value(); +} + void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `length(argument)` with `argument.size0`. /// `argument` may be Array or Map. NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; + node = std::make_shared(column, ctx.column_source); } @@ -63,6 +90,9 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col /// `argument` may be Array or Map. NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); function_arguments_nodes.clear(); @@ -137,6 +167,8 @@ void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & funct return; NameAndTypePair column{ctx.column.name + "." + subcolumn->name, subcolumn->type}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; node = std::make_shared(column, ctx.column_source); } @@ -169,6 +201,8 @@ std::map, NodeToSubcolumnTransformer> node_transfor auto key_type = std::make_shared(data_type_map.getKeyType()); NameAndTypePair column{ctx.column.name + ".keys", key_type}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; node = std::make_shared(column, ctx.column_source); }, }, @@ -181,6 +215,8 @@ std::map, NodeToSubcolumnTransformer> node_transfor auto value_type = std::make_shared(data_type_map.getValueType()); NameAndTypePair column{ctx.column.name + ".values", value_type}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; node = std::make_shared(column, ctx.column_source); }, }, @@ -192,6 +228,8 @@ std::map, NodeToSubcolumnTransformer> node_transfor const auto & data_type_map = assert_cast(*ctx.column.type); NameAndTypePair column{ctx.column.name + ".keys", std::make_shared(data_type_map.getKeyType())}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; auto & function_arguments_nodes = function_node.getArguments().getNodes(); auto has_function_argument = std::make_shared(column, ctx.column_source); @@ -206,6 +244,8 @@ std::map, NodeToSubcolumnTransformer> node_transfor { /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; auto & function_arguments_nodes = function_node.getArguments().getNodes(); auto new_column_node = std::make_shared(column, ctx.column_source); @@ -224,6 +264,8 @@ std::map, NodeToSubcolumnTransformer> node_transfor { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; node = std::make_shared(column, ctx.column_source); }, }, @@ -233,6 +275,8 @@ std::map, NodeToSubcolumnTransformer> node_transfor { /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + if (sourceHasColumn(ctx.column_source, column.name) || !canOptimizeToSubcolumn(ctx.column_source, column.name)) + return; auto & function_arguments_nodes = function_node.getArguments().getNodes(); function_arguments_nodes = {std::make_shared(column, ctx.column_source)}; @@ -279,7 +323,7 @@ std::tuple getTypedNodesForOptimizati if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata)) return {}; - auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name); + auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns(), column.name); if (!column_in_table || !column_in_table->type->equals(*column.type)) return {}; diff --git a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp index a190b312946d..38a7a21b7c68 100644 --- a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp +++ b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp @@ -5,102 +5,19 @@ #include #include #include -#include #include #include #include #include +#include + namespace DB { namespace { -class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitorWithContext -{ -public: - using Base = InDepthQueryTreeVisitorWithContext; - using Base::Base; - - bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) - { - if (isQueryOrUnionNode(child)) - { - subqueries_nodes_to_visit.insert(child); - return false; - } - - return true; - } - - void enterImpl(QueryTreeNodePtr & node) - { - auto node_type = node->getNodeType(); - - if (node_type == QueryTreeNodeType::QUERY) - { - auto & query_node = node->as(); - auto table_expressions = extractTableExpressions(query_node.getJoinTree()); - for (const auto & table_expression : table_expressions) - if (isQueryOrUnionNode(table_expression)) - query_or_union_node_to_used_columns.emplace(table_expression, std::unordered_set()); - - return; - } - - if (node_type == QueryTreeNodeType::FUNCTION) - { - auto & function_node = node->as(); - - if (function_node.getFunctionName() != "exists") - return; - - const auto & subquery_argument = function_node.getArguments().getNodes().front(); - auto * query_node = subquery_argument->as(); - auto * union_node = subquery_argument->as(); - - const auto & correlated_columns = query_node != nullptr ? query_node->getCorrelatedColumns() : union_node->getCorrelatedColumns(); - for (const auto & correlated_column : correlated_columns) - { - auto * column_node = correlated_column->as(); - auto column_source_node = column_node->getColumnSource(); - auto column_source_node_type = column_source_node->getNodeType(); - if (column_source_node_type == QueryTreeNodeType::QUERY || column_source_node_type == QueryTreeNodeType::UNION) - query_or_union_node_to_used_columns[column_source_node].insert(column_node->getColumnName()); - } - return; - } - - if (node_type != QueryTreeNodeType::COLUMN) - return; - - auto & column_node = node->as(); - if (column_node.getColumnName() == "__grouping_set") - return; - - auto column_source_node = column_node.getColumnSource(); - - auto it = query_or_union_node_to_used_columns.find(column_source_node); - /// If the source node is not found in the map then: - /// 1. Tt's either not a Query or Union node. - /// 2. It's a correlated column and it comes from the outer scope. - if (it != query_or_union_node_to_used_columns.end()) - { - it->second.insert(column_node.getColumnName()); - } - } - - void reset() - { - subqueries_nodes_to_visit.clear(); - query_or_union_node_to_used_columns.clear(); - } - - std::unordered_set subqueries_nodes_to_visit; - std::unordered_map> query_or_union_node_to_used_columns; -}; - std::unordered_set convertUsedColumnNamesToUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, const std::unordered_set & used_column_names) { std::unordered_set result; @@ -161,21 +78,80 @@ void updateUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, s } -void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) +void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr /*context*/) { - std::vector nodes_to_visit; - nodes_to_visit.push_back(query_tree_node); - - CollectUsedColumnsVisitor visitor(std::move(context)); + QueryTreeNodes nodes_to_visit = { query_tree_node }; while (!nodes_to_visit.empty()) { auto node_to_visit = std::move(nodes_to_visit.back()); nodes_to_visit.pop_back(); - visitor.visit(node_to_visit); + std::unordered_set subqueries_nodes_to_visit; + std::unordered_map> node_to_used_columns; - for (auto & [query_or_union_node, used_columns] : visitor.query_or_union_node_to_used_columns) + /// Initialize map with query and union nodes in the FROM clause + if (auto * query_node = node_to_visit->as()) + { + for (const auto & table_expression : extractTableExpressions(query_node->getJoinTree())) + if (isQueryOrUnionNode(table_expression)) + node_to_used_columns.emplace(table_expression, std::unordered_set()); + } + + /// Collect information about what columns are used in the query. + traverseQueryTree(node_to_visit, + [&subqueries_nodes_to_visit, &node_to_used_columns]( + const QueryTreeNodePtr & /*parent*/, + const QueryTreeNodePtr & child + ) + { + if (isQueryOrUnionNode(child)) + { + subqueries_nodes_to_visit.insert(child); + + auto * query_node = child->as(); + auto * union_node = child->as(); + + const auto & correlated_columns = query_node != nullptr ? query_node->getCorrelatedColumns() : union_node->getCorrelatedColumns(); + for (const auto & correlated_column : correlated_columns) + { + auto * column_node = correlated_column->as(); + auto column_source_node = column_node->getColumnSource(); + auto column_source_node_type = column_source_node->getNodeType(); + if (column_source_node_type == QueryTreeNodeType::QUERY || column_source_node_type == QueryTreeNodeType::UNION) + { + if (auto it = node_to_used_columns.find(column_source_node); it != node_to_used_columns.end()) + it->second.insert(column_node->getColumnName()); + } + } + return false; + } + return true; + }, + [&node_to_used_columns](const QueryTreeNodePtr & node) + { + const auto node_type = node->getNodeType(); + if (node_type != QueryTreeNodeType::COLUMN) + return; + + auto & column_node = node->as(); + if (column_node.getColumnName() == "__grouping_set") + return; + + auto column_source_node = column_node.getColumnSource(); + + auto it = node_to_used_columns.find(column_source_node); + /// If the source node is not found in the map then: + /// 1. Tt's either not a Query or Union node. + /// 2. It's a correlated column and it comes from the outer scope. + if (it != node_to_used_columns.end()) + { + it->second.insert(column_node.getColumnName()); + } + }); + + /// Pass information about used columns to subqueries and remove unused projection columns + for (auto & [query_or_union_node, used_columns] : node_to_used_columns) { /// can't remove columns from distinct, see example - 03023_remove_unused_column_distinct.sql if (auto * query_node = query_or_union_node->as()) @@ -197,10 +173,8 @@ void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr & query_tree_node, query_node->removeUnusedProjectionColumns(used_projection_indexes); } - for (const auto & subquery_node_to_visit : visitor.subqueries_nodes_to_visit) + for (const auto & subquery_node_to_visit : subqueries_nodes_to_visit) nodes_to_visit.push_back(subquery_node_to_visit); - - visitor.reset(); } } diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.h b/src/Analyzer/Resolve/IdentifierResolveScope.h index b4c0fc26a577..73aec05cb5f2 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.h +++ b/src/Analyzer/Resolve/IdentifierResolveScope.h @@ -147,8 +147,8 @@ struct IdentifierResolveScope /// Store current scope aliases defined in WITH clause if `enable_scopes_for_with_statement` setting is disabled. ScopeAliases global_with_aliases; - /// Table column name to column node. Valid only during table ALIAS columns resolve. - ColumnNameToColumnNodeMap column_name_to_column_node; + /// Valid only during table ALIAS columns resolve. + AnalysisTableExpressionData * table_expression_data_for_alias_resolution = nullptr; std::list *> join_using_columns; diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 5580ea0f8df7..216d5a90bf24 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -356,26 +356,25 @@ bool IdentifierResolver::tryBindIdentifierToJoinUsingColumn(const IdentifierLook */ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope) { - if (scope.column_name_to_column_node.empty() || !identifier_lookup.isExpressionLookup()) + if (!scope.table_expression_data_for_alias_resolution || !identifier_lookup.isExpressionLookup()) return {}; const auto & identifier = identifier_lookup.identifier; - auto it = scope.column_name_to_column_node.find(identifier.getFullName()); - bool full_column_name_match = it != scope.column_name_to_column_node.end(); + auto identifier_full_name = identifier.getFullName(); + auto it = scope.table_expression_data_for_alias_resolution->column_name_to_column_node.find(identifier_full_name); + if (it != scope.table_expression_data_for_alias_resolution->column_name_to_column_node.end()) + return it->second; - if (!full_column_name_match) + /// Check if it's a subcolumn + if (auto subcolumn_info = scope.table_expression_data_for_alias_resolution->tryGetSubcolumnInfo(identifier_full_name)) { - it = scope.column_name_to_column_node.find(identifier_lookup.identifier[0]); - if (it == scope.column_name_to_column_node.end()) - return {}; - } - - QueryTreeNodePtr result = it->second; + if (scope.table_expression_data_for_alias_resolution->supports_subcolumns) + return std::make_shared(NameAndTypePair{identifier_full_name, subcolumn_info->subcolumn_type}, subcolumn_info->column_node->getColumnSource()); - if (!full_column_name_match && identifier.isCompound()) - return tryResolveIdentifierFromCompoundExpression(identifier_lookup.identifier, 1 /*identifier_bind_size*/, it->second, {}, scope); + return wrapExpressionNodeInSubcolumn(subcolumn_info->column_node, String(subcolumn_info->subcolumn_name), scope.context); + } - return result; + return {}; } bool IdentifierResolver::tryBindIdentifierToTableExpression(const IdentifierLookup & identifier_lookup, @@ -496,65 +495,31 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromStorage( */ QueryTreeNodePtr result_expression; - bool match_full_identifier = false; const auto & identifier_full_name = identifier_without_column_qualifier.getFullName(); - ColumnNodePtr result_column_node; - bool can_resolve_directly_from_storage = false; if (auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name); it != table_expression_data.column_name_to_column_node.end()) { - can_resolve_directly_from_storage = true; - result_column_node = it->second; + result_expression = it->second; } - /// Check if it's a dynamic subcolumn - else if (table_expression_data.supports_subcolumns) + /// Check if it's a subcolumn + else { - auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name); - auto jt = table_expression_data.column_name_to_column_node.find(column_name); - if (jt != table_expression_data.column_name_to_column_node.end() && jt->second->getColumnType()->hasDynamicSubcolumns()) + if (auto subcolumn_info = table_expression_data.tryGetSubcolumnInfo(identifier_full_name)) { - if (auto dynamic_subcolumn_type = jt->second->getColumnType()->tryGetSubcolumnType(dynamic_subcolumn_name)) - { - result_column_node = std::make_shared(NameAndTypePair{identifier_full_name, dynamic_subcolumn_type}, jt->second->getColumnSource()); - can_resolve_directly_from_storage = true; - } + if (table_expression_data.supports_subcolumns) + result_expression = std::make_shared(NameAndTypePair{identifier_full_name, subcolumn_info->subcolumn_type}, subcolumn_info->column_node->getColumnSource()); + else + result_expression = wrapExpressionNodeInSubcolumn(subcolumn_info->column_node, String(subcolumn_info->subcolumn_name), scope.context); } } - - if (can_resolve_directly_from_storage) - { - match_full_identifier = true; - result_expression = result_column_node; - } - else - { - auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0)); - if (it != table_expression_data.column_name_to_column_node.end()) - result_expression = it->second; - } - bool clone_is_needed = true; String table_expression_source = table_expression_data.table_expression_description; if (!table_expression_data.table_expression_name.empty()) table_expression_source += " with name " + table_expression_data.table_expression_name; - if (result_expression && !match_full_identifier && identifier_without_column_qualifier.isCompound()) - { - size_t identifier_bind_size = identifier_column_qualifier_parts + 1; - result_expression = tryResolveIdentifierFromCompoundExpression(identifier, - identifier_bind_size, - result_expression, - table_expression_source, - scope, - can_be_not_found); - if (can_be_not_found && !result_expression) - return {}; - clone_is_needed = false; - } - if (!result_expression) { /// Here we try to create Nested from Array columns with the `identifier` prefix. diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index a89a57f29b46..26d34be495f3 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -4810,6 +4810,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table } } + table_expression_data.column_name_to_column_node = std::move(column_name_to_column_node); for (auto & [alias_column_to_resolve_name, alias_column_to_resolve] : alias_columns_to_resolve) { /** Alias column could be potentially resolved during resolve of other ALIAS column. @@ -4817,10 +4818,10 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table * * During resolve of alias_value_1, alias_value_2 column will be resolved. */ - alias_column_to_resolve = column_name_to_column_node[alias_column_to_resolve_name]; + alias_column_to_resolve = table_expression_data.column_name_to_column_node[alias_column_to_resolve_name]; IdentifierResolveScope & alias_column_resolve_scope = createIdentifierResolveScope(alias_column_to_resolve, &scope /*parent_scope*/); - alias_column_resolve_scope.column_name_to_column_node = std::move(column_name_to_column_node); + alias_column_resolve_scope.table_expression_data_for_alias_resolution = &table_expression_data; alias_column_resolve_scope.context = scope.context; /// Initialize aliases in alias column scope @@ -4834,11 +4835,8 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table auto & resolved_expression = alias_column_to_resolve->getExpression(); if (!resolved_expression->getResultType()->equals(*alias_column_to_resolve->getResultType())) resolved_expression = buildCastFunction(resolved_expression, alias_column_to_resolve->getResultType(), scope.context, true); - column_name_to_column_node = std::move(alias_column_resolve_scope.column_name_to_column_node); - column_name_to_column_node[alias_column_to_resolve_name] = alias_column_to_resolve; + table_expression_data.column_name_to_column_node[alias_column_to_resolve_name] = alias_column_to_resolve; } - - table_expression_data.column_name_to_column_node = std::move(column_name_to_column_node); } else if (query_node || union_node) { diff --git a/src/Analyzer/Resolve/TableExpressionData.h b/src/Analyzer/Resolve/TableExpressionData.h index bbedad69cfe4..4d8c89ac3f47 100644 --- a/src/Analyzer/Resolve/TableExpressionData.h +++ b/src/Analyzer/Resolve/TableExpressionData.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -49,7 +50,8 @@ struct AnalysisTableExpressionData bool canBindIdentifier(IdentifierView identifier_view) const { - return column_identifier_first_parts.contains(identifier_view.at(0)); + return column_identifier_first_parts.contains(identifier_view.at(0)) || column_name_to_column_node.contains(identifier_view.at(0)) + || tryGetSubcolumnInfo(identifier_view.getFullName()); } [[maybe_unused]] void dump(WriteBuffer & buffer) const @@ -78,6 +80,28 @@ struct AnalysisTableExpressionData return buffer.str(); } + + struct SubcolumnInfo + { + ColumnNodePtr column_node; + std::string_view subcolumn_name; + DataTypePtr subcolumn_type; + }; + + std::optional tryGetSubcolumnInfo(std::string_view full_identifier_name) const + { + for (auto [column_name, subcolumn_name] : Nested::getAllColumnAndSubcolumnPairs(full_identifier_name)) + { + auto it = column_name_to_column_node.find(column_name); + if (it != column_name_to_column_node.end()) + { + if (auto subcolumn_type = it->second->getResultType()->tryGetSubcolumnType(subcolumn_name)) + return SubcolumnInfo{it->second, subcolumn_name, subcolumn_type}; + } + } + + return std::nullopt; + } }; } diff --git a/src/Analyzer/traverseQueryTree.h b/src/Analyzer/traverseQueryTree.h new file mode 100644 index 000000000000..da2d3f1a5f95 --- /dev/null +++ b/src/Analyzer/traverseQueryTree.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Traverse query tree in depth-first manner, applying `func` to each node. +/// `should_visit_predicate` is called for each child node to determine whether to visit it or not. +/// If the node is a TableFunctionNode, its arguments that are not resolved are skipped during traversal. +/// +/// Note: This function implements non-recursive traversal to avoid stack overflow on deep trees. +/// If you need recursive traversal or you need to use query context, consider using InDepthQueryTreeVisitorWithContext. +/// This function is suitable for simple use cases where you just need to apply a function to each node. +/// +/// @param node The root node of the query tree to traverse. +/// @param should_visit_predicate A callable that takes (parent_node, child_node) and returns a bool indicating whether to visit the child node. +/// @param func A callable that takes (current_node) to be applied to each visited node. +template +void traverseQueryTree(const QueryTreeNodePtr & node, ShouldVisitPredicate should_visit_predicate, Func func) +{ + QueryTreeNodes nodes_to_process = { node }; + + while (!nodes_to_process.empty()) + { + auto current_node = nodes_to_process.back(); + nodes_to_process.pop_back(); + + func(current_node); + + if (auto * table_function_node = current_node->as()) + { + for (const auto & child : current_node->getChildren()) + { + if (!child) + continue; + + if (child == table_function_node->getArgumentsNode()) + { + const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes(); + const auto & arguments_nodes = table_function_node->getArguments().getNodes(); + + for (size_t index = 0; index < arguments_nodes.size(); ++index) + { + const auto & argument_node = arguments_nodes[index]; + if (std::find(unresolved_indexes.begin(), unresolved_indexes.end(), index) == unresolved_indexes.end()) + { + nodes_to_process.push_back(argument_node); + } + } + } + else + { + if (should_visit_predicate(current_node, child)) + nodes_to_process.push_back(child); + } + } + } + else + { + for (const auto & child : current_node->getChildren()) + { + if (!child) + continue; + + if (should_visit_predicate(current_node, child)) + nodes_to_process.push_back(child); + } + } + } +} + +} diff --git a/src/Backups/BackupInfo.cpp b/src/Backups/BackupInfo.cpp index 6e582fb77146..c50351a621ff 100644 --- a/src/Backups/BackupInfo.cpp +++ b/src/Backups/BackupInfo.cpp @@ -1,10 +1,16 @@ #include + +#include +#include +#include +#include #include #include #include #include #include #include +#include namespace DB @@ -29,6 +35,16 @@ BackupInfo BackupInfo::fromString(const String & str) } +namespace +{ + /// Check if an AST node is a key-value assignment (e.g., url='...' parsed as equals(url, '...')) + bool isKeyValueArg(const ASTPtr & ast) + { + const auto * func = ast->as(); + return func && func->name == "equals"; + } +} + ASTPtr BackupInfo::toAST() const { auto func = std::make_shared(); @@ -39,7 +55,7 @@ ASTPtr BackupInfo::toAST() const auto list = std::make_shared(); func->arguments = list; func->children.push_back(list); - list->children.reserve(args.size() + !id_arg.empty()); + list->children.reserve(args.size() + kv_args.size() + !id_arg.empty()); if (!id_arg.empty()) list->children.push_back(std::make_shared(id_arg)); @@ -47,6 +63,9 @@ ASTPtr BackupInfo::toAST() const for (const auto & arg : args) list->children.push_back(std::make_shared(arg)); + for (const auto & kv_arg : kv_args) + list->children.push_back(kv_arg); + if (function_arg) list->children.push_back(function_arg); @@ -85,6 +104,14 @@ BackupInfo BackupInfo::fromAST(const IAST & ast) for (; index < args_size; ++index) { const auto & elem = list->children[index]; + + /// Check for key-value arguments (e.g., url='...' parsed as equals(url, '...')) + if (isKeyValueArg(elem)) + { + res.kv_args.push_back(elem); + continue; + } + const auto * lit = elem->as(); if (!lit) { @@ -127,4 +154,32 @@ void BackupInfo::copyS3CredentialsTo(BackupInfo & dest) const dest_args[2] = args[2]; } +NamedCollectionPtr BackupInfo::getNamedCollection(ContextPtr context) const +{ + if (id_arg.empty()) + return nullptr; + + /// Load named collections (both from config and SQL-defined) + NamedCollectionFactory::instance().loadIfNot(); + + auto collection = NamedCollectionFactory::instance().tryGet(id_arg); + if (!collection) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no named collection `{}`", id_arg); + + /// Check access rights for the named collection + context->checkAccess(AccessType::NAMED_COLLECTION, id_arg); + + /// Apply key-value overrides from the query (e.g., url='...', blob_path='...') + if (!kv_args.empty()) + { + auto mutable_collection = collection->duplicate(); + auto params_from_query = getParamsMapFromAST(kv_args, context); + for (const auto & [key, value] : params_from_query) + mutable_collection->setOrUpdate(key, value.safeGet(), {}); + collection = std::move(mutable_collection); + } + + return collection; +} + } diff --git a/src/Backups/BackupInfo.h b/src/Backups/BackupInfo.h index 08218ece35a5..d3c56655040e 100644 --- a/src/Backups/BackupInfo.h +++ b/src/Backups/BackupInfo.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include namespace DB @@ -16,6 +18,7 @@ struct BackupInfo String id_arg; std::vector args; ASTPtr function_arg; + ASTs kv_args; String toString() const; static BackupInfo fromString(const String & str); @@ -26,6 +29,11 @@ struct BackupInfo String toStringForLogging() const; void copyS3CredentialsTo(BackupInfo & dest) const; + + /// Gets the named collection specified by id_arg, checks access rights, + /// and applies any key-value overrides from kv_args. + /// Returns nullptr if id_arg is empty (i.e., no named collection is used). + NamedCollectionPtr getNamedCollection(ContextPtr context) const; }; } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 5a7e6e9e62a0..5dd6ce6a39fa 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -60,6 +60,7 @@ namespace DB namespace Setting { + extern const SettingsUInt64 readonly; extern const SettingsBool s3_disable_checksum; } @@ -70,6 +71,7 @@ namespace ServerSetting namespace ErrorCodes { + extern const int ACCESS_DENIED; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int QUERY_WAS_CANCELLED; @@ -388,6 +390,12 @@ struct BackupsWorker::BackupStarter backup_info = BackupInfo::fromAST(*backup_query->backup_name); backup_name_for_logging = backup_info.toStringForLogging(); is_internal_backup = backup_settings.internal; + + /// The "internal" option can only be used by a query that was initiated by another query (e.g., ON CLUSTER query). + /// It should not be allowed for an initial query explicitly specified by a user. + if (is_internal_backup && (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)) + throw Exception(ErrorCodes::ACCESS_DENIED, "Setting 'internal' cannot be set explicitly"); + on_cluster = !backup_query->cluster.empty() || is_internal_backup; if (!backup_settings.backup_uuid) @@ -452,13 +460,24 @@ struct BackupsWorker::BackupStarter cluster = backup_context->getCluster(backup_query->cluster); backup_settings.cluster_host_ids = cluster->getHostIDs(); } + + /// Check access rights before opening the backup destination (e.g., S3). + /// This ensures we fail fast with a proper ACCESS_DENIED error instead of trying to connect to external storage first. + /// For ON CLUSTER queries, access rights are checked in executeDDLQueryOnCluster() before distributing the query. + if (!on_cluster) + { + backup_query->setCurrentDatabase(backup_context->getCurrentDatabase()); + auto required_access = BackupUtils::getRequiredAccessToBackup(backup_query->elements); + query_context->checkAccess(required_access); + } + backup_coordination = backups_worker.makeBackupCoordination(on_cluster, backup_settings, backup_context); backup_coordination->startup(); chassert(!backup); backup = backups_worker.openBackupForWriting(backup_info, backup_settings, backup_coordination, backup_context); - backups_worker.doBackup(backup, backup_query, backup_id, backup_settings, backup_coordination, backup_context, query_context, + backups_worker.doBackup(backup, backup_query, backup_id, backup_settings, backup_coordination, backup_context, on_cluster, cluster); if (!is_internal_backup) @@ -603,7 +622,6 @@ void BackupsWorker::doBackup( const BackupSettings & backup_settings, std::shared_ptr backup_coordination, ContextMutablePtr context, - const ContextPtr & query_context, bool on_cluster, const ClusterPtr & cluster) { @@ -618,17 +636,13 @@ void BackupsWorker::doBackup( bool is_internal_backup = backup_settings.internal; - /// Checks access rights if this is not ON CLUSTER query. - /// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.) - auto required_access = BackupUtils::getRequiredAccessToBackup(backup_query->elements); - if (!on_cluster) - query_context->checkAccess(required_access); - maybeSleepForTesting(); /// Write the backup. if (on_cluster && !is_internal_backup) { + auto required_access = BackupUtils::getRequiredAccessToBackup(backup_query->elements); + /// Send the BACKUP query to other hosts. backup_settings.copySettingsToQuery(*backup_query); sendQueryToOtherHosts(*backup_query, cluster, backup_settings.shard_num, backup_settings.replica_num, @@ -829,6 +843,19 @@ struct BackupsWorker::RestoreStarter backup_info = BackupInfo::fromAST(*restore_query->backup_name); backup_name_for_logging = backup_info.toStringForLogging(); is_internal_restore = restore_settings.internal; + + /// The "internal" option can only be used by a query that was initiated by another query (e.g., ON CLUSTER query). + /// It should not be allowed for an initial query explicitly specified by a user. + if (is_internal_restore && (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)) + throw Exception(ErrorCodes::ACCESS_DENIED, "Setting 'internal' cannot be set explicitly"); + + /// RESTORE is a write operation, it should be forbidden in strict readonly mode (readonly=1). + /// Note: readonly=2 allows changing settings but still restricts writes - however it's set automatically + /// by the HTTP interface for GET requests (to protect against accidental writes), so we only block readonly=1 + /// which is explicitly set by the user to enforce read-only mode. + if (query_context->getSettingsRef()[Setting::readonly] == 1) + throw Exception(ErrorCodes::ACCESS_DENIED, "Cannot execute RESTORE in readonly mode"); + on_cluster = !restore_query->cluster.empty() || is_internal_restore; if (!restore_settings.restore_uuid) diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h index 011f0107eae8..559408c868cf 100644 --- a/src/Backups/BackupsWorker.h +++ b/src/Backups/BackupsWorker.h @@ -85,7 +85,6 @@ class BackupsWorker const BackupSettings & backup_settings, std::shared_ptr backup_coordination, ContextMutablePtr context, - const ContextPtr & query_context, bool on_cluster, const ClusterPtr & cluster); diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index b7e880c90160..dde3f73af81f 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -6,14 +6,14 @@ #if USE_AZURE_BLOB_STORAGE #include -#include #include +#include +#include #include #include #include #include -#include #endif @@ -50,28 +50,31 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) auto creator_fn = []([[maybe_unused]] BackupFactory::CreateParams params) -> std::unique_ptr { #if USE_AZURE_BLOB_STORAGE - const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; String blob_path; AzureBlobStorage::ConnectionParams connection_params; - auto request_settings = AzureBlobStorage::getRequestSettings(params.context->getSettingsRef()); - if (!id_arg.empty()) + if (auto collection = params.backup_info.getNamedCollection(params.context)) { - const auto & config = params.context->getConfigRef(); - auto config_prefix = "named_collections." + id_arg; + String connection_url = collection->getAnyOrDefault({"connection_string", "storage_account_url"}, ""); + String container_name = collection->get("container"); + blob_path = collection->getOrDefault("blob_path", ""); - if (!config.has(config_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - - connection_params = + auto get_optional = [&](const char * key) -> std::optional { - .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), - .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), - .client_options = AzureBlobStorage::getClientOptions(params.context, params.context->getSettingsRef(), *request_settings, /*for_disk=*/ true), + return collection->has(key) ? std::optional(collection->get(key)) : std::nullopt; }; + connection_params = getAzureConnectionParams( + connection_url, + container_name, + get_optional("account_name"), + get_optional("account_key"), + get_optional("client_id"), + get_optional("tenant_id"), + params.context); + if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); @@ -87,20 +90,19 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) auto container_name = args[1].safeGet(); blob_path = args[2].safeGet(); - AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); - connection_params.client_options = AzureBlobStorage::getClientOptions(params.context, params.context->getSettingsRef(), *request_settings, /*for_disk=*/ true); + connection_params = getAzureConnectionParams( + connection_url, container_name, std::nullopt, std::nullopt, std::nullopt, std::nullopt, params.context); } else if (args.size() == 5) { - connection_params.endpoint.storage_account_url = args[0].safeGet(); - connection_params.endpoint.container_name = args[1].safeGet(); + auto connection_url = args[0].safeGet(); + auto container_name = args[1].safeGet(); blob_path = args[2].safeGet(); - auto account_name = args[3].safeGet(); auto account_key = args[4].safeGet(); - connection_params.auth_method = std::make_shared(account_name, account_key); - connection_params.client_options = AzureBlobStorage::getClientOptions(params.context, params.context->getSettingsRef(), *request_settings, /*for_disk=*/ true); + connection_params = getAzureConnectionParams( + connection_url, container_name, account_name, account_key, std::nullopt, std::nullopt, params.context); } else { diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index 8c0feb17e89a..b0a3b6a668d0 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -6,12 +6,12 @@ #if USE_AWS_S3 #include #include +#include +#include #include #include -#include -#include - #include +#include namespace DB::S3AuthSetting { @@ -54,7 +54,6 @@ void registerBackupEngineS3(BackupFactory & factory) auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr { #if USE_AWS_S3 - const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; String s3_uri; @@ -63,22 +62,16 @@ void registerBackupEngineS3(BackupFactory & factory) String role_arn; String role_session_name; - if (!id_arg.empty()) + if (auto collection = params.backup_info.getNamedCollection(params.context)) { - const auto & config = params.context->getConfigRef(); - auto config_prefix = "named_collections." + id_arg; - - if (!config.has(config_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - - s3_uri = config.getString(config_prefix + ".url"); - access_key_id = config.getString(config_prefix + ".access_key_id", ""); - secret_access_key = config.getString(config_prefix + ".secret_access_key", ""); - role_arn = config.getString(config_prefix + ".role_arn", ""); - role_session_name = config.getString(config_prefix + ".role_session_name", ""); - - if (config.has(config_prefix + ".filename")) - s3_uri = std::filesystem::path(s3_uri) / config.getString(config_prefix + ".filename"); + s3_uri = collection->get("url"); + access_key_id = collection->getOrDefault("access_key_id", ""); + secret_access_key = collection->getOrDefault("secret_access_key", ""); + role_arn = collection->getOrDefault("role_arn", ""); + role_session_name = collection->getOrDefault("role_session_name", ""); + + if (collection->has("filename")) + s3_uri = std::filesystem::path(s3_uri) / collection->get("filename"); if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup S3 requires 1 or 2 arguments: named_collection, [filename]"); diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 74d39468c0d4..b52c5b0a2d10 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -88,6 +88,7 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; extern const int EMPTY_DATA_PASSED; + extern const int LOGICAL_ERROR; } Connection::~Connection() @@ -1297,6 +1298,11 @@ Packet Connection::receivePacket() { try { + /// We are trying to send something to already disconnected connection, + /// this means that we continue using Connection after exception. + if (!in) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Connection to {} is terminated", getDescription()); + Packet res; /// Have we already read packet type? diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index b93ca0987b56..7ca7701a1e83 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -2187,4 +2187,15 @@ void ColumnObject::repairDuplicatesInDynamicPathsAndSharedData(size_t offset) shared_data = std::move(new_shared_data); } +void ColumnObject::validateDynamicPathsSizes() const +{ + size_t expected_size = shared_data->size(); + for (const auto & [path, column] : dynamic_paths) + { + if (column->size() != expected_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of dynamic path {}: {} != {}", path, column->size(), expected_size); + } + +} + } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index fc1426a3876e..2421cf7934b8 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -289,6 +289,8 @@ class ColumnObject final : public COWHelper, ColumnO /// offset argument - is the offset from which we should check for duplicates. void repairDuplicatesInDynamicPathsAndSharedData(size_t offset = 0); + void validateDynamicPathsSizes() const; + private: class SortedPathsIterator; diff --git a/src/Common/AllocatorWithMemoryTracking.h b/src/Common/AllocatorWithMemoryTracking.h index ff83e925822c..7e60e3504650 100644 --- a/src/Common/AllocatorWithMemoryTracking.h +++ b/src/Common/AllocatorWithMemoryTracking.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -18,6 +19,14 @@ template struct AllocatorWithMemoryTracking { using value_type = T; + /// Allocator is stateless and thus always equal to another allocator. + using is_always_equal = std::true_type; + /// When propagate_on_container_move_assignment::value is: + /// true: The container will move the allocator from the source to the destination during move assignment + /// false (default): The container keeps its original allocator + /// For a stateless allocator like this one, this option doesn't make a lot of sense and needed only + /// to workaround a compilation error in our version of boost::container::devector. + using propagate_on_container_move_assignment = std::true_type; AllocatorWithMemoryTracking() = default; @@ -54,13 +63,13 @@ struct AllocatorWithMemoryTracking }; template -bool operator==(const AllocatorWithMemoryTracking &, const AllocatorWithMemoryTracking &) +constexpr bool operator==(const AllocatorWithMemoryTracking &, const AllocatorWithMemoryTracking &) { return true; } template -bool operator!=(const AllocatorWithMemoryTracking &, const AllocatorWithMemoryTracking &) +constexpr bool operator!=(const AllocatorWithMemoryTracking &, const AllocatorWithMemoryTracking &) { return false; } diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index c83f7b893050..476738471079 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -116,10 +116,22 @@ static struct InitFiu REGULAR(slowdown_parallel_replicas_local_plan_read) \ ONCE(iceberg_writes_cleanup) \ REGULAR(sleep_in_logs_flush) \ + ONCE(database_replicated_drop_before_removing_keeper_failed) \ + ONCE(database_replicated_drop_after_removing_keeper_failed) \ + PAUSEABLE_ONCE(mt_mutate_task_pause_in_prepare) \ + PAUSEABLE(rmt_mutate_task_pause_in_prepare) \ + PAUSEABLE(rmt_merge_selecting_task_pause_when_scheduled) \ + PAUSEABLE_ONCE(smt_mutate_task_pause_in_prepare) \ + PAUSEABLE_ONCE(smt_merge_selecting_task_pause_when_scheduled) \ + ONCE(shared_set_full_update_fails_when_initializing) \ + PAUSEABLE(after_kill_part_pause) \ + ONCE(parallel_replicas_reading_response_timeout) \ + ONCE(database_iceberg_gcs) \ + REGULAR(rmt_delay_execute_drop_range) \ + REGULAR(rmt_delay_commit_part) \ ONCE(smt_commit_exception_before_op) \ ONCE(backup_add_empty_memory_table) \ - REGULAR(refresh_task_stop_racing_for_running_refresh) \ - ONCE(database_iceberg_gcs) + REGULAR(refresh_task_stop_racing_for_running_refresh) namespace FailPoints diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index f19508b6a331..241d6d218515 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -32,7 +32,6 @@ namespace DB namespace ErrorCodes { extern const int FILE_DOESNT_EXIST; - extern const int LOGICAL_ERROR; } #if defined(OS_LINUX) @@ -223,13 +222,21 @@ std::string_view sourceToString(MemoryWorker::MemoryUsageSource source) /// - reading from cgroups' pseudo-files (fastest and most accurate) /// - reading jemalloc's resident stat (doesn't take into account allocations that didn't use jemalloc) /// Also, different tick rates are used because not all options are equally fast -MemoryWorker::MemoryWorker(uint64_t period_ms_, bool correct_tracker_, bool use_cgroup, std::shared_ptr page_cache_) +MemoryWorker::MemoryWorker( + MemoryWorkerConfig config, + std::shared_ptr page_cache_) : log(getLogger("MemoryWorker")) - , period_ms(period_ms_) - , correct_tracker(correct_tracker_) + , rss_update_period_ms(config.rss_update_period_ms) + , correct_tracker(config.correct_tracker) + , purge_total_memory_threshold_ratio(config.purge_total_memory_threshold_ratio) + , purge_dirty_pages_threshold_ratio(config.purge_dirty_pages_threshold_ratio) , page_cache(page_cache_) { - if (use_cgroup) +#if USE_JEMALLOC + page_size = pagesize_mib.getValue(); +#endif + + if (config.use_cgroup) { #if defined(OS_LINUX) try @@ -245,8 +252,8 @@ MemoryWorker::MemoryWorker(uint64_t period_ms_, bool correct_tracker_, bool use_ cgroups_reader = ICgroupsReader::createCgroupsReader(version, cgroup_path); source = MemoryUsageSource::Cgroups; - if (period_ms == 0) - period_ms = cgroups_memory_usage_tick_ms; + if (rss_update_period_ms == 0) + rss_update_period_ms = cgroups_memory_usage_tick_ms; return; } @@ -261,8 +268,8 @@ MemoryWorker::MemoryWorker(uint64_t period_ms_, bool correct_tracker_, bool use_ static constexpr uint64_t jemalloc_memory_usage_tick_ms{100}; source = MemoryUsageSource::Jemalloc; - if (period_ms == 0) - period_ms = jemalloc_memory_usage_tick_ms; + if (rss_update_period_ms == 0) + rss_update_period_ms = jemalloc_memory_usage_tick_ms; #endif } @@ -276,79 +283,114 @@ void MemoryWorker::start() if (source == MemoryUsageSource::None) return; + const std::string purge_dirty_pages_info = purge_dirty_pages_threshold_ratio > 0 || purge_total_memory_threshold_ratio > 0 + ? fmt::format( + "enabled (total memory threshold ratio: {}, dirty pages threshold ratio: {}, page size: {})", + purge_total_memory_threshold_ratio, + purge_dirty_pages_threshold_ratio, + page_size) + : "disabled"; + LOG_INFO( - getLogger("MemoryWorker"), - "Starting background memory thread with period of {}ms, using {} as source", - period_ms, - sourceToString(source)); - background_thread = ThreadFromGlobalPool([this] { backgroundThread(); }); + log, + "Starting background memory thread with period of {}ms, using {} as source, purging dirty pages {}", + rss_update_period_ms, + sourceToString(source), + purge_dirty_pages_info); + + update_resident_memory_thread = ThreadFromGlobalPool([this] { updateResidentMemoryThread(); }); + +#if USE_JEMALLOC + purge_dirty_pages_thread = ThreadFromGlobalPool([this] { purgeDirtyPagesThread(); }); +#endif } MemoryWorker::~MemoryWorker() { { - std::unique_lock lock(mutex); + std::scoped_lock lock(rss_update_mutex, purge_dirty_pages_mutex); shutdown = true; } - cv.notify_all(); - if (background_thread.joinable()) - background_thread.join(); + rss_update_cv.notify_all(); + purge_dirty_pages_cv.notify_all(); + + if (update_resident_memory_thread.joinable()) + update_resident_memory_thread.join(); + +#if USE_JEMALLOC + if (purge_dirty_pages_thread.joinable()) + purge_dirty_pages_thread.join(); +#endif } -uint64_t MemoryWorker::getMemoryUsage() +uint64_t MemoryWorker::getMemoryUsage(bool log_error) { switch (source) { case MemoryUsageSource::Cgroups: - return cgroups_reader != nullptr ? cgroups_reader->readMemoryUsage() : 0; + { + if (cgroups_reader != nullptr) + return cgroups_reader->readMemoryUsage(); + [[fallthrough]]; + } case MemoryUsageSource::Jemalloc: #if USE_JEMALLOC epoch_mib.setValue(0); return resident_mib.getValue(); #else - return 0; + [[fallthrough]]; #endif case MemoryUsageSource::None: - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Trying to fetch memory usage while no memory source can be used"); + { + if (log_error) + LOG_ERROR(getLogger("MemoryWorker"), "Trying to fetch memory usage while no memory source can be used"); + return 0; + } } } -void MemoryWorker::backgroundThread() +void MemoryWorker::updateResidentMemoryThread() { setThreadName("MemoryWorker"); - std::chrono::milliseconds chrono_period_ms{period_ms}; + std::chrono::milliseconds chrono_period_ms{rss_update_period_ms}; [[maybe_unused]] bool first_run = true; - std::unique_lock lock(mutex); + std::unique_lock rss_update_lock(rss_update_mutex); + while (true) { - cv.wait_for(lock, chrono_period_ms, [this] { return shutdown; }); + rss_update_cv.wait_for(rss_update_lock, chrono_period_ms, [this] { return shutdown; }); if (shutdown) return; Stopwatch total_watch; - Int64 resident = getMemoryUsage(); + Int64 resident = getMemoryUsage(first_run); MemoryTracker::updateRSS(resident); if (page_cache) page_cache->autoResize(std::max(resident, total_memory_tracker.get()), total_memory_tracker.getHardLimit()); #if USE_JEMALLOC - if (resident > total_memory_tracker.getHardLimit()) + const auto memory_tracker_limit = total_memory_tracker.getHardLimit(); + + const bool needs_purge + = (purge_total_memory_threshold_ratio > 0 && resident > memory_tracker_limit * purge_total_memory_threshold_ratio) + || (purge_dirty_pages_threshold_ratio > 0 + && pdirty_mib.getValue() * page_size > memory_tracker_limit * purge_dirty_pages_threshold_ratio); + + bool is_purge_enabled = false; + if (needs_purge && purge_dirty_pages.compare_exchange_strong(is_purge_enabled, true, std::memory_order_relaxed)) { - Stopwatch purge_watch; - purge_mib.run(); - ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); - ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, purge_watch.elapsedMicroseconds()); + purge_dirty_pages_cv.notify_all(); } /// update MemoryTracker with `allocated` information from jemalloc when: /// - it's a first run of MemoryWorker (MemoryTracker could've missed some allocation before its initialization) /// - MemoryTracker stores a negative value /// - `correct_tracker` is set to true - if (unlikely(first_run || total_memory_tracker.get() < 0)) + if (first_run || total_memory_tracker.get() < 0) [[unlikely]] MemoryTracker::updateAllocated(resident, /*log_change=*/true); else if (correct_tracker) MemoryTracker::updateAllocated(resident, /*log_change=*/false); @@ -358,7 +400,7 @@ void MemoryWorker::backgroundThread() /// resident memory can be much larger than the actual allocated memory /// so we rather ignore the potential difference caused by allocated memory /// before MemoryTracker initialization - if (unlikely(total_memory_tracker.get() < 0) || correct_tracker) + if (total_memory_tracker.get() < 0 || correct_tracker) [[unlikely]] MemoryTracker::updateAllocated(resident, /*log_change=*/false); #endif @@ -368,4 +410,34 @@ void MemoryWorker::backgroundThread() } } +void MemoryWorker::purgeDirtyPagesThread() +{ +#if USE_JEMALLOC + /// Instead of having completely separate logic for purging dirty pages, + /// we rely on the main thread to notify us when we need to purge dirty pages. + /// We do it to avoid reading RSS value in both threads. Even though they are fairly + /// fast, they are still not free. + /// So we keep the work of reading current RSS in one thread which allows us to keep the low period time for it. + setThreadName("MemoryWorker"); + + std::unique_lock purge_dirty_pages_lock(purge_dirty_pages_mutex); + + while (true) + { + purge_dirty_pages_cv.wait(purge_dirty_pages_lock, [this] { return shutdown || purge_dirty_pages.load(std::memory_order_relaxed); }); + if (shutdown) + return; + + bool is_purge_enabled = true; + if (!purge_dirty_pages.compare_exchange_strong(is_purge_enabled, false, std::memory_order_relaxed)) + continue; + + Stopwatch purge_watch; + purge_mib.run(); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, purge_watch.elapsedMicroseconds()); + } +#endif +} + } diff --git a/src/Common/MemoryWorker.h b/src/Common/MemoryWorker.h index 8eac77c3c908..28f6455f852a 100644 --- a/src/Common/MemoryWorker.h +++ b/src/Common/MemoryWorker.h @@ -30,6 +30,14 @@ struct ICgroupsReader virtual std::string dumpAllStats() = 0; }; +struct MemoryWorkerConfig +{ + uint64_t rss_update_period_ms = 0; + double purge_dirty_pages_threshold_ratio = 0.0; + double purge_total_memory_threshold_ratio = 0.0; + bool correct_tracker = false; + bool use_cgroup = true; +}; /// Correct MemoryTracker based on external information (e.g. Cgroups or stats.resident from jemalloc) /// The worker spawns a background thread which periodically reads current resident memory from the source, @@ -38,7 +46,7 @@ struct ICgroupsReader class MemoryWorker { public: - MemoryWorker(uint64_t period_ms_, bool correct_tracker_, bool use_cgroup, std::shared_ptr page_cache_); + MemoryWorker(MemoryWorkerConfig config, std::shared_ptr page_cache_); enum class MemoryUsageSource : uint8_t { @@ -53,21 +61,31 @@ class MemoryWorker ~MemoryWorker(); private: - uint64_t getMemoryUsage(); + uint64_t getMemoryUsage(bool log_error); - void backgroundThread(); + void updateResidentMemoryThread(); + [[maybe_unused]] void purgeDirtyPagesThread(); - ThreadFromGlobalPool background_thread; + ThreadFromGlobalPool update_resident_memory_thread; + ThreadFromGlobalPool purge_dirty_pages_thread; - std::mutex mutex; - std::condition_variable cv; + std::mutex rss_update_mutex; + std::condition_variable rss_update_cv; + std::mutex purge_dirty_pages_mutex; + std::condition_variable purge_dirty_pages_cv; bool shutdown = false; LoggerPtr log; - uint64_t period_ms; + uint64_t rss_update_period_ms; + bool correct_tracker = false; + std::atomic purge_dirty_pages = false; + double purge_total_memory_threshold_ratio; + double purge_dirty_pages_threshold_ratio; + uint64_t page_size = 0; + MemoryUsageSource source{MemoryUsageSource::None}; std::shared_ptr cgroups_reader; @@ -77,9 +95,11 @@ class MemoryWorker #if USE_JEMALLOC JemallocMibCache epoch_mib{"epoch"}; JemallocMibCache resident_mib{"stats.resident"}; + JemallocMibCache pagesize_mib{"arenas.page"}; #define STRINGIFY_HELPER(x) #x #define STRINGIFY(x) STRINGIFY_HELPER(x) + JemallocMibCache pdirty_mib{"stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".pdirty"}; JemallocMibCache purge_mib{"arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge"}; #undef STRINGIFY #undef STRINGIFY_HELPER diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h index 7650780dc597..93b7e1a5082c 100644 --- a/src/Common/Scheduler/SchedulerRoot.h +++ b/src/Common/Scheduler/SchedulerRoot.h @@ -250,9 +250,11 @@ class SchedulerRoot final : public ISchedulerNode } Resource * current = nullptr; // round-robin pointer - std::unordered_map children; // resources by pointer std::atomic stop_flag = false; EventQueue events; + /// Resources by pointer. Must be destroyed before the "events", + /// because the descructor of ISchedulerNode might access the mutex in that queue. + std::unordered_map children; ThreadFromGlobalPool scheduler; }; diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index e5875fb0c968..66162ecd536e 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -125,10 +125,10 @@ class ThreadGroup std::vector getInvolvedThreadIds() const; size_t getPeakThreadsUsage() const; - UInt64 getThreadsTotalElapsedMs() const; + UInt64 getGroupElapsedMs() const; void linkThread(UInt64 thread_id); - void unlinkThread(UInt64 elapsed_thread_counter_ms); + void unlinkThread(); private: mutable std::mutex mutex; @@ -145,7 +145,8 @@ class ThreadGroup /// Peak threads count in the group size_t peak_threads_usage TSA_GUARDED_BY(mutex) = 0; - UInt64 elapsed_total_threads_counter_ms TSA_GUARDED_BY(mutex) = 0; + Stopwatch effective_group_stopwatch TSA_GUARDED_BY(mutex) = Stopwatch(STOPWATCH_DEFAULT_CLOCK, 0, /* is running */ false); + UInt64 elapsed_group_ms TSA_GUARDED_BY(mutex) = 0; static ThreadGroupPtr create(ContextPtr context); }; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index d7dd7013d86e..55501ea1d568 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -351,7 +351,7 @@ void ZooKeeper::flushWriteBuffer() void ZooKeeper::cancelWriteBuffer() noexcept { if (compressed_out) - compressed_out->cancel(); + compressed_out->cancel(); if (out) out->cancel(); } @@ -585,7 +585,6 @@ void ZooKeeper::connect( throw; } - connected = true; if (use_compression) { compressed_in.emplace(*in); @@ -593,6 +592,8 @@ void ZooKeeper::connect( } original_index.store(node.original_index); + + connected = true; break; } catch (...) diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index 6a45b4420d2c..c025b41315aa 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -435,7 +435,7 @@ inline void touchBufferWithZeroFilling(char * buffer, UInt32 buffer_size) } } -void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { /// QPL library is using AVX-512 with some shuffle operations. /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. @@ -457,7 +457,7 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so } else sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; + return uncompressed_size; } case CodecMode::Asynchronous: { @@ -466,11 +466,11 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so res = hw_codec->doDecompressDataAsynchronous(source, source_size, dest, uncompressed_size); if (res == HardwareCodecDeflateQpl::RET_ERROR) sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; + return uncompressed_size; } case CodecMode::SoftwareFallback: sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; + return uncompressed_size; } } diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h index b85c52521725..3ecfbb49eaf1 100644 --- a/src/Compression/CompressionCodecDeflateQpl.h +++ b/src/Compression/CompressionCodecDeflateQpl.h @@ -117,7 +117,7 @@ class CompressionCodecDeflateQpl final : public ICompressionCodec UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; /// Flush result for previous asynchronous decompression requests on asynchronous mode. void flushAsynchronousDecompressRequests() override; diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp index 79d7b76f3e28..89fdbfd4d4d2 100644 --- a/src/Compression/CompressionCodecDelta.cpp +++ b/src/Compression/CompressionCodecDelta.cpp @@ -21,7 +21,7 @@ class CompressionCodecDelta : public ICompressionCodec protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; } @@ -47,6 +47,7 @@ namespace ErrorCodes extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; extern const int ILLEGAL_CODEC_PARAMETER; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } CompressionCodecDelta::CompressionCodecDelta(UInt8 delta_bytes_size_) @@ -88,8 +89,9 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) } template -void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) +UInt32 decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) { + const char * const original_dest = dest; const char * const output_end = dest + output_size; if (source_size % sizeof(T) != 0) @@ -107,6 +109,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, source += sizeof(T); dest += sizeof(T); } + + return dest - original_dest; } } @@ -118,7 +122,7 @@ UInt32 CompressionCodecDelta::doCompressData(const char * source, UInt32 source_ dest[1] = bytes_to_skip; /// unused (backward compatibility) memcpy(&dest[2], source, bytes_to_skip); size_t start_pos = 2 + bytes_to_skip; - switch (delta_bytes_size) // NOLINT(bugprone-switch-missing-default-case) + switch (delta_bytes_size) { case 1: compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); @@ -132,17 +136,19 @@ UInt32 CompressionCodecDelta::doCompressData(const char * source, UInt32 source_ case 8: compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot compress to delta-encoded data. Invalid byte size {}", UInt32{delta_bytes_size}); } return 1 + 1 + source_size; } -void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { if (source_size < 2) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data. File has wrong header"); if (uncompressed_size == 0) - return; + return 0; UInt8 bytes_size = source[0]; @@ -157,20 +163,19 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_ memcpy(dest, &source[2], bytes_to_skip); UInt32 source_size_no_header = source_size - bytes_to_skip - 2; - switch (bytes_size) // NOLINT(bugprone-switch-missing-default-case) + switch (bytes_size) { case 1: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 2: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 4: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 8: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + default: + /// This should be unreachable due to the check above + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot decompress delta-encoded data. File has unknown byte size {}", UInt32{bytes_size}); } } diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index e765ee43badd..667cfc216e48 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -131,7 +131,7 @@ class CompressionCodecDoubleDelta : public ICompressionCodec UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; @@ -365,16 +365,17 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) } template -void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) +UInt32 decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) { static_assert(is_unsigned_v, "ValueType must be unsigned."); using UnsignedDeltaType = ValueType; const char * source_end = source + source_size; const char * output_end = dest + output_size; + const char * const original_dest = dest; if (source + sizeof(UInt32) > source_end) - return; + return 0; const UInt32 items_count = unalignedLoadLittleEndian(source); source += sizeof(items_count); @@ -384,7 +385,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, // decoding first item if (source + sizeof(ValueType) > source_end || items_count < 1) - return; + return 0; prev_value = unalignedLoadLittleEndian(source); if (dest + sizeof(prev_value) > output_end) @@ -396,7 +397,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, // decoding second item if (source + sizeof(UnsignedDeltaType) > source_end || items_count < 2) - return; + return dest - original_dest; prev_delta = unalignedLoadLittleEndian(source); prev_value = prev_value + static_cast(prev_delta); @@ -441,6 +442,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, prev_delta = curr_value - prev_value; prev_value = curr_value; } + + return dest - original_dest; } UInt8 getDataBytesSize(const IDataType * column_type) @@ -498,7 +501,7 @@ UInt32 CompressionCodecDoubleDelta::doCompressData(const char * source, UInt32 s size_t start_pos = 2 + bytes_to_skip; UInt32 compressed_size = 0; - switch (data_bytes_size) // NOLINT(bugprone-switch-missing-default-case) + switch (data_bytes_size) { case 1: compressed_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); @@ -512,19 +515,21 @@ UInt32 CompressionCodecDoubleDelta::doCompressData(const char * source, UInt32 s case 8: compressed_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot compress to double delta-encoded data. Invalid byte size {}", UInt32{data_bytes_size}); } return 1 + 1 + compressed_size + UInt32(bytes_to_skip); } -void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { if (source_size < 2) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta encoded data. File has wrong header"); UInt8 bytes_size = source[0]; - if (bytes_size == 0) + if (bytes_size != 1 && bytes_size != 2 && bytes_size != 4 && bytes_size != 8) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta encoded data. File has wrong header"); UInt8 bytes_to_skip = uncompressed_size % bytes_size; @@ -535,20 +540,20 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s memcpy(dest, &source[2], bytes_to_skip); UInt32 source_size_no_header = source_size - bytes_to_skip - 2; - switch (bytes_size) // NOLINT(bugprone-switch-missing-default-case) + switch (bytes_size) { case 1: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 2: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 4: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 8: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + default: + /// This should be unreachable due to the check above + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Cannot decompress with codec 'double-delta'. File has incorrect width ({})", UInt32{bytes_size}); } } diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index 39522a3b811b..b663f9894945 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -537,7 +537,7 @@ UInt32 CompressionCodecEncrypted::doCompressData(const char * source, UInt32 sou return safe_cast(out_size); } -void CompressionCodecEncrypted::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecEncrypted::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { /// The key is needed for decrypting. That's why it is read at the beginning of process. UInt64 key_id; @@ -566,6 +566,8 @@ void CompressionCodecEncrypted::doDecompressData(const char * source, UInt32 sou throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't decrypt data, out length after decryption {} is wrong, expected {}", out_len, ciphertext_size - tag_size); + + return out_len; } } diff --git a/src/Compression/CompressionCodecEncrypted.h b/src/Compression/CompressionCodecEncrypted.h index 229eccf032b9..c3d19436c1ec 100644 --- a/src/Compression/CompressionCodecEncrypted.h +++ b/src/Compression/CompressionCodecEncrypted.h @@ -121,7 +121,7 @@ class CompressionCodecEncrypted final : public ICompressionCodec /// Decrypt data with chosen method /// Throws exception if decryption is impossible or size of decrypted text is incorrect - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; private: EncryptionMethod encryption_method; }; diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp index 1b35a0612681..dd4df9be6dcc 100644 --- a/src/Compression/CompressionCodecFPC.cpp +++ b/src/Compression/CompressionCodecFPC.cpp @@ -33,7 +33,7 @@ class CompressionCodecFPC : public ICompressionCodec protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; @@ -243,7 +243,7 @@ class FPCOperation public: FPCOperation(std::span destination, UInt8 compression_level) - : dfcm_predictor(1u << compression_level), fcm_predictor(1u << compression_level), chunk{}, result{destination} + : dfcm_predictor(1u << compression_level), fcm_predictor(1u << compression_level), chunk{}, result{destination}, result_initial_pointer{result.data()} { } @@ -261,7 +261,7 @@ class FPCOperation return initial_size - result.size(); } - void decode(std::span values, size_t decoded_size) && + UInt32 decode(std::span values, size_t decoded_size) && { size_t read_bytes = 0; @@ -273,6 +273,8 @@ class FPCOperation read_bytes += decodeChunk(values.subspan(read_bytes), chunk_view); exportChunk(chunk_view); } + + return result.data() - result_initial_pointer; } private: @@ -455,6 +457,8 @@ class FPCOperation std::array chunk{}; std::span result{}; + /// Pointer to the start of the result buffer (used to calculate the final size) + std::span::const_pointer result_initial_pointer{}; }; } @@ -479,7 +483,7 @@ UInt32 CompressionCodecFPC::doCompressData(const char * source, UInt32 source_si throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with codec 'FPC'. File has incorrect float width"); } -void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { if (source_size < HEADER_SIZE) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress FPC-encoded data. File has wrong header"); @@ -495,11 +499,9 @@ void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_si switch (compressed_float_width) { case sizeof(Float64): - FPCOperation(destination, compressed_level).decode(src, uncompressed_size); - break; + return FPCOperation(destination, compressed_level).decode(src, uncompressed_size); case sizeof(Float32): - FPCOperation(destination, compressed_level).decode(src, uncompressed_size); - break; + return FPCOperation(destination, compressed_level).decode(src, uncompressed_size); default: throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress FPC-encoded data. File has incorrect float width"); } diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 34065da4d741..e93be2be8407 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -26,7 +26,7 @@ class CompressionCodecGCD : public ICompressionCodec protected: /// 1 byte (`gcd_bytes_size` value) + 1 byte (`bytes_to_skip` value) + `bytes_to_skip` bytes (trash) + `gcd_bytes_size` bytes (gcd value) + (`source_size` - `bytes_to_skip`) bytes (data) UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; bool isCompression() const override { return false; } @@ -47,6 +47,7 @@ namespace ErrorCodes extern const int CANNOT_DECOMPRESS; extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } CompressionCodecGCD::CompressionCodecGCD(UInt8 gcd_bytes_size_) @@ -131,8 +132,9 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) } template -void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) +UInt32 decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) { + const char * original_dest = dest; if (source_size % sizeof(T) != 0) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data, data size {} is not aligned to {}", source_size, sizeof(T)); @@ -153,7 +155,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data"); memcpy(dest, source, source_size - sizeof(T)); - return; + return source_size - sizeof(T); } while (source < source_end) @@ -166,6 +168,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, dest += sizeof(T); } chassert(source == source_end); + return dest - original_dest; } } @@ -177,7 +180,7 @@ UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_si dest[1] = bytes_to_skip; /// unused (backward compatibility) memcpy(&dest[2], source, bytes_to_skip); size_t start_pos = 2 + bytes_to_skip; - switch (gcd_bytes_size) // NOLINT(bugprone-switch-missing-default-case) + switch (gcd_bytes_size) { case 1: compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); @@ -197,17 +200,19 @@ UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_si case 32: compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot compress to GCD-encoded data. Invalid byte size {}", UInt32{gcd_bytes_size}); } return 2 + gcd_bytes_size + source_size; } -void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { if (source_size < 2) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data. File has wrong header"); if (uncompressed_size == 0) - return; + return 0; UInt8 bytes_size = source[0]; @@ -226,26 +231,23 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si memcpy(dest, &source[2], bytes_to_skip); UInt32 source_size_no_header = source_size - bytes_to_skip - 2; - switch (bytes_size) // NOLINT(bugprone-switch-missing-default-case) + switch (bytes_size) { case 1: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 2: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 4: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 8: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 16: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); case 32: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + default: + /// This should be unreachable due to the check above + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot decompress GCD-encoded data. File has unknown byte size {}", UInt32{bytes_size}); } } diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp index 5693d2205d44..acad0f69f9e9 100644 --- a/src/Compression/CompressionCodecGorilla.cpp +++ b/src/Compression/CompressionCodecGorilla.cpp @@ -116,7 +116,7 @@ class CompressionCodecGorilla : public ICompressionCodec UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; @@ -141,6 +141,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; extern const int ILLEGAL_CODEC_PARAMETER; + extern const int LOGICAL_ERROR; } namespace @@ -270,12 +271,13 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest, } template -void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) +UInt32 decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) { + const char * const original_dest = dest; const char * const source_end = source + source_size; if (source + sizeof(UInt32) > source_end) - return; + return 0; const UInt32 items_count = unalignedLoadLittleEndian(source); source += sizeof(items_count); @@ -284,7 +286,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, // decoding first item if (source + sizeof(T) > source_end || items_count < 1) - return; + return 0; if (static_cast(items_count) * sizeof(T) > dest_size) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data: corrupted input data."); @@ -341,6 +343,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, prev_xored_info = curr_xored_info; prev_value = curr_value; } + + return dest - original_dest; } UInt8 getDataBytesSize(const IDataType * column_type) @@ -398,7 +402,7 @@ UInt32 CompressionCodecGorilla::doCompressData(const char * source, UInt32 sourc UInt32 result_size = 0; const UInt32 compressed_size = getMaxCompressedDataSize(source_size); - switch (data_bytes_size) // NOLINT(bugprone-switch-missing-default-case) + switch (data_bytes_size) { case 1: result_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos], compressed_size); @@ -412,12 +416,14 @@ UInt32 CompressionCodecGorilla::doCompressData(const char * source, UInt32 sourc case 8: result_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos], compressed_size); break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot compress to Gorilla-encoded data. Invalid byte size {}", UInt32{data_bytes_size}); } return 2 + bytes_to_skip + result_size; } -void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecGorilla::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { if (source_size < 2) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header"); @@ -438,20 +444,16 @@ void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 sourc memcpy(dest, &source[2], bytes_to_skip); UInt32 source_size_no_header = source_size - bytes_to_skip - 2; UInt32 uncompressed_size_left = uncompressed_size - bytes_to_skip; - switch (bytes_size) // NOLINT(bugprone-switch-missing-default-case) + switch (bytes_size) { case 1: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left); case 2: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left); case 4: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left); case 8: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left); - break; + return bytes_to_skip + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left); default: throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header"); } diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp index 4cb424b318cc..2b91e1070587 100644 --- a/src/Compression/CompressionCodecLZ4.cpp +++ b/src/Compression/CompressionCodecLZ4.cpp @@ -38,7 +38,7 @@ class CompressionCodecLZ4 : public ICompressionCodec String getDescription() const override { return "Extremely fast; good compression; balanced speed and efficiency."; } private: - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; @@ -97,12 +97,15 @@ UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_si return LZ4_compress_default(source, dest, source_size, LZ4_COMPRESSBOUND(source_size)); } -void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat); if (!success) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress LZ4-encoded data"); + + /// LZ4::decompress only returns true when the dest buffer has been fully written (uncompressed_size) + return uncompressed_size; } void registerCodecLZ4(CompressionCodecFactory & factory) diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp index aae52a2d0512..d44482b64948 100644 --- a/src/Compression/CompressionCodecMultiple.cpp +++ b/src/Compression/CompressionCodecMultiple.cpp @@ -65,7 +65,7 @@ UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 sour return static_cast(1 + codecs.size() + source_size); } -void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const +UInt32 CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const { if (source_size < 1 || !source[0]) throw Exception(decompression_error_code, "Wrong compression methods list"); @@ -113,10 +113,12 @@ void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 sour codec->decompress(compressed_buf.data(), source_size, uncompressed_buf.data()); uncompressed_buf.swap(compressed_buf); + /// The call to decompress will validate uncompressed_size (same readDecompressedBlockSize call as here) source_size = uncompressed_size; } memcpy(dest, compressed_buf.data(), decompressed_size); + return decompressed_size; } std::vector CompressionCodecMultiple::getCodecsBytesFromData(const char * source) diff --git a/src/Compression/CompressionCodecMultiple.h b/src/Compression/CompressionCodecMultiple.h index 2c2573bacf54..3459958d94f2 100644 --- a/src/Compression/CompressionCodecMultiple.h +++ b/src/Compression/CompressionCodecMultiple.h @@ -23,7 +23,7 @@ class CompressionCodecMultiple final : public ICompressionCodec UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const override; bool isCompression() const override; bool isGenericCompression() const override { return false; } diff --git a/src/Compression/CompressionCodecNone.cpp b/src/Compression/CompressionCodecNone.cpp index e52a601700e2..d09ab0b8179e 100644 --- a/src/Compression/CompressionCodecNone.cpp +++ b/src/Compression/CompressionCodecNone.cpp @@ -27,13 +27,14 @@ UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_s return source_size; } -void CompressionCodecNone::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecNone::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { if (source_size != uncompressed_size) throw Exception(decompression_error_code, "Wrong data for compression codec NONE: source_size ({}) != uncompressed_size ({})", source_size, uncompressed_size); memcpy(dest, source, uncompressed_size); + return uncompressed_size; } void registerCodecNone(CompressionCodecFactory & factory) diff --git a/src/Compression/CompressionCodecNone.h b/src/Compression/CompressionCodecNone.h index 89cf12bbe826..00d92586a7be 100644 --- a/src/Compression/CompressionCodecNone.h +++ b/src/Compression/CompressionCodecNone.h @@ -19,7 +19,7 @@ class CompressionCodecNone final : public ICompressionCodec protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; bool isCompression() const override { return false; } bool isGenericCompression() const override { return false; } diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index b724e8510b7d..5d2276e85914 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -43,7 +43,7 @@ class CompressionCodecT64 : public ICompressionCodec protected: UInt32 doCompressData(const char * src, UInt32 src_size, char * dst) const override; - void doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { @@ -549,12 +549,14 @@ UInt32 compressData(const char * src, UInt32 bytes_size, char * dst) } template -void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 uncompressed_size) +UInt32 decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 uncompressed_size) { using MinMaxType = std::conditional_t, Int64, UInt64>; static constexpr const UInt32 matrix_size = 64; static constexpr const UInt32 header_size = 2 * sizeof(UInt64); + + const char * const original_dst = dst; if (bytes_size < header_size) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data, data size ({}) is less than the size of T64 header", @@ -583,7 +585,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco T min_value = static_cast(min); for (UInt32 i = 0; i < num_elements; ++i, dst += sizeof(T)) unalignedStore(dst, min_value); - return; + return dst - original_dst; } UInt32 src_shift = sizeof(UInt64) * num_bits; @@ -634,7 +636,10 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco reverseTranspose(src, buf, num_bits, tail); restoreUpperBits(buf, upper_min, upper_max, sign_bit, tail); store(buf, dst, tail); + dst += tail * sizeof(T); } + + return dst - original_dst; } template @@ -646,12 +651,12 @@ UInt32 compressData(const char * src, UInt32 src_size, char * dst, Variant varia } template -void decompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size, Variant variant) +UInt32 decompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size, Variant variant) { if (variant == Variant::Bit) - decompressData(src, src_size, dst, uncompressed_size); + return decompressData(src, src_size, dst, uncompressed_size); else - decompressData(src, src_size, dst, uncompressed_size); + return decompressData(src, src_size, dst, uncompressed_size); } } @@ -688,7 +693,7 @@ UInt32 CompressionCodecT64::doCompressData(const char * src, UInt32 src_size, ch throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with T64 codec"); } -void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const +UInt32 CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const { if (!src_size) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data"); @@ -703,34 +708,24 @@ void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, ch switch (baseType(saved_type_id)) { case TypeIndex::Int8: - decompressData(src, src_size, dst, uncompressed_size, saved_variant); - return; + return decompressData(src, src_size, dst, uncompressed_size, saved_variant); case TypeIndex::Int16: - decompressData(src, src_size, dst, uncompressed_size, saved_variant); - return; + return decompressData(src, src_size, dst, uncompressed_size, saved_variant); case TypeIndex::Int32: - decompressData(src, src_size, dst, uncompressed_size, saved_variant); - return; + return decompressData(src, src_size, dst, uncompressed_size, saved_variant); case TypeIndex::Int64: - decompressData(src, src_size, dst, uncompressed_size, saved_variant); - return; + return decompressData(src, src_size, dst, uncompressed_size, saved_variant); case TypeIndex::UInt8: - decompressData(src, src_size, dst, uncompressed_size, saved_variant); - return; + return decompressData(src, src_size, dst, uncompressed_size, saved_variant); case TypeIndex::UInt16: - decompressData(src, src_size, dst, uncompressed_size, saved_variant); - return; + return decompressData(src, src_size, dst, uncompressed_size, saved_variant); case TypeIndex::UInt32: - decompressData(src, src_size, dst, uncompressed_size, saved_variant); - return; + return decompressData(src, src_size, dst, uncompressed_size, saved_variant); case TypeIndex::UInt64: - decompressData(src, src_size, dst, uncompressed_size, saved_variant); - return; + return decompressData(src, src_size, dst, uncompressed_size, saved_variant); default: - break; + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data"); } - - throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data"); } uint8_t CompressionCodecT64::getMethodByte() const diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp index 7aecb652efc5..fb3eaaa54b63 100644 --- a/src/Compression/CompressionCodecZSTD.cpp +++ b/src/Compression/CompressionCodecZSTD.cpp @@ -57,12 +57,13 @@ UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_s } -void CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +UInt32 CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { size_t res = ZSTD_decompress(dest, uncompressed_size, source, source_size); if (ZSTD_isError(res)) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress ZSTD-encoded data: {}", std::string(ZSTD_getErrorName(res))); + return res; } CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) diff --git a/src/Compression/CompressionCodecZSTD.h b/src/Compression/CompressionCodecZSTD.h index f363f1af69e1..eeaabca94130 100644 --- a/src/Compression/CompressionCodecZSTD.h +++ b/src/Compression/CompressionCodecZSTD.h @@ -24,7 +24,7 @@ class CompressionCodecZSTD : public ICompressionCodec UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; bool isCompression() const override { return true; } bool isGenericCompression() const override { return true; } diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp index aac0d66dd474..da5f74f320d3 100644 --- a/src/Compression/ICompressionCodec.cpp +++ b/src/Compression/ICompressionCodec.cpp @@ -116,9 +116,16 @@ UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, ch throw Exception(decompression_error_code, "Can't decompress data with codec byte {} using codec with byte {}", method, our_method); UInt32 decompressed_size = readDecompressedBlockSize(source); - doDecompressData(&source[header_size], source_size - header_size, dest, decompressed_size); - - return decompressed_size; + UInt32 final_decompressed_size = doDecompressData(&source[header_size], source_size - header_size, dest, decompressed_size); + if (decompressed_size != final_decompressed_size) + throw Exception( + decompression_error_code, + "Can't decompress data: The size after decompression ({}) is different than the expected size ({}) for codec '{}'", + final_decompressed_size, + decompressed_size, + getCodecDesc()->formatForErrorMessage()); + + return final_decompressed_size; } UInt32 ICompressionCodec::readCompressedBlockSize(const char * source) const diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index 551f6db55191..46e6565dd461 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -141,7 +141,7 @@ class ICompressionCodec : private boost::noncopyable virtual UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const = 0; /// Actually decompress data without header - virtual void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const = 0; + virtual UInt32 doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const = 0; /// Construct and set codec description from codec name and arguments. Must be called in codec constructor. void setCodecDescription(const String & name, const ASTs & arguments = {}); diff --git a/src/Core/BackgroundSchedulePool.cpp b/src/Core/BackgroundSchedulePool.cpp index 08c2f22864f7..eb34e5e69e35 100644 --- a/src/Core/BackgroundSchedulePool.cpp +++ b/src/Core/BackgroundSchedulePool.cpp @@ -266,8 +266,14 @@ void BackgroundSchedulePool::join() shutdown = true; /// Unlock threads - tasks_cond_var.notify_all(); - delayed_tasks_cond_var.notify_all(); + { + std::lock_guard tasks_lock(tasks_mutex); + tasks_cond_var.notify_all(); + } + { + std::lock_guard tasks_lock(delayed_tasks_mutex); + delayed_tasks_cond_var.notify_all(); + } /// Join all worker threads to avoid any recursive calls to schedule()/scheduleAfter() from the task callbacks { diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 623d2f5cd882..9b740b2df509 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -318,20 +318,19 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool c std::optional Block::findSubcolumnByName(const std::string & name) const { - auto [name_in_storage, subcolumn_name] = Nested::splitName(name); - if (subcolumn_name.empty()) - return std::nullopt; - - const auto * column = findByName(name_in_storage, false); - if (!column) - return std::nullopt; + for (auto [column_name, subcolumn_name] : Nested::getAllColumnAndSubcolumnPairs(name)) + { + const auto * column = findByName(column_name, false); + if (!column) + continue; - auto subcolumn_type = column->type->tryGetSubcolumnType(subcolumn_name); - auto subcolumn = column->type->tryGetSubcolumn(subcolumn_name, column->column); - if (!subcolumn_type || !subcolumn) - return std::nullopt; + auto subcolumn_type = column->type->tryGetSubcolumnType(subcolumn_name); + auto subcolumn = column->type->tryGetSubcolumn(subcolumn_name, column->column); + if (subcolumn_type && subcolumn) + return ColumnWithTypeAndName(subcolumn, subcolumn_type, name); + } - return ColumnWithTypeAndName(subcolumn, subcolumn_type, name); + return std::nullopt; } std::optional Block::findColumnOrSubcolumnByName(const std::string & name) const diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index f227384a1f09..493783927b7e 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -1037,6 +1037,12 @@ The policy on how to perform a scheduling of CPU slots specified by `concurrent_ DECLARE(UInt64, memory_worker_period_ms, 0, R"( Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source )", 0) \ + DECLARE(Double, memory_worker_purge_dirty_pages_threshold_ratio, 0.2, R"( + The threshold ratio for jemalloc dirty pages relative to the memory available to ClickHouse server. When dirty pages size exceeds this ratio, the background memory worker forces purging of dirty pages. If set to 0, forced purging based on dirty pages ratio is disabled. + )", 0) \ + DECLARE(Double, memory_worker_purge_total_memory_threshold_ratio, 0.9, R"( + The threshold ratio for purging jemalloc relative to the memory available to ClickHouse server. When total memory usage exceeds this ratio, the background memory worker forces purging of dirty pages. If set to 0, forced purging based on total memory is disabled. + )", 0) \ DECLARE(Bool, memory_worker_correct_memory_tracker, 0, R"( Whether background memory worker should correct internal memory tracker based on the information from external sources like jemalloc and cgroups )", 0) \ diff --git a/src/Core/tests/gtest_BackgroundSchedulePool.cpp b/src/Core/tests/gtest_BackgroundSchedulePool.cpp index e949fb2fafb2..6eb37d79bccd 100644 --- a/src/Core/tests/gtest_BackgroundSchedulePool.cpp +++ b/src/Core/tests/gtest_BackgroundSchedulePool.cpp @@ -54,8 +54,10 @@ TEST(BackgroundSchedulePool, ScheduleAfter) }); ASSERT_EQ(task->activateAndSchedule(), true); - std::unique_lock lock(mutex); - condvar.wait(lock, [&] { return counter == ITERATIONS; }); + { + std::unique_lock lock(mutex); + condvar.wait(lock, [&] { return counter == ITERATIONS; }); + } ASSERT_EQ(counter, ITERATIONS); diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 58929b99daa6..bb671740b71e 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -499,12 +499,6 @@ static DataTypePtr createObject(const ASTPtr & arguments, const DataTypeObject:: if (typed_paths.contains(path_with_type->path)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Found duplicated path with type: {}", path_with_type->path); - for (const auto & [path, _] : typed_paths) - { - if (path.starts_with(path_with_type->path + ".") || path_with_type->path.starts_with(path + ".")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Found incompatible typed paths: {} and {}. One of them is a prefix of the other", path, path_with_type->path); - } - typed_paths.emplace(path_with_type->path, data_type); } else if (object_type_argument->skip_path) diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index d6380d2fdbca..b4a0747d8b7f 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -1,10 +1,10 @@ #include #include -#include -#include -#include #include +#include +#include +#include #include #include @@ -19,6 +19,7 @@ #include #include +#include namespace DB { @@ -27,6 +28,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int SIZES_OF_ARRAYS_DONT_MATCH; + extern const int BAD_ARGUMENTS; } namespace Nested @@ -61,6 +63,41 @@ std::pair splitName(std::string_view name, b return {name.substr(0, idx), name.substr(idx + 1)}; } +std::vector> getAllColumnAndSubcolumnPairs(std::string_view name) +{ + std::vector> pairs; + auto idx = name.find_first_of('.'); + while (idx != std::string::npos) + { + std::string_view column_name = name.substr(0, idx); + std::string_view subcolumn_name = name.substr(idx + 1); + if (!column_name.empty() && !subcolumn_name.empty()) + pairs.emplace_back(column_name, subcolumn_name); + idx = name.find_first_of('.', idx + 1); + } + + return pairs; +} + +std::pair getColumnAndSubcolumnPair(std::string_view name, const NameSet & storage_columns) +{ + for (auto [storage_column_name, subcolumn_name] : Nested::getAllColumnAndSubcolumnPairs(name)) + { + if (storage_columns.contains(String(storage_column_name))) + return {storage_column_name, subcolumn_name}; + } + + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Column or subcolumn '{}' is not found, there are only columns: {}", + name, + boost::join(storage_columns, ", ")); +} + +std::string_view getColumnFromSubcolumn(std::string_view name, const NameSet & storage_columns) +{ + return getColumnAndSubcolumnPair(name, storage_columns).first; +} std::string extractTableName(const std::string & nested_name) { diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index 894af62092ba..03d5ac883fa6 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -22,6 +22,21 @@ namespace Nested std::pair splitName(const std::string & name, bool reverse = false); std::pair splitName(std::string_view name, bool reverse = false); + /// Returns all possible pairs of column + subcolumn for specified name. + /// For example: + /// "a.b.c.d" -> ("a", "b.c.d"), ("a.b", "c.d"), ("a.b.c", "d") + std::vector> getAllColumnAndSubcolumnPairs(std::string_view name); + + /// Given all existing columns, return specific pair of column and subcolumn from specified name. + /// For example: + /// Columns: "a.x", "b", "c". Name: "a.x.y.z". Result: ("a.x", "y.z"). + std::pair getColumnAndSubcolumnPair(std::string_view name, const NameSet & storage_columns); + + /// Given all existing columns, return column name of the subcolumn with specified name. + /// For example: + /// Columns: "a.x", "b", "c". Name: "a.x.y.z". Result: "a.x". + std::string_view getColumnFromSubcolumn(std::string_view name, const NameSet & storage_columns); + /// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot. std::string extractTableName(const std::string & nested_name); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index 037237157bfa..86b90026f76d 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -233,11 +233,19 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( null_map->push_back(0); } } + else if (is_null_map_subcolumn) + { + null_map->push_back(static_cast(1)); + } else { variant_column->insertDefault(); } } + else if (is_null_map_subcolumn) + { + null_map->push_back(static_cast(1)); + } else { variant_column->insertDefault(); diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 276dc87b413b..7071bc2dfee7 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -21,6 +21,8 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_READ_ALL_DATA; + extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } void SerializationNullable::enumerateStreams( @@ -139,6 +141,16 @@ void SerializationNullable::deserializeBinaryBulkWithMultipleStreams( settings.path.back() = Substream::NullableElements; nested->deserializeBinaryBulkWithMultipleStreams(col.getNestedColumnPtr(), rows_offset, limit, settings, state, cache); settings.path.pop_back(); + + auto null_map = col.getNullMapColumnPtr(); + auto nested_column = col.getNestedColumnPtr(); + if (null_map->size() != nested_column->size()) + throw Exception( + settings.native_format ? ErrorCodes::INCORRECT_DATA : ErrorCodes::LOGICAL_ERROR, + "Sizes of nested column and null map of Nullable column are not equal after deserialization (null map size = {}, nested " + "column size = {})", + null_map->size(), + nested_column->size()); } diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index a9030100796c..e164e10d6bd1 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -806,6 +806,7 @@ void SerializationObject::serializeBinaryBulkWithMultipleStreams( return; } + column_object.validateDynamicPathsSizes(); const auto & dynamic_paths = column_object.getDynamicPaths(); const auto & shared_data = column_object.getSharedDataPtr(); @@ -1037,6 +1038,20 @@ void SerializationObject::deserializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); settings.path.pop_back(); + /// Verify that all typed paths, dynamic paths and shared data has consistent sizes + size_t expected_size = shared_data->size(); + for (const auto & [path, path_column] : typed_paths) + { + if (path_column->size() != expected_size) + throw Exception(settings.native_format ? ErrorCodes::INCORRECT_DATA : ErrorCodes::LOGICAL_ERROR, "Unexpected size of typed path {}: {}. Expected size {}", path, path_column->size(), expected_size); + } + + for (const auto & [path, path_column] : dynamic_paths) + { + if (path_column->size() != expected_size) + throw Exception(settings.native_format ? ErrorCodes::INCORRECT_DATA : ErrorCodes::LOGICAL_ERROR, "Unexpected size of dynamic path {}: {}. Expected size {}", path, path_column->size(), expected_size); + } + column_object.repairDuplicatesInDynamicPathsAndSharedData(shared_data_previous_size); } diff --git a/src/DataTypes/Serializations/SerializationObjectHelpers.cpp b/src/DataTypes/Serializations/SerializationObjectHelpers.cpp index 8bee9306e919..71905982a674 100644 --- a/src/DataTypes/Serializations/SerializationObjectHelpers.cpp +++ b/src/DataTypes/Serializations/SerializationObjectHelpers.cpp @@ -226,6 +226,9 @@ void deserializeIndexesAndCollectPathsImpl(ColumnString & paths_column, ReadBuff T index; readBinaryLittleEndian(index, istr); + if (index >= paths.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Object path index is out of range: {} >= {}", static_cast(index), paths.size()); + const String & path = paths[index]; offset += path.size(); offsets.push_back(offset); @@ -282,7 +285,6 @@ void deserializeIndexesAndCollectPaths(IColumn & paths_column, ReadBuffer & istr default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column type of paths indexes: {}", indexes_type->getName()); } - } } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 108c80576e77..da4d5109f88a 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } @@ -801,6 +802,14 @@ void SerializationTuple::deserializeBinaryBulkWithMultipleStreams( for (size_t i = 0; i < elems.size(); ++i) elems[i]->deserializeBinaryBulkWithMultipleStreams(column_tuple.getColumnPtr(i), rows_offset, limit, settings, tuple_state->states[i], cache); + /// Verify that all Tuple elements have the same size. + size_t expected_size = column_tuple.getColumn(0).size(); + for (size_t i = 1; i < elems.size(); ++i) + { + if (column_tuple.getColumn(i).size() != expected_size) + throw Exception(settings.native_format ? ErrorCodes::INCORRECT_DATA : ErrorCodes::LOGICAL_ERROR, "Unexpected size of tuple element {}: {}. Expected size: {}", i, column_tuple.getColumn(i).size(), expected_size); + } + typeid_cast(*mutable_column).addSize(column_tuple.getColumn(0).size()); } diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 71e8efcfd5f4..8bf1a829abe2 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -607,6 +607,10 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( col.getVariantPtrByLocalDiscriminator(i), variant_rows_offsets[i], variant_limits[i], settings, variant_state->variant_states[i], cache); settings.path.pop_back(); + + /// Verify that we deserialized data of this variant. + if (variant_limits[i] && col.getVariantPtrByLocalDiscriminator(i)->empty()) + throw Exception(settings.native_format ? ErrorCodes::INCORRECT_DATA : ErrorCodes::LOGICAL_ERROR, "Variant {} is empty, but expected to be read {} values", variant_names[i], variant_limits[i]); } settings.path.pop_back(); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index c736639608f6..3a0aecd573a8 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1,6 +1,8 @@ +#include #include #include +#include #include #include @@ -104,6 +106,7 @@ namespace ErrorCodes extern const int QUERY_IS_PROHIBITED; extern const int SUPPORT_IS_DISABLED; extern const int ASYNC_LOAD_CANCELED; + extern const int SYNTAX_ERROR; } namespace FailPoints @@ -126,6 +129,21 @@ static inline String getHostID(ContextPtr global_context, const UUID & db_uuid, return Cluster::Address::toString(getFQDNOrHostName(), port) + ':' + toString(db_uuid); } +// Return +static inline std::tuple parseHostID(const String & content) +{ + auto pos = content.find_last_of(':'); + if (pos == std::string::npos || pos + 1 >= content.size()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid host ID '{}'", content); + + auto [address, port] = Cluster::Address::fromString(content.substr(0, pos)); + UUID db_uuid; + if (!tryParse(db_uuid, content.substr(pos + 1))) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid host ID '{}'", content); + + return {address, port, db_uuid}; +} + static inline UInt64 getMetadataHash(const String & table_name, const String & metadata) { SipHash hash; @@ -511,10 +529,39 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL if (replica_host_id != host_id && replica_host_id != host_id_default) { - throw Exception( - ErrorCodes::REPLICA_ALREADY_EXISTS, - "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'", - replica_name, shard_name, zookeeper_path, replica_host_id, host_id); + UUID uuid_in_keeper = UUIDHelpers::Nil; + try + { + uuid_in_keeper = std::get<2>(parseHostID(replica_host_id)); + } + catch (const Exception & e) + { + LOG_WARNING(log, "Failed to parse host_id {} in zookeeper, error {}", replica_host_id, e.what()); + } + + if (uuid_in_keeper != db_uuid) + throw Exception( + ErrorCodes::REPLICA_ALREADY_EXISTS, + "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'", + replica_name, + shard_name, + zookeeper_path, + replica_host_id, + host_id); + + // After restarting, InterserverIOAddress might change (e.g: config updated, `getFQDNOrHostName` returns a different one) + // If the UUID in the keeper is the same as the current server UUID, we will update the host_id in keeper + LOG_INFO( + log, + "Replicated database replica: {}, shard {}, zk_path: {} already exists with the same UUID, replica host ID: '{}', " + "current host ID: '{}', will set the host_id to the current host ID", + replica_name, + shard_name, + zookeeper_path, + replica_host_id, + host_id); + current_zookeeper->set(replica_path, host_id, -1); + createEmptyLogEntry(current_zookeeper); } /// Before 24.6 we always created host_id with insecure port, even if cluster_auth_info.cluster_secure_connection was true. @@ -1216,7 +1263,7 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ } } -BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags) +BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags, DDLGuardPtr && database_guard) { waitDatabaseStarted(); @@ -1259,7 +1306,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex } - return getQueryStatus(node_path, fs::path(zookeeper_path) / "replicas", query_context, hosts_to_wait); + return getQueryStatus(node_path, fs::path(zookeeper_path) / "replicas", query_context, hosts_to_wait, std::move(database_guard)); } static UUID getTableUUIDIfReplicated(const String & metadata, ContextPtr context) @@ -1432,9 +1479,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep for (const auto & table_name : tables_to_detach) { - DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, table_name); - if (getDatabaseName() != db_name) - throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed, will retry"); + DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, table_name, this); auto table = tryGetTable(table_name, getContext()); if (!table) @@ -1449,7 +1494,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep String to_name = fmt::format("{}_{}_{}", broken_table_name, max_log_ptr, thread_local_rng() % 1000); LOG_DEBUG(log, "Will RENAME TABLE {} TO {}.{}", backQuoteIfNeed(broken_table_name), backQuoteIfNeed(to_database_name), backQuoteIfNeed(to_name)); assert(db_name < to_database_name); - DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(to_database_name, to_name); + DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(to_database_name, to_name, nullptr); auto to_db_ptr = DatabaseCatalog::instance().getDatabase(to_database_name); std::lock_guard lock{metadata_mutex}; @@ -1509,8 +1554,8 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep auto rename_table = [&](String from, String to) { LOG_DEBUG(log, "Will RENAME TABLE {} TO {}", backQuoteIfNeed(from), backQuoteIfNeed(to)); - DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::min(from, to)); - DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::max(from, to)); + DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::min(from, to), this); + DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::max(from, to), this); std::lock_guard lock{metadata_mutex}; UInt64 new_digest = tables_metadata_digest; @@ -2349,13 +2394,13 @@ void registerDatabaseReplicated(DatabaseFactory & factory) } BlockIO DatabaseReplicated::getQueryStatus( - const String & node_path, const String & replicas_path, ContextPtr context_, const Strings & hosts_to_wait) + const String & node_path, const String & replicas_path, ContextPtr context_, const Strings & hosts_to_wait, DDLGuardPtr && database_guard) { BlockIO io; if (context_->getSettingsRef()[Setting::distributed_ddl_task_timeout] == 0) return io; - auto source = std::make_shared(node_path, replicas_path, context_, hosts_to_wait); + auto source = std::make_shared(node_path, replicas_path, context_, hosts_to_wait, std::move(database_guard)); io.pipeline = QueryPipeline(std::move(source)); if (context_->getSettingsRef()[Setting::distributed_ddl_output_mode] == DistributedDDLOutputMode::NONE diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 0100723e28c5..4348a30caecb 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -71,7 +71,7 @@ class DatabaseReplicated : public DatabaseAtomic /// Try to execute DLL query on current host as initial query. If query is succeed, /// then it will be executed on all replicas. - BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags) override; + BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags, DDLGuardPtr && database_guard) override; bool canExecuteReplicatedMetadataAlter() const override; @@ -182,7 +182,7 @@ class DatabaseReplicated : public DatabaseAtomic void reinitializeDDLWorker(); static BlockIO - getQueryStatus(const String & node_path, const String & replicas_path, ContextPtr context, const Strings & hosts_to_wait); + getQueryStatus(const String & node_path, const String & replicas_path, ContextPtr context, const Strings & hosts_to_wait, DDLGuardPtr && database_guard); String zookeeper_path; String shard_name; diff --git a/src/Databases/IDatabase.cpp b/src/Databases/IDatabase.cpp index 2ee26f233f9b..5619947ab550 100644 --- a/src/Databases/IDatabase.cpp +++ b/src/Databases/IDatabase.cpp @@ -204,7 +204,7 @@ void IDatabase::stopReplication() throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread", getEngineName()); } -BlockIO IDatabase::tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] QueryFlags flags) /// NOLINT +BlockIO IDatabase::tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] QueryFlags flags, DDLGuardPtr && /*database_guard*/) /// NOLINT { throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not have replicated DDL queue", getEngineName()); } diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 9f4fa18cc71a..86e69842dd15 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -425,7 +426,7 @@ class IDatabase : public std::enable_shared_from_this virtual bool shouldReplicateQuery(const ContextPtr & /*query_context*/, const ASTPtr & /*query_ptr*/) const { return false; } - virtual BlockIO tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] QueryFlags flags); + virtual BlockIO tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] QueryFlags flags, DDLGuardPtr && /*database_guard*/); /// Returns CREATE TABLE queries and corresponding tables prepared for writing to a backup. virtual std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context) const; diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index bd21af241fb2..c49ccf79284a 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -508,11 +508,12 @@ void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory) { auto * engine_define = args.create_query.storage; const ASTFunction * engine = engine_define->engine; - ASTs & engine_args = engine->arguments->children; - const String & engine_name = engine_define->engine->name; if (!engine->arguments) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `MaterializedPostgreSQL` must have arguments"); + + ASTs & engine_args = engine->arguments->children; + const String & engine_name = engine_define->engine->name; StoragePostgreSQL::Configuration configuration; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 424926ec86d9..379fd82df776 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -532,11 +532,12 @@ void registerDatabasePostgreSQL(DatabaseFactory & factory) { auto * engine_define = args.create_query.storage; const ASTFunction * engine = engine_define->engine; - ASTs & engine_args = engine->arguments->children; - const String & engine_name = engine_define->engine->name; if (!engine->arguments) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `PostgreSQL` must have arguments"); + + ASTs & engine_args = engine->arguments->children; + const String & engine_name = engine_define->engine->name; auto use_table_cache = false; StoragePostgreSQL::Configuration configuration; diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index dac01956733c..6c46e0f7366c 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -331,6 +331,12 @@ class DiskEncrypted : public IDisk bool supportsChmod() const override { return delegate->supportsChmod(); } bool isSymlinkSupported() const override { return delegate->isSymlinkSupported(); } + bool isReadOnly() const override { return delegate->isReadOnly(); } + bool isWriteOnce() const override { return delegate->isWriteOnce(); } + bool isPlain() const override { return delegate->isPlain(); } + + ObjectStoragePtr getObjectStorage() override { return delegate->getObjectStorage(); } + SyncGuardPtr getDirectorySyncGuard(const String & path) const override; std::shared_ptr createEncryptedTransaction() const diff --git a/src/Functions/IFunctionCustomWeek.h b/src/Functions/IFunctionCustomWeek.h index fc9b5a31dd29..99941e5c186c 100644 --- a/src/Functions/IFunctionCustomWeek.h +++ b/src/Functions/IFunctionCustomWeek.h @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -29,25 +32,47 @@ class IFunctionCustomWeek : public IFunction bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } + bool canBeExecutedOnDefaultArguments() const override + { + /// String default is empty (not parseable as DateTime), so avoid executing on LC default dictionary key + if constexpr (Transform::value_may_be_string) + return false; + + return true; + } + bool hasInformationAboutMonotonicity() const override { return true; } Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override { + const IDataType * type_ptr = &type; + + if (const auto * lc_type = checkAndGetDataType(type_ptr)) + type_ptr = lc_type->getDictionaryType().get(); + + if (const auto * nullable_type = checkAndGetDataType(type_ptr)) + type_ptr = nullable_type->getNestedType().get(); + + const IFunction::Monotonicity is_not_monotonic; + + /// Parsing of String arguments is not monotonic w.r.t. String ordering + if (checkAndGetDataType(type_ptr)) + return is_not_monotonic; + if constexpr (std::is_same_v) return {.is_monotonic = true, .is_always_monotonic = true}; + if (left.isNull() || right.isNull()) + return is_not_monotonic; + const IFunction::Monotonicity is_monotonic = {.is_monotonic = true}; - const IFunction::Monotonicity is_not_monotonic; /// This method is called only if the function has one argument. Therefore, we do not care about the non-local time zone. const DateLUTImpl & date_lut = DateLUT::instance(); - if (left.isNull() || right.isNull()) - return {}; - /// The function is monotonous on the [left, right] segment, if the factor transformation returns the same values for them. - if (checkAndGetDataType(&type)) + if (checkAndGetDataType(type_ptr)) { return Transform::FactorTransform::execute(UInt16(left.safeGet()), date_lut) == Transform::FactorTransform::execute(UInt16(right.safeGet()), date_lut) @@ -55,9 +80,16 @@ class IFunctionCustomWeek : public IFunction : is_not_monotonic; } - if (checkAndGetDataType(&type)) + if (checkAndGetDataType(type_ptr)) { + return Transform::FactorTransform::execute(Int32(left.safeGet()), date_lut) + == Transform::FactorTransform::execute(Int32(right.safeGet()), date_lut) + ? is_monotonic + : is_not_monotonic; + } + if (checkAndGetDataType(type_ptr)) + { const auto & left_date_time = left.safeGet(); TransformDateTime64 transformer_left(left_date_time.getScale()); @@ -70,10 +102,15 @@ class IFunctionCustomWeek : public IFunction : is_not_monotonic; } - return Transform::FactorTransform::execute(UInt32(left.safeGet()), date_lut) - == Transform::FactorTransform::execute(UInt32(right.safeGet()), date_lut) - ? is_monotonic - : is_not_monotonic; + if (checkAndGetDataType(type_ptr)) + { + return Transform::FactorTransform::execute(UInt32(left.safeGet()), date_lut) + == Transform::FactorTransform::execute(UInt32(right.safeGet()), date_lut) + ? is_monotonic + : is_not_monotonic; + } + + return is_not_monotonic; } protected: diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 7b711aee646e..a3d11b0497ac 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -26,6 +26,9 @@ inline std::string_view checkAndReturnHost(const Pos & pos, const Pos & dot_pos, /// @return empty string view if the host is not valid (i.e. it does not have dot, or there no symbol after dot). inline std::string_view getURLHostRFC(const char * data, size_t size) { + if (size < 2) + return std::string_view{}; + Pos pos = data; Pos end = data + size; diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp index 03697ff7c89e..dfc767027453 100644 --- a/src/Functions/randomStringUTF8.cpp +++ b/src/Functions/randomStringUTF8.cpp @@ -68,9 +68,14 @@ class FunctionRandomStringUTF8 : public IFunction const IColumn & col_length = *arguments[0].column; size_t total_codepoints = 0; + const size_t max_total_codepoints = 1ULL << 29; for (size_t row_num = 0; row_num < input_rows_count; ++row_num) { size_t codepoints = col_length.getUInt(row_num); + + if (codepoints > max_total_codepoints - total_codepoints) + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size in function {}", getName()); + total_codepoints += codepoints; } @@ -78,9 +83,6 @@ class FunctionRandomStringUTF8 : public IFunction * per generated code point ~= 3.85. So, reserving for coefficient 4 will not be an overhead */ - if (total_codepoints > (1 << 29)) - throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size in function {}", getName()); - size_t max_byte_size = total_codepoints * 4 + input_rows_count; data_to.resize(max_byte_size); @@ -117,7 +119,7 @@ class FunctionRandomStringUTF8 : public IFunction size_t codepoints = col_length.getUInt(row_num); auto * pos = data_to.data() + offset; - for (size_t i = 0; i < codepoints; i +=2) + for (size_t i = 0; i < codepoints; i += 2) { UInt64 rand = rng(); /// that's the bottleneck diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index ac72c3ded02e..a9ca9e6f05f0 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -385,7 +385,7 @@ int ZipArchiveWriter::compressionMethodToInt(const String & compression_method_) String ZipArchiveWriter::intToCompressionMethod(int compression_method_) { - switch (compression_method_) // NOLINT(bugprone-switch-missing-default-case) + switch (compression_method_) { case MZ_COMPRESS_METHOD_STORE: return kStore; case MZ_COMPRESS_METHOD_DEFLATE: return kDeflate; @@ -393,14 +393,15 @@ String ZipArchiveWriter::intToCompressionMethod(int compression_method_) case MZ_COMPRESS_METHOD_LZMA: return kLzma; case MZ_COMPRESS_METHOD_ZSTD: return kZstd; case MZ_COMPRESS_METHOD_XZ: return kXz; + default: + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_); } - throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_); } /// Checks that a passed compression method can be used. void ZipArchiveWriter::checkCompressionMethodIsEnabled(int compression_method_) { - switch (compression_method_) // NOLINT(bugprone-switch-missing-default-case) + switch (compression_method_) { case MZ_COMPRESS_METHOD_STORE: [[fallthrough]]; case MZ_COMPRESS_METHOD_DEFLATE: @@ -417,8 +418,10 @@ void ZipArchiveWriter::checkCompressionMethodIsEnabled(int compression_method_) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "bzip2 compression method is disabled"); #endif } + + default: + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_); } - throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_); } /// Checks that encryption is enabled. diff --git a/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.cpp b/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.cpp index 2dccea140120..876dbb1b1404 100644 --- a/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.cpp +++ b/src/IO/Protobuf/ProtobufZeroCopyOutputStreamFromWriteBuffer.cpp @@ -27,6 +27,7 @@ ProtobufZeroCopyOutputStreamFromWriteBuffer::ProtobufZeroCopyOutputStreamFromWri bool ProtobufZeroCopyOutputStreamFromWriteBuffer::Next(void ** data, int * size) { + out->nextIfAtEnd(); *data = out->position(); *size = static_cast(out->available()); out->position() += *size; diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 93562e7bfed6..0e7f3a496a59 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include @@ -56,7 +57,7 @@ ssize_t ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char * ptr, size_t if (async_callback) { socket.setBlocking(false); - SCOPE_EXIT(socket.setBlocking(true)); + SCOPE_EXIT_SAFE(socket.setBlocking(true)); bool secure = socket.secure(); bytes_read = socket.impl()->receiveBytes(ptr, static_cast(size)); diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index d1554262f7e9..eb1ed86eee29 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -417,12 +417,12 @@ std::unique_ptr ReadWriteBufferFromHTTP::initialize() /// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0. if (getOffset() != 0) { - /// Retry 200OK + /// Retry 200 OK if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK) { String explanation = fmt::format( "Cannot read with range: [{}, {}] (response status: {}, reason: {}), will retry", - *read_range.begin, read_range.end ? toString(*read_range.end) : "-", + getOffset(), read_range.end ? toString(*read_range.end) : "-", toString(response.getStatus()), response.getReason()); /// it is retriable error @@ -436,7 +436,7 @@ std::unique_ptr ReadWriteBufferFromHTTP::initialize() throw Exception( ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, "Cannot read with range: [{}, {}] (response status: {}, reason: {})", - *read_range.begin, + getOffset(), read_range.end ? toString(*read_range.end) : "-", toString(response.getStatus()), response.getReason()); diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 9f4746fde70c..c7eb5724e374 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -1,6 +1,7 @@ #include #include +#include #include @@ -47,7 +48,7 @@ ssize_t WriteBufferFromPocoSocket::socketSendBytesImpl(const char * ptr, size_t { socket.setBlocking(false); /// Set socket to blocking mode at the end. - SCOPE_EXIT(socket.setBlocking(true)); + SCOPE_EXIT_SAFE(socket.setBlocking(true)); bool secure = socket.secure(); res = socket.impl()->sendBytes(ptr, static_cast(size)); diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 7a7a4d476d8a..300a43f3d779 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1006,12 +1006,17 @@ ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult( bool has_all_arguments = true; for (size_t i = 0; i < arguments.size(); ++i) { - arguments[i] = node_to_column[node->children[i]]; + const auto * child = node->children[i]; + if (auto it = node_to_column.find(child); it != node_to_column.end()) + arguments[i] = it->second; + else + arguments[i] = ColumnWithTypeAndName{nullptr, child->result_type, child->result_name}; + if (!arguments[i].column) has_all_arguments = false; + if (!has_all_arguments && throw_on_error) - throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Not found column {}", node->children[i]->result_name); + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Not found column {}", child->result_name); } if (node->type == ActionsDAG::ActionType::INPUT && throw_on_error) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index a723cdb81084..796bb4aa9103 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -46,8 +46,10 @@ namespace CurrentMetrics { extern const Metric FilesystemCacheDownloadQueueElements; extern const Metric FilesystemCacheReserveThreads; + extern const Metric FilesystemCacheSizeLimit; } + namespace DB { @@ -164,6 +166,8 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s if (settings[FileCacheSetting::enable_filesystem_query_cache_limit]) query_limit = std::make_unique(); + + CurrentMetrics::add(CurrentMetrics::FilesystemCacheSizeLimit, settings[FileCacheSetting::max_size]); } const FileCache::UserInfo & FileCache::getCommonUser() diff --git a/src/Interpreters/Cache/IFileCachePriority.cpp b/src/Interpreters/Cache/IFileCachePriority.cpp index 280af0c942a2..97b864641568 100644 --- a/src/Interpreters/Cache/IFileCachePriority.cpp +++ b/src/Interpreters/Cache/IFileCachePriority.cpp @@ -2,10 +2,6 @@ #include #include -namespace CurrentMetrics -{ - extern const Metric FilesystemCacheSizeLimit; -} namespace DB { @@ -18,7 +14,6 @@ namespace ErrorCodes IFileCachePriority::IFileCachePriority(size_t max_size_, size_t max_elements_) : max_size(max_size_), max_elements(max_elements_) { - CurrentMetrics::add(CurrentMetrics::FilesystemCacheSizeLimit, max_size_); } IFileCachePriority::Entry::Entry( diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 902c6a551460..455ffe68508c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -972,7 +972,13 @@ struct ContextSharedPart : boost::noncopyable /// Stop trace collector if any trace_collector.reset(); + } + + { /// Stop zookeeper connection + std::lock_guard lock(zookeeper_mutex); + if (zookeeper) + zookeeper->finalize("shutdown"); zookeeper.reset(); } diff --git a/src/Interpreters/DDLGuard.h b/src/Interpreters/DDLGuard.h new file mode 100644 index 000000000000..e42a908ecc1e --- /dev/null +++ b/src/Interpreters/DDLGuard.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Allows executing DDL query only in one thread. +/// Puts an element into the map, locks tables's mutex, counts how much threads run parallel query on the table, +/// when counter is 0 erases element in the destructor. +/// If the element already exists in the map, waits when ddl query will be finished in other thread. +class DDLGuard +{ +public: + struct Entry + { + std::unique_ptr mutex; + UInt32 counter; + }; + + /// Element name -> (mutex, counter). + /// NOTE: using std::map here (and not std::unordered_map) to avoid iterator invalidation on insertion. + using Map = std::map; + + DDLGuard( + Map & map_, + SharedMutex & db_mutex_, + std::unique_lock guards_lock_, + const String & elem, + const String & database_name); + ~DDLGuard(); + + /// Unlocks table name, keeps holding read lock for database name + void releaseTableLock() noexcept; + +private: + Map & map; + SharedMutex & db_mutex; + Map::iterator it; + std::unique_lock guards_lock; + std::unique_lock table_lock; + bool table_lock_removed = false; + bool is_database_guard = false; +}; + +using DDLGuardPtr = std::unique_ptr; + +} diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 727a05a22f4a..be910cf3de23 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1017,15 +1017,24 @@ std::vector DatabaseCatalog::getDependentViews(const StorageID & sour return view_dependencies.getDependencies(source_table_id); } -DDLGuardPtr DatabaseCatalog::getDDLGuard(const String & database, const String & table) +DDLGuardPtr DatabaseCatalog::getDDLGuard(const String & database, const String & table, const IDatabase * expected_database) { if (database.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot obtain lock for empty database"); - std::unique_lock lock(ddl_guards_mutex); - /// TSA does not support unique_lock - auto db_guard_iter = TSA_SUPPRESS_WARNING_FOR_WRITE(ddl_guards).try_emplace(database).first; - DatabaseGuard & db_guard = db_guard_iter->second; - return std::make_unique(db_guard.table_guards, db_guard.database_ddl_mutex, std::move(lock), table, database); + + DDLGuardPtr guard; + { + std::unique_lock lock(ddl_guards_mutex); + /// TSA does not support unique_lock + auto db_guard_iter = TSA_SUPPRESS_WARNING_FOR_WRITE(ddl_guards).try_emplace(database).first; + DatabaseGuard & db_guard = db_guard_iter->second; + guard = std::make_unique(db_guard.table_guards, db_guard.database_ddl_mutex, std::move(lock), table, database); + } + + if (expected_database && expected_database != tryGetDatabase(database).get()) + throw Exception(ErrorCodes::UNFINISHED, "The database {} was dropped or renamed concurrently", database); + + return guard; } DatabaseCatalog::DatabaseGuard & DatabaseCatalog::getDatabaseGuard(const String & database) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index c95aaf4654d9..ee426b888688 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -37,45 +38,6 @@ using Databases = std::map>; using DiskPtr = std::shared_ptr; using TableNamesSet = std::unordered_set; -/// Allows executing DDL query only in one thread. -/// Puts an element into the map, locks tables's mutex, counts how much threads run parallel query on the table, -/// when counter is 0 erases element in the destructor. -/// If the element already exists in the map, waits when ddl query will be finished in other thread. -class DDLGuard -{ -public: - struct Entry - { - std::unique_ptr mutex; - UInt32 counter; - }; - - /// Element name -> (mutex, counter). - /// NOTE: using std::map here (and not std::unordered_map) to avoid iterator invalidation on insertion. - using Map = std::map; - - DDLGuard( - Map & map_, - SharedMutex & db_mutex_, - std::unique_lock guards_lock_, - const String & elem, - const String & database_name); - ~DDLGuard(); - - /// Unlocks table name, keeps holding read lock for database name - void releaseTableLock() noexcept; - -private: - Map & map; - SharedMutex & db_mutex; - Map::iterator it; - std::unique_lock guards_lock; - std::unique_lock table_lock; - bool table_lock_removed = false; - bool is_database_guard = false; -}; - -using DDLGuardPtr = std::unique_ptr; class FutureSetFromSubquery; using FutureSetFromSubqueryPtr = std::shared_ptr; @@ -150,7 +112,7 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext void loadMarkedAsDroppedTables(); /// Get an object that protects the table from concurrently executing multiple DDL operations. - DDLGuardPtr getDDLGuard(const String & database, const String & table); + DDLGuardPtr getDDLGuard(const String & database, const String & table, const IDatabase * expected_database); /// Get an object that protects the database from concurrent DDL queries all tables in the database std::unique_lock getExclusiveDDLGuardForDatabase(const String & database); diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index c2671c8e9d2b..a5bb8677ad26 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -774,7 +774,7 @@ bool HashJoin::addBlockToJoin(const Block & block, ScatteredBlock::Selector sele all_values_unique); if (flag_per_row) - used_flags->reinit, MapsAll>>(&stored_columns->columns); + used_flags->reinit, MapsAll>>(&stored_columns->columns, stored_columns->selector); }); } diff --git a/src/Interpreters/HashJoin/JoinUsedFlags.h b/src/Interpreters/HashJoin/JoinUsedFlags.h index 1ad3a6616e1f..a0e695609cac 100644 --- a/src/Interpreters/HashJoin/JoinUsedFlags.h +++ b/src/Interpreters/HashJoin/JoinUsedFlags.h @@ -45,13 +45,21 @@ class JoinUsedFlags } template - void reinit(const Columns * columns) + void reinit(const Columns * columns, const ScatteredBlock::Selector & selector) { if constexpr (MapGetter::flagged) { assert(per_row_flags[columns].size() <= columns->at(0)->size()); need_flags = true; per_row_flags[columns] = std::vector(columns->at(0)->size()); + + /// Mark all rows outside of selector as used. + /// We should not emit them in RIGHT/FULL JOIN result, + /// since they belongs to another shard, which will handle flags for these rows + for (auto & flag : per_row_flags[columns]) + flag.store(true); + for (size_t index : selector) + per_row_flags[columns][index].store(false); } } diff --git a/src/Interpreters/InsertDependenciesBuilder.cpp b/src/Interpreters/InsertDependenciesBuilder.cpp index c6de824fd252..d37382333256 100644 --- a/src/Interpreters/InsertDependenciesBuilder.cpp +++ b/src/Interpreters/InsertDependenciesBuilder.cpp @@ -1437,7 +1437,7 @@ void InsertDependenciesBuilder::logQueryView(StorageID view_id, std::exception_p const auto & view_type = view_types.at(view_id); const auto & inner_table_id = inner_tables.at(view_id); - UInt64 elapsed_ms = thread_group->getThreadsTotalElapsedMs(); + UInt64 elapsed_ms = thread_group->getGroupElapsedMs(); UInt64 min_query_duration = settings[Setting::log_queries_min_query_duration_ms].totalMilliseconds(); if (min_query_duration && elapsed_ms <= min_query_duration) diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 2685d27d28df..b639f0c34208 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -131,9 +131,9 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (database->shouldReplicateQuery(getContext(), query_ptr)) { - auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); + auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name, database.get()); guard->releaseTableLock(); - return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), {}); + return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), {}, std::move(guard)); } if (!table) diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index 6ab6ded5f0cc..cce6a31b3916 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -75,9 +75,9 @@ BlockIO InterpreterCreateIndexQuery::execute() DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (database->shouldReplicateQuery(getContext(), query_ptr)) { - auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); + auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name, database.get()); guard->releaseTableLock(); - return database->tryEnqueueReplicatedDDL(query_ptr, current_context, {}); + return database->tryEnqueueReplicatedDDL(query_ptr, current_context, {}, std::move(guard)); } StoragePtr table = DatabaseCatalog::instance().getTable(table_id, current_context); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 0b5dd67d2cb6..273b76768fcb 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -193,7 +193,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { String database_name = create.getDatabase(); - auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, ""); + auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, "", nullptr); /// Database can be created before or it can be created concurrently in another thread, while we were waiting in DDLGuard if (DatabaseCatalog::instance().isDatabaseExist(database_name)) @@ -1529,10 +1529,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) auto database = DatabaseCatalog::instance().tryGetDatabase(database_name); if (database && database->shouldReplicateQuery(getContext(), query_ptr)) { - auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, create.getTable()); + auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, create.getTable(), database.get()); create.setDatabase(database_name); guard->releaseTableLock(); - return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), QueryFlags{ .internal = internal, .distributed_backup_restore = is_restore_from_backup }); + return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), QueryFlags{ .internal = internal, .distributed_backup_restore = is_restore_from_backup }, std::move(guard)); } if (!create.cluster.empty()) @@ -1544,7 +1544,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) /// For short syntax of ATTACH query we have to lock table name here, before reading metadata /// and hold it until table is attached if (likely(need_ddl_guard)) - ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, create.getTable()); + ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, create.getTable(), database.get()); bool if_not_exists = create.if_not_exists; @@ -1722,10 +1722,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (database && database->shouldReplicateQuery(getContext(), query_ptr)) { chassert(!ddl_guard); - auto guard = DatabaseCatalog::instance().getDDLGuard(create.getDatabase(), create.getTable()); + auto guard = DatabaseCatalog::instance().getDDLGuard(create.getDatabase(), create.getTable(), database.get()); assertOrSetUUID(create, database); guard->releaseTableLock(); - return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), QueryFlags{ .internal = internal, .distributed_backup_restore = is_restore_from_backup }); + return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), QueryFlags{ .internal = internal, .distributed_backup_restore = is_restore_from_backup }, std::move(guard)); } if (!create.cluster.empty()) @@ -1786,7 +1786,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } if (!ddl_guard && likely(need_ddl_guard)) - ddl_guard = DatabaseCatalog::instance().getDDLGuard(create.getDatabase(), create.getTable()); + ddl_guard = DatabaseCatalog::instance().getDDLGuard(create.getDatabase(), create.getTable(), nullptr); String data_path; DatabasePtr database; diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 0b36f6db458b..53cacbb28fca 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -80,9 +80,9 @@ BlockIO InterpreterDeleteQuery::execute() DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (database->shouldReplicateQuery(getContext(), query_ptr)) { - auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); + auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name, database.get()); guard->releaseTableLock(); - return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), {}); + return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), {}, std::move(guard)); } auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), settings[Setting::lock_acquire_timeout]); diff --git a/src/Interpreters/InterpreterDropIndexQuery.cpp b/src/Interpreters/InterpreterDropIndexQuery.cpp index 9e04f6bdfc06..4584fd8ad968 100644 --- a/src/Interpreters/InterpreterDropIndexQuery.cpp +++ b/src/Interpreters/InterpreterDropIndexQuery.cpp @@ -45,9 +45,9 @@ BlockIO InterpreterDropIndexQuery::execute() DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (database->shouldReplicateQuery(getContext(), query_ptr)) { - auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); + auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name, database.get()); guard->releaseTableLock(); - return database->tryEnqueueReplicatedDDL(query_ptr, current_context, {}); + return database->tryEnqueueReplicatedDDL(query_ptr, current_context, {}, std::move(guard)); } StoragePtr table = DatabaseCatalog::instance().getTable(table_id, current_context); diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 4e2dd6cb83a6..377cf03f0613 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -143,7 +143,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ContextPtr & context_, AS throw Exception(ErrorCodes::UNKNOWN_TABLE, "Temporary table {} doesn't exist", backQuoteIfNeed(table_id.table_name)); } - auto ddl_guard = (!query.no_ddl_lock ? DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name) : nullptr); + auto ddl_guard = (!query.no_ddl_lock ? DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name, nullptr) : nullptr); /// If table was already dropped by anyone, an exception will be thrown auto [database, table] = query.if_exists ? DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, context_) @@ -227,7 +227,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ContextPtr & context_, AS query_to_send.if_empty = false; - return database->tryEnqueueReplicatedDDL(new_query_ptr, context_, {}); + return database->tryEnqueueReplicatedDDL(new_query_ptr, context_, {}, std::move(ddl_guard)); } if (query.kind == ASTDropQuery::Kind::Detach) @@ -412,7 +412,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, return {}; const auto & database_name = query.getDatabase(); - auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, ""); + auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, "", nullptr); database = tryGetDatabase(database_name, query.if_exists); if (!database) @@ -695,7 +695,7 @@ AccessRightsElements InterpreterDropQuery::getRequiredAccessForDDLOnCluster() co void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, const StorageID & target_table_id, bool sync, bool ignore_sync_setting, bool need_ddl_guard) { - auto ddl_guard = (need_ddl_guard ? DatabaseCatalog::instance().getDDLGuard(target_table_id.database_name, target_table_id.table_name) : nullptr); + auto ddl_guard = (need_ddl_guard ? DatabaseCatalog::instance().getDDLGuard(target_table_id.database_name, target_table_id.table_name, nullptr) : nullptr); if (DatabaseCatalog::instance().tryGetTable(target_table_id, current_context)) { /// We create and execute `drop` query for internal table. diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 097de8a0f041..05db47057ebf 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -74,7 +74,7 @@ BlockIO InterpreterRenameQuery::execute() /// Must do it in consistent order. for (auto & table_guard : table_guards) - table_guard.second = database_catalog.getDDLGuard(table_guard.first.database_name, table_guard.first.table_name); + table_guard.second = database_catalog.getDDLGuard(table_guard.first.database_name, table_guard.first.table_name, nullptr); if (rename.database) return executeToDatabase(rename, descriptions); @@ -126,7 +126,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c UniqueTableName to(elem.to_database_name, elem.to_table_name); ddl_guards[from]->releaseTableLock(); ddl_guards[to]->releaseTableLock(); - return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), {}); + return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), {}, std::move(ddl_guards[from])); } StorageID from_table_id{elem.from_database_name, elem.from_table_name}; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index fe8cb8286989..6327f3d65298 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -1011,7 +1011,7 @@ void InterpreterSystemQuery::restoreDatabaseReplica(ASTSystemQuery & query) StoragePtr InterpreterSystemQuery::doRestartReplica(const StorageID & replica, ContextMutablePtr system_context, bool throw_on_error) { LOG_TRACE(log, "Restarting replica {}", replica); - auto table_ddl_guard = DatabaseCatalog::instance().getDDLGuard(replica.getDatabaseName(), replica.getTableName()); + auto table_ddl_guard = DatabaseCatalog::instance().getDDLGuard(replica.getDatabaseName(), replica.getTableName(), nullptr); auto restart_replica_lock = DatabaseCatalog::instance().tryGetLockForRestartReplica(replica.getDatabaseName()); if (!restart_replica_lock) @@ -1467,7 +1467,7 @@ void InterpreterSystemQuery::loadOrUnloadPrimaryKeysImpl(bool load) void InterpreterSystemQuery::syncReplicatedDatabase(ASTSystemQuery & query) { const auto database_name = query.getDatabase(); - auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, ""); + auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, "", nullptr); auto database = DatabaseCatalog::instance().getDatabase(database_name); if (auto * ptr = typeid_cast(database.get())) diff --git a/src/Interpreters/InterpreterUndropQuery.cpp b/src/Interpreters/InterpreterUndropQuery.cpp index 1a1a7558bc83..0e6f3d68fde8 100644 --- a/src/Interpreters/InterpreterUndropQuery.cpp +++ b/src/Interpreters/InterpreterUndropQuery.cpp @@ -52,7 +52,7 @@ BlockIO InterpreterUndropQuery::executeToTable(ASTUndropQuery & query) query.setDatabase(table_id.database_name); } - auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); + auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name, nullptr); auto database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (database->getEngineName() == "Replicated") diff --git a/src/Interpreters/InterpreterUpdateQuery.cpp b/src/Interpreters/InterpreterUpdateQuery.cpp index e0c68e75af93..23572be20757 100644 --- a/src/Interpreters/InterpreterUpdateQuery.cpp +++ b/src/Interpreters/InterpreterUpdateQuery.cpp @@ -105,9 +105,9 @@ BlockIO InterpreterUpdateQuery::execute() DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (database->shouldReplicateQuery(getContext(), query_ptr)) { - auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); + auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name, database.get()); guard->releaseTableLock(); - return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), {}); + return database->tryEnqueueReplicatedDDL(query_ptr, getContext(), {}, std::move(guard)); } MutationCommands commands; diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 84a9dea15331..7af15e834671 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -234,7 +234,7 @@ void removeColumnNullability(ColumnWithTypeAndName & column) if (column.column && column.column->isNullable()) { - column.column = column.column->convertToFullColumnIfConst(); + column.column = column.column->convertToFullIfNeeded(); const auto * nullable_col = checkAndGetColumn(column.column.get()); if (!nullable_col) { diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 825e993f2e3a..3a37e9e8fa14 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -557,8 +557,8 @@ static void validateUpdateColumns( /// Check if we have a subcolumn of this column as a key column. for (const auto & key_column : key_columns) { - auto [key_column_name, key_subcolumn_name] = Nested::splitName(key_column); - if (key_column_name == column_name && ordinary_storage_column->type->hasSubcolumn(key_subcolumn_name)) + auto column = storage_columns.getColumnOrSubcolumn(GetColumnsOptions::All, key_column); + if (column.isSubcolumn() && column_name == column.getNameInStorage()) throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE column {} because its subcolumn {} is a key column", backQuote(column_name), backQuote(key_column)); } } diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 51d05824927c..19e3216066fc 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -48,8 +48,12 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) visitChildren(describe_query->table_expression); else if (auto * create_user_query = dynamic_cast(ast.get())) { - ASTPtr names = create_user_query->names; - visitChildren(names); + if (create_user_query->names) + { + ASTPtr names = create_user_query->names; + visitChildren(names); + } + visitChildren(ast); } else visitChildren(ast); diff --git a/src/Interpreters/ReplicatedDatabaseQueryStatusSource.cpp b/src/Interpreters/ReplicatedDatabaseQueryStatusSource.cpp index f28e6ea2f873..b3bd2624c2f0 100644 --- a/src/Interpreters/ReplicatedDatabaseQueryStatusSource.cpp +++ b/src/Interpreters/ReplicatedDatabaseQueryStatusSource.cpp @@ -21,9 +21,10 @@ extern const int LOGICAL_ERROR; } ReplicatedDatabaseQueryStatusSource::ReplicatedDatabaseQueryStatusSource( - const String & zk_node_path, const String & zk_replicas_path, ContextPtr context_, const Strings & hosts_to_wait) + const String & zk_node_path, const String & zk_replicas_path, ContextPtr context_, const Strings & hosts_to_wait, DDLGuardPtr && database_guard_) : DistributedQueryStatusSource( zk_node_path, zk_replicas_path, std::make_shared(getSampleBlock()), context_, hosts_to_wait, "ReplicatedDatabaseQueryStatusSource") + , database_guard(std::move(database_guard_)) { } diff --git a/src/Interpreters/ReplicatedDatabaseQueryStatusSource.h b/src/Interpreters/ReplicatedDatabaseQueryStatusSource.h index 226706d3d431..1c536115fe67 100644 --- a/src/Interpreters/ReplicatedDatabaseQueryStatusSource.h +++ b/src/Interpreters/ReplicatedDatabaseQueryStatusSource.h @@ -11,7 +11,7 @@ class ReplicatedDatabaseQueryStatusSource final : public DistributedQueryStatusS { public: ReplicatedDatabaseQueryStatusSource( - const String & zk_node_path, const String & zk_replicas_path, ContextPtr context_, const Strings & hosts_to_wait); + const String & zk_node_path, const String & zk_replicas_path, ContextPtr context_, const Strings & hosts_to_wait, DDLGuardPtr && database_guard_); String getName() const override { return "ReplicatedDatabaseQueryStatus"; } @@ -26,6 +26,8 @@ class ReplicatedDatabaseQueryStatusSource final : public DistributedQueryStatusS private: static Block getSampleBlock(); + /// A kind of read lock for the database which prevents dropping the database (and its metadata from zk that we use for getting the query status) + DDLGuardPtr database_guard; }; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 34660ff31135..d47d06d5c19b 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -115,10 +115,10 @@ size_t ThreadGroup::getPeakThreadsUsage() const return peak_threads_usage; } -UInt64 ThreadGroup::getThreadsTotalElapsedMs() const +UInt64 ThreadGroup::getGroupElapsedMs() const { std::lock_guard lock(mutex); - return elapsed_total_threads_counter_ms; + return elapsed_group_ms; } void ThreadGroup::linkThread(UInt64 thread_id) @@ -126,16 +126,21 @@ void ThreadGroup::linkThread(UInt64 thread_id) std::lock_guard lock(mutex); thread_ids.insert(thread_id); + if (active_thread_count == 0) + effective_group_stopwatch.restart(); + ++active_thread_count; peak_threads_usage = std::max(peak_threads_usage, active_thread_count); } -void ThreadGroup::unlinkThread(UInt64 elapsed_thread_counter_ms) +void ThreadGroup::unlinkThread() { std::lock_guard lock(mutex); chassert(active_thread_count > 0); --active_thread_count; - elapsed_total_threads_counter_ms += elapsed_thread_counter_ms; + + if (active_thread_count == 0) + elapsed_group_ms += effective_group_stopwatch.elapsedMilliseconds(); } ThreadGroupPtr ThreadGroup::createForQuery(ContextPtr query_context_, std::function fatal_error_callback_) @@ -386,7 +391,7 @@ void ThreadStatus::detachFromGroup() /// Extract MemoryTracker out from query and user context memory_tracker.setParent(&total_memory_tracker); - thread_group->unlinkThread(thread_attach_time.elapsedMilliseconds()); + thread_group->unlinkThread(); thread_group.reset(); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 8ba17a0e937a..05768ab19284 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1229,19 +1229,25 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select { for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) { - auto [column_name, subcolumn_name] = Nested::splitName(*it); - - if (column_name == pair.name) + bool found = false; + for (auto [column_name, subcolumn_name] : Nested::getAllColumnAndSubcolumnPairs(*it)) { - if (auto subcolumn_type = pair.type->tryGetSubcolumnType(subcolumn_name)) + if (column_name == pair.name) { - source_columns.emplace_back(*it, subcolumn_type); - it = unknown_required_source_columns.erase(it); - continue; + if (auto subcolumn_type = pair.type->tryGetSubcolumnType(subcolumn_name)) + { + source_columns.emplace_back(*it, subcolumn_type); + it = unknown_required_source_columns.erase(it); + found = true; + break; + } } } - ++it; + if (found) + continue; + else + ++it; } } } diff --git a/src/Interpreters/createSubcolumnsExtractionActions.cpp b/src/Interpreters/createSubcolumnsExtractionActions.cpp index e054fe12e348..ad0508818938 100644 --- a/src/Interpreters/createSubcolumnsExtractionActions.cpp +++ b/src/Interpreters/createSubcolumnsExtractionActions.cpp @@ -18,19 +18,24 @@ ActionsDAG createSubcolumnsExtractionActions(const Block & available_columns, co std::unordered_map input_nodes; for (const auto & required_column : required_columns) { - auto subcolumn = available_columns.findSubcolumnByName(required_column); - if (!available_columns.has(required_column) && subcolumn) + if (available_columns.has(required_column)) + continue; + + for (auto [column_name, subcolumn_name] : Nested::getAllColumnAndSubcolumnPairs(required_column)) { - auto [column_name, subcolumn_name] = Nested::splitName(required_column); + const auto * column = available_columns.findByName(column_name); + if (!column || !column->type->tryGetSubcolumnType(subcolumn_name)) + continue; + const ActionsDAG::Node * column_input_node; /// Check if we don't have input with this column yet. - if (auto it = input_nodes.find(column_name); it == input_nodes.end()) + if (auto it = input_nodes.find(column->name); it == input_nodes.end()) { - const auto * node = &extract_subcolumns_dag.addInput(available_columns.getByName(column_name)); + const auto * node = &extract_subcolumns_dag.addInput(available_columns.getByName(column->name)); extract_subcolumns_dag.getOutputs().push_back(node); - input_nodes[column_name] = node; + input_nodes[column->name] = node; } - column_input_node = input_nodes[column_name]; + column_input_node = input_nodes[column->name]; /// Create the second argument of getSubcolumn function with string /// containing subcolumn name and add it to the ActionsDAG. @@ -48,6 +53,8 @@ ActionsDAG createSubcolumnsExtractionActions(const Block & available_columns, co /// Create an alias for getSubcolumn function so it has the name of the subcolumn. const auto & alias_node = extract_subcolumns_dag.addAlias(function_node, required_column); extract_subcolumns_dag.getOutputs().push_back(&alias_node); + + break; } } diff --git a/src/Interpreters/tests/gtest_mark_ranges_memory_tracking.cpp b/src/Interpreters/tests/gtest_mark_ranges_memory_tracking.cpp new file mode 100644 index 000000000000..1e1d884140b3 --- /dev/null +++ b/src/Interpreters/tests/gtest_mark_ranges_memory_tracking.cpp @@ -0,0 +1,45 @@ +#include + +#include +#include +#include +#include +#include + +#include + +using namespace DB; + +TEST(MarkRanges, MemoryTracking) +{ + MainThreadStatus::getInstance(); + total_memory_tracker.resetCounters(); + CurrentThread::get().memory_tracker.resetCounters(); + + total_memory_tracker.setHardLimit(1_KiB); + CurrentThread::get().memory_tracker.setHardLimit(1_KiB); + + SCOPE_EXIT_SAFE(total_memory_tracker.setHardLimit(0)); + SCOPE_EXIT_SAFE(CurrentThread::get().memory_tracker.setHardLimit(0)); + + constexpr size_t num_ranges = 1'000'000; + std::uniform_int_distribution dist(0, 1'000'000); + + MarkRanges ranges; + + try + { + for (size_t i = 0; i < num_ranges; ++i) + { + size_t begin = dist(thread_local_rng); + size_t end = begin + dist(thread_local_rng) % 1000 + 1; + ranges.emplace_back(begin, end); + } + } + catch (DB::Exception &) + { + return; + } + + FAIL() << "Expected memory limit exception was not thrown"; +} diff --git a/src/Parsers/FunctionSecretArgumentsFinder.h b/src/Parsers/FunctionSecretArgumentsFinder.h index b614dcb393f6..2485a9ec5f7e 100644 --- a/src/Parsers/FunctionSecretArgumentsFinder.h +++ b/src/Parsers/FunctionSecretArgumentsFinder.h @@ -163,6 +163,10 @@ class FunctionSecretArgumentsFinder { findURLSecretArguments(); } + else if (function->name() == "redis") + { + findRedisFunctionSecretArguments(); + } else if (function->name() == "ytsaurus") { findYTsaurusStorageTableEngineSecretArguments(); @@ -217,7 +221,7 @@ class FunctionSecretArgumentsFinder result.replacement = std::move(uri); } - void findRedisSecretArguments() + void findRedisTableEngineSecretArguments() { /// Redis does not have URL/address argument, /// only 'host:port' and separate "password" argument. @@ -606,7 +610,7 @@ class FunctionSecretArgumentsFinder } else if (engine_name == "Redis") { - findRedisSecretArguments(); + findRedisTableEngineSecretArguments(); } else if (engine_name == "YTsaurus") { @@ -702,6 +706,12 @@ class FunctionSecretArgumentsFinder markSecretArgument(url_arg_idx + 4); } + void findRedisFunctionSecretArguments() + { + // redis(host:port, key, structure, db_index, password, pool_size) + markSecretArgument(4); + } + void findYTsaurusStorageTableEngineSecretArguments() { // YTsaurus('base_uri', 'yt_path', 'auth_token') @@ -783,6 +793,12 @@ class FunctionSecretArgumentsFinder const String & engine_name = function->name(); if (engine_name == "S3") { + if (isNamedCollectionName(0)) + { + /// BACKUP ... TO S3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) markSecretArgument(2); } diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 43f53f9387f2..64547828612e 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -19,7 +19,6 @@ #include #include - namespace DB { namespace Setting diff --git a/src/Processors/QueryPlan/resolveStorages.cpp b/src/Processors/QueryPlan/resolveStorages.cpp index 38ea216b5ec6..411df98ecc32 100644 --- a/src/Processors/QueryPlan/resolveStorages.cpp +++ b/src/Processors/QueryPlan/resolveStorages.cpp @@ -44,6 +44,7 @@ namespace Setting extern const SettingsSetOperationMode except_default_mode; extern const SettingsSetOperationMode intersect_default_mode; extern const SettingsSetOperationMode union_default_mode; + extern const SettingsSeconds lock_acquire_timeout; } namespace ErrorCodes @@ -186,6 +187,8 @@ static QueryPlanResourceHolder replaceReadingFromTable(QueryPlan::Node & node, Q select_query_info.table_expression_modifiers = reading_from_table_function->getTableExpressionModifiers(); } + auto table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef()[Setting::lock_acquire_timeout]); + ASTPtr query; bool is_storage_merge = typeid_cast(storage.get()); if (storage->isRemote() || is_storage_merge) @@ -255,6 +258,10 @@ static QueryPlanResourceHolder replaceReadingFromTable(QueryPlan::Node & node, Q node.step = std::make_unique(reading_plan.getCurrentHeader(), std::move(converting_actions)); node.children = {reading_plan.getRootNode()}; + reading_plan.addInterpreterContext(context); + reading_plan.addStorageHolder(std::move(storage)); + reading_plan.addTableLock(std::move(table_lock)); + auto nodes_and_resource = QueryPlan::detachNodesAndResources(std::move(reading_plan)); nodes.splice(nodes.end(), std::move(nodes_and_resource.first)); diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index e69b785fd2ad..1ced06f0a955 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -248,6 +248,8 @@ void RemoteSource::onCancel() noexcept void RemoteSource::onUpdatePorts() { + if (isCancelled()) + return; if (getPort().isFinished()) query_executor->finish(); } diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index bd5d95e28e4c..b3ac7228bca5 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -811,7 +811,7 @@ void RemoteQueryExecutor::finish() * - received an unknown packet from one replica; * then you do not need to read anything. */ - if (!isQueryPending() || hasThrownException()) + if (!isQueryPending() || hasThrownException() || was_cancelled) return; /// To make sure finish is only called once diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 9100f5660629..2bfb9dda0f35 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -619,13 +619,8 @@ bool ColumnsDescription::hasSubcolumn(const String & column_name) const return true; /// Check for dynamic subcolumns - auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); - auto it = columns.get<1>().find(ordinary_column_name); - if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns() && !dynamic_subcolumn_name.empty()) - { - if (auto /*dynamic_subcolumn_type*/ _ = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) - return true; - } + if (tryGetDynamicSubcolumn(column_name)) + return true; return false; } @@ -701,13 +696,11 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns if (jt != subcolumns.get<0>().end()) return *jt; - /// Check for dynamic subcolumns. - auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); - it = columns.get<1>().find(ordinary_column_name); - if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + if (options.with_dynamic_subcolumns) { - if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) - return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + /// Check for dynamic subcolumns. + if (auto dynamic_subcolumn = tryGetDynamicSubcolumn(column_name)) + return dynamic_subcolumn; } } @@ -800,15 +793,6 @@ bool ColumnsDescription::hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, cons if ((it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & kind)) || hasSubcolumn(column_name)) return true; - /// Check for dynamic subcolumns. - auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); - it = columns.get<1>().find(ordinary_column_name); - if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) - { - if (auto /*dynamic_subcolumn_type*/ _ = it->type->hasSubcolumn(dynamic_subcolumn_name)) - return true; - } - return false; } @@ -963,6 +947,22 @@ std::vector ColumnsDescription::getAllRegisteredNames() const return names; } +std::optional ColumnsDescription::tryGetDynamicSubcolumn(const String & column_name) const +{ + for (auto [ordinary_column_name, dynamic_subcolumn_name] : Nested::getAllColumnAndSubcolumnPairs(column_name)) + { + auto it = columns.get<1>().find(String(ordinary_column_name)); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return NameAndTypePair(String(ordinary_column_name), String(dynamic_subcolumn_name), it->type, dynamic_subcolumn_type); + } + } + + return std::nullopt; +} + + void getDefaultExpressionInfoInto(const ASTColumnDeclaration & col_decl, const DataTypePtr & data_type, DefaultExpressionsInfo & info) { if (!col_decl.default_expression) diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 23a14fa92d3e..597080d49786 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -55,6 +55,13 @@ struct GetColumnsOptions GetColumnsOptions(Kind kind_) : kind(kind_) {} /// NOLINT(google-explicit-constructor) GetColumnsOptions & withSubcolumns(bool value = true) + { + with_subcolumns = value; + with_dynamic_subcolumns = value; + return *this; + } + + GetColumnsOptions & withRegularSubcolumns(bool value = true) { with_subcolumns = value; return *this; @@ -76,6 +83,7 @@ struct GetColumnsOptions VirtualsKind virtuals_kind = VirtualsKind::None; bool with_subcolumns = false; + bool with_dynamic_subcolumns = false; bool with_extended_objects = false; }; @@ -267,6 +275,8 @@ class ColumnsDescription : public IHints<> void addSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage); void removeSubcolumns(const String & name_in_storage); + + std::optional tryGetDynamicSubcolumn(const String & column_name) const; }; class ASTColumnDeclaration; diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index bf2fa8136576..9d0e0bf9dd43 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -728,6 +728,15 @@ void DataPartStorageOnDiskBase::remove( disk->removeSharedRecursive( fs::path(to) / "", !can_remove_description->can_remove_anything, can_remove_description->files_not_to_remove); } + catch (const fs::filesystem_error & e) + { + if (e.code() == std::errc::no_such_file_or_directory) + { + /// If the directory was already removed (e.g. by clearOldTemporaryDirectories), nothing to do. + } + else + throw; + } catch (...) { LOG_ERROR( @@ -748,7 +757,9 @@ void DataPartStorageOnDiskBase::remove( try { disk->moveDirectory(from, to); - part_dir = part_dir_without_slash; + /// NOTE: we intentionally don't update part_dir here because it would cause a data race + /// with concurrent readers (e.g. system.parts table queries calling getFullPath()). + /// The part is being removed anyway, so the path doesn't need to be updated. } catch (const Exception & e) { @@ -859,7 +870,19 @@ void DataPartStorageOnDiskBase::clearDirectory( if (checksums.empty() || incomplete_temporary_part) { /// If the part is not completely written, we cannot use fast path by listing files. - disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove); + try + { + disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove); + } + catch (const fs::filesystem_error & e) + { + if (e.code() == std::errc::no_such_file_or_directory) + { + /// If the directory was already removed (e.g. by clearOldTemporaryDirectories), nothing to do. + } + else + throw; + } return; } @@ -895,7 +918,19 @@ void DataPartStorageOnDiskBase::clearDirectory( /// Recursive directory removal does many excessive "stat" syscalls under the hood. LOG_ERROR(log, "Cannot quickly remove directory {} by removing files; fallback to recursive removal. Reason: {}", fullPath(disk, dir), getCurrentExceptionMessage(false)); - disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove); + try + { + disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove); + } + catch (const fs::filesystem_error & e) + { + if (e.code() == std::errc::no_such_file_or_directory) + { + /// If the directory was already removed (e.g. by clearOldTemporaryDirectories), nothing to do. + } + else + throw; + } } } diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index 9ecc2bfb9576..3ea0c97b2db6 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -30,7 +31,7 @@ struct MarkRange bool operator<(const MarkRange & rhs) const; }; -struct MarkRanges : public std::deque +struct MarkRanges : public std::deque> { enum class SearchAlgorithm : uint8_t { @@ -39,7 +40,7 @@ struct MarkRanges : public std::deque GenericExclusionSearch, }; - using std::deque::deque; /// NOLINT(modernize-type-traits) + using std::deque>::deque; /// NOLINT(modernize-type-traits) size_t getNumberOfMarks() const; bool isOneRangeForWholePart(size_t num_marks_in_part) const; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index af064aa83ddd..5b8df976d475 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -275,7 +275,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu key_columns.insert(name); /// If we don't have this column in storage columns, it must be a subcolumn of one of the storage columns. else - key_columns.insert(Nested::splitName(name).first); + key_columns.insert(String(Nested::getColumnFromSubcolumn(name, storage_columns))); } /// Force sign column for Collapsing mode @@ -331,8 +331,9 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu const auto & column_name = index_columns.front(); if (storage_columns.contains(column_name)) global_ctx->skip_indexes_by_column[column_name].push_back(index); + /// If we don't have this column in storage columns, it must be a subcolumn of one of the storage columns. else - global_ctx->skip_indexes_by_column[Nested::splitName(column_name).first].push_back(index); + global_ctx->skip_indexes_by_column[String(Nested::getColumnFromSubcolumn(column_name, storage_columns))].push_back(index); } else { @@ -342,7 +343,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu key_columns.insert(index_column); /// If we don't have this column in storage columns, it must be a subcolumn of one of the storage columns. else - key_columns.insert(Nested::splitName(index_column).first); + key_columns.insert(String(Nested::getColumnFromSubcolumn(index_column, storage_columns))); } global_ctx->merging_skip_indexes.push_back(index); @@ -351,9 +352,13 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu for (const auto * projection : global_ctx->projections_to_rebuild) { - Names projection_columns_vec = projection->getRequiredColumns(); - std::copy(projection_columns_vec.cbegin(), projection_columns_vec.cend(), - std::inserter(key_columns, key_columns.end())); + for (const auto & column : projection->getRequiredColumns()) + { + if (projection->with_parent_part_offset && column == "_part_offset") + continue; + + key_columns.insert(column); + } } /// TODO: also force "summing" and "aggregating" columns to make Horizontal merge only for such columns @@ -887,16 +892,12 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::prepareProjectionsToMergeAndRe } -void MergeTask::ExecuteAndFinalizeHorizontalPart::calculateProjections(const Block & block) const +void MergeTask::ExecuteAndFinalizeHorizontalPart::calculateProjections(const Block & block, UInt64 starting_offset) const { for (size_t i = 0, size = global_ctx->projections_to_rebuild.size(); i < size; ++i) { const auto & projection = *global_ctx->projections_to_rebuild[i]; - Block block_with_required_columns; - for (const auto & name : projection.getRequiredColumns()) - if (name != "_part_offset") - block_with_required_columns.insert(block.getByName(name)); - Block block_to_squash = projection.calculate(block_with_required_columns, global_ctx->context); + Block block_to_squash = projection.calculate(block, starting_offset, global_ctx->context); /// Avoid replacing the projection squash header if nothing was generated (it used to return an empty block) if (block_to_squash.rows() == 0) return; @@ -1012,6 +1013,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() const } } + size_t starting_offset = global_ctx->rows_written; global_ctx->rows_written += block.rows(); const_cast(*global_ctx->to).write(block); @@ -1021,7 +1023,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() const block, MergeTreeData::getMinMaxColumnsNames(global_ctx->metadata_snapshot->getPartitionKey())); } - calculateProjections(block); + calculateProjections(block, starting_offset); UInt64 result_rows = 0; UInt64 result_bytes = 0; diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 5df407ce0843..208a0cc1985a 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -314,7 +314,7 @@ class MergeTask ExecuteAndFinalizeHorizontalPartSubtasks::const_iterator subtasks_iterator = subtasks.begin(); void prepareProjectionsToMergeAndRebuild() const; - void calculateProjections(const Block & block) const; + void calculateProjections(const Block & block, UInt64 starting_offset) const; void finalizeProjections() const; void constructTaskForProjectionPartsMerge() const; bool executeMergeProjections() const; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index e680bed5f391..a6181c302d5a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -175,6 +175,8 @@ void updateTTL( subquery->buildSetInplace(context); auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(expr_and_set.expression, block, ttl_entry.result_column); + /// In some cases block can contain Sparse columns (for example, during direct deserialization into Sparse in input formats). + ttl_column = ttl_column->convertToFullColumnIfSparse(); if (const ColumnUInt16 * column_date = typeid_cast(ttl_column.get())) { @@ -806,7 +808,7 @@ MergeTreeTemporaryPartPtr MergeTreeDataWriter::writeTempPartImpl( Block projection_block; { ProfileEventTimeIncrement watch(ProfileEvents::MergeTreeDataWriterProjectionsCalculationMicroseconds); - projection_block = projection.calculate(block, context, perm_ptr); + projection_block = projection.calculate(block, 0, context, perm_ptr); LOG_DEBUG( log, "Spent {} ms calculating projection {} for the part {}", watch.elapsed() / 1000, projection.name, new_data_part->name); } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index c52ab6725abc..ad75db7166fd 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -212,11 +212,11 @@ MergeTreeRangeReader::Stream::Stream(size_t from_mark, size_t to_mark, size_t cu size_t marks_count = index_granularity->getMarksCount(); if (from_mark >= marks_count) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying create stream to read from mark №{} but total marks count is {}", - toString(current_mark), toString(marks_count)); + toString(from_mark), toString(marks_count)); if (last_mark > marks_count) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying create stream to read to mark №{} but total marks count is {}", - toString(current_mark), toString(marks_count)); + toString(last_mark), toString(marks_count)); } void MergeTreeRangeReader::Stream::checkNotFinished() const diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 7ff964cba8a0..7938bc0b4422 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -23,6 +23,11 @@ namespace constexpr auto DATA_FILE_EXTENSION = ".bin"; } +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, @@ -324,7 +329,21 @@ ReadBuffer * MergeTreeReaderWide::getStream( auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, checksums); if (!stream_name) + { + /// We allow missing streams only for columns/subcolumns that are not present in this part. + auto column = data_part_info_for_read->getColumnsDescription().tryGetColumn(GetColumnsOptions::AllPhysical, name_and_type.getNameInStorage()); + if (column && (!name_and_type.isSubcolumn() || column->type->hasSubcolumn(name_and_type.getSubcolumnName()))) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Stream {} for column {} with type {} is not found", + ISerialization::getFileNameForStream(name_and_type.type->getName(), substream_path), + name_and_type.name, + column->type->getName()); + } + return nullptr; + } auto it = streams.find(*stream_name); if (it == streams.end()) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index ba505319c03a..02f24454ef5b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1369,13 +1369,14 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() if (ctx->count_lightweight_deleted_rows) existing_rows_count += MutationHelpers::getExistingRowsCount(cur_block); + UInt64 starting_offset = (*ctx->mutate_entry)->rows_written; for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { Chunk squashed_chunk; { ProfileEventTimeIncrement projection_watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - Block block_to_squash = ctx->projections_to_build[i]->calculate(cur_block, ctx->context); + Block block_to_squash = ctx->projections_to_build[i]->calculate(cur_block, starting_offset, ctx->context); /// Everything is deleted by lighweight delete if (block_to_squash.rows() == 0) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b55f2e3c8aee..96f934fc6de1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -52,6 +52,7 @@ namespace FailPoints extern const char replicated_merge_tree_insert_quorum_fail_0[]; extern const char replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault[]; extern const char replicated_merge_tree_insert_retry_pause[]; + extern const char rmt_delay_commit_part[]; } namespace ErrorCodes @@ -592,14 +593,34 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl int error = 0; /// Set a special error code if the block is duplicate - /// And remove attaching_ prefix if (deduplicate && deduplicated) { error = ErrorCodes::INSERT_WAS_DEDUPLICATED; - if (!endsWith(part->getDataPartStorage().getRelativePath(), "detached/attaching_" + part->name + "/")) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected relative path for a deduplicated part: {}", part->getDataPartStorage().getRelativePath()); - fs::path new_relative_path = fs::path("detached") / part->getNewName(part->info); - part->renameTo(new_relative_path, false); + + const auto & relative_path = part->getDataPartStorage().getRelativePath(); + const auto part_dir = fs::path(relative_path).parent_path().filename().string(); + + if (relative_path.ends_with("detached/attaching_" + part->name + "/")) + { + /// Part came from ATTACH PART - rename back to detached/ (remove attaching_ prefix) + fs::path new_relative_path = fs::path("detached") / part->getNewName(part->info); + part->renameTo(new_relative_path, false); + } + else if (part_dir.starts_with("tmp_restore_" + part->name)) + { + /// Part came from RESTORE with a temporary directory. + /// Just remove the temporary part since it's a duplicate. + LOG_DEBUG(log, "Removing deduplicated part {} from temporary path {}", part->name, relative_path); + part->removeIfNeeded(); + } + else + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected deduplicated part with relative path '{}' and part directory '{}'. " + "Expected relative path to end with 'detached/attaching_{}/' or part directory to start with 'tmp_restore_{}'.", + relative_path, part_dir, part->name, part->name); + } } PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()), ExecutionStatus(error)); return deduplicated; @@ -950,9 +971,10 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: fiu_do_on(FailPoints::replicated_merge_tree_commit_zk_fail_after_op, { zookeeper->forceFailureAfterOperation(); }); + fiu_do_on(FailPoints::rmt_delay_commit_part, { sleepForSeconds(5); }); + Coordination::Responses responses; Coordination::Error multi_code = zookeeper->tryMultiNoThrow(ops, responses, /* check_session_valid */ true); /// 1 RTT - if (multi_code == Coordination::Error::ZOK) { part->new_part_was_committed_to_zookeeper_after_rename_on_disk = true; diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index 6d0392cd6a89..7271e52b5be1 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -104,14 +104,14 @@ ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr conte /*common_key_prefix*/ ""); } -static AzureBlobStorage::ConnectionParams getConnectionParams( +AzureBlobStorage::ConnectionParams getAzureConnectionParams( const String & connection_url, const String & container_name, const std::optional & account_name, const std::optional & account_key, const std::optional & client_id, const std::optional & tenant_id, - const ContextPtr & local_context) + ContextPtr local_context) { AzureBlobStorage::ConnectionParams connection_params; auto request_settings = AzureBlobStorage::getRequestSettings(local_context->getSettingsRef()); @@ -196,9 +196,8 @@ void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & coll } setPartitionColumnsInDataFile(collection.getOrDefault("partition_columns_in_data_file", getPartitionStrategyType() != PartitionStrategyFactory::StrategyType::HIVE)); - blobs_paths = {blob_path}; - connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, client_id, tenant_id, context); + connection_params = getAzureConnectionParams(connection_url, container_name, account_name, account_key, client_id, tenant_id, context); } ASTPtr StorageAzureConfiguration::extractExtraCredentials(ASTs & args) @@ -530,7 +529,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, } blobs_paths = {blob_path}; - connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, client_id, tenant_id, context); + connection_params = getAzureConnectionParams(connection_url, container_name, account_name, account_key, client_id, tenant_id, context); } void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index 76c6ec2cfb0a..134d0deedc18 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -12,6 +12,16 @@ namespace DB { class BackupFactory; +/// Constructs Azure connection parameters from individual components. +AzureBlobStorage::ConnectionParams getAzureConnectionParams( + const String & connection_url, + const String & container_name, + const std::optional & account_name, + const std::optional & account_key, + const std::optional & client_id, + const std::optional & tenant_id, + ContextPtr context); + class StorageAzureConfiguration : public StorageObjectStorageConfiguration { friend class BackupReaderAzureBlobStorage; diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 1aefb9867e90..b32bba1ca37f 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -746,7 +746,7 @@ Field DeltaLakeMetadata::getFieldValue(const String & value, DataTypePtr data_ty { ReadBufferFromString in(value); DateTime64 time = 0; - readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); + readDateTime64Text(time, 6, in, assert_cast(check_type.get())->getTimeZone()); return time; } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 2a5fbaffcac3..ac44fd993691 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -269,7 +269,7 @@ bool ObjectStorageQueueIFileMetadata::trySetProcessing() { file_status->onProcessing(); } - else + else if (file_state != FileStatus::State::None) { LOG_TEST(log, "Updating state of {} from {} to {}", path, file_status->state.load(), file_state); file_status->updateState(file_state); diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp index de66996258ae..50947d528f85 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp @@ -280,6 +280,7 @@ std::pair ObjectStorag const size_t max_num_tries = 100; Coordination::Error code; + std::string failed_path; for (size_t i = 0; i < max_num_tries; ++i) { std::optional processed_node; @@ -404,7 +405,8 @@ std::pair ObjectStorag /// 6. check processed node version did not change auto failed_idx = zkutil::getFailedOpIndex(code, responses); - LOG_DEBUG(log, "Code: {}, failed idx: {}, failed path: {}", code, failed_idx, requests[failed_idx]->getPath()); + failed_path = requests[failed_idx]->getPath(); + LOG_DEBUG(log, "Code: {}, failed idx: {}, failed path: {}", code, failed_idx, failed_path); if (has_request_failed(failed_path_doesnt_exist_idx)) return {false, FileStatus::State::Failed}; @@ -431,10 +433,9 @@ std::pair ObjectStorag LOG_DEBUG(log, "Retrying setProcessing because processing node id path is unexpectedly missing or was created (error code: {})", code); } - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Failed to set file processing within {} retries, last error: {}", - max_num_tries, code); + LOG_WARNING(log, "Failed to set file processing within {} retries, last error {} for path {}", max_num_tries, code, failed_path); + chassert(false); /// Catch in CI. + return {false, FileStatus::State::None}; } void ObjectStorageQueueOrderedFileMetadata::prepareProcessedAtStartRequests( diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 5ace34b71049..3b6873a7fe41 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -307,6 +307,7 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const result.sample_block.erase("_part_offset"); result.sample_block.insert(std::move(new_column)); result.with_parent_part_offset = true; + std::erase_if(result.required_columns, [](const String & s) { return s.contains("_part_offset"); }); } auto block = result.sample_block; @@ -321,7 +322,8 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const /// Subcolumns can be used in projection only when the original column is used. if (columns.hasSubcolumn(column_with_type_name.name)) { - if (!block.has(Nested::splitName(column_with_type_name.name).first)) + auto subcolumn = columns.getColumnOrSubcolumn(GetColumnsOptions::All, column_with_type_name.name); + if (!block.has(subcolumn.getNameInStorage())) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Projections cannot contain individual subcolumns: {}", column_with_type_name.name); /// Also remove this subcolumn from the required columns as we have the original column. std::erase_if(result.required_columns, [&](const String & column_name){ return column_name == column_with_type_name.name; }); @@ -435,7 +437,8 @@ void ProjectionDescription::recalculateWithNewColumns(const ColumnsDescription & *this = getProjectionFromAST(definition_ast, new_columns, query_context); } -Block ProjectionDescription::calculate(const Block & block, ContextPtr context, const IColumnPermutation * perm_ptr) const +Block ProjectionDescription::calculate( + const Block & block, UInt64 starting_offset, ContextPtr context, const IColumnPermutation * perm_ptr) const { auto mut_context = Context::createCopy(context); /// We ignore aggregate_functions_null_for_empty cause it changes aggregate function types. @@ -457,31 +460,34 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context, makeASTFunction("equals", std::make_shared(RowExistsColumn::name), std::make_shared(1))); } - /// Create "_part_offset" column when needed for projection with parent part offsets + /// Only keep required columns Block source_block = block; + for (const auto & column : required_columns) + source_block.insert(block.getByName(column)); + + /// Create "_part_offset" column when needed for projection with parent part offsets if (with_parent_part_offset) { chassert(sample_block.has("_parent_part_offset")); - - /// Add "_part_offset" column if not present (needed for insertions but not mutations - materialize projections) - if (!source_block.has("_part_offset")) + chassert(!source_block.has("_part_offset")); + auto uint64 = std::make_shared(); + auto column = uint64->createColumn(); + auto & offset = assert_cast(*column).getData(); + offset.resize_exact(block.rows()); + if (perm_ptr) { - auto uint64 = std::make_shared(); - auto column = uint64->createColumn(); - auto & offset = assert_cast(*column).getData(); - offset.resize_exact(block.rows()); - if (perm_ptr) - { - for (size_t i = 0; i < block.rows(); ++i) - offset[(*perm_ptr)[i]] = i; - } - else - { - iota(offset.data(), offset.size(), UInt64(0)); - } - - source_block.insert({std::move(column), std::move(uint64), "_part_offset"}); + /// Insertion path + chassert(starting_offset == 0); + for (size_t i = 0; i < block.rows(); ++i) + offset[(*perm_ptr)[i]] = i; } + else + { + /// Rebuilding path + iota(offset.data(), offset.size(), starting_offset); + } + + source_block.insert({std::move(column), std::move(uint64), "_part_offset"}); } auto builder = InterpreterSelectQuery( diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 493537d7c94e..bbc29014dd20 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -103,6 +103,11 @@ struct ProjectionDescription * @brief Calculates the projection result for a given input block. * * @param block The input block used to evaluate the projection. + * @param starting_offset The absolute starting row index of the current `block` within the + * source data part. It is used to calculate the value of the virtual `_part_offset` + * column (i.e., `_part_offset = starting_offset + row_index`). This column is + * essential for mapping projection rows back to their original positions in the + * parent part during merge or mutation. * @param context The query context. A copy will be made internally with adjusted settings. * @param perm_ptr Optional pointer to a permutation vector. If provided, it is used to map * the output rows back to their original order in the parent block. This is necessary @@ -111,7 +116,7 @@ struct ProjectionDescription * * @return The resulting block after executing the projection query. */ - Block calculate(const Block & block, ContextPtr context, const IColumnPermutation * perm_ptr = nullptr) const; + Block calculate(const Block & block, UInt64 starting_offset, ContextPtr context, const IColumnPermutation * perm_ptr = nullptr) const; String getDirectoryName() const { return name + ".proj"; } }; diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index afd146173228..7d532c02cce0 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -660,16 +661,21 @@ void registerStorageGenerateRandom(StorageFactory & factory) if (!engine_args.empty()) { - const auto & ast_literal = engine_args[0]->as(); - if (!ast_literal.value.isNull()) - random_seed = checkAndGetLiteralArgument(ast_literal, "random_seed"); + engine_args[0] = evaluateConstantExpressionAsLiteral(engine_args[0], args.getLocalContext()); + random_seed = checkAndGetLiteralArgument(engine_args[0], "random_seed"); } if (engine_args.size() >= 2) - max_string_length = checkAndGetLiteralArgument(engine_args[1], "max_string_length"); + { + engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.getLocalContext()); + max_string_length = checkAndGetLiteralArgument(engine_args[0], "max_string_length"); + } if (engine_args.size() == 3) + { + engine_args[2] = evaluateConstantExpressionAsLiteral(engine_args[2], args.getLocalContext()); max_array_length = checkAndGetLiteralArgument(engine_args[2], "max_array_length"); + } return std::make_shared(args.table_id, args.columns, args.comment, max_array_length, max_string_length, random_seed); }); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 49b976e22b55..71e83abf0a17 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -544,7 +544,7 @@ StorageMaterializedView::prepareRefresh(bool append, ContextMutablePtr refresh_c auto inner_table_id = getTargetTableId(); StorageID target_table = inner_table_id; - auto select_query = getInMemoryMetadataPtr()->getSelectQuery().select_query; + auto select_query = getInMemoryMetadataPtr()->getSelectQuery().select_query->clone(); InterpreterSetQuery::applySettingsFromQuery(select_query, refresh_context); if (!append) @@ -558,7 +558,8 @@ StorageMaterializedView::prepareRefresh(bool append, ContextMutablePtr refresh_c /// Pre-check the permissions. Would be awkward if we create a temporary table and can't drop it. refresh_context->checkAccess(AccessType::DROP_TABLE | AccessType::CREATE_TABLE | AccessType::SELECT | AccessType::INSERT, db_name); - auto create_query = std::dynamic_pointer_cast(db->getCreateTableQuery(inner_table_id.table_name, getContext())); + auto create_query + = std::dynamic_pointer_cast(db->getCreateTableQuery(inner_table_id.table_name, getContext())->clone()); create_query->setTable(new_table_name); create_query->setDatabase(db_name); create_query->create_or_replace = true; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 49b5fb2ff86b..596f0965ced0 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -227,9 +227,11 @@ ColumnsDescription StorageMerge::getColumnsDescriptionFromSourceTablesImpl( if (!t) return false; - if (auto id = t->getStorageID(); !access->isGranted(AccessType::SHOW_TABLES, id.database_name, id.table_name)) + const auto storage_id = t->getStorageID(); + if (!access->isGranted(AccessType::SHOW_TABLES, storage_id.database_name, storage_id.table_name)) return false; + access->checkAccess(AccessType::SHOW_COLUMNS, storage_id.database_name, storage_id.table_name); auto structure = t->getInMemoryMetadataPtr()->getColumns(); String prev_column_name; for (const ColumnDescription & column : structure) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 916efd99d39d..4b04fe1915f1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -127,9 +128,10 @@ #include #include +#include +#include #include #include -#include #include #include @@ -259,6 +261,8 @@ namespace FailPoints extern const char zero_copy_unlock_zk_fail_before_op[]; extern const char zero_copy_unlock_zk_fail_after_op[]; extern const char rmt_lightweight_update_sleep_after_block_allocation[]; + extern const char rmt_merge_selecting_task_pause_when_scheduled[]; + extern const char rmt_delay_execute_drop_range[]; } namespace ErrorCodes @@ -2778,6 +2782,8 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { LOG_TRACE(log, "Executing DROP_RANGE {}", entry.new_part_name); + fiu_do_on(FailPoints::rmt_delay_execute_drop_range, { sleepForSeconds(10); }); + auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); /// Wait for loading of outdated parts because DROP_RANGE @@ -8937,12 +8943,13 @@ void StorageReplicatedMergeTree::clearLockedBlockNumbersInPartition( LOG_WARNING(log, new_version_warning, paths_to_get[i], result.data); Stopwatch time_waiting; - const auto & stop_waiting = [this, &time_waiting]() - { - auto timeout = getContext()->getSettingsRef()[Setting::lock_acquire_timeout].value.seconds(); - return partial_shutdown_called || (timeout < time_waiting.elapsedSeconds()); - }; - zookeeper.waitForDisappear(paths_to_get[i], stop_waiting); + const Int64 timeout = getContext()->getSettingsRef()[Setting::lock_acquire_timeout].value.totalSeconds(); + const auto & stop_waiting + = [this, &time_waiting, &timeout]() { return partial_shutdown_called || (timeout < Int64(time_waiting.elapsedSeconds())); }; + + if (!zookeeper.waitForDisappear(paths_to_get[i], stop_waiting)) + throw Exception( + ErrorCodes::TIMEOUT_EXCEEDED, "Path {} does not disappear, timed out after {} seconds", paths_to_get[i], timeout); } } } diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index dceb291d5d50..4db9bcd2cfef 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -49,7 +49,7 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database while (true) { String name = database->getDatabaseName(); - guard = DatabaseCatalog::instance().getDDLGuard(name, ""); + guard = DatabaseCatalog::instance().getDDLGuard(name, "", nullptr); /// Ensure that the database was not renamed before we acquired the lock auto locked_database = DatabaseCatalog::instance().tryGetDatabase(name); diff --git a/src/Storages/tests/gtest_delta_kernel.cpp b/src/Storages/tests/gtest_delta_kernel.cpp index bd1900304887..a3e36c53f6e6 100644 --- a/src/Storages/tests/gtest_delta_kernel.cpp +++ b/src/Storages/tests/gtest_delta_kernel.cpp @@ -1,9 +1,10 @@ #include "config.h" +#include + #if USE_DELTA_KERNEL_RS #include -#include #include #include #include @@ -69,3 +70,19 @@ TEST_F(DeltaKernelTest, ExpressionVisitor) } #endif + +#if USE_PARQUET + +#include +#include +#include +#include + +/// Regression test for segfault +TEST(DeltaLakeMetadata, GetFieldValueNullableDateTime64) +{ + auto nullable_datetime64_type = std::make_shared(std::make_shared(6, "UTC")); + ASSERT_NO_THROW(DB::DeltaLakeMetadata::getFieldValue("2024-01-15 10:30:45.123456", nullable_datetime64_type)); +} + +#endif diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 407fc2d4a7bd..de88fc85b46c 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -1,22 +1,13 @@ #!/usr/bin/env python3 -import gzip import io -import json -import logging import os -import random -import threading import time import pytest from azure.storage.blob import BlobServiceClient -import helpers.client -from helpers.cluster import ClickHouseCluster, ClickHouseInstance -from helpers.mock_servers import start_mock_servers -from helpers.network import PartitionManager -from helpers.test_tools import exec_query_with_retry +from helpers.cluster import ClickHouseCluster def generate_cluster_def(port): @@ -98,6 +89,15 @@ def cluster(): cluster.shutdown() +backup_id_counter = 0 + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"backup{backup_id_counter}" + + def azure_query( node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None ): @@ -278,6 +278,98 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster): ) +def test_backup_restore_with_sql_named_collection_azure(cluster): + """Test backup using a named collection created via SQL (not XML config).""" + node = cluster.instances["node"] + port = cluster.env_variables["AZURITE_PORT"] + + # Create a named collection via SQL + azure_query( + node, + f""" + CREATE NAMED COLLECTION IF NOT EXISTS sql_azure_backup_collection AS + connection_string = '{cluster.env_variables['AZURITE_CONNECTION_STRING']}', + container = 'cont' + """, + ) + + try: + azure_query(node, "DROP TABLE IF EXISTS test_sql_nc_backup") + azure_query( + node, + f"CREATE TABLE test_sql_nc_backup (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_sql_nc_backup.csv', 'CSV')", + ) + azure_query( + node, + "INSERT INTO test_sql_nc_backup SETTINGS azure_truncate_on_insert = 1 VALUES (1, 'a')", + ) + + backup_name = new_backup_name() + backup_destination = f"AzureBlobStorage(sql_azure_backup_collection, '{backup_name}')" + azure_query( + node, + f"BACKUP TABLE test_sql_nc_backup TO {backup_destination}", + ) + print(get_azure_file_content(f"{backup_name}/.backup", port)) + azure_query(node, "DROP TABLE IF EXISTS test_sql_nc_backup_restored") + azure_query( + node, + f"RESTORE TABLE test_sql_nc_backup AS test_sql_nc_backup_restored FROM {backup_destination};", + ) + assert ( + azure_query(node, "SELECT * from test_sql_nc_backup_restored") == "1\ta\n" + ) + finally: + azure_query(node, "DROP NAMED COLLECTION IF EXISTS sql_azure_backup_collection") + + +def test_backup_restore_with_sql_named_collection_azure_with_overrides(cluster): + """Test backup using a SQL named collection with key-value overrides.""" + node = cluster.instances["node"] + port = cluster.env_variables["AZURITE_PORT"] + + # Create a named collection via SQL with placeholder blob_path + azure_query( + node, + f""" + CREATE NAMED COLLECTION IF NOT EXISTS sql_azure_backup_override AS + connection_string = '{cluster.env_variables['AZURITE_CONNECTION_STRING']}', + container = 'cont', + blob_path = 'placeholder' + """, + ) + + try: + azure_query(node, "DROP TABLE IF EXISTS test_sql_nc_override_backup") + azure_query( + node, + f"CREATE TABLE test_sql_nc_override_backup (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_sql_nc_override.csv', 'CSV')", + ) + azure_query( + node, + "INSERT INTO test_sql_nc_override_backup SETTINGS azure_truncate_on_insert = 1 VALUES (2, 'b')", + ) + + backup_name = new_backup_name() + # Override the blob_path via key-value argument + backup_destination = f"AzureBlobStorage(sql_azure_backup_override, blob_path='{backup_name}')" + azure_query( + node, + f"BACKUP TABLE test_sql_nc_override_backup TO {backup_destination}", + ) + print(get_azure_file_content(f"{backup_name}/.backup", port)) + azure_query(node, "DROP TABLE IF EXISTS test_sql_nc_override_restored") + azure_query( + node, + f"RESTORE TABLE test_sql_nc_override_backup AS test_sql_nc_override_restored FROM {backup_destination};", + ) + assert ( + azure_query(node, "SELECT * from test_sql_nc_override_restored") == "2\tb\n" + ) + finally: + azure_query(node, "DROP NAMED COLLECTION IF EXISTS sql_azure_backup_override") + + def test_backup_restore_on_merge_tree(cluster): node = cluster.instances["node"] azure_query( diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index fb985b61ac88..5892c6173ad7 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -1,5 +1,3 @@ -import concurrent - import pytest from helpers.cluster import ClickHouseCluster, ClickHouseInstance diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 6486fa9ae61d..37d397cdc813 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -374,6 +374,55 @@ def test_backup_to_s3_named_collection(): check_backup_and_restore(cluster, storage_policy, backup_destination) +def test_backup_to_s3_named_collection_sql(): + """Test backup using a named collection created via SQL (not XML config).""" + node = cluster.instances["node"] + storage_policy = "default" + backup_name = new_backup_name() + + # Create a named collection via SQL + node.query( + f""" + CREATE NAMED COLLECTION IF NOT EXISTS sql_named_collection_s3_backup AS + url = 'http://minio1:9001/root/data/backups', + access_key_id = 'minio', + secret_access_key = '{minio_secret_key}' + """ + ) + + try: + backup_destination = f"S3(sql_named_collection_s3_backup, '{backup_name}')" + check_backup_and_restore(cluster, storage_policy, backup_destination) + finally: + node.query("DROP NAMED COLLECTION IF EXISTS sql_named_collection_s3_backup") + + +def test_backup_to_s3_named_collection_sql_with_overrides(): + """Test backup using a SQL named collection with key-value overrides.""" + node = cluster.instances["node"] + storage_policy = "default" + backup_name = new_backup_name() + + # Create a named collection via SQL with a placeholder URL + node.query( + f""" + CREATE NAMED COLLECTION IF NOT EXISTS sql_named_collection_s3_backup_override AS + url = 'http://minio1:9001/root/data/placeholder', + access_key_id = 'minio', + secret_access_key = '{minio_secret_key}' + """ + ) + + try: + # Override the URL via key-value argument + backup_destination = f"S3(sql_named_collection_s3_backup_override, url='http://minio1:9001/root/data/backups/{backup_name}')" + check_backup_and_restore(cluster, storage_policy, backup_destination) + finally: + node.query( + "DROP NAMED COLLECTION IF EXISTS sql_named_collection_s3_backup_override" + ) + + def test_backup_to_s3_multipart(): storage_policy = "default" backup_name = new_backup_name() diff --git a/tests/integration/test_dirty_pages_force_purge/__init__.py b/tests/integration/test_dirty_pages_force_purge/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_dirty_pages_force_purge/configs/overrides.yaml b/tests/integration/test_dirty_pages_force_purge/configs/overrides.yaml new file mode 100644 index 000000000000..195236e51dde --- /dev/null +++ b/tests/integration/test_dirty_pages_force_purge/configs/overrides.yaml @@ -0,0 +1,3 @@ +--- +max_server_memory_usage: 4Gi +memory_worker_purge_dirty_pages_threshold_ratio: 0.2 diff --git a/tests/integration/test_dirty_pages_force_purge/test.py b/tests/integration/test_dirty_pages_force_purge/test.py new file mode 100644 index 000000000000..24f5a18fab58 --- /dev/null +++ b/tests/integration/test_dirty_pages_force_purge/test.py @@ -0,0 +1,69 @@ +import time + +import pytest + +from helpers.cluster import ClickHouseCluster + +# Jemalloc configuration used in this test disables automated dirty pages purge, +# to simplify its accumulation, and verify that they'll be cleaned up by the +# MemoryWorker +MALLOC_CONF = "background_thread:false,dirty_decay_ms:-1,muzzy_decay_ms:0,oversize_threshold:0" +PEAK_MEMORY_UPPER_BOUND = 3 * 1024 * 1024 * 1024 + +PEAK_MEMORY_COUNTER_PATHS = [ + "/sys/fs/cgroup/memory/memory.max_usage_in_bytes", # cgroup v1 + "/sys/fs/cgroup/memory.peak", # cgroup v2 +] + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/overrides.yaml"], + env_variables={"MALLOC_CONF": MALLOC_CONF}, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_dirty_pages_force_purge(start_cluster): + if node.is_built_with_sanitizer(): + pytest.skip("Jemalloc disabled in sanitizer builds") + + purges = "" + for _ in range(100): + node.query(""" + SELECT arrayMap(x -> randomPrintableASCII(40), range(4096)) + FROM numbers(2048) + FORMAT Null + """) + + purges = node.query("SELECT value from system.events where event = 'MemoryAllocatorPurge'") + if purges: + break + + time.sleep(0.2) + + if not purges: + raise TimeoutError("Timed out waiting for MemoryAllocatorPurge event") + + for path in PEAK_MEMORY_COUNTER_PATHS: + try: + peak_memory = int(node.exec_in_container(["cat", path])) + break + except Exception as ex: + if not str(ex).lower().strip().endswith("no such file or directory"): + raise + else: + raise RuntimeError("Failed to find peak memory counter") + + # Assert peak memory usage is lower than expected peak, which is noticeably lower + # than max_server_memory_usage, to guarantee that purge has been cause by dirty pages + # volume, not the memory tracker limit + assert(peak_memory < PEAK_MEMORY_UPPER_BOUND) diff --git a/tests/integration/test_filesystem_cache/config.d/filesystem_caches.xml b/tests/integration/test_filesystem_cache/config.d/filesystem_caches.xml new file mode 100644 index 000000000000..faf4d4f40517 --- /dev/null +++ b/tests/integration/test_filesystem_cache/config.d/filesystem_caches.xml @@ -0,0 +1,20 @@ + + + + cache + hdd_blob + /cache1/ + 1234 + SLRU + 1 + + + cache + hdd_blob + /cache1/ + 1234 + SLRU + 1 + + + diff --git a/tests/integration/test_filesystem_cache/test_size_limit_metric.py b/tests/integration/test_filesystem_cache/test_size_limit_metric.py new file mode 100644 index 000000000000..e4babdefbb8a --- /dev/null +++ b/tests/integration/test_filesystem_cache/test_size_limit_metric.py @@ -0,0 +1,47 @@ +import logging +import os +import random +import time +import uuid + +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.mock_servers import start_mock_servers, start_s3_mock +from helpers.utility import SafeThread, generate_values, replace_config + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node_test_size_limit_metric", + main_configs=[ + "config.d/filesystem_caches_path.xml", + "config.d/filesystem_caches.xml", + ], + user_configs=[ + "users.d/cache_on_write_operations.xml", + ], + stay_alive=True, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_cache_size_limit_metric(cluster): + node = cluster.instances["node_test_size_limit_metric"] + assert 1234 == int( + node.query( + "SELECT value FROM system.metrics WHERE name = 'FilesystemCacheSizeLimit'" + ) + ) diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 0c1ba8e29dff..b65fdede5665 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -332,6 +332,7 @@ def test_create_table(): f"Kafka() SETTINGS kafka_broker_list = '127.0.0.1', kafka_topic_list = 'topic', kafka_group_name = 'group', kafka_format = 'JSONEachRow', kafka_security_protocol = 'sasl_ssl', kafka_sasl_mechanism = 'PLAIN', kafka_sasl_username = 'user', kafka_sasl_password = '{password}', format_avro_schema_registry_url = 'http://schema_user:{password}@'", f"Kafka() SETTINGS kafka_broker_list = '127.0.0.1', kafka_topic_list = 'topic', kafka_group_name = 'group', kafka_format = 'JSONEachRow', kafka_security_protocol = 'sasl_ssl', kafka_sasl_mechanism = 'PLAIN', kafka_sasl_username = 'user', kafka_sasl_password = '{password}', format_avro_schema_registry_url = 'http://schema_user:{password}@domain.com'", f"S3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', access_key_id = 'minio', secret_access_key = '{password}', compression_method = 'gzip')", + f"Redis('localhost', 0, '{password}') PRIMARY KEY x;", ] def make_test_case(i): @@ -423,6 +424,7 @@ def make_test_case(i): "CREATE TABLE table44 (`x` int) ENGINE = Kafka SETTINGS kafka_broker_list = '127.0.0.1', kafka_topic_list = 'topic', kafka_group_name = 'group', kafka_format = 'JSONEachRow', kafka_security_protocol = 'sasl_ssl', kafka_sasl_mechanism = 'PLAIN', kafka_sasl_username = 'user', kafka_sasl_password = '[HIDDEN]', format_avro_schema_registry_url = 'http://schema_user:[HIDDEN]@'", "CREATE TABLE table45 (`x` int) ENGINE = Kafka SETTINGS kafka_broker_list = '127.0.0.1', kafka_topic_list = 'topic', kafka_group_name = 'group', kafka_format = 'JSONEachRow', kafka_security_protocol = 'sasl_ssl', kafka_sasl_mechanism = 'PLAIN', kafka_sasl_username = 'user', kafka_sasl_password = '[HIDDEN]', format_avro_schema_registry_url = 'http://schema_user:[HIDDEN]@domain.com'", "CREATE TABLE table46 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', access_key_id = 'minio', secret_access_key = '[HIDDEN]', compression_method = 'gzip')", + "CREATE TABLE table47 (`x` int) ENGINE = Redis('localhost', 0, '[HIDDEN]') PRIMARY KEY x", ], must_not_contain=[password], ) @@ -555,6 +557,7 @@ def test_table_functions(): f"gcs('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", f"deltaLakeAzure('{azure_storage_account_url}', 'cont', 'test_simple_6.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none', 'auto')", f"hudi('http://minio1:9001/root/data/test7.csv', 'minio', '{password}')", + f"redis('localhost', 'key', 'key Int64', 0, '{password}')" ] def make_test_case(i): @@ -653,6 +656,7 @@ def make_test_case(i): "CREATE TABLE tablefunc55 (`x` int) AS gcs('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", f"CREATE TABLE tablefunc56 (`x` int) AS deltaLakeAzure('{azure_storage_account_url}', 'cont', 'test_simple_6.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')", "CREATE TABLE tablefunc57 (`x` int) AS hudi('http://minio1:9001/root/data/test7.csv', 'minio', '[HIDDEN]')", + "CREATE TABLE tablefunc58 (`x` int) AS redis('localhost', 'key', 'key Int64', 0, '[HIDDEN]')", ], must_not_contain=[password], ) @@ -864,6 +868,151 @@ def test_backup_to_s3(): node.query("DROP TABLE IF EXISTS temptbl2") +def test_backup_table_s3_named_collection(): + """Test that secrets in S3 named collection backups are masked in system.backups and logs.""" + password = new_password() + + setup_queries = [ + "CREATE TABLE backup_test_s3_nc (x int) ENGINE = MergeTree ORDER BY x", + "INSERT INTO backup_test_s3_nc SELECT * FROM numbers(10)", + ] + + for query in setup_queries: + node.query_and_get_answer_with_error(query) + + # Create named collection for S3 backup + node.query( + f"CREATE NAMED COLLECTION IF NOT EXISTS s3_backup_nc AS " + f"url = 'http://minio1:9001/root/data/backups/nc_backup_test_base', " + f"access_key_id = 'minio', " + f"secret_access_key = '{password}'" + ) + + # Test 1: Using named collection directly + base_backup = "S3(s3_backup_nc)" + inc_backup = "S3(s3_backup_nc, 'nc_backup_test_incremental')" + + node.query_and_get_answer_with_error(f"BACKUP TABLE backup_test_s3_nc TO {base_backup} ASYNC")[0] + + inc_backup_query_output = node.query_and_get_answer_with_error( + f"BACKUP TABLE backup_test_s3_nc TO {inc_backup} SETTINGS async=1, base_backup={base_backup}" + )[0] + inc_backup_id = TSV.toMat(inc_backup_query_output)[0][0] + names_in_system_backups_output, _ = node.query_and_get_answer_with_error( + f"SELECT base_backup_name, name FROM system.backups where id = '{inc_backup_id}'" + ) + + base_backup_name, name = TSV.toMat(names_in_system_backups_output)[0] + + assert password not in base_backup_name + assert password not in name + + # Test 2: Using named collection with secret_access_key override + password2 = new_password() + base_backup2 = f"S3(s3_backup_nc, 'nc_backup_test_base2', secret_access_key = '{password2}')" + inc_backup2 = f"S3(s3_backup_nc, 'nc_backup_test_incremental2', secret_access_key = '{password2}')" + + node.query_and_get_answer_with_error(f"BACKUP TABLE backup_test_s3_nc TO {base_backup2} ASYNC")[0] + + inc_backup_query_output2 = node.query_and_get_answer_with_error( + f"BACKUP TABLE backup_test_s3_nc TO {inc_backup2} SETTINGS async=1, base_backup={base_backup2}" + )[0] + inc_backup_id2 = TSV.toMat(inc_backup_query_output2)[0][0] + names_in_system_backups_output2, _ = node.query_and_get_answer_with_error( + f"SELECT base_backup_name, name FROM system.backups where id = '{inc_backup_id2}'" + ) + + base_backup_name2, name2 = TSV.toMat(names_in_system_backups_output2)[0] + + assert password2 not in base_backup_name2 + assert password2 not in name2 + + # Check logs don't contain secrets and key-value args are masked + check_logs( + must_contain=[ + "BACKUP TABLE backup_test_s3_nc TO S3(s3_backup_nc, 'nc_backup_test_base2', secret_access_key = '[HIDDEN]')", + ], + must_not_contain=[password, password2], + ) + + node.query("DROP TABLE IF EXISTS backup_test_s3_nc") + node.query("DROP NAMED COLLECTION IF EXISTS s3_backup_nc") + + +def test_backup_table_azure_named_collection(): + """Test that secrets in Azure named collection backups are masked in system.backups and logs.""" + azure_storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"] + azure_account_name = "devstoreaccount1" + azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + + setup_queries = [ + "CREATE TABLE backup_test_az_nc (x int) ENGINE = MergeTree ORDER BY x", + "INSERT INTO backup_test_az_nc SELECT * FROM numbers(10)", + ] + + for query in setup_queries: + node.query_and_get_answer_with_error(query) + + # Create named collection for Azure backup (using storage_account_url variant) + node.query( + f"CREATE NAMED COLLECTION IF NOT EXISTS azure_backup_nc AS " + f"storage_account_url = '{azure_storage_account_url}', " + f"container = 'cont', " + f"account_name = '{azure_account_name}', " + f"account_key = '{azure_account_key}'" + ) + + # Test 1: Using named collection directly + base_backup = "AzureBlobStorage(azure_backup_nc, 'az_nc_backup_test_base')" + inc_backup = "AzureBlobStorage(azure_backup_nc, 'az_nc_backup_test_incremental')" + + node.query_and_get_answer_with_error(f"BACKUP TABLE backup_test_az_nc TO {base_backup} ASYNC")[0] + + inc_backup_query_output = node.query_and_get_answer_with_error( + f"BACKUP TABLE backup_test_az_nc TO {inc_backup} SETTINGS async=1, base_backup={base_backup}" + )[0] + inc_backup_id = TSV.toMat(inc_backup_query_output)[0][0] + names_in_system_backups_output, _ = node.query_and_get_answer_with_error( + f"SELECT base_backup_name, name FROM system.backups where id = '{inc_backup_id}'" + ) + + base_backup_name, name = TSV.toMat(names_in_system_backups_output)[0] + + assert azure_account_key not in base_backup_name + assert azure_account_key not in name + + # Test 2: Using named collection with account_key override + password2 = new_password() + base_backup2 = f"AzureBlobStorage(azure_backup_nc, 'az_nc_backup_test_base2', account_key = '{password2}')" + inc_backup2 = f"AzureBlobStorage(azure_backup_nc, 'az_nc_backup_test_incremental2', account_key = '{password2}')" + + node.query_and_get_answer_with_error(f"BACKUP TABLE backup_test_az_nc TO {base_backup2} ASYNC")[0] + + inc_backup_query_output2 = node.query_and_get_answer_with_error( + f"BACKUP TABLE backup_test_az_nc TO {inc_backup2} SETTINGS async=1, base_backup={base_backup2}" + )[0] + inc_backup_id2 = TSV.toMat(inc_backup_query_output2)[0][0] + names_in_system_backups_output2, _ = node.query_and_get_answer_with_error( + f"SELECT base_backup_name, name FROM system.backups where id = '{inc_backup_id2}'" + ) + + base_backup_name2, name2 = TSV.toMat(names_in_system_backups_output2)[0] + + assert password2 not in base_backup_name2 + assert password2 not in name2 + + # Check logs don't contain secrets and key-value args are masked + check_logs( + must_contain=[ + "BACKUP TABLE backup_test_az_nc TO AzureBlobStorage(azure_backup_nc, 'az_nc_backup_test_base2', account_key = '[HIDDEN]')", + ], + must_not_contain=[azure_account_key, password2], + ) + + node.query("DROP TABLE IF EXISTS backup_test_az_nc") + node.query("DROP NAMED COLLECTION IF EXISTS azure_backup_nc") + + def test_on_cluster(): password = new_password() diff --git a/tests/integration/test_projection_rebuild_with_required_columns/test.py b/tests/integration/test_projection_rebuild_with_required_columns/test.py index 27728c75b7b7..d0fdf03e402e 100644 --- a/tests/integration/test_projection_rebuild_with_required_columns/test.py +++ b/tests/integration/test_projection_rebuild_with_required_columns/test.py @@ -19,12 +19,21 @@ def started_cluster(): def test_projection_rebuild_uses_only_required_columns(started_cluster): + # Here we check that projection rebuild does not create too many temporary parts. + # The size of temporary projection part is limited by min_insert_block_size_bytes/min_insert_block_size_rows, so they are changed in the config. + + node1.query("drop table if exists tab") node1.query("create table tab (x UInt64, y UInt64, data String codec(NONE), v UInt8, projection p (select _part_offset order by y)) engine = ReplacingMergeTree(v) order by x settings allow_part_offset_column_in_projections=1, deduplicate_merge_projection_mode='rebuild';") - node1.query("insert into tab select number, number, rightPad('', 100, 'a'), 0 from numbers(30000);") + # Here we expect 3 parts to be inserted, contrilled by max_block_size=min_insert_block_size_rows=10000 + node1.query("insert into tab select number, number, rightPad('', 100, 'a'), 0 from numbers(30000) settings max_block_size=10000;") + # Here we merge parts, and projections should be rebuild + # Initially we kept `data` column in projection squash, ~10 temporary parts were created by min_insert_block_size_bytes limit node1.query("optimize table tab final settings mutations_sync=2, alter_sync=2;") node1.query("system flush logs;") uuid = node1.query("select uuid from system.tables where table = 'tab';").strip() - cnt = node1.query("select count() from system.text_log where query_id = '{}::all_1_1_2' and message like '%Reading%from part p_%from the beginning of the part%'".format(uuid)) - assert (cnt == '2\n') - + cnt = node1.query("select count() from system.text_log where query_id like '{}::all_%_2' and message like '%Reading%from part p_%from the beginning of the part%'".format(uuid)) + # One projection part per source part + assert (cnt == '3\n') + # Here we check that _parent_part_offset is calculated properly. It was fixed in https://github.com/ClickHouse/ClickHouse/pull/93827 + assert(node1.query("select min(_parent_part_offset), max(_parent_part_offset) from mergeTreeProjection(default, tab, 'p')") == '0\t29999\n') diff --git a/tests/integration/test_prometheus_protocols/test_write_read.py b/tests/integration/test_prometheus_protocols/test_write_read.py index e890d0558ab0..a7c198ca16f4 100644 --- a/tests/integration/test_prometheus_protocols/test_write_read.py +++ b/tests/integration/test_prometheus_protocols/test_write_read.py @@ -21,9 +21,8 @@ ) -# Data are inserted via RemoteWrite protocol to ClickHouse, -# we need to wait a bit until we get some data. -def wait_for_data(): +# Waits until Prometheus scrapes some data and sends it to ClickHouse via the RemoteWrite protocol. +def wait_for_scraped_data(): start_time = time.monotonic() assert_eq_with_retry( node, "SELECT count() > 0 FROM timeSeriesData(prometheus)", "1" @@ -40,6 +39,18 @@ def wait_for_data(): ) +# Sends lots of data to ClickHouse via the RemoteWrite protocol. +def send_big_data(metric_name="big_data", start_time=1724112000, end_time=1724115600, count=75000): + time_series = [] + step = (end_time - start_time) / count + for i in range(0, count): + timestamp = start_time + i * step + value = i + time_series.append(({"__name__": metric_name}, {timestamp: value})) + protobuf = convert_time_series_to_protobuf(time_series) + send_protobuf_to_remote_write(node.ip_address, 9093, "/write", protobuf) + + # Executes a query in the "prometheus_reader" service. This service uses the RemoteRead protocol to get data from ClickHouse. def execute_query_in_prometheus_reader(query, timestamp): return execute_query_via_http_api( @@ -83,13 +94,14 @@ def start_cluster(): try: cluster.start() node.query("CREATE TABLE prometheus ENGINE=TimeSeries") + wait_for_scraped_data() + send_big_data() yield cluster finally: cluster.shutdown() def test_handle_normal_scrape(): - wait_for_data() query = "up" evaluation_time = time.time() result = execute_query_in_prometheus(query, evaluation_time) @@ -103,8 +115,6 @@ def test_handle_normal_scrape(): def test_remote_read_auth(): - wait_for_data() - read_request = convert_read_request_to_protobuf( "^up$", time.time() - 300, time.time() ) @@ -128,3 +138,19 @@ def test_remote_read_auth(): read_request, ) assert auth_fail_response.status_code == requests.codes.forbidden + + +def test_remote_read_big_data(): + read_request = convert_read_request_to_protobuf( + "^big_data$", 1724112000, 1724115600 + ) + + read_response = receive_protobuf_from_remote_read( + node.ip_address, + 9093, + "read_auth_ok", + read_request) + + assert len(read_response.results) == 1 + assert len(read_response.results[0].timeseries) == 1 + assert len(read_response.results[0].timeseries[0].samples) == 75000 diff --git a/tests/integration/test_race_condition_for_replicated_merge_tree/__init__.py b/tests/integration/test_race_condition_for_replicated_merge_tree/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_race_condition_for_replicated_merge_tree/configs/users.xml b/tests/integration/test_race_condition_for_replicated_merge_tree/configs/users.xml new file mode 100644 index 000000000000..2daf5e5b0c06 --- /dev/null +++ b/tests/integration/test_race_condition_for_replicated_merge_tree/configs/users.xml @@ -0,0 +1,17 @@ + + + + 1 + 1 + 0 + 0 + 2 + 3 + + + + + default + + + diff --git a/tests/integration/test_race_condition_for_replicated_merge_tree/test.py b/tests/integration/test_race_condition_for_replicated_merge_tree/test.py new file mode 100644 index 000000000000..171ce6a4d4a9 --- /dev/null +++ b/tests/integration/test_race_condition_for_replicated_merge_tree/test.py @@ -0,0 +1,173 @@ +import pytest +import threading +import time +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", + with_zookeeper=True, + user_configs=["configs/users.xml"], + stay_alive=True, + macros={"shard": "s1", "replica": "r1"}, +) +node2 = cluster.add_instance( + "node2", + with_zookeeper=True, + user_configs=["configs/users.xml"], + stay_alive=True, + macros={"shard": "s1", "replica": "r2"}, +) +node3 = cluster.add_instance( + "node3", + with_zookeeper=True, + user_configs=["configs/users.xml"], + stay_alive=True, + macros={"shard": "s1", "replica": "r3"}, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def get_last_block_number_for_partition(node, table_uuid, partition): + query = f""" + SELECT + toInt64(replaceRegexpOne(name, 'block-', '')) as block_number + FROM system.zookeeper + WHERE path = '/clickhouse/tables/{table_uuid}/s1/block_numbers/{partition}' + AND name LIKE 'block-%' + ORDER BY block_number DESC + LIMIT 1 + """ + result = node.query(query).strip() + if result == "": + return None + return int(result) + + +def wait_for_queue_to_process(node, database, table, timeout=60): + """Poll until replication queue is empty.""" + start = time.time() + while time.time() - start < timeout: + result = node.query( + f""" + SELECT queue_size, absolute_delay + FROM system.replicas + WHERE database = '{database}' AND table = '{table}' + """ + ) + + if result: + queue_size, delay = map(int, result.strip().split("\t")) + if queue_size == 0 and delay == 0: + print(f"Queue processed on {node.name}") + return True + + time.sleep(0.5) + + raise TimeoutError(f"Queue not processed within {timeout}s") + + +def test_partition_move_drop_race(started_cluster): + for node in [node1, node2, node3]: + node.query(f"DROP DATABASE IF EXISTS test_db SYNC") + + node1.query("SYSTEM ENABLE FAILPOINT rmt_delay_execute_drop_range") + node1.query("SYSTEM ENABLE FAILPOINT rmt_delay_commit_part") + + for node in [node1, node2, node3]: + node.query( + """ + CREATE DATABASE test_db + ENGINE = Replicated('/test/db', '{shard}', '{replica}') + """ + ) + + node1.query( + f""" + CREATE TABLE test_db.tbl (id UInt32, val String) + ENGINE = ReplicatedMergeTree + PARTITION BY id % 10 ORDER BY id + SETTINGS old_parts_lifetime = 1 + """ + ) + + for node in [node1, node2, node3]: + assert node.query_with_retry( + "SELECT count() FROM system.tables WHERE database='test_db' AND name='tbl'", + check_callback=lambda x: x.strip() == "1", + ) + + table_uuid = node1.query( + "SELECT uuid FROM system.tables WHERE database='test_db' AND name='tbl'" + ).strip() + + node2.query(f"SYSTEM STOP FETCHES test_db.tbl") + node3.query(f"SYSTEM STOP FETCHES test_db.tbl") + + prev_block_number = get_last_block_number_for_partition(node1, table_uuid, 0) + + exception_holder = [None] + def drop_op(): + try: + # Wait until INSERT query allocates the block number + timeout = 60 # seconds + start_time = time.time() + + while True: + current_block_number = get_last_block_number_for_partition( + node1, table_uuid, 0 + ) + if current_block_number != prev_block_number: + break + + if time.time() - start_time > timeout: + raise TimeoutError( + f"Timeout waiting for block number to change after {timeout}s. " + f"Previous: {prev_block_number}, Current: {current_block_number}" + ) + time.sleep(0.5) + + node1.query(f"ALTER TABLE test_db.tbl DROP PARTITION 0") + except Exception as e: + exception_holder[0] = e + + t = threading.Thread(target=drop_op) + t.start() + + node1.query(f"INSERT INTO test_db.tbl VALUES (0, 'a'), (10, 'b'), (20, 'c')") + t.join() + + if exception_holder[0]: + raise exception_holder[0] + + wait_for_queue_to_process(node1, "test_db", "tbl") + node3.query(f"SYSTEM START FETCHES test_db.tbl") + wait_for_queue_to_process(node3, "test_db", "tbl") + + errors = [] + for node in [node1, node2, node3]: + lost = int( + node.query( + "SELECT lost_part_count FROM system.replicas WHERE database = 'test_db' AND table = 'tbl'" + ).strip() + ) + + print(f"{node.name} lost_parts: {lost}") + + if lost > 0: + errors.append(f"{node.name} has {lost} lost parts") + + if errors: + pytest.fail(f"Race: {'; '.join(errors)}") + + for node in [node1, node2, node3]: + node.query(f"DROP DATABASE IF EXISTS test_db SYNC") diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index 39057083ce16..793cd27c4f1f 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -3925,7 +3925,6 @@ def check_validity_and_get_prunned_files(select_expression): ) - def test_iceberg_write_minmax(started_cluster): instance = started_cluster.instances["node1"] TABLE_NAME = "test_iceberg_write_minmax_" + get_uuid_str() diff --git a/tests/integration/test_table_functions_access_rights/test.py b/tests/integration/test_table_functions_access_rights/test.py index b6e6f0c1f03c..ce63280dbb91 100644 --- a/tests/integration/test_table_functions_access_rights/test.py +++ b/tests/integration/test_table_functions_access_rights/test.py @@ -36,7 +36,8 @@ def cleanup_after_test(): def test_merge(): - select_query = "SELECT * FROM merge('default', 'table[0-9]+') ORDER BY x" + merge_spec = "merge('default', 'table[0-9]+')" + select_query = f"SELECT * FROM {merge_spec} ORDER BY x" assert instance.query(select_query) == "1\n2\n" instance.query("CREATE USER A") @@ -62,6 +63,20 @@ def test_merge(): in instance.query_and_get_error(select_query, user="A") ) + instance.query("REVOKE ALL ON default.* FROM A") + describe_query = f"DESCRIBE TABLE {merge_spec}" + assert ( + "Either there is no database, which matches regular expression `default`, or there are no tables in the database matches `default`, which fit tables expression: table[0-9]+" + in instance.query_and_get_error(describe_query, user="A") + ) + instance.query("GRANT SHOW TABLES ON default.table1 TO A") + assert ( + "it's necessary to have the grant SHOW COLUMNS ON default.table1" + in instance.query_and_get_error(describe_query, user="A") + ) + instance.query("GRANT SHOW COLUMNS ON default.table1 TO A") + assert instance.query(describe_query) == "x\tUInt32\t\t\t\t\t\n" + def test_view_if_permitted(): assert ( diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index 05bcd6208731..b11b7acc8f53 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -16,6 +16,7 @@ # - and that max_backup_bandwidth from the query will override setting from the user profile import time +import uuid import pytest @@ -24,11 +25,28 @@ cluster = ClickHouseCluster(__file__) -def elapsed(func, *args, **kwargs): - start = time.time() - ret = func(*args, **kwargs) - end = time.time() - return ret, end - start +def elapsed(node, query, **kwargs): + # Generate a unique query_id to reliably find this query in logs + query_id = f"elapsed_{uuid.uuid4().hex}" + + # Execute the query with the unique query_id + ret = node.query(query, query_id=query_id, **kwargs) + + # Flush logs to ensure the query appears in system.query_log + node.query("SYSTEM FLUSH LOGS query_log") + + # Get the server-side query duration from system.query_log using the query_id + duration_result = node.query( + f""" + SELECT query_duration_ms / 1000.0 as duration + FROM system.query_log + WHERE type = 'QueryFinish' + AND query_id = '{query_id}' + LIMIT 1 + """ + ) + duration = float(duration_result.strip()) + return ret, duration node = cluster.add_instance( @@ -281,7 +299,7 @@ def test_backup_throttling(policy, backup_storage, mode, setting, value, should_ insert into data select * from numbers(1e6); """ ) - _, took = elapsed(node.query, f"backup table data to {next_backup_name(backup_storage)}") + _, took = elapsed(node, f"backup table data to {next_backup_name(backup_storage)}") assert_took(took, should_take) @@ -297,7 +315,7 @@ def test_backup_throttling_override(): backup_name = next_backup_name("local") _, took = elapsed( - node.query, + node, f"backup table data to {backup_name}", settings={ "max_backup_bandwidth": "500K", @@ -365,7 +383,7 @@ def test_read_throttling(policy, mode, setting, value, should_take): insert into data select * from numbers(1e6); """ ) - _, took = elapsed(node.query, f"select * from data") + _, took = elapsed(node, f"select * from data") assert_took(took, should_take) @@ -378,7 +396,7 @@ def test_remote_read_throttling_reload(): """ ) # without bandwidth limit - _, took = elapsed(node.query, f"select * from data") + _, took = elapsed(node, f"select * from data") assert_took(took, 0) # add bandwidth limit and reload config on fly @@ -388,7 +406,7 @@ def test_remote_read_throttling_reload(): node.query("SYSTEM RELOAD CONFIG") # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds - _, took = elapsed(node.query, f"select * from data") + _, took = elapsed(node, f"select * from data") assert_took(took, 3) # update bandwidth back to 0 @@ -397,8 +415,8 @@ def test_remote_read_throttling_reload(): ) node.query("SYSTEM RELOAD CONFIG") - _, took = elapsed(node.query, f"select * from data") - assert took < 1 + _, took = elapsed(node, f"select * from data") + assert took < 3 def test_local_read_throttling_reload(): node.query( @@ -409,7 +427,7 @@ def test_local_read_throttling_reload(): """ ) # without bandwidth limit - _, took = elapsed(node.query, f"select * from data") + _, took = elapsed(node, f"select * from data") assert_took(took, 0) # add bandwidth limit and reload config on fly @@ -419,7 +437,7 @@ def test_local_read_throttling_reload(): node.query("SYSTEM RELOAD CONFIG") # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds - _, took = elapsed(node.query, f"select * from data") + _, took = elapsed(node, f"select * from data") assert_took(took, 3) # update bandwidth back to 0 @@ -428,8 +446,8 @@ def test_local_read_throttling_reload(): ) node.query("SYSTEM RELOAD CONFIG") - _, took = elapsed(node.query, f"select * from data") - assert took < 1 + _, took = elapsed(node, f"select * from data") + assert took < 3 @pytest.mark.parametrize( "policy,mode,setting,value,should_take", @@ -488,7 +506,7 @@ def test_write_throttling(policy, mode, setting, value, should_take): create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='{policy}'; """ ) - _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") + _, took = elapsed(node, f"insert into data select * from numbers(1e6)") assert_took(took, should_take) @@ -501,7 +519,7 @@ def test_remote_write_throttling_reload(): """ ) # without bandwidth limit - _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") + _, took = elapsed(node, f"insert into data select * from numbers(1e6)") assert_took(took, 0) # add bandwidth limit and reload config on fly @@ -511,7 +529,7 @@ def test_remote_write_throttling_reload(): node.query("SYSTEM RELOAD CONFIG") # writing 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds - _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") + _, took = elapsed(node, f"insert into data select * from numbers(1e6)") assert_took(took, 3) # update bandwidth back to 0 @@ -520,8 +538,8 @@ def test_remote_write_throttling_reload(): ) node.query("SYSTEM RELOAD CONFIG") - _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") - assert took < 1 + _, took = elapsed(node, f"insert into data select * from numbers(1e6)") + assert took < 3 def test_local_write_throttling_reload(): node.query( @@ -532,7 +550,7 @@ def test_local_write_throttling_reload(): """ ) # without bandwidth limit - _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") + _, took = elapsed(node, f"insert into data select * from numbers(1e6)") assert_took(took, 0) # add bandwidth limit and reload config on fly @@ -542,7 +560,7 @@ def test_local_write_throttling_reload(): node.query("SYSTEM RELOAD CONFIG") # writing 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds - _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") + _, took = elapsed(node, f"insert into data select * from numbers(1e6)") assert_took(took, 3) # update bandwidth back to 0 @@ -551,8 +569,8 @@ def test_local_write_throttling_reload(): ) node.query("SYSTEM RELOAD CONFIG") - _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") - assert took < 1 + _, took = elapsed(node, f"insert into data select * from numbers(1e6)") + assert took < 3 def test_max_mutations_bandwidth_for_server(): node.query( @@ -563,7 +581,7 @@ def test_max_mutations_bandwidth_for_server(): ) node.query("insert into data select * from numbers(1e6)") _, took = elapsed( - node.query, + node, "alter table data update key = -key where 1 settings mutations_sync = 1", ) # reading 1e6*8 bytes with 1M/s bandwidth should take (8-1)/1=7 seconds @@ -578,6 +596,6 @@ def test_max_merges_bandwidth_for_server(): """ ) node.query("insert into data select * from numbers(1e6)") - _, took = elapsed(node.query, "optimize table data final") + _, took = elapsed(node, "optimize table data final") # reading 1e6*8 bytes with 1M/s bandwidth should take (8-1)/1=7 seconds assert_took(took, 7) diff --git a/tests/queries/0_stateless/01798_uniq_theta_sketch.oldanalyzer.reference b/tests/queries/0_stateless/01798_uniq_theta_sketch.oldanalyzer.reference index 0455a06036f0..744e7977fca7 100644 --- a/tests/queries/0_stateless/01798_uniq_theta_sketch.oldanalyzer.reference +++ b/tests/queries/0_stateless/01798_uniq_theta_sketch.oldanalyzer.reference @@ -2,7 +2,7 @@ uniqTheta many agrs 10 10 100 100 1000 1000 17 10 10 100 100 610 610 766 52 10 10 100 100 608 608 766 -5 10 10 100 100 608 608 765 +5 10 10 100 100 609 609 765 9 10 10 100 100 608 608 765 13 10 10 100 100 607 607 765 46 10 10 100 100 607 607 765 diff --git a/tests/queries/0_stateless/01798_uniq_theta_sketch.reference b/tests/queries/0_stateless/01798_uniq_theta_sketch.reference index e1f77d593d57..4bba5ffb944d 100644 --- a/tests/queries/0_stateless/01798_uniq_theta_sketch.reference +++ b/tests/queries/0_stateless/01798_uniq_theta_sketch.reference @@ -2,7 +2,7 @@ uniqTheta many agrs 10 10 100 100 1000 1000 17 10 10 100 100 610 610 766 52 10 10 100 100 608 608 766 -5 10 10 100 100 608 608 765 +5 10 10 100 100 609 609 765 9 10 10 100 100 608 608 765 13 10 10 100 100 607 607 765 46 10 10 100 100 607 607 765 diff --git a/tests/queries/0_stateless/01883_subcolumns_distributed.reference b/tests/queries/0_stateless/01883_subcolumns_distributed.reference index 459f90ada981..ebf283ad196c 100644 --- a/tests/queries/0_stateless/01883_subcolumns_distributed.reference +++ b/tests/queries/0_stateless/01883_subcolumns_distributed.reference @@ -1,2 +1,4 @@ 3 0 bbb ccc 3 0 bbb ccc +3 0 bbb ccc +3 0 bbb ccc diff --git a/tests/queries/0_stateless/01883_subcolumns_distributed.sql b/tests/queries/0_stateless/01883_subcolumns_distributed.sql index 05bab51018f1..72730e796c36 100644 --- a/tests/queries/0_stateless/01883_subcolumns_distributed.sql +++ b/tests/queries/0_stateless/01883_subcolumns_distributed.sql @@ -17,7 +17,10 @@ DROP TABLE t_subcolumns_local; -- StripeLog doesn't support subcolumns. CREATE TABLE t_subcolumns_local (arr Array(UInt32), n Nullable(String), t Tuple(s1 String, s2 String)) ENGINE = StripeLog; -SELECT arr.size0, n.null, t.s1, t.s2 FROM t_subcolumns_dist; -- { serverError UNKNOWN_IDENTIFIER } +INSERT INTO t_subcolumns_local VALUES ([1, 2, 3], 'aaa', ('bbb', 'ccc')); + +SELECT arr.size0, n.null, t.s1, t.s2 FROM t_subcolumns_dist SETTINGS enable_analyzer=1; +SELECT arr.size0, n.null, t.s1, t.s2 FROM t_subcolumns_dist SETTINGS enable_analyzer=0; -- {serverError UNKNOWN_IDENTIFIER} DROP TABLE t_subcolumns_local; DROP TABLE t_subcolumns_dist; diff --git a/tests/queries/0_stateless/02886_missed_json_subcolumns.reference b/tests/queries/0_stateless/02886_missed_json_subcolumns.reference deleted file mode 100644 index 263917c002b1..000000000000 --- a/tests/queries/0_stateless/02886_missed_json_subcolumns.reference +++ /dev/null @@ -1,7 +0,0 @@ -4 1 -{"id":1,"n":"aaa","obj.k4":null} -{"id":2,"n":"bbb","obj.k4":null} -{"id":3,"n":"ccc","obj.k4":null} -{"id":4,"n":"ddd","obj.k4":null} -4 1 -4 1 diff --git a/tests/queries/0_stateless/02886_missed_json_subcolumns.sql b/tests/queries/0_stateless/02886_missed_json_subcolumns.sql deleted file mode 100644 index 9984809ce217..000000000000 --- a/tests/queries/0_stateless/02886_missed_json_subcolumns.sql +++ /dev/null @@ -1,29 +0,0 @@ -DROP TABLE IF EXISTS t_missed_subcolumns; - -SET allow_experimental_object_type = 1; - -CREATE TABLE t_missed_subcolumns (id UInt64, n String, obj Object(Nullable('json'))) -ENGINE = MergeTree ORDER BY id; - -INSERT INTO t_missed_subcolumns VALUES (1, 'aaa', '{"k1": {"k2": "foo"}, "k3": 5}'); -INSERT INTO t_missed_subcolumns VALUES (2, 'bbb', '{"k1": {"k2": "fee"}, "k3": 4}'); -INSERT INTO t_missed_subcolumns VALUES (3, 'ccc', '{"k1": {"k2": "foo", "k4": "baz"}, "k3": 4}'); -INSERT INTO t_missed_subcolumns VALUES (4, 'ddd', '{"k1": {"k2": "foo"}, "k3": 4}'); - -OPTIMIZE TABLE t_missed_subcolumns FINAL; - -SELECT count(), min(id) FROM t_missed_subcolumns; - -SELECT * FROM t_missed_subcolumns WHERE obj.k4 = 5 ORDER BY id FORMAT JSONEachRow; - -SELECT * FROM t_missed_subcolumns WHERE obj.k1.k3 = 'fee' ORDER BY id FORMAT JSONEachRow; - -SELECT id, n, obj.k4 FROM t_missed_subcolumns ORDER BY id FORMAT JSONEachRow; - -ALTER TABLE t_missed_subcolumns DELETE WHERE obj.k4 = 5; -SELECT count(), min(id) FROM t_missed_subcolumns; - -DELETE FROM t_missed_subcolumns WHERE obj.k1.k3 = 'fee'; -SELECT count(), min(id) FROM t_missed_subcolumns; - -DROP TABLE IF EXISTS t_missed_subcolumns; diff --git a/tests/queries/0_stateless/03221_merge_profile_events.reference b/tests/queries/0_stateless/03221_merge_profile_events.reference index 6558ad3e5811..c0fdf2217462 100644 --- a/tests/queries/0_stateless/03221_merge_profile_events.reference +++ b/tests/queries/0_stateless/03221_merge_profile_events.reference @@ -1,3 +1,3 @@ -Horizontal 1 20000 3 0 480000 1 1 1 1 1 1 -Vertical 1 20000 1 2 480000 1 1 1 1 1 1 1 1 -Vertical 2 400000 2 3 8000000 1 1 1 1 1 1 1 1 1 1 1 1 +Horizontal 1 20000 3 0 480000 1 1 1 1 1 +Vertical 1 20000 1 2 480000 1 1 1 1 1 1 1 +Vertical 2 400000 2 3 8000000 1 1 1 1 1 1 1 1 1 1 1 diff --git a/tests/queries/0_stateless/03221_merge_profile_events.sql b/tests/queries/0_stateless/03221_merge_profile_events.sql index 8ebaf7121904..ad9702844def 100644 --- a/tests/queries/0_stateless/03221_merge_profile_events.sql +++ b/tests/queries/0_stateless/03221_merge_profile_events.sql @@ -23,7 +23,6 @@ SELECT ProfileEvents['MergeExecuteMilliseconds'] > 0, ProfileEvents['MergeHorizontalStageTotalMilliseconds'] > 0, ProfileEvents['MergeHorizontalStageExecuteMilliseconds'] > 0, - ProfileEvents['UserTimeMicroseconds'] > 0, ProfileEvents['OSCPUVirtualTimeMicroseconds'] > 0, FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_1' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1'; @@ -54,7 +53,6 @@ SELECT ProfileEvents['MergeHorizontalStageExecuteMilliseconds'] > 0, ProfileEvents['MergeVerticalStageTotalMilliseconds'] > 0, ProfileEvents['MergeVerticalStageExecuteMilliseconds'] > 0, - ProfileEvents['UserTimeMicroseconds'] > 0, ProfileEvents['OSCPUVirtualTimeMicroseconds'] > 0, FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_2' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1'; @@ -89,7 +87,6 @@ SELECT ProfileEvents['MergeProjectionStageExecuteMilliseconds'] > 0, ProfileEvents['MergeExecuteMilliseconds'] <= duration_ms, ProfileEvents['MergeTotalMilliseconds'] <= duration_ms, - ProfileEvents['UserTimeMicroseconds'] > 0, ProfileEvents['OSCPUVirtualTimeMicroseconds'] > 0, FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_3' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1'; diff --git a/tests/queries/0_stateless/03300_generate_random_const_expr_params.reference b/tests/queries/0_stateless/03300_generate_random_const_expr_params.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03300_generate_random_const_expr_params.sql b/tests/queries/0_stateless/03300_generate_random_const_expr_params.sql new file mode 100644 index 000000000000..ff910ee409fb --- /dev/null +++ b/tests/queries/0_stateless/03300_generate_random_const_expr_params.sql @@ -0,0 +1,4 @@ +CREATE TABLE t0 (c0 Int32) ENGINE = GenerateRandom(rand()); -- { serverError BAD_ARGUMENTS } +CREATE TABLE t1 (c0 Int32) ENGINE = GenerateRandom(now() % 1073741824); +CREATE TABLE t2 (c0 Int32) ENGINE = GenerateRandom(1 + 1); +CREATE TABLE t4 (c0 Int32) ENGINE = GenerateRandom(now() % 1073741824, 1+1, '123'::UInt64); diff --git a/tests/queries/0_stateless/03628_subcolumns_of_columns_with_dot_in_name.reference b/tests/queries/0_stateless/03628_subcolumns_of_columns_with_dot_in_name.reference new file mode 100644 index 000000000000..872d8bf4e09c --- /dev/null +++ b/tests/queries/0_stateless/03628_subcolumns_of_columns_with_dot_in_name.reference @@ -0,0 +1,20 @@ +42 +42 +42 +42 +42 +{"a":42,"b":42} 42 42 +{"a":42,"b":42} 42 42 +{"a":42,"b":42} 42 42 +{"a":42,"b":42} 42 42 +{"a":43,"b":43} 43 43 +{"a":42,"b":42} 42 42 +{"a":43,"b":43} 43 43 +{"a":42,"b":42} 42 42 +{"a":43,"b":43} 43 43 +(42) 42 +(42) 42 +(42) 42 +(43) 43 +(42) 42 +(43) 43 diff --git a/tests/queries/0_stateless/03628_subcolumns_of_columns_with_dot_in_name.sql b/tests/queries/0_stateless/03628_subcolumns_of_columns_with_dot_in_name.sql new file mode 100644 index 000000000000..d4564159ba7e --- /dev/null +++ b/tests/queries/0_stateless/03628_subcolumns_of_columns_with_dot_in_name.sql @@ -0,0 +1,57 @@ +drop table if exists test; +create table test (`my.json` JSON) engine=Memory; +insert into test select '{"a" : 42}'; +select my.json.a from test settings enable_analyzer=1; +select `my.json`.a from test settings enable_analyzer=1; +select my.json.a from test settings enable_analyzer=0; +select `my.json`.a from test settings enable_analyzer=0; +drop table test; + +select `t.t`.a from format(JSONEachRow, '`t.t` Tuple(a UInt32)', '{"t.t" : {"a" : 42}}'); + +create table test +( + `my.json` JSON(a UInt32), + a1 UInt32 materialized my.json.a, + a2 UInt32 default my.json.a, + b1 UInt32 materialized my.json.b, + b2 UInt32 default my.json.b, + index idx1 my.json.a type minmax, + index idx2 my.json.b::Int64 type minmax, + projection prj1 (select my.json, my.json.a, my.json.b order by my.json.a, my.json.b::Int32) +) engine=MergeTree order by (my.json.a, my.json.b::Int32, my.json.a + 42, my.json.b::Int32 + 42); +insert into test (my.json) select '{"a" : 42, "b" : 42}'; +select * from test; +select * from test order by my.json.a; +select * from test order by my.json.b::Int32; +insert into test (my.json) select '{"a" : 43, "b" : 43}'; +optimize table test final; +select * from test; +select * from test order by my.json.a; +select * from test order by my.json.b::Int32; + +alter table test modify column my.json JSON(a UInt32, b UInt32); -- {serverError ALTER_OF_COLUMN_IS_FORBIDDEN} +alter table test update `my.json` = '{}' where 1; -- {serverError CANNOT_UPDATE_COLUMN} + +drop table test; + +create table test +( + `my.tuple` Tuple(a UInt32), + a1 UInt32 materialized my.tuple.a, + a2 UInt32 default my.tuple.a, + index idx1 my.tuple.a type minmax, + projection prj1 (select my.tuple, my.tuple.a order by my.tuple.a) +) engine=MergeTree order by (my.tuple.a, my.tuple.a + 42); +insert into test (my.tuple) select tuple(42); +select * from test; +select * from test order by my.tuple.a; +insert into test (my.tuple) select tuple(43); +optimize table test final; +select * from test; +select * from test order by my.tuple.a; + +alter table test modify column my.tuple Tuple(a UInt32, b UInt32); -- {serverError ALTER_OF_COLUMN_IS_FORBIDDEN} +alter table test update `my.tuple` = tuple(0, 0) where 1; -- {serverError CANNOT_UPDATE_COLUMN} + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03722_random_utf8_bug.reference b/tests/queries/0_stateless/03722_random_utf8_bug.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03722_random_utf8_bug.sql b/tests/queries/0_stateless/03722_random_utf8_bug.sql new file mode 100644 index 000000000000..7b08f562389c --- /dev/null +++ b/tests/queries/0_stateless/03722_random_utf8_bug.sql @@ -0,0 +1 @@ +select randomStringUTF8(18446744073709551615-1000+number*2003) from numbers(2); -- { serverError TOO_LARGE_STRING_SIZE } diff --git a/tests/queries/0_stateless/03766_subcolumns_resolution_in_aliases.reference b/tests/queries/0_stateless/03766_subcolumns_resolution_in_aliases.reference new file mode 100644 index 000000000000..cf3f99ff1e63 --- /dev/null +++ b/tests/queries/0_stateless/03766_subcolumns_resolution_in_aliases.reference @@ -0,0 +1,23 @@ +0 43 +QUERY id: 0 + PROJECTION COLUMNS + ab UInt64 + bcd UInt64 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: ab, result_type: UInt64, source_id: 3 + EXPRESSION + FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: a.b, result_type: Dynamic, source_id: 3 + CONSTANT id: 7, constant_value: \'UInt64\', constant_value_type: String + COLUMN id: 8, column_name: bcd, result_type: UInt64, source_id: 3 + EXPRESSION + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 2 + COLUMN id: 11, column_name: b.c.d, result_type: Dynamic, source_id: 3 + CONSTANT id: 12, constant_value: \'UInt64\', constant_value_type: String + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.test diff --git a/tests/queries/0_stateless/03766_subcolumns_resolution_in_aliases.sql b/tests/queries/0_stateless/03766_subcolumns_resolution_in_aliases.sql new file mode 100644 index 000000000000..19b4df723b02 --- /dev/null +++ b/tests/queries/0_stateless/03766_subcolumns_resolution_in_aliases.sql @@ -0,0 +1,9 @@ +set enable_analyzer=1; + +drop table if exists test; +create table test (a JSON, `b.c` JSON, ab UInt64 alias a.b, bcd UInt64 alias b.c.d) engine=MergeTree order by tuple(); +insert into test select '{"a" : 42}', '{"d" : 43}'; +select ab, bcd from test; +explain query tree select ab, bcd from test; +drop table test; + diff --git a/tests/queries/0_stateless/03773_create_user_param_auth_methods.reference b/tests/queries/0_stateless/03773_create_user_param_auth_methods.reference new file mode 100644 index 000000000000..910de1b18dd3 --- /dev/null +++ b/tests/queries/0_stateless/03773_create_user_param_auth_methods.reference @@ -0,0 +1 @@ +localhost 9000 0 0 0 diff --git a/tests/queries/0_stateless/03773_create_user_param_auth_methods.sql b/tests/queries/0_stateless/03773_create_user_param_auth_methods.sql new file mode 100644 index 000000000000..d76a5be7600d --- /dev/null +++ b/tests/queries/0_stateless/03773_create_user_param_auth_methods.sql @@ -0,0 +1,12 @@ +-- Tags: no-fasttest, no-parallel, no-replicated-database +-- Tag no-replicated-database: ON CLUSTER is not allowed +-- Test for issue #92010: Query parameters in authentication methods with ON CLUSTER + +DROP USER IF EXISTS user_param_auth_03773; + +SET param_password='test_password_03773'; + +-- Before fix: This would fail with UNKNOWN_QUERY_PARAMETER on remote nodes +CREATE USER user_param_auth_03773 ON CLUSTER test_shard_localhost IDENTIFIED WITH plaintext_password BY {password:String}; + +DROP USER IF EXISTS user_param_auth_03773; diff --git a/tests/queries/0_stateless/03773_deserialise_malformed_aggregate_state.reference b/tests/queries/0_stateless/03773_deserialise_malformed_aggregate_state.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03773_deserialise_malformed_aggregate_state.sql b/tests/queries/0_stateless/03773_deserialise_malformed_aggregate_state.sql new file mode 100644 index 000000000000..a8cf6ac6d4a4 --- /dev/null +++ b/tests/queries/0_stateless/03773_deserialise_malformed_aggregate_state.sql @@ -0,0 +1,13 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/93026 +SELECT hex(groupConcatMerge(',', 10)(state)) +FROM +( + SELECT CAST(unhex('01580180808080108A80808010'), 'AggregateFunction(groupConcat(\',\', 10), String)') AS state +); -- { serverError BAD_ARGUMENTS } + +-- Check for non-monotonic offsets +SELECT hex(groupConcatMerge(',', 10)(state)) +FROM +( + SELECT CAST(unhex('0141010100'), 'AggregateFunction(groupConcat(\',\', 10), String)') AS state +); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03773_group_concat_overflow.reference b/tests/queries/0_stateless/03773_group_concat_overflow.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03773_group_concat_overflow.sql b/tests/queries/0_stateless/03773_group_concat_overflow.sql new file mode 100644 index 000000000000..1aa88c3dbf63 --- /dev/null +++ b/tests/queries/0_stateless/03773_group_concat_overflow.sql @@ -0,0 +1,8 @@ +-- Check if we catch overflow on num_rows + +-- data_size = 4, data = AAAA, num_rows = 2^63 +SELECT hex(groupConcatMerge(',', 10)(state)) +FROM +( + SELECT CAST(unhex('044141414180808080808080808001'), 'AggregateFunction(groupConcat(\',\', 10), String)') AS state +) -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03773_nullable_sparse_join.reference b/tests/queries/0_stateless/03773_nullable_sparse_join.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03773_nullable_sparse_join.sql b/tests/queries/0_stateless/03773_nullable_sparse_join.sql new file mode 100644 index 000000000000..a361267bb344 --- /dev/null +++ b/tests/queries/0_stateless/03773_nullable_sparse_join.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +SET max_partitions_per_insert_block=99999999; +SET compatibility='23.3'; + +CREATE TABLE t1 (`c1` String, `c3` String, `c2` DateTime) ENGINE = MergeTree PARTITION BY toYYYYMM(c2) ORDER BY c1; +CREATE TABLE t2 (`c4` Int64) ENGINE = MergeTree ORDER BY c4; + +INSERT INTO t1 SELECT * FROM generateRandom() LIMIT 9; +INSERT INTO t2 SELECT * FROM generateRandom() LIMIT 9; + +SELECT lo.c4 FROM t1 AS l INNER JOIN t2 AS lo ON toInt64OrNull(l.c3) = lo.c4 FORMAT NULL; + +DROP TABLE t1; +DROP TABLE t2; diff --git a/tests/queries/0_stateless/03777_analyzer_unused_columns_removal_correlated_subquery.reference b/tests/queries/0_stateless/03777_analyzer_unused_columns_removal_correlated_subquery.reference new file mode 100644 index 000000000000..93effd44c582 --- /dev/null +++ b/tests/queries/0_stateless/03777_analyzer_unused_columns_removal_correlated_subquery.reference @@ -0,0 +1,65 @@ +QUERY id: 0 + PROJECTION COLUMNS + avg_yearly Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: divide, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 2) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: l_extendedprice, result_type: Decimal(15, 2), source_id: 7 + CONSTANT id: 8, constant_value: Float64_7, constant_value_type: Float64 + JOIN TREE + QUERY id: 7, alias: __table1, is_subquery: 1 + PROJECTION COLUMNS + l_quantity Decimal(15, 2) + l_extendedprice Decimal(15, 2) + p_partkey Int32 + PROJECTION + LIST id: 9, nodes: 3 + COLUMN id: 10, column_name: l_quantity, result_type: Decimal(15, 2), source_id: 11 + COLUMN id: 12, column_name: l_extendedprice, result_type: Decimal(15, 2), source_id: 11 + COLUMN id: 13, column_name: p_partkey, result_type: Int32, source_id: 14 + JOIN TREE + JOIN id: 15, strictness: ALL, kind: INNER + LEFT TABLE EXPRESSION + TABLE id: 11, alias: __table2, table_name: default.lineitem + RIGHT TABLE EXPRESSION + TABLE id: 14, alias: __table3, table_name: default.part + JOIN EXPRESSION + FUNCTION id: 16, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 18, column_name: p_partkey, result_type: Int32, source_id: 14 + COLUMN id: 19, column_name: l_partkey, result_type: Int32, source_id: 11 + WHERE + FUNCTION id: 20, function_name: less, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 22, column_name: l_quantity, result_type: Decimal(15, 2), source_id: 7 + QUERY id: 23, alias: __table4, is_subquery: 1, is_correlated: 1 + CORRELATED COLUMNS + LIST id: 24, nodes: 1 + COLUMN id: 25, column_name: p_partkey, result_type: Int32, source_id: 7 + PROJECTION COLUMNS + multiply(0.2, avg(l_quantity)) Float64 + PROJECTION + LIST id: 26, nodes: 1 + FUNCTION id: 27, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 28, nodes: 2 + CONSTANT id: 29, constant_value: Float64_0.2, constant_value_type: Float64 + FUNCTION id: 30, function_name: avg, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 32, column_name: l_quantity, result_type: Decimal(15, 2), source_id: 33 + JOIN TREE + TABLE id: 33, alias: __table5, table_name: default.lineitem + WHERE + FUNCTION id: 34, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 35, nodes: 2 + COLUMN id: 36, column_name: l_partkey, result_type: Int32, source_id: 33 + COLUMN id: 25, column_name: p_partkey, result_type: Int32, source_id: 7 diff --git a/tests/queries/0_stateless/03777_analyzer_unused_columns_removal_correlated_subquery.sql b/tests/queries/0_stateless/03777_analyzer_unused_columns_removal_correlated_subquery.sql new file mode 100644 index 000000000000..d6f08fb97303 --- /dev/null +++ b/tests/queries/0_stateless/03777_analyzer_unused_columns_removal_correlated_subquery.sql @@ -0,0 +1,67 @@ +SET enable_analyzer = 1; +SET enable_parallel_replicas = 0; +SET correlated_subqueries_substitute_equivalent_expressions = 0; + +CREATE TABLE lineitem ( + l_orderkey Int32, + l_partkey Int32, + l_suppkey Int32, + l_linenumber Int32, + l_quantity Decimal(15,2), + l_extendedprice Decimal(15,2), + l_discount Decimal(15,2), + l_tax Decimal(15,2), + l_returnflag String, + l_linestatus String, + l_shipdate Date, + l_commitdate Date, + l_receiptdate Date, + l_shipinstruct String, + l_shipmode String, + l_comment String) +ORDER BY (l_orderkey, l_linenumber); +INSERT INTO lineitem SELECT * FROM generateRandom() LIMIT 1; + +CREATE TABLE part ( + p_partkey Int32, + p_name String, + p_mfgr String, + p_brand String, + p_type String, + p_size Int32, + p_container String, + p_retailprice Decimal(15,2), + p_comment String) +ORDER BY (p_partkey); +INSERT INTO part SELECT * FROM generateRandom() LIMIT 1; + + +EXPLAIN QUERY TREE +SELECT + sum(l_extendedprice) / 7.0 AS avg_yearly +FROM + (SELECT * FROM lineitem, part WHERE p_partkey = l_partkey) AS lp +WHERE + l_quantity < ( + SELECT + 0.2 * avg(l_quantity) + FROM + lineitem + WHERE + l_partkey = lp.p_partkey + ); + +SELECT + sum(l_extendedprice) / 7.0 AS avg_yearly +FROM + (SELECT * FROM lineitem, part WHERE p_partkey = l_partkey) AS lp +WHERE + l_quantity < ( + SELECT + 0.2 * avg(l_quantity) + FROM + lineitem + WHERE + l_partkey = lp.p_partkey + ) +FORMAT Null; diff --git a/tests/queries/0_stateless/03780_insert_sparse_into_ttl_column.reference b/tests/queries/0_stateless/03780_insert_sparse_into_ttl_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03780_insert_sparse_into_ttl_column.sh b/tests/queries/0_stateless/03780_insert_sparse_into_ttl_column.sh new file mode 100755 index 000000000000..871a9fb4d9f2 --- /dev/null +++ b/tests/queries/0_stateless/03780_insert_sparse_into_ttl_column.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query=" +drop table if exists test; +create table test (c0 Int, c1 DateTime) engine=MergeTree order by tuple() ttl c1 SETTINGS ratio_of_defaults_for_sparse_serialization = 0.001; +insert into test (c0) select * from numbers(10); +insert into function file(${CLICKHOUSE_TEST_UNIQUE_NAME}.csv) select * from test; +insert into test select * from file(${CLICKHOUSE_TEST_UNIQUE_NAME}.csv); +drop table test; +" \ No newline at end of file diff --git a/tests/queries/0_stateless/03780_json_typed_paths_with_same_prefix.reference b/tests/queries/0_stateless/03780_json_typed_paths_with_same_prefix.reference new file mode 100644 index 000000000000..8e5083748216 --- /dev/null +++ b/tests/queries/0_stateless/03780_json_typed_paths_with_same_prefix.reference @@ -0,0 +1 @@ +{"a":42,"a":{"b":43}} 42 43 diff --git a/tests/queries/0_stateless/03780_json_typed_paths_with_same_prefix.sql b/tests/queries/0_stateless/03780_json_typed_paths_with_same_prefix.sql new file mode 100644 index 000000000000..40231d999f7b --- /dev/null +++ b/tests/queries/0_stateless/03780_json_typed_paths_with_same_prefix.sql @@ -0,0 +1,3 @@ +set enable_analyzer=1; +select '{"a" : 42, "a.b" : 43}'::JSON(a UInt32, `a.b` UInt32) as json, json.a, json.a.b; + diff --git a/tests/queries/0_stateless/03784_bad_base_backup.reference b/tests/queries/0_stateless/03784_bad_base_backup.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03784_bad_base_backup.sh b/tests/queries/0_stateless/03784_bad_base_backup.sh new file mode 100755 index 000000000000..d95c3fd6f10e --- /dev/null +++ b/tests/queries/0_stateless/03784_bad_base_backup.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# A test for a race condition in backups. +# It does weird stuff, it's expected to have errors due to concurrently created files. +# But in no circumstances it can crash the server. + +TIMEOUT=10 + +function thread() +{ + I=0 + while true + do + [[ $SECONDS -gt $TIMEOUT ]] && break + I=$((I+1)) + + #rm "${CLICKHOUSE_DATABASE}_${I}/backup0.tar.zst" + + $CLICKHOUSE_CLIENT --query=" + SET allow_suspicious_low_cardinality_types = 1; + + DROP DATABASE IF EXISTS d1_$CLICKHOUSE_DATABASE; + DROP DATABASE IF EXISTS d2_$CLICKHOUSE_DATABASE; + + CREATE DATABASE d1_$CLICKHOUSE_DATABASE ENGINE = Replicated('/clickhouse/databases/d1_$CLICKHOUSE_DATABASE', '{shard}', '{replica}'); + CREATE DATABASE d2_$CLICKHOUSE_DATABASE ENGINE = Replicated('/clickhouse/databases/d2_$CLICKHOUSE_DATABASE', 's0', 'd0'); + + CREATE TABLE d1_$CLICKHOUSE_DATABASE.\"t4\" (\"c0\" Array(UInt64), \"c1\" Nullable(UInt256), \"c2\" Nullable(UInt8), \"c3\" LowCardinality(Nullable(Time))) ENGINE = Set(); + + CREATE TABLE d2_$CLICKHOUSE_DATABASE.\"t9\" (\"c0\" String COMMENT '😉', \"c1\" String, \"c2\" Nullable(UInt256), \"c3\" Nullable(JSON(max_dynamic_types=21))) ENGINE = DeltaLakeLocal('${CLICKHOUSE_USER_FILES_UNIQUE}_${I}', Parquet) SETTINGS iceberg_recent_metadata_file_by_last_updated_ms_field = 0; + + BACKUP DATABASE d2_$CLICKHOUSE_DATABASE TO File('${CLICKHOUSE_DATABASE}_${I}/backup0.tar.zst') SETTINGS query_plan_enable_optimizations = 1, max_network_bandwidth = 32768, hdfs_skip_empty_files = 0, format_binary_max_array_size = 8064, force_remove_data_recursively_on_drop = 1, input_format_orc_filter_push_down = 1, parallel_replicas_index_analysis_only_on_coordinator = 1, max_threads_for_indexes = 13, output_format_decimal_trailing_zeros = 0, remote_filesystem_read_prefetch = 0, output_format_parquet_geometadata = 1, optimize_extract_common_expressions = 0, merge_tree_min_rows_for_seek = 5313, max_bytes_ratio_before_external_group_by = 0.010000, optimize_respect_aliases = 0, use_skip_indexes = 0, join_to_sort_minimum_perkey_rows = 5969, parallel_replicas_for_cluster_engines = 0, rewrite_in_to_join = 0, merge_tree_min_bytes_for_seek = 0, parallel_replica_offset = 6, update_insert_deduplication_token_in_dependent_materialized_views = 0, distributed_aggregation_memory_efficient = 0, max_number_of_partitions_for_independent_aggregation = 4222, implicit_select = 0, max_result_bytes = 0, delta_lake_throw_on_engine_predicate_error = 0, apply_row_policy_after_final = 0, input_format_defaults_for_omitted_fields = 1, filesystem_cache_enable_background_download_during_fetch = 1, output_format_sql_insert_include_column_names = 1, input_format_tsv_detect_header = 1, asterisk_include_alias_columns = 0, show_create_query_identifier_quoting_rule = 'always', log_formatted_queries = 1, database_atomic_wait_for_drop_and_detach_synchronously = 0, hdfs_ignore_file_doesnt_exist = 1, cloud_mode = 1 ASYNC; + + BACKUP TABLE d1_$CLICKHOUSE_DATABASE.\"t4\" TO Memory('${CLICKHOUSE_DATABASE}_${I}/backup2.tar') SETTINGS base_backup = File('${CLICKHOUSE_DATABASE}_${I}/backup0.tar.zst'), async_insert_max_data_size = 5977266, check_table_dependencies = 1, input_format_custom_detect_header = 0, fsync_metadata = 0, group_by_use_nulls = 1 ASYNC FORMAT TabSeparatedRawWithNames; + " + done >/dev/null 2>&1 +} + +thread & +thread & +thread & +thread & + +wait diff --git a/tests/queries/0_stateless/03784_zstd_decompressed_size.reference b/tests/queries/0_stateless/03784_zstd_decompressed_size.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03784_zstd_decompressed_size.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03784_zstd_decompressed_size.sh b/tests/queries/0_stateless/03784_zstd_decompressed_size.sh new file mode 100755 index 000000000000..36968d2e9918 --- /dev/null +++ b/tests/queries/0_stateless/03784_zstd_decompressed_size.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +printf '\x41\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x1a\x00\x00\x00\xe8\x03\x00\x00(\xb5/\xfd \x08A\x00\x00SELECT 1' \ + | ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- | grep -c "The size after decompression" \ No newline at end of file diff --git a/tests/queries/0_stateless/03785_double_delta_width.reference b/tests/queries/0_stateless/03785_double_delta_width.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03785_double_delta_width.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03785_double_delta_width.sh b/tests/queries/0_stateless/03785_double_delta_width.sh new file mode 100755 index 000000000000..45cba8f66a4b --- /dev/null +++ b/tests/queries/0_stateless/03785_double_delta_width.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +printf '\x41\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x94\x1a\x00\x00\x00\xe8\x03\x00\x00(\xb5/\xfd \x08A\x00\x00SELECT 1' \ + | ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- \ + | grep -c "Cannot decompress double-delta encoded data. File has wrong header" \ No newline at end of file diff --git a/tests/queries/0_stateless/03785_rebuild_projection_with_part_offset.reference b/tests/queries/0_stateless/03785_rebuild_projection_with_part_offset.reference new file mode 100644 index 000000000000..02357561e024 --- /dev/null +++ b/tests/queries/0_stateless/03785_rebuild_projection_with_part_offset.reference @@ -0,0 +1,2 @@ +200000 +200000 diff --git a/tests/queries/0_stateless/03785_rebuild_projection_with_part_offset.sql b/tests/queries/0_stateless/03785_rebuild_projection_with_part_offset.sql new file mode 100644 index 000000000000..04cddd167592 --- /dev/null +++ b/tests/queries/0_stateless/03785_rebuild_projection_with_part_offset.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test +( + `a` Int32, + `b` Int32, + PROJECTION p + ( + SELECT + a, + b, + _part_offset + ORDER BY b + ) +) +ENGINE = ReplacingMergeTree +ORDER BY a +SETTINGS index_granularity_bytes = 10485760, index_granularity = 8192, deduplicate_merge_projection_mode = 'rebuild'; + +INSERT INTO test SELECT number * 3, rand() FROM numbers(100000); +INSERT INTO test SELECT number * 3 + 1, rand() FROM numbers(100000); +SELECT sum(l._part_offset = r._parent_part_offset) FROM test l JOIN mergeTreeProjection(currentDatabase(), test, p) r USING (a) SETTINGS enable_analyzer = 1; + +OPTIMIZE TABLE test FINAL; + +SELECT sum(l._part_offset = r._parent_part_offset) FROM test l JOIN mergeTreeProjection(currentDatabase(), test, p) r USING (a) SETTINGS enable_analyzer = 1; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/03789_rmv_with_multiple_subqueries.reference b/tests/queries/0_stateless/03789_rmv_with_multiple_subqueries.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03789_rmv_with_multiple_subqueries.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03789_rmv_with_multiple_subqueries.sql b/tests/queries/0_stateless/03789_rmv_with_multiple_subqueries.sql new file mode 100644 index 000000000000..c6dbaad00c99 --- /dev/null +++ b/tests/queries/0_stateless/03789_rmv_with_multiple_subqueries.sql @@ -0,0 +1,27 @@ +-- test that multiple refreshes of RMV with multiple subqueries do not leak memory or cause any other general issues with the instance +CREATE TABLE 03789_rmv_target (message String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/03789_rmv_with_multiple_subqueries', 'r1') ORDER BY (); +CREATE MATERIALIZED VIEW 03789_rmv_mv REFRESH EVERY 1 MONTH APPEND TO 03789_rmv_target AS WITH + ( + SELECT 1 + ) AS lower_limit, + ( + SELECT number + FROM numbers(10) + WHERE number = lower_limit + ) AS upper_limit, + result AS + ( + SELECT 'OH NO' AS message + FROM numbers(10) + WHERE (number >= lower_limit) AND (number <= upper_limit) + ) +SELECT * +FROM result; + +SYSTEM REFRESH VIEW 03789_rmv_mv; +SYSTEM WAIT VIEW 03789_rmv_mv; +SYSTEM REFRESH VIEW 03789_rmv_mv; +SYSTEM WAIT VIEW 03789_rmv_mv; + +SYSTEM FLUSH LOGS query_log; +SELECT uniqExact(query) FROM system.query_log WHERE has(databases, currentDatabase()) AND query LIKE '%INSERT%SELECT%' AND type = 'QueryFinish'; \ No newline at end of file diff --git a/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.reference b/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.reference new file mode 100644 index 000000000000..7e87f95616da --- /dev/null +++ b/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.reference @@ -0,0 +1,52 @@ +-- { echo } + +DROP TABLE IF EXISTS t; +CREATE TABLE t (s String) +ENGINE = MergeTree +ORDER BY s; +INSERT INTO t VALUES + ('2020-01-10 00:00:00'), + ('2020-01-2 00:00:00'); +SELECT * FROM t +WHERE toYearWeek(s) = toYearWeek('2020-01-2 00:00:00'); +2020-01-2 00:00:00 +DROP TABLE IF EXISTS t; +CREATE TABLE t (d Date32) +ENGINE = MergeTree +ORDER BY d; +INSERT INTO t VALUES ('2020-12-31'), ('2021-01-01'); +SELECT * FROM t +WHERE toWeek(d) = toWeek(toDate32('2020-12-31')); +2020-12-31 +DROP TABLE IF EXISTS t; +CREATE TABLE t (s String) +ENGINE = MergeTree +ORDER BY s; +INSERT INTO t VALUES + ('2020-02-11 00:00:00'), + ('2020-02-3 00:00:00'); +SELECT * FROM t +WHERE toWeek(s) = toWeek('2020-02-3 00:00:00'); +2020-02-3 00:00:00 +DROP TABLE IF EXISTS t; +CREATE TABLE t (dt DateTime) ENGINE=MergeTree ORDER BY dt SETTINGS index_granularity=1; +INSERT INTO t SELECT toDateTime('2020-01-01 00:00:00') + number * 3600 FROM numbers(24 * 40); +SELECT count() +FROM t +WHERE toWeek(dt) = toWeek(toDateTime('2020-01-15 00:00:00')) SETTINGS force_primary_key = 1, max_rows_to_read = 169; +168 +DROP TABLE IF EXISTS t; +CREATE TABLE t (s LowCardinality(String)) ENGINE = MergeTree ORDER BY s; +INSERT INTO t VALUES ('2020-01-10 00:00:00'), ('2020-01-2 00:00:00'); +SELECT * FROM t WHERE toYearWeek(s) = toYearWeek('2020-01-2 00:00:00'); +2020-01-2 00:00:00 +DROP TABLE IF EXISTS t; +CREATE TABLE t (s Nullable(String)) ENGINE = MergeTree ORDER BY s SETTINGS allow_nullable_key = 1; +INSERT INTO t VALUES ('2020-01-10 00:00:00'), ('2020-01-2 00:00:00'); +SELECT * FROM t WHERE toYearWeek(s) = toYearWeek('2020-01-2 00:00:00'); +2020-01-2 00:00:00 +DROP TABLE IF EXISTS t; +CREATE TABLE t (s LowCardinality(String)) ENGINE = MergeTree ORDER BY s; +INSERT INTO t VALUES ('2020-01-10 00:00:00'), ('2020-01-2 00:00:00'); +SELECT * FROM t WHERE toYearWeek(s) = toYearWeek('2020-01-2 00:00:00'); +2020-01-2 00:00:00 diff --git a/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.sql b/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.sql new file mode 100644 index 000000000000..fb550e72051f --- /dev/null +++ b/tests/queries/0_stateless/03789_to_year_week_monotonicity_key_condition.sql @@ -0,0 +1,61 @@ +-- { echo } + +DROP TABLE IF EXISTS t; +CREATE TABLE t (s String) +ENGINE = MergeTree +ORDER BY s; + +INSERT INTO t VALUES + ('2020-01-10 00:00:00'), + ('2020-01-2 00:00:00'); + +SELECT * FROM t +WHERE toYearWeek(s) = toYearWeek('2020-01-2 00:00:00'); + +DROP TABLE IF EXISTS t; +CREATE TABLE t (d Date32) +ENGINE = MergeTree +ORDER BY d; + +INSERT INTO t VALUES ('2020-12-31'), ('2021-01-01'); + +SELECT * FROM t +WHERE toWeek(d) = toWeek(toDate32('2020-12-31')); + +DROP TABLE IF EXISTS t; +CREATE TABLE t (s String) +ENGINE = MergeTree +ORDER BY s; + +INSERT INTO t VALUES + ('2020-02-11 00:00:00'), + ('2020-02-3 00:00:00'); + +SELECT * FROM t +WHERE toWeek(s) = toWeek('2020-02-3 00:00:00'); + +DROP TABLE IF EXISTS t; +CREATE TABLE t (dt DateTime) ENGINE=MergeTree ORDER BY dt SETTINGS index_granularity=1; +INSERT INTO t SELECT toDateTime('2020-01-01 00:00:00') + number * 3600 FROM numbers(24 * 40); + +SELECT count() +FROM t +WHERE toWeek(dt) = toWeek(toDateTime('2020-01-15 00:00:00')) SETTINGS force_primary_key = 1, max_rows_to_read = 169; + +DROP TABLE IF EXISTS t; +CREATE TABLE t (s LowCardinality(String)) ENGINE = MergeTree ORDER BY s; +INSERT INTO t VALUES ('2020-01-10 00:00:00'), ('2020-01-2 00:00:00'); + +SELECT * FROM t WHERE toYearWeek(s) = toYearWeek('2020-01-2 00:00:00'); + +DROP TABLE IF EXISTS t; +CREATE TABLE t (s Nullable(String)) ENGINE = MergeTree ORDER BY s SETTINGS allow_nullable_key = 1; +INSERT INTO t VALUES ('2020-01-10 00:00:00'), ('2020-01-2 00:00:00'); + +SELECT * FROM t WHERE toYearWeek(s) = toYearWeek('2020-01-2 00:00:00'); + +DROP TABLE IF EXISTS t; +CREATE TABLE t (s LowCardinality(String)) ENGINE = MergeTree ORDER BY s; +INSERT INTO t VALUES ('2020-01-10 00:00:00'), ('2020-01-2 00:00:00'); + +SELECT * FROM t WHERE toYearWeek(s) = toYearWeek('2020-01-2 00:00:00'); diff --git a/tests/queries/0_stateless/03790_materialized_postgresql_nullptr_dereference.reference b/tests/queries/0_stateless/03790_materialized_postgresql_nullptr_dereference.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03790_materialized_postgresql_nullptr_dereference.sql b/tests/queries/0_stateless/03790_materialized_postgresql_nullptr_dereference.sql new file mode 100644 index 000000000000..464fb212aae3 --- /dev/null +++ b/tests/queries/0_stateless/03790_materialized_postgresql_nullptr_dereference.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest +-- depends on libpq + +SET allow_experimental_database_materialized_postgresql = 1; +CREATE DATABASE d03790_materialized_postgresql_nullptr_dereference ENGINE = MaterializedPostgreSQL; -- { serverError BAD_ARGUMENTS } +CREATE DATABASE d03790_materialized_postgresql_nullptr_dereference ENGINE = PostgreSQL; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03790_uniqTheta_error.reference b/tests/queries/0_stateless/03790_uniqTheta_error.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03790_uniqTheta_error.sql b/tests/queries/0_stateless/03790_uniqTheta_error.sql new file mode 100644 index 000000000000..2e9199e98113 --- /dev/null +++ b/tests/queries/0_stateless/03790_uniqTheta_error.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest +-- - no-fasttest -- compiled w/o datasketches + +-- Regression for very high error in uniqTheta() due to optimization for u8 keys +select throwIf(stddevSampStable(theta)/avg(theta)>0.1) from (select number%3 key, uniqTheta(generateUUIDv4(number)) theta FROM numbers_mt(9160000) GROUP BY key with totals SETTINGS max_threads = 8) format Null; diff --git a/tests/queries/0_stateless/03791_function_to_subcolumns_optimization_on_subcolumns.reference b/tests/queries/0_stateless/03791_function_to_subcolumns_optimization_on_subcolumns.reference new file mode 100644 index 000000000000..9929f29dbe76 --- /dev/null +++ b/tests/queries/0_stateless/03791_function_to_subcolumns_optimization_on_subcolumns.reference @@ -0,0 +1,4 @@ +SELECT count() AS `count()` +FROM default.test AS __table1 +WHERE __table1.`a.x.size0` != 0 +999 diff --git a/tests/queries/0_stateless/03791_function_to_subcolumns_optimization_on_subcolumns.sql b/tests/queries/0_stateless/03791_function_to_subcolumns_optimization_on_subcolumns.sql new file mode 100644 index 000000000000..9b533101adc4 --- /dev/null +++ b/tests/queries/0_stateless/03791_function_to_subcolumns_optimization_on_subcolumns.sql @@ -0,0 +1,9 @@ +SET enable_analyzer=1; +SET optimize_functions_to_subcolumns=1; +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Tuple(x Array(UInt64))) ENGINE=MergeTree ORDER BY tuple(); +INSERT INTO test SELECT tuple(range(number % 1000)) FROM numbers_mt(1000); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count() FROM test WHERE notEmpty(a.x); +SELECT count() FROM test WHERE notEmpty(a.x); +DROP TABLE test; + diff --git a/tests/queries/0_stateless/03791_system_parts_race_condition_drop_part.reference b/tests/queries/0_stateless/03791_system_parts_race_condition_drop_part.reference new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/tests/queries/0_stateless/03791_system_parts_race_condition_drop_part.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/03791_system_parts_race_condition_drop_part.sh b/tests/queries/0_stateless/03791_system_parts_race_condition_drop_part.sh new file mode 100755 index 000000000000..25001fa4fc8b --- /dev/null +++ b/tests/queries/0_stateless/03791_system_parts_race_condition_drop_part.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Tags: race, no-parallel +# Test for a race condition between reading system.parts and removing parts. +# The race was in DataPartStorageOnDiskBase::remove() modifying part_dir +# while getFullPath() was reading it concurrently. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP TABLE IF EXISTS part_race" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE TABLE part_race (x UInt64) ENGINE = MergeTree ORDER BY x PARTITION BY x % 10 + SETTINGS old_parts_lifetime = 0, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration = 0, max_cleanup_delay_period = 0" + +TIMEOUT=30 + +function thread_insert() +{ + local TIMELIMIT=$((SECONDS+TIMEOUT)) + local i=0 + while [ $SECONDS -lt "$TIMELIMIT" ] + do + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "INSERT INTO part_race SELECT $i" 2>/dev/null + ((i++)) || true + done +} + +function thread_drop_partition() +{ + local TIMELIMIT=$((SECONDS+TIMEOUT)) + while [ $SECONDS -lt "$TIMELIMIT" ] + do + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER TABLE part_race DROP PARTITION ID '$((RANDOM % 10))'" 2>/dev/null + sleep 0.0$RANDOM + done +} + +function thread_select_parts() +{ + local TIMELIMIT=$((SECONDS+TIMEOUT)) + while [ $SECONDS -lt "$TIMELIMIT" ] + do + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT name, path FROM system.parts WHERE database = '${CLICKHOUSE_DATABASE}' AND table = 'part_race' FORMAT Null" 2>/dev/null + done +} + +# Start multiple instances of each thread +thread_insert & +thread_insert & + +thread_drop_partition & +thread_drop_partition & + +thread_select_parts & +thread_select_parts & +thread_select_parts & +thread_select_parts & + +wait + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP TABLE part_race" + +echo "OK" diff --git a/tests/queries/0_stateless/03799_any_left_join_isnotnull_crash.reference b/tests/queries/0_stateless/03799_any_left_join_isnotnull_crash.reference new file mode 100644 index 000000000000..2c13876f6cc5 --- /dev/null +++ b/tests/queries/0_stateless/03799_any_left_join_isnotnull_crash.reference @@ -0,0 +1 @@ +{"site":"STORE_A","page_level":1,"count":1} diff --git a/tests/queries/0_stateless/03799_any_left_join_isnotnull_crash.sql b/tests/queries/0_stateless/03799_any_left_join_isnotnull_crash.sql new file mode 100644 index 000000000000..7edb0fff58ad --- /dev/null +++ b/tests/queries/0_stateless/03799_any_left_join_isnotnull_crash.sql @@ -0,0 +1,53 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS AddedToCart; +DROP TABLE IF EXISTS Session; + +CREATE TABLE Session +( + id String, + site Enum8('STORE_A' = 1, 'STORE_B' = 2), + device Enum8('DESKTOP' = 1, 'MOBILE' = 2) +) +ENGINE = MergeTree +ORDER BY id; + +CREATE TABLE AddedToCart +( + sessionId String, + order Int32, + top Nullable(Int32), + screenHeight Nullable(Int32), + screenWidth Nullable(Int32), + isPromotion UInt8, + date DateTime64(3) +) +ENGINE = MergeTree +ORDER BY (sessionId, date); + +INSERT INTO Session (id, site, device) VALUES + ('s1', 'STORE_A', 'DESKTOP'), + ('s2', 'STORE_B', 'MOBILE'); + +INSERT INTO AddedToCart (sessionId, order, top, screenHeight, screenWidth, isPromotion, date) VALUES + ('s1', 1, 100, 400, 1024, 1, parseDateTime64BestEffort('2026-01-19T12:00:00.000Z', 3)), + ('s2', 2, 100, 400, 1024, 1, parseDateTime64BestEffort('2026-01-19T12:00:01.000Z', 3)); + +SELECT + s.site AS site, + if((a.order IS NULL) OR (a.order <= 0) OR (a.order > 30), NULL, accurateCastOrNull(a.order, 'Int32')) AS page_level, + count() AS count +FROM AddedToCart AS a +ANY LEFT JOIN Session AS s ON a.sessionId = s.id +WHERE (a.top IS NOT NULL) + AND (a.screenHeight IS NOT NULL) + AND (a.screenHeight > 0) + AND (a.isPromotion = _CAST(1, 'UInt8')) + AND (s.device = 'DESKTOP') + AND isNotNull(s.site) +GROUP BY site, page_level +ORDER BY site ASC, page_level ASC +FORMAT JSONEachRow; + +DROP TABLE AddedToCart; +DROP TABLE Session; diff --git a/tests/queries/0_stateless/03799_backup_restore_security.reference b/tests/queries/0_stateless/03799_backup_restore_security.reference new file mode 100644 index 000000000000..0f956314d345 --- /dev/null +++ b/tests/queries/0_stateless/03799_backup_restore_security.reference @@ -0,0 +1,2 @@ +Test 1 OK +0 diff --git a/tests/queries/0_stateless/03799_backup_restore_security.sh b/tests/queries/0_stateless/03799_backup_restore_security.sh new file mode 100755 index 000000000000..e3e7e67404a3 --- /dev/null +++ b/tests/queries/0_stateless/03799_backup_restore_security.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +# Test for security fixes related to BACKUP and RESTORE operations: +# 1. RESTORE should be forbidden in readonly mode +# 2. The 'internal' setting should not be allowed for initial queries +# 3. Permission check should happen before backup destination is opened (e.g., S3 connection) +# +# All tests use fake S3 URLs with invalid credentials. Since all security checks happen +# before any connection attempt, we should always get ACCESS_DENIED errors, not S3 errors. +# We set backup_restore_s3_retry_attempts=0 to avoid retries on connection failure +# when running against a version without the fix. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +backup_name="${CLICKHOUSE_DATABASE}_03799_backup_security" +user_name="test_03799_user_${CLICKHOUSE_DATABASE}" + +function cleanup() +{ + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_backup_security" + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_restored" + $CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS $user_name" +} +trap cleanup EXIT + +$CLICKHOUSE_CLIENT -q " +DROP TABLE IF EXISTS test_backup_security; +CREATE TABLE test_backup_security (id Int32) ENGINE=MergeTree() ORDER BY id; +INSERT INTO test_backup_security VALUES (1), (2), (3); +" + +# Test 1: RESTORE should be forbidden in readonly mode +# The readonly check happens before any backup destination is accessed. +# We use CLICKHOUSE_CLIENT_BINARY directly to avoid test framework injecting settings that conflict with readonly mode. +$CLICKHOUSE_CLIENT --readonly=1 --backup_restore_s3_retry_attempts=0 -q "RESTORE TABLE test_backup_security AS test_restored FROM S3('http://localhost:11111/test/backups/${backup_name}', 'INVALID_ACCESS_KEY', 'INVALID_SECRET')" 2>&1 | grep -q "ACCESS_DENIED" && echo "Test 1 OK" || echo "Test 1 FAIL" +# Verify that the table was not created +$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.tables WHERE database = currentDatabase() AND name = 'test_restored'" + +# Test 2: The 'internal' setting should not be allowed for initial BACKUP query +# This check happens before any connection attempt. +$CLICKHOUSE_CLIENT -m -q " +BACKUP TABLE test_backup_security TO S3('http://localhost:11111/test/backups/${backup_name}_internal', 'INVALID_ACCESS_KEY', 'INVALID_SECRET') SETTINGS internal=1, backup_restore_s3_retry_attempts=0; -- { serverError ACCESS_DENIED } +" + +# Test 3: The 'internal' setting should not be allowed for initial RESTORE query +# This check happens before any connection attempt. +$CLICKHOUSE_CLIENT -m -q " +RESTORE TABLE test_backup_security AS test_restored FROM S3('http://localhost:11111/test/backups/${backup_name}', 'INVALID_ACCESS_KEY', 'INVALID_SECRET') SETTINGS internal=1, backup_restore_s3_retry_attempts=0; -- { serverError ACCESS_DENIED } +" + +# Test 4: User without BACKUP permission should get ACCESS_DENIED (not S3_ERROR) +# This tests that permission check happens before opening backup destination. +# We use S3 with invalid credentials - if we get ACCESS_DENIED instead of S3_ERROR, +# it proves the permission check happens before attempting to connect to S3. +$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS $user_name" +$CLICKHOUSE_CLIENT -q "CREATE USER $user_name" +$CLICKHOUSE_CLIENT -q "GRANT SELECT ON ${CLICKHOUSE_DATABASE}.test_backup_security TO $user_name" +# User has SELECT but not BACKUP permission - should get ACCESS_DENIED, not S3_ERROR +$CLICKHOUSE_CLIENT --user=$user_name -m -q " +BACKUP TABLE test_backup_security TO S3('http://localhost:11111/test/backups/${CLICKHOUSE_DATABASE}/no_permission_backup', 'INVALID_ACCESS_KEY', 'INVALID_SECRET') SETTINGS backup_restore_s3_retry_attempts=0; -- { serverError ACCESS_DENIED } +" +$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS $user_name" diff --git a/tests/queries/0_stateless/03799_parallel_right_join_flag_per_row.reference b/tests/queries/0_stateless/03799_parallel_right_join_flag_per_row.reference new file mode 100644 index 000000000000..6b14c324970a --- /dev/null +++ b/tests/queries/0_stateless/03799_parallel_right_join_flag_per_row.reference @@ -0,0 +1,9 @@ +1 10 1 30 +0 0 2 20 +- +1 10 1 30 +0 0 2 20 +- +0 0 2 20 +- +0 0 2 20 diff --git a/tests/queries/0_stateless/03799_parallel_right_join_flag_per_row.sql b/tests/queries/0_stateless/03799_parallel_right_join_flag_per_row.sql new file mode 100644 index 000000000000..f9bbbdfb76e7 --- /dev/null +++ b/tests/queries/0_stateless/03799_parallel_right_join_flag_per_row.sql @@ -0,0 +1,54 @@ +DROP TABLE IF EXISTS t0; +DROP TABLE IF EXISTS t1; + +CREATE TABLE t0 (c0 UInt64, c1 UInt64) ENGINE = MergeTree() ORDER BY (c0); +INSERT INTO t0 VALUES (2, 20), (1, 30); +CREATE TABLE t1 (c0 UInt64, c1 UInt64) ENGINE = MergeTree() ORDER BY (c0); +INSERT INTO t1 VALUES (1, 10); + +SET query_plan_join_swap_table = 0; +SET enable_analyzer = 1; + +SELECT + * +FROM t1 +RIGHT JOIN t0 + ON (t0.c0 = t1.c0) + AND (t0.c1 >= t1.c1) +ORDER BY t0.c0 +SETTINGS join_algorithm='hash'; + +SELECT '-'; + +SELECT + * +FROM t1 +RIGHT JOIN t0 + ON (t0.c0 = t1.c0) + AND (t0.c1 >= t1.c1) +ORDER BY t0.c0 +SETTINGS join_algorithm='parallel_hash'; + + +SELECT '-'; + +SELECT + * +FROM t1 +RIGHT JOIN t0 + ON (t0.c0 = t1.c0) + AND (t0.c1 >= t1.c1) +WHERE t0.c0 = 2 +SETTINGS join_algorithm='parallel_hash', query_plan_filter_push_down = 1; + +SELECT '-'; + + +SELECT + * +FROM t1 +RIGHT JOIN t0 + ON (t0.c0 = t1.c0) + AND (t0.c1 >= t1.c1) +WHERE t0.c0 = 2 +SETTINGS join_algorithm='parallel_hash', query_plan_filter_push_down = 0; diff --git a/tests/queries/0_stateless/03801_domain_empty_string_msan.reference b/tests/queries/0_stateless/03801_domain_empty_string_msan.reference new file mode 100644 index 000000000000..a02e23450817 --- /dev/null +++ b/tests/queries/0_stateless/03801_domain_empty_string_msan.reference @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/tests/queries/0_stateless/03801_domain_empty_string_msan.sql b/tests/queries/0_stateless/03801_domain_empty_string_msan.sql new file mode 100644 index 000000000000..fa8ec2c850f6 --- /dev/null +++ b/tests/queries/0_stateless/03801_domain_empty_string_msan.sql @@ -0,0 +1,21 @@ +-- Test for empty and short strings in domain functions to catch use-of-uninitialized-value errors (MSan) + +SELECT domainRFC(''); +SELECT domainRFC('a'); +SELECT domainRFC('/'); +SELECT domainRFC('//'); + +SELECT domainWithoutWWWRFC(''); +SELECT domainWithoutWWWRFC('a'); +SELECT domainWithoutWWWRFC('/'); +SELECT domainWithoutWWWRFC('//'); + +SELECT domain(''); +SELECT domain('a'); +SELECT domain('/'); +SELECT domain('//'); + +SELECT domainWithoutWWW(''); +SELECT domainWithoutWWW('a'); +SELECT domainWithoutWWW('/'); +SELECT domainWithoutWWW('//'); diff --git a/tests/queries/0_stateless/03801_merge_tree_on_readonly_disk.reference b/tests/queries/0_stateless/03801_merge_tree_on_readonly_disk.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03801_merge_tree_on_readonly_disk.sh b/tests/queries/0_stateless/03801_merge_tree_on_readonly_disk.sh new file mode 100755 index 000000000000..aef5f6215bc6 --- /dev/null +++ b/tests/queries/0_stateless/03801_merge_tree_on_readonly_disk.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " +create table mt_ro (key Int) order by tuple() settings disk=disk(readonly = 1, type = 's3_plain_rewritable', endpoint = 'http://localhost:11111/test/s3_plain_rewritable_$CLICKHOUSE_DATABASE', access_key_id = clickhouse, secret_access_key = clickhouse); + +insert into mt_ro values (1); -- { serverError TABLE_IS_READ_ONLY } + +drop table mt_ro; +" + +$CLICKHOUSE_CLIENT -nm -q " +create table mt_ro (key Int) order by tuple() settings disk=disk(type='encrypted', readonly=1, key='1234567812345678', disk=disk(readonly = 1, type = 's3_plain_rewritable', endpoint = 'http://localhost:11111/test/s3_plain_rewritable_encrypted_$CLICKHOUSE_DATABASE', access_key_id = clickhouse, secret_access_key = clickhouse)); + +insert into mt_ro values (1); -- { serverError TABLE_IS_READ_ONLY } + +drop table mt_ro; +" diff --git a/tests/queries/0_stateless/03820_plain_rewritable_over_another_disk_with_same_path.reference b/tests/queries/0_stateless/03820_plain_rewritable_over_another_disk_with_same_path.reference new file mode 100644 index 000000000000..03f56863b12a --- /dev/null +++ b/tests/queries/0_stateless/03820_plain_rewritable_over_another_disk_with_same_path.reference @@ -0,0 +1,2 @@ +cache_on_write_operations=0 +cache_on_write_operations=1 diff --git a/tests/queries/0_stateless/03820_plain_rewritable_over_another_disk_with_same_path.sh b/tests/queries/0_stateless/03820_plain_rewritable_over_another_disk_with_same_path.sh new file mode 100755 index 000000000000..dd4662468fe1 --- /dev/null +++ b/tests/queries/0_stateless/03820_plain_rewritable_over_another_disk_with_same_path.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +settings=( + # Not compatible with DC + --write_through_distributed_cache=0 + --read_through_distributed_cache=0 +) + +echo "cache_on_write_operations=0" +endpoint="http://localhost:11111/test/s3_plain_rewritable_${CLICKHOUSE_DATABASE}_cache_on_write_operations_0" +cache_path="${CLICKHOUSE_DATABASE}_cache_disk_cache_on_write_operations_0" +$CLICKHOUSE_CLIENT "${settings[@]}" -nm -q " +drop table if exists mt1; +drop table if exists mt1; +drop table if exists mt3; +drop table if exists mt4; + +create table mt1 (key Int) engine=MergeTree order by () settings disk=disk(cache_on_write_operations=0, type='s3_plain_rewritable', endpoint='$endpoint', access_key_id=clickhouse, secret_access_key=clickhouse); +-- cache disk on top of s3_plain_rewritable is allowed +create table mt2 (key Int) engine=MergeTree order by () settings disk=disk(cache_on_write_operations=0, type='cache', path='$cache_path', max_size=100000, disk=disk(cache_on_write_operations=0, type='s3_plain_rewritable', endpoint='$endpoint', access_key_id=clickhouse, secret_access_key=clickhouse)); +-- encrypted disk with the same path as original s3_plain_rewritable disk is allowed +create table mt3 (key Int) engine=MergeTree order by () settings disk=disk(cache_on_write_operations=0, type='encrypted', key='1234567812345678', disk=disk(cache_on_write_operations=0, type='s3_plain_rewritable', endpoint='$endpoint', access_key_id=clickhouse, secret_access_key=clickhouse)); +-- encrypted disk over cache is allowed +create table mt4 (key Int) engine=MergeTree order by () settings disk=disk(cache_on_write_operations=0, type='encrypted', key='1234567812345678', disk=disk(cache_on_write_operations=0, type='cache', path='$cache_path', max_size=100000, disk=disk(cache_on_write_operations=0, type='s3_plain_rewritable', endpoint='$endpoint', access_key_id=clickhouse, secret_access_key=clickhouse))); + +drop table mt1; +drop table mt2; +drop table mt3; +drop table mt4; +" + +echo "cache_on_write_operations=1" +endpoint="http://localhost:11111/test/s3_plain_rewritable_${CLICKHOUSE_DATABASE}_cache_on_write_operations_1" +cache_path="${CLICKHOUSE_DATABASE}_cache_disk_cache_on_write_operations_1" +$CLICKHOUSE_CLIENT "${settings[@]}" -nm -q " +drop table if exists mt1; +drop table if exists mt1; +drop table if exists mt3; +drop table if exists mt4; + +create table mt1 (key Int) engine=MergeTree order by () settings disk=disk(cache_on_write_operations=1, type='s3_plain_rewritable', endpoint='$endpoint', access_key_id=clickhouse, secret_access_key=clickhouse); +-- cache disk on top of s3_plain_rewritable is allowed +create table mt2 (key Int) engine=MergeTree order by () settings disk=disk(cache_on_write_operations=1, type='cache', path='$cache_path', max_size=100000, disk=disk(cache_on_write_operations=1, type='s3_plain_rewritable', endpoint='$endpoint', access_key_id=clickhouse, secret_access_key=clickhouse)); +-- encrypted disk with the same path as original s3_plain_rewritable disk is allowed +create table mt3 (key Int) engine=MergeTree order by () settings disk=disk(cache_on_write_operations=1, type='encrypted', key='1234567812345678', disk=disk(cache_on_write_operations=1, type='s3_plain_rewritable', endpoint='$endpoint', access_key_id=clickhouse, secret_access_key=clickhouse)); +-- encrypted disk over cache is allowed +create table mt4 (key Int) engine=MergeTree order by () settings disk=disk(cache_on_write_operations=1, type='encrypted', key='1234567812345678', disk=disk(cache_on_write_operations=1, type='cache', path='$cache_path', max_size=100000, disk=disk(cache_on_write_operations=1, type='s3_plain_rewritable', endpoint='$endpoint', access_key_id=clickhouse, secret_access_key=clickhouse))); + +drop table mt1; +drop table mt2; +drop table mt3; +drop table mt4; +"