diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml
index dfa7c8d0ea..4e7bcf38b9 100644
--- a/.github/workflows/cla.yml
+++ b/.github/workflows/cla.yml
@@ -12,7 +12,7 @@ jobs:
- name: "CLA Assistant"
if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target'
# Beta Release
- uses: cla-assistant/github-action@v2.1.3-beta
+ uses: cla-assistant/github-action@v2.4.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# the below token should have repo scope and must be manually added by you in the repository's secret
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
index 10d0cdb437..a246352abf 100644
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@@ -8,23 +8,23 @@ on:
jobs:
deploy:
- runs-on: ubuntu-20.04
+ runs-on: ubuntu-22.04
steps:
- name: Git checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
with:
submodules: true # Fetch Hugo themes (true OR recursive)
fetch-depth: 0 # Fetch all history for .GitInfo and .Lastmod
ref: main
- name: Setup Hugo
- uses: peaceiris/actions-hugo@v2
+ uses: peaceiris/actions-hugo@v3
with:
- hugo-version: 'latest'
+ hugo-version: '0.128.2'
extended: true
- name: Cache Hugo modules
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: /tmp/hugo_cache
key: ${{ runner.os }}-hugomod-${{ hashFiles('**/go.sum') }}
@@ -32,12 +32,12 @@ jobs:
${{ runner.os }}-hugomod-
- name: Setup Node
- uses: actions/setup-node@v3
+ uses: actions/setup-node@v4
with:
- node-version: '14'
+ node-version: '20'
- name: Cache dependencies
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
@@ -51,7 +51,7 @@ jobs:
# run: hugo --gc
- name: Deploy
- uses: peaceiris/actions-gh-pages@v3
+ uses: peaceiris/actions-gh-pages@v4
if: github.ref == 'refs/heads/main'
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 26d948602c..ae663ecb0b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,13 @@
+docs/
*.DS_Store*
node_modules/
.vscode/*
yarn.lock
.hugo_build.lock
+/.idea/.gitignore
+/.idea/altinityknowledgebase.iml
+/resources/_gen/assets/scss/scss/main.scss_3f90599f3717b4a4920df16fdcadce3d.content
+/resources/_gen/assets/scss/scss/main.scss_3f90599f3717b4a4920df16fdcadce3d.json
+/.idea/modules.xml
+/.idea/vcs.xml
+/.idea/inspectionProfiles/Project_Default.xml
diff --git a/README.md b/README.md
index 1339a1c617..c857183f29 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
Welcome to the Altinity Knowledgebase Repository! This Knowledgebase was established for Altinity Engineers and ClickHouse community members to work together to find common solutions.
-Submissions and merges to this repository are distrubuted at https://kb.altinity.com .
+Submissions and merges to this repository are distributed at https://kb.altinity.com .
This knowledgebase is licensed under Apache 2.0. Contributors who submit to the Altinity Knowledgebase agree to the Altinity Contribution License Agreement.
diff --git a/assets/icons/logo.svg b/assets/icons/logo.svg
index 063493b0af..192e1a9eb5 100644
--- a/assets/icons/logo.svg
+++ b/assets/icons/logo.svg
@@ -1 +1,46 @@
-
\ No newline at end of file
+
diff --git a/assets/scss/_content.scss b/assets/scss/_content.scss
index 7fea32581b..221ddb4e95 100755
--- a/assets/scss/_content.scss
+++ b/assets/scss/_content.scss
@@ -5,13 +5,174 @@ Styles to override the theme.
*/
.td-navbar {
- background: #333;
+ max-width: 1280px;
+ background: #132f48;
}
+
.td-navbar .navbar-brand svg {
margin: -8px 10px 0;
}
+li.nav-item {
+ margin-bottom: 0;
+}
+
+.navbar-dark .navbar-nav .nav-link,
+.navbar-dark .navbar-nav .nav-link:hover,
+.navbar-dark .navbar-nav .nav-link:focus {
+ color: #fff;
+ line-height: 65px;
+ margin-bottom: 0;
+ padding: 0 10px;
+}
+
+.dropdown-toggle::after {
+ position: relative;
+ /* top: 3pt; Uncomment this to lower the icons as requested in comments*/
+ content: "";
+ display: inline-block;
+ /* By using an em scale, the arrows will size with the font */
+ width: 0.5em;
+ height: 0.5em;
+ border-top: 0 none;
+ border-left: 0 none;
+ border-right: 2px solid #fff;
+ border-bottom: 2px solid #fff;
+ transform: rotate(45deg);
+ margin-left: 0.5rem
+}
+
+.dropdown-menu {
+ position: absolute;
+ top: 100%;
+ left: 0;
+ z-index: 1000;
+ display: none;
+ float: left;
+ min-width: 16em;
+ padding: 0 0 1rem;
+ margin: 0;
+ font-size: 1rem;
+ color: #fff;
+ text-align: left;
+ list-style: none;
+ background-color: rgba(19, 47, 72, 0.9);
+ background-clip: padding-box;
+ border: 0px solid transparent;
+ border-radius: 0;
+ box-shadow: none;
+ backdrop-filter: blur(4px);
+}
+
+.dropdown-item,
+.dropdown-item:hover,
+.dropdown-item:focus {
+ display: block;
+ width: 100%;
+ padding: 0.5rem 1.5rem;
+ clear: both;
+ font-weight: 400;
+ color: #fff;
+ text-align: inherit;
+ white-space: nowrap;
+ background-color: transparent;
+ border: 0;
+}
+.dropdown-item:hover,
+.dropdown-item:focus {
+ text-decoration:underline;
+ background: transparent;
+}
+
+.dropdown-menu[data-bs-popper]{
+ margin-top: 0;
+}
+
+@media (max-width: 992px) {
+ .navbar-nav .nav-item{
+ display:none;
+ }
+}
+
+.header-social-wrap a {
+ text-decoration: none;
+}
+
+@media (min-width: 992px) {
+ body>header {
+ position: fixed;
+ top: 0;
+ width: 100%;
+ background: rgba(19, 47, 72, 0.9);
+ z-index:1000;
+ min-height: 65px;
+ backdrop-filter: blur(4px);
+ }
+ .td-navbar {
+ position:relative;
+ margin: 0 auto;
+ padding-left: 5px;
+ padding-right: 5px;
+ background:transparent;
+ min-height: 65px;
+ padding-top:0;
+ padding-bottom:0;
+ }
+ .td-navbar-nav-scroll{
+ width: 100%;
+ }
+
+ .td-navbar .navbar-brand svg {
+ width: 30px;
+ margin: -8px 10px 0 0;
+ height: auto;
+ }
+ .td-navbar .navbar-brand span.font-weight-bold {
+ display:inline-block;
+ vertical-align: 1px;
+ font-size:18px;
+ }
+
+ .td-sidebar {
+ padding-top: 75px;
+ background-color: #e9ecf0;
+ padding-right: 1rem;
+ border-right: 1px solid #dee2e6;
+ }
+
+ .td-sidebar-toc {
+ border-left: 1px solid $border-color;
+
+ @supports (position: sticky) {
+ position: sticky;
+ top: 75px;
+ height: calc(100vh - 85px);
+ overflow-y: auto;
+ }
+ order: 2;
+ padding-top: 5px;
+ padding-bottom: 1.5rem;
+ vertical-align: top;
+ }
+
+
+ .header-social-wrap {
+ height: 65px;
+ display: flex;
+ margin-left: auto;
+ align-items: center;
+ margin-bottom: 0;
+ }
+}
+
+
+
+
+footer.bg-dark {
+ background: #132f48 !important;
+}
+
img {
max-width:100%;
height: auto;
@@ -70,11 +231,11 @@ th {
tr:nth-child(odd) {
background: $td-sidebar-bg-color;
-}
+}
tr:nth-child(even) {
background-color: rgba(233, 236, 240, 0.5);
-}
+}
.feedback--title {
@@ -85,3 +246,92 @@ tr:nth-child(even) {
.feedback--answer {
width: 4em;
}
+
+
+// LEFT SIDEBAR
+
+@media (min-width: 768px){
+ .td-sidebar-nav {
+ min-height: 100%;
+ }}
+
+#m-upgrade_ebook,
+#m-join_slack,
+#m-maintenance_ebook,
+#m-clickhouse_training {
+ font-weight: bold;
+ color: #189DD0;
+ padding-left: 20px !important;
+ font-size: 15px;
+}
+
+#m-upgrade_ebook:hover span,
+#m-join_slack:hover span,
+#m-maintenance_ebook:hover span,
+#m-clickhouse_training:hover span {
+ text-decoration: underline;
+}
+
+#m-clickhouse_training {
+ background:url('data:image/svg+xml,') left 3px no-repeat transparent;
+ background-size: 17px;
+}
+
+#m-contact_us {
+ background:url('data:image/svg+xml,') left 3px no-repeat transparent;
+ background-size: 17px;
+}
+
+#m-join_slack {
+ background:url('data:image/svg+xml,') left 3px no-repeat transparent;
+ background-size: 17px;
+}
+
+#m-maintenance_ebook {
+ background:url('data:image/svg+xml,') left 3px no-repeat transparent;
+ background-size: 17px;
+}
+
+#m-upgrade_ebook {
+ background:url('data:image/svg+xml,') left 3px no-repeat transparent;
+ background-size: 17px;
+}
+
+#m-join_slack-li,
+#m-upgrade_ebook-li {
+ padding-top:20px;
+ border-top: 1px #189DD0 solid;
+ margin-top:20px;
+}
+
+
+
+footer {
+ min-height: auto !important;
+ color: #fff;
+}
+footer a, footer a:hover, footer a:active {
+ color: #fff;
+}
+
+footer .nav li {
+ font-size: 14px;
+ line-height: 1.8;
+}
+
+// Twitter icon fix
+
+footer i.fab.fa-twitter:before{
+ content: ' ';
+ width: 24px;
+ height: 24px;
+ display:inline-block;
+ background: url('data:image/svg+xml,') center bottom no-repeat transparent;
+ background-size: contain;
+ vertical-align: -3px;
+}
+
+footer .footer-inner {
+ max-width: 1280px;
+ margin: 0 auto;
+}
diff --git a/config.toml b/config.toml
index 8b05c360ee..381e637b5a 100644
--- a/config.toml
+++ b/config.toml
@@ -1,6 +1,6 @@
baseURL = "http://kb.altinity.com/"
languageCode = "en-us"
-title = "Altinity Knowledge Base"
+title = "Altinity® Knowledge Base for ClickHouse®"
# theme = ["docsy"]
publishDir = "docs"
enableRobotsTXT = true
@@ -54,8 +54,8 @@ anchor = "smart"
[languages]
[languages.en]
-title = "Altinity Knowledge Base"
-description = "Altinity Knowledge Base"
+title = "Altinity® Knowledge Base for ClickHouse®"
+description = "Altinity® Knowledge Base for ClickHouse®"
languageName = "English"
# Weight used for sorting.
weight = 1
@@ -76,7 +76,7 @@ time_format_blog = "2006.01.02"
[params]
# copyright = " Altinity Inc."
-copyright = " Altinity Inc. Altinity® and Altinity.Cloud® are registered trademarks of Altinity, Inc. ClickHouse® is a registered trademark of ClickHouse, Inc."
+copyright = " Altinity Inc. Altinity®, Altinity.Cloud®, and Altinity Stable® are registered trademarks of Altinity, Inc. ClickHouse® is a registered trademark of ClickHouse, Inc.; Altinity is not affiliated with or associated with ClickHouse, Inc. Kafka, Kubernetes, MySQL, and PostgreSQL are trademarks and property of their respective owners."
privacy_policy = "https://altinity.com/privacy-policy/"
favicon = "/favicon.ico"
@@ -158,28 +158,37 @@ enable = false
[params.links]
# End user relevant links. These will show up on left side of footer and in the community page if you have one.
-[[params.links.user]]
- name ="Twitter"
+[[params.links.developer]]
+ name ="Slack"
+ url = "https://altinity.com/slack"
+ icon = "fab fa-slack"
+ desc = "Join our Slack Community"
+[[params.links.developer]]
+ name ="X"
url = "https://twitter.com/AltinityDB"
icon = "fab fa-twitter"
- desc = "Follow us on Twitter to get the latest news!"
-[[params.links.user]]
+ desc = "Follow us on X to get the latest news!"
+[[params.links.developer]]
+ name = "LinkedIn"
+ url = "https://www.linkedin.com/company/altinity/"
+ icon = "fab fa-linkedin"
+ desc = "Partner with us on LinkedIn."
+[[params.links.developer]]
name = "Youtube"
url = "https://www.youtube.com/channel/UCE3Y2lDKl_ZfjaCrh62onYA"
icon = "fab fa-youtube"
desc = "Watch our videos."
-[[params.links.user]]
- name = "LinkedIn"
- url = "https://www.linkedin.com/company/altinity/"
- icon = "fab fa-linkedin"
- desc = "Partner with us on LinkedIn."
# Developer relevant links. These will show up on right side of footer and in the community page if you have one.
[[params.links.developer]]
name = "GitHub"
- url = "https://github.com/orgs/Altinity/"
+ url = "https://github.com/Altinity/altinityknowledgebase"
icon = "fab fa-github"
- desc = "Development takes place here!"
-
+ desc = "Development takes place here!"
+[[params.links.developer]]
+ name = "Reddit"
+ url = "https://www.reddit.com/r/Clickhouse/"
+ icon = "fab fa-reddit"
+ desc = "Altinity on Reddit"
[outputFormats]
[outputFormats.PRINT]
baseName = "index"
@@ -199,4 +208,4 @@ section = [ "HTML", "print"]
[[module.imports]]
path = "github.com/google/docsy"
[[module.imports]]
- path = "github.com/google/docsy/dependencies"
\ No newline at end of file
+ path = "github.com/google/docsy/dependencies"
diff --git a/content/en/_index.md b/content/en/_index.md
index 92ba47dae1..43b10260ff 100755
--- a/content/en/_index.md
+++ b/content/en/_index.md
@@ -1,7 +1,7 @@
---
-title: "Altinity Knowledge Base"
-linkTitle: "Altinity Knowledge Base"
-description: "Up-to-date ClickHouse knowledge base for every ClickHouse user."
+title: "Altinity® Knowledge Base for ClickHouse®"
+linkTitle: "Altinity® Knowledge Base for ClickHouse®"
+description: "Up-to-date ClickHouse® knowledge base for every ClickHouse user."
keywords:
- ClickHouse Knowledge Base
- Altinity Knowledge Base
@@ -12,7 +12,7 @@ cascade:
_target:
path: "/**"
---
-## Welcome to the Altinity ClickHouse Knowledge Base (KB)
+## Welcome to the Altinity® Knowledge Base (KB) for ClickHouse®
This knowledge base is supported by [Altinity](http://altinity.com/) engineers to provide quick answers to common questions and issues involving ClickHouse.
@@ -21,11 +21,17 @@ The [Altinity Knowledge Base is licensed under Apache 2.0](https://github.com/Al
For more detailed information about Altinity services support, see the following:
* [Altinity](https://altinity.com/): Providers of Altinity.Cloud, providing SOC-2 certified support for ClickHouse.
-* [Altinity ClickHouse Documentation](https://docs.altinity.com): Detailed guides on installing and connecting ClickHouse to other services.
-* [Altinity Resources](https://altinity.com/resources/): News, blog posts, and webinars about ClickHouse and Altinity services.
+* [Altinity.com Documentation](https://docs.altinity.com): Detailed guides on working with:
+ * [Altinity.Cloud](https://docs.altinity.com/altinitycloud/)
+ * [Altinity.Cloud Anywhere](https://docs.altinity.com/altinitycloudanywhere/)
+ * [The Altinity Cloud Manager](https://docs.altinity.com/altinitycloud/quickstartguide/clusterviewexplore/)
+ * [The Altinity Kubernetes Operator for ClickHouse](https://docs.altinity.com/releasenotes/altinity-kubernetes-operator-release-notes/)
+ * [The Altinity Sink Connector for ClickHouse](https://docs.altinity.com/releasenotes/altinity-sink-connector-release-notes/) and
+ * [Altinity Backup for ClickHouse](https://docs.altinity.com/releasenotes/altinity-backup-release-notes/)
+* [Altinity Blog](https://altinity.com/blog/): Blog posts about ClickHouse the database and Altinity services.
The following sites are also useful references regarding ClickHouse:
-* [ClickHouse.tech documentation](https://clickhouse.tech/docs/en/): From Yandex, the creators of ClickHouse
+* [ClickHouse.com documentation](https://clickhouse.com/docs/en/): Official documentation from ClickHouse Inc.
* [ClickHouse at Stackoverflow](https://stackoverflow.com/questions/tagged/clickhouse): Community driven responses to questions regarding ClickHouse
* [Google groups (Usenet) yes we remember it](https://groups.google.com/g/clickhouse): The grandparent of all modern discussion boards.
diff --git a/content/en/altinity-kb-dictionaries/_index.md b/content/en/altinity-kb-dictionaries/_index.md
index 15cc00ca4e..64bcc3214e 100644
--- a/content/en/altinity-kb-dictionaries/_index.md
+++ b/content/en/altinity-kb-dictionaries/_index.md
@@ -6,11 +6,11 @@ keywords:
- clickhouse arrays
- postgresql dictionary
description: >
- All you need to know about creating and using ClickHouse dictionaries.
+ All you need to know about creating and using ClickHouse® dictionaries.
weight: 11
---
-For more information on ClickHouse Dictionaries, see
+For more information on ClickHouse® Dictionaries, see
the presentation [https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup34/clickhouse_integration.pdf](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup34/clickhouse_integration.pdf), slides 82-95, video https://youtu.be/728Yywcd5ys?t=10642
@@ -20,7 +20,3 @@ https://altinity.com/blog/2020/5/19/clickhouse-dictionaries-reloaded
And some videos:
https://www.youtube.com/watch?v=FsVrFbcyb84
-
-Also there 3rd party articles on the same subj.
-https://prog.world/how-to-create-and-use-dictionaries-in-clickhouse/
-
diff --git a/content/en/altinity-kb-dictionaries/altinity-kb-sparse_hashed-vs-hashed.md b/content/en/altinity-kb-dictionaries/altinity-kb-sparse_hashed-vs-hashed.md
index bf27340627..5a2f5ce070 100644
--- a/content/en/altinity-kb-dictionaries/altinity-kb-sparse_hashed-vs-hashed.md
+++ b/content/en/altinity-kb-dictionaries/altinity-kb-sparse_hashed-vs-hashed.md
@@ -1,10 +1,10 @@
---
-title: "SPARSE_HASHED VS HASHED"
-linkTitle: "SPARSE_HASHED VS HASHED"
+title: "SPARSE_HASHED VS HASHED vs HASHED_ARRAY"
+linkTitle: "SPARSE_HASHED VS HASHED vs HASHED_ARRAY"
description: >
- SPARSE_HASHED VS HASHED
+ SPARSE_HASHED VS HASHED VS HASHED_ARRAY
---
-Sparse_hashed layout is supposed to save memory but has some downsides. We can test how much slower SPARSE_HASHED than HASHED is with the following:
+Sparse_hashed and hashed_array layouts are supposed to save memory but has some downsides. We can test it with the following:
```sql
create table orders(id UInt64, price Float64)
@@ -22,6 +22,11 @@ PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000
TABLE orders DB 'default' USER 'default'))
LIFETIME(MIN 0 MAX 0) LAYOUT(SPARSE_HASHED());
+CREATE DICTIONARY orders_hashed_array (id UInt64, price Float64)
+PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000
+TABLE orders DB 'default' USER 'default'))
+LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED_ARRAY());
+
SELECT
name,
type,
@@ -30,26 +35,32 @@ SELECT
formatReadableSize(bytes_allocated) AS RAM
FROM system.dictionaries
WHERE name LIKE 'orders%'
-┌─name──────────┬─type─────────┬─status─┬─element_count─┬─RAM────────┐
-│ orders_sparse │ SparseHashed │ LOADED │ 5000000 │ 84.29 MiB │
-│ orders_hashed │ Hashed │ LOADED │ 5000000 │ 256.00 MiB │
-└───────────────┴──────────────┴────────┴───────────────┴────────────┘
+┌─name────────────────┬─type─────────┬─status─┬─element_count─┬─RAM────────┐
+│ orders_hashed_array │ HashedArray │ LOADED │ 5000000 │ 68.77 MiB │
+│ orders_sparse │ SparseHashed │ LOADED │ 5000000 │ 76.30 MiB │
+│ orders_hashed │ Hashed │ LOADED │ 5000000 │ 256.00 MiB │
+└─────────────────────┴──────────────┴────────┴───────────────┴────────────┘
SELECT sum(dictGet('default.orders_hashed', 'price', toUInt64(number))) AS res
FROM numbers(10000000)
┌─res─┐
│ 0 │
└─────┘
-1 rows in set. Elapsed: 0.279 sec. Processed 10.02 million rows ...
+1 rows in set. Elapsed: 0.546 sec. Processed 10.01 million rows ...
SELECT sum(dictGet('default.orders_sparse', 'price', toUInt64(number))) AS res
FROM numbers(10000000)
┌─res─┐
│ 0 │
└─────┘
-1 rows in set. Elapsed: 1.085 sec. Processed 10.02 million rows ...
-```
+1 rows in set. Elapsed: 1.422 sec. Processed 10.01 million rows ...
-As you can see **SPARSE_HASHED** is memory efficient and use about 3 times less memory (!!!) but is almost 4 times slower. But this is the ultimate case because this test does not read data from the disk (no MergeTree table involved).
+SELECT sum(dictGet('default.orders_hashed_array', 'price', toUInt64(number))) AS res
+FROM numbers(10000000)
+┌─res─┐
+│ 0 │
+└─────┘
+1 rows in set. Elapsed: 0.558 sec. Processed 10.01 million rows ...
+```
-We encourage you to test **SPARSE_HASHED** against your real queries, because it able to save a lot of memory and have larger (in rows) external dictionaries.
+As you can see **SPARSE_HASHED** is memory efficient and use about 3 times less memory (!!!) but is almost 3 times slower as well. On the other side **HASHED_ARRAY** is even more efficient in terms of memory usage and maintains almost the same performance as **HASHED** layout.
diff --git a/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md b/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md
index db0352e2f2..8cfa4b0027 100644
--- a/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md
+++ b/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md
@@ -4,7 +4,7 @@ linkTitle: "Dictionaries & arrays"
description: >
Dictionaries & arrays
---
-## Dictionary with Clickhouse table as a source
+## Dictionary with ClickHouse® table as a source
### Test data
diff --git a/content/en/altinity-kb-dictionaries/dictionary-on-top-tables.md b/content/en/altinity-kb-dictionaries/dictionary-on-top-tables.md
index 8dc6c35ea4..a7fa9c0cf5 100644
--- a/content/en/altinity-kb-dictionaries/dictionary-on-top-tables.md
+++ b/content/en/altinity-kb-dictionaries/dictionary-on-top-tables.md
@@ -1,8 +1,8 @@
---
-title: "Dictionary on the top of the several tables using VIEW"
-linkTitle: "Dictionary on the top of the several tables using VIEW"
+title: "Dictionary on the top of several tables using VIEW"
+linkTitle: "Dictionary on the top of several tables using VIEW"
description: >
- Dictionary on the top of the several tables using VIEW
+ Dictionary on the top of several tables using VIEW
---
```sql
diff --git a/content/en/altinity-kb-dictionaries/dimension_table_desing.md b/content/en/altinity-kb-dictionaries/dimension_table_desing.md
new file mode 100644
index 0000000000..21c5f4bcb8
--- /dev/null
+++ b/content/en/altinity-kb-dictionaries/dimension_table_desing.md
@@ -0,0 +1,161 @@
+---
+title: "Dimension table design "
+linkTitle: "Dimension table design "
+description: >
+ Dimension table design
+---
+## Dimension table design considerations
+
+### Choosing storage Engine
+
+To optimize the performance of reporting queries, dimensional tables should be loaded into RAM as ClickHouse Dictionaries whenever feasible. It's becoming increasingly common to allocate 100-200GB of RAM per server specifically for these Dictionaries. Implementing sharding by tenant can further reduce the size of these dimension tables, enabling a greater portion of them to be stored in RAM and thus enhancing query speed.
+
+Different Dictionary Layouts can take more or less RAM (in trade for speed).
+
+- The cached dictionary layout is ideal for minimizing the amount of RAM required to store dimensional data when the hit ratio is high. This layout allows frequently accessed data to be kept in RAM while less frequently accessed data is stored on disk, thereby optimizing memory usage without sacrificing performance.
+- HASHED_ARRAY or SPARSE_HASHED dictionary layouts take less RAM than HASHED. See tests [here](https://kb.altinity.com/altinity-kb-dictionaries/altinity-kb-sparse_hashed-vs-hashed/).
+- Normalization techniques can be used to lower RAM usage (see below)
+
+If the amount of data is so high that it does not fit in the RAM even after suitable sharding, a disk-based table with an appropriate engine and its parameters can be used for accessing dimensional data in report queries.
+
+MergeTree engines (including Replacing or Aggregating) are not tuned by default for point queries due to the high index granularity (8192) and the necessity of using FINAL (or GROUP BY) when accessing mutated data.
+
+When using the MergeTree engine for Dimensions, the table’s index granularity should be lowered to 256. More RAM will be used for PK, but it’s a reasonable price for reading less data from the disk and making report queries faster, and that amount can be lowered by lightweight PK design (see below).
+
+The `EmbeddedRocksDB` engine could be used as an alternative. It performs much better than ReplacingMergeTree for highly mutated data, as it is tuned by design for random point queries and high-frequency updates. However, EmbeddedRocksDB does not support Replication, so INSERTing data to such tables should be done over a Distributed table with `internal_replication` set to false, which is vulnerable to different desync problems. Some “sync” procedures should be designed, developed, and applied after serious data ingesting incidents (like ETL crashes).
+
+When the Dimension table is built on several incoming event streams, `AggregatingMergeTree` is preferable to `ReplacingMergeTree`, as it allows putting data from different event streams without external ETL processes:
+
+```sql
+CREATE TABLE table_C (
+ id UInt64,
+ colA SimpleAggregatingFunction(any,Nullable(UInt32)),
+ colB SimpleAggregatingFunction(max, String)
+) ENGINE = AggregatingMergeTree()
+PARTITION BY intDiv(id, 0x800000000000000) /* 32 bucket*/
+ORDER BY id;
+
+CREATE MATERIALIZED VIEW mv_A TO table_C AS SELECT id,colA FROM Kafka_A;
+CREATE MATERIALIZED VIEW mv_B TO table_C AS SELECT id,colB FROM Kafka_B;
+```
+
+EmbeddedRocksDB natively supports UPDATEs without any complications with AggregatingFunctions.
+
+For dimensions where some “start date” column is used in filtering, the [Range_Hashed](https://kb.altinity.com/altinity-kb-dictionaries/altinity-kb-range_hashed-example-open-intervals/) dictionary layout can be used if it is acceptable for RAM usage. For MergeTree variants, ASOF JOIN in queries is needed. Such types of dimensions are the first candidates for placement into RAM.
+
+EmbeddedRocksDB is not suitable here.
+
+### Primary Key
+
+To increase query performance, I recommend using a single UInt64 (not String) column for PK, where the upper 32 bits are reserved for tenant_id (shop_id) and the lower 32 bits for actual object_id (like customer_id, product_id, etc.)
+
+That benefits both EmbeddedRocksDB Engine (it can have only one Primary Key column) and ReplacingMergeTree, as FINAL processing will work much faster with a light ORDER BY column of a single UInt64 value.
+
+### Direct Dictionary and UDFs
+
+To make the SQL code of report queries more readable and manageable, I recommend always using Dictionaries to access dimensions. A `direct dictionary layout` should be used for disk-stored dimensions (EmbeddedRocksDB or *MergeTree).
+
+When Clickhouse builds a query to Direct Dictionary, it automatically creates a filter with a list of all needed ID values. There is no need to write code to filter necessary dimension rows to reduce the hash table for the right join table.
+
+Another trick for code manageability is creating an interface function for every dimension to place here all the complexity of managing IDs by packing several values into a single PK value:
+
+```sql
+create or replace function getCustomer as (shop, id, attr) ->
+ dictGetOrNull('dict_Customers', attr, bitOr((bitShiftLeft(toUInt64(shop),32)),id));
+```
+
+It also allows the flexibility of changing dictionary names when testing different types of Engines or can be used to spread dimensional data to several dictionaries. F.e. most active tenants can be served by expensive in-RAM dictionary, while others (not active) tenants will be served from disk.
+
+```sql
+create or replace function getCustomer as (shop, id, attr) ->
+ dictGetOrDefault('dict_Customers_RAM', attr, bitOr((bitShiftLeft(toUInt64(shop),32)),id) as key,
+ dictGetOrNull('dict_Customers_MT', attr, key));
+```
+
+We always recommended DENORMALIZATION for Fact tables. However, NORMALIZATION is still a usable approach for taking less RAM for Dimension data stored as dictionaries.
+
+Example of storing a long company name (String) in a separate dictionary:
+
+```sql
+create or replace function getCustomer as (shop, id, attr) ->
+ if(attr='company_name',
+ dictGetOrDefault('dict_Company_name', 'name',
+ dictGetOrNull('dict_Customers', 'company_id',
+ bitOr((bitShiftLeft(toUInt64(shop),32)),id)) as key),
+ dictGetOrNull('dict_Customers', attr, key)
+ );
+```
+
+Example of combining Hash and Direct Dictionaries. Allows to increase lifetime without losing consistency.
+
+```sql
+CREATE OR REPLACE FUNCTION getProduct AS (product_id, attr) ->
+ dictGetOrDefault('hashed_dictionary', attr,(shop_id, product_id),
+ dictGet('direct_dictionary',attr,(shop_id, product_id) )
+ );
+```
+
+### Tests/Examples
+
+EmbeddedRocksDB
+
+```sql
+CREATE TABLE Dim_Customers (
+ id UInt64,
+ name String,
+ new_or_returning bool
+) ENGINE = EmbeddedRocksDB()
+PRIMARY KEY (id);
+
+INSERT INTO Dim_Customers
+SELECT bitShiftLeft(3648061509::UInt64,32)+number,
+ ['Customer A', 'Customer B', 'Customer C', 'Customer D', 'Customer E'][number % 5 + 1],
+ number % 2 = 0
+FROM numbers(100);
+
+CREATE DICTIONARY dict_Customers
+(
+ id UInt64,
+ name String,
+ new_or_returning bool
+)
+PRIMARY KEY id
+LAYOUT(DIRECT())
+SOURCE(CLICKHOUSE(TABLE 'Dim_Customers'));
+
+select dictGetOrNull('dict_Customers', 'name',
+ bitOr((bitShiftLeft(toUInt64(shop_id),32)),customer_id));
+```
+
+ReplacingMergeTree
+
+```sql
+CREATE TABLE Dim_Customers (
+ id UInt64,
+ name String,
+ new_or_returning bool
+) ENGINE = ReplacingMergeTree()
+ORDER BY id
+PARTITION BY intDiv(id, 0x800000000000000) /* 32 buckets by shop_id */
+settings index_granularity=256;
+
+CREATE DICTIONARY dict_Customers
+(
+ id UInt64,
+ name String,
+ new_or_returning bool
+)
+PRIMARY KEY id
+LAYOUT(DIRECT())
+SOURCE(CLICKHOUSE(query 'select * from Dim_Customers FINAL'));
+
+set do_not_merge_across_partitions_select_final=1; -- or place it to profile
+select dictGet('dict_Customers','name',bitShiftLeft(3648061509::UInt64,32)+1);
+```
+
+Tests 1M random reads over 10M entries per shop_id in the Dimension table
+
+- [EmbeddedRocksDB](https://fiddle.clickhouse.com/c304d0cc-f1c2-4323-bd65-ab82165aecb6) - 0.003s
+- [ReplacingMergeTree](https://fiddle.clickhouse.com/093fc133-0685-4c97-aa90-d38200f93f9f)- 0.003s
+
+There is no difference in SELECT on that synthetic test with all MergeTree optimizations applied. The test must be rerun on actual data with the expected update volume. The difference could be seen on a table with high-volume real-time updates.
diff --git a/content/en/altinity-kb-dictionaries/mysql8-source-for-dictionaries.md b/content/en/altinity-kb-dictionaries/mysql8-source-for-dictionaries.md
index 3554bcda00..650519dff6 100644
--- a/content/en/altinity-kb-dictionaries/mysql8-source-for-dictionaries.md
+++ b/content/en/altinity-kb-dictionaries/mysql8-source-for-dictionaries.md
@@ -6,7 +6,7 @@ description: >
---
#### Authorization
-MySQL8 used default authorization plugin `caching_sha2_password`. Unfortunately, `libmysql` which currently used (21.4-) in clickhouse is not.
+MySQL8 used default authorization plugin `caching_sha2_password`. Unfortunately, `libmysql` which currently used (21.4-) in ClickHouse® is not.
You can fix it during create custom user with `mysql_native_password` authentication plugin.
diff --git a/content/en/altinity-kb-dictionaries/partial-updates.md b/content/en/altinity-kb-dictionaries/partial-updates.md
index 088e562e84..8efd312c3a 100644
--- a/content/en/altinity-kb-dictionaries/partial-updates.md
+++ b/content/en/altinity-kb-dictionaries/partial-updates.md
@@ -4,7 +4,7 @@ linkTitle: "Partial updates"
description: >
Partial updates
---
-Clickhouse is able to fetch from a source only updated rows. You need to define `update_field` section.
+ClickHouse® is able to fetch from a source only updated rows. You need to define `update_field` section.
As an example, We have a table in an external source MySQL, PG, HTTP, ... defined with the following code sample:
@@ -36,4 +36,4 @@ LIFETIME(MIN 30 MAX 30)
A dictionary with **update_field** `updated_at` will fetch only updated rows. A dictionary saves the current time (now) time of the last successful update and queries the source `where updated_at >= previous_update - 1` (shift = 1 sec.).
-In case of HTTP source Clickhouse will send get requests with **update_field** as an URL parameter `&updated_at=2020-01-01%2000:01:01`
+In case of HTTP source ClickHouse will send get requests with **update_field** as an URL parameter `&updated_at=2020-01-01%2000:01:01`
diff --git a/content/en/altinity-kb-dictionaries/security-named-collections.md b/content/en/altinity-kb-dictionaries/security-named-collections.md
new file mode 100644
index 0000000000..5c34dd11d8
--- /dev/null
+++ b/content/en/altinity-kb-dictionaries/security-named-collections.md
@@ -0,0 +1,47 @@
+---
+title: "Security named collections"
+linkTitle: "Security named collections"
+description: >
+ Security named collections
+---
+
+
+## Dictionary with ClickHouse® table as a source with named collections
+
+### Data for connecting to external sources can be stored in named collections
+
+```xml
+
+
+
+ localhost
+ 9000
+ default
+ ch_dict
+ mypass
+
+
+
+```
+
+### Dictionary
+
+```sql
+DROP DICTIONARY IF EXISTS named_coll_dict;
+CREATE DICTIONARY named_coll_dict
+(
+ key UInt64,
+ val String
+)
+PRIMARY KEY key
+SOURCE(CLICKHOUSE(NAME local_host TABLE my_table DB default))
+LIFETIME(MIN 1 MAX 2)
+LAYOUT(HASHED());
+
+INSERT INTO my_table(key, val) VALUES(1, 'first row');
+
+SELECT dictGet('named_coll_dict', 'b', 1);
+┌─dictGet('named_coll_dict', 'b', 1)─┐
+│ first row │
+└────────────────────────────────────┘
+```
diff --git a/content/en/altinity-kb-functions/array-like-memory-usage.md b/content/en/altinity-kb-functions/array-like-memory-usage.md
index a88ceec5a1..69dbedf0f5 100644
--- a/content/en/altinity-kb-functions/array-like-memory-usage.md
+++ b/content/en/altinity-kb-functions/array-like-memory-usage.md
@@ -2,12 +2,12 @@
title: "arrayMap, arrayJoin or ARRAY JOIN memory usage"
linkTitle: "arrayMap, arrayJoin or ARRAY JOIN memory usage"
description: >
- Why arrayMap, arrayFilter, arrayJoin use so much memory?
+ Why do arrayMap, arrayFilter, and arrayJoin use so much memory?
---
## arrayMap-like functions memory usage calculation.
-In order to calculate arrayMap or similar array* functions ClickHouse temporarily does arrayJoin-like operation, which in certain conditions can lead to huge memory usage for big arrays.
+In order to calculate arrayMap or similar array* functions ClickHouse® temporarily does arrayJoin-like operation, which in certain conditions can lead to huge memory usage for big arrays.
So for example, you have 2 columns:
diff --git a/content/en/altinity-kb-functions/arrayfold.md b/content/en/altinity-kb-functions/arrayfold.md
new file mode 100644
index 0000000000..3a66ce055d
--- /dev/null
+++ b/content/en/altinity-kb-functions/arrayfold.md
@@ -0,0 +1,17 @@
+---
+title: "arrayFold"
+linkTitle: "arrayFold"
+---
+
+## EWMA example
+
+```sql
+WITH
+ [40, 45, 43, 31, 20] AS data,
+ 0.3 AS alpha
+SELECT arrayFold((acc, x) -> arrayPushBack(acc, (alpha * x) + ((1 - alpha) * (acc[-1]))), arrayPopFront(data), [CAST(data[1], 'Float64')]) as ewma
+
+┌─ewma─────────────────────────────────────────────────────────────┐
+│ [40,41.5,41.949999999999996,38.66499999999999,33.06549999999999] │
+└──────────────────────────────────────────────────────────────────┘
+```
diff --git a/content/en/altinity-kb-functions/assumenotnull-and-friends.md b/content/en/altinity-kb-functions/assumenotnull-and-friends.md
index 7ba6ca1ee5..760d2afbbf 100644
--- a/content/en/altinity-kb-functions/assumenotnull-and-friends.md
+++ b/content/en/altinity-kb-functions/assumenotnull-and-friends.md
@@ -89,7 +89,7 @@ Code: 36, e.displayText() = DB::Exception: Unexpected value 0 in enum, Stack tra
```
{{% alert title="Info" color="info" %}}
-Null values in ClickHouse are stored in a separate dictionary: is this value Null. And for faster dispatch of functions there is no check on Null value while function execution, so functions like plus can modify internal column value (which has default value). In normal conditions it’s not a problem because on read attempt, ClickHouse first would check the Null dictionary and return value from column itself for non-Nulls only. And `assumeNotNull` function just ignores this Null dictionary. So it would return only column values, and in certain cases it’s possible to have unexpected results.
+Null values in ClickHouse® are stored in a separate dictionary: is this value Null. And for faster dispatch of functions there is no check on Null value while function execution, so functions like plus can modify internal column value (which has default value). In normal conditions it’s not a problem because on read attempt, ClickHouse first would check the Null dictionary and return value from column itself for non-Nulls only. And `assumeNotNull` function just ignores this Null dictionary. So it would return only column values, and in certain cases it’s possible to have unexpected results.
{{% /alert %}}
If it's possible to have Null values, it's better to use `ifNull` function instead.
diff --git a/content/en/altinity-kb-functions/how-to-encode-decode-quantiletdigest-state.md b/content/en/altinity-kb-functions/how-to-encode-decode-quantiletdigest-state.md
new file mode 100644
index 0000000000..766d864fec
--- /dev/null
+++ b/content/en/altinity-kb-functions/how-to-encode-decode-quantiletdigest-state.md
@@ -0,0 +1,85 @@
+---
+title: "How to encode/decode quantileTDigest states from/to list of centroids"
+linkTitle: "Encoding and Decoding of quantileTDigest states"
+weight: 100
+description: >-
+ A way to export or import quantileTDigest states from/into ClickHouse®
+---
+
+## quantileTDigestState
+
+quantileTDigestState is stored in two parts: a count of centroids in LEB128 format + list of centroids without a delimiter. Each centroid is represented as two Float32 values: Mean & Count.
+
+```sql
+SELECT
+ hex(quantileTDigestState(1)),
+ hex(toFloat32(1))
+
+┌─hex(quantileTDigestState(1))─┬─hex(toFloat32(1))─┐
+│ 010000803F0000803F │ 0000803F │
+└──────────────────────────────┴───────────────────┘
+ 01 0000803F 0000803F
+ ^ ^ ^
+ LEB128 Float32 Mean Float32 Count
+```
+
+We need to make two helper `UDF` functions:
+
+```xml
+cat /etc/clickhouse-server/decodeTDigestState_function.xml
+
+
+ executable
+ 0
+ decodeTDigestState
+ Array(Tuple(mean Float32, count Float32))
+
+ AggregateFunction(quantileTDigest, UInt32)
+
+ RowBinary
+ cat
+ 0
+
+
+
+cat /etc/clickhouse-server/encodeTDigestState_function.xml
+
+
+ executable
+ 0
+ encodeTDigestState
+ AggregateFunction(quantileTDigest, UInt32)
+
+ Array(Tuple(mean Float32, count Float32))
+
+ RowBinary
+ cat
+ 0
+
+
+```
+
+Those UDF – `(encode/decode)TDigestState` converts `TDigestState` to the `Array(Tuple(Float32, Float32))` and back.
+
+```sql
+SELECT quantileTDigest(CAST(number, 'UInt32')) AS result
+FROM numbers(10)
+
+┌─result─┐
+│ 4 │
+└────────┘
+
+SELECT decodeTDigestState(quantileTDigestState(CAST(number, 'UInt32'))) AS state
+FROM numbers(10)
+
+┌─state─────────────────────────────────────────────────────────┐
+│ [(0,1),(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1)] │
+└───────────────────────────────────────────────────────────────┘
+
+SELECT finalizeAggregation(encodeTDigestState(CAST('[(0,1),(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1)]', 'Array(Tuple(Float32, Float32))'))) AS result
+
+┌─result─┐
+│ 4 │
+└────────┘
+```
+
diff --git a/content/en/altinity-kb-functions/kurt_skew_statistics.md b/content/en/altinity-kb-functions/kurt_skew_statistics.md
new file mode 100644
index 0000000000..84cd3b4950
--- /dev/null
+++ b/content/en/altinity-kb-functions/kurt_skew_statistics.md
@@ -0,0 +1,76 @@
+---
+title: "kurt & skew statistical functions in ClickHouse®
+"
+linkTitle: "kurt & skew"
+weight: 100
+description: >-
+ How to make them return the same result like python scipy
+---
+
+```python
+from scipy.stats import skew, kurtosis
+
+# Creating a dataset
+
+dataset = [10,17,71,6,55,38,27,61,48,46,21,38,2,67,35,77,29,31,27,67,81,82,75,81,31,38,68,95,37,34,65,59,81,28,82,80,35,3,97,42,66,28,85,98,45,15,41,61,24,53,97,86,5,65,84,18,9,32,46,52,69,44,78,98,61,64,26,11,3,19,0,90,28,72,47,8,0,74,38,63,88,43,81,61,34,24,37,53,79,72,5,77,58,3,61,56,1,3,5,61]
+
+print(skew(dataset, axis=0, bias=True), skew(dataset))
+
+# -0.05785361619432152 -0.05785361619432152
+```
+```sql
+WITH arrayJoin([10,17,71,6,55,38,27,61,48,46,21,38,2,67,35,77,29,31,27,67,81,82,75,81,31,38,68,95,37,34,65,59,81,28,82,80,35,3,97,42,66,28,85,98,45,15,41,61,24,53,97,86,5,65,84,18,9,32,46,52,69,44,78,98,61,64,26,11,3,19,0,90,28,72,47,8,0,74,38,63,88,43,81,61,34,24,37,53,79,72,5,77,58,3,61,56,1,3,5,61]) AS value
+SELECT skewPop(value) AS ex_1
+
+┌──────────────────ex_1─┐
+│ -0.057853616194321014 │
+└───────────────────────┘
+```
+```python
+print(skew(dataset, bias=False))
+
+# -0.05873838908626328
+```
+```sql
+WITH arrayJoin([10, 17, 71, 6, 55, 38, 27, 61, 48, 46, 21, 38, 2, 67, 35, 77, 29, 31, 27, 67, 81, 82, 75, 81, 31, 38, 68, 95, 37, 34, 65, 59, 81, 28, 82, 80, 35, 3, 97, 42, 66, 28, 85, 98, 45, 15, 41, 61, 24, 53, 97, 86, 5, 65, 84, 18, 9, 32, 46, 52, 69, 44, 78, 98, 61, 64, 26, 11, 3, 19, 0, 90, 28, 72, 47, 8, 0, 74, 38, 63, 88, 43, 81, 61, 34, 24, 37, 53, 79, 72, 5, 77, 58, 3, 61, 56, 1, 3, 5, 61]) AS value
+SELECT
+ skewSamp(value) AS ex_1,
+ (pow(count(), 2) * ex_1) / ((count() - 1) * (count() - 2)) AS G
+
+┌─────────────────ex_1─┬────────────────────G─┐
+│ -0.05698798509149213 │ -0.05873838908626276 │
+└──────────────────────┴──────────────────────┘
+```
+```python
+print(kurtosis(dataset, bias=True, fisher=False), kurtosis(dataset, bias=True, fisher=True), kurtosis(dataset))
+
+# 1.9020275610791184 -1.0979724389208816 -1.0979724389208816
+```
+```sql
+WITH arrayJoin([10, 17, 71, 6, 55, 38, 27, 61, 48, 46, 21, 38, 2, 67, 35, 77, 29, 31, 27, 67, 81, 82, 75, 81, 31, 38, 68, 95, 37, 34, 65, 59, 81, 28, 82, 80, 35, 3, 97, 42, 66, 28, 85, 98, 45, 15, 41, 61, 24, 53, 97, 86, 5, 65, 84, 18, 9, 32, 46, 52, 69, 44, 78, 98, 61, 64, 26, 11, 3, 19, 0, 90, 28, 72, 47, 8, 0, 74, 38, 63, 88, 43, 81, 61, 34, 24, 37, 53, 79, 72, 5, 77, 58, 3, 61, 56, 1, 3, 5, 61]) AS value
+SELECT
+ kurtPop(value) AS pearson,
+ pearson - 3 AS fisher
+
+┌────────────pearson─┬──────────────fisher─┐
+│ 1.9020275610791124 │ -1.0979724389208876 │
+└────────────────────┴─────────────────────┘
+```
+```python
+print(kurtosis(dataset, bias=False))
+
+# -1.0924286152713967
+```
+```sql
+WITH arrayJoin([10, 17, 71, 6, 55, 38, 27, 61, 48, 46, 21, 38, 2, 67, 35, 77, 29, 31, 27, 67, 81, 82, 75, 81, 31, 38, 68, 95, 37, 34, 65, 59, 81, 28, 82, 80, 35, 3, 97, 42, 66, 28, 85, 98, 45, 15, 41, 61, 24, 53, 97, 86, 5, 65, 84, 18, 9, 32, 46, 52, 69, 44, 78, 98, 61, 64, 26, 11, 3, 19, 0, 90, 28, 72, 47, 8, 0, 74, 38, 63, 88, 43, 81, 61, 34, 24, 37, 53, 79, 72, 5, 77, 58, 3, 61, 56, 1, 3, 5, 61]) AS value
+SELECT
+ kurtSamp(value) AS ex_1,
+ (((pow(count(), 2) * (count() + 1)) / (((count() - 1) * (count() - 2)) * (count() - 3))) * ex_1) - ((3 * pow(count() - 1, 2)) / ((count() - 2) * (count() - 3))) AS G
+
+┌──────────────ex_1─┬───────────────────G─┐
+│ 1.864177212613638 │ -1.0924286152714027 │
+└───────────────────┴─────────────────────┘
+```
+
+
+[Google Collab](https://colab.research.google.com/drive/1xoWNi7QAJ9XZtCbmQqJFB8Z_mCreITPW?usp=sharing)
diff --git a/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md b/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md
new file mode 100644
index 0000000000..08c8bc89df
--- /dev/null
+++ b/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md
@@ -0,0 +1,172 @@
+---
+title: "ClickHouse® python drivers"
+linkTitle: "ClickHouse® python drivers"
+weight: 100
+description: >-
+ Python main drivers/clients for ClickHouse®
+---
+
+There are two main python drivers that can be used with ClickHouse. They all have their different set of features and use cases:
+
+### ClickHouse driver AKA [clickhouse-driver](https://clickhouse-driver.readthedocs.io/en/latest/)
+
+The **`clickhouse-driver`** is a Python library used for interacting with ClickHouse. Here's a summary of its features:
+
+1. **Connectivity**: **`clickhouse-driver`** allows Python applications to connect to ClickHouse servers over TCP/IP Native Interface (9000/9440 ports) and also HTTP interface but it is experimental.
+2. **SQL Queries**: It enables executing SQL queries against ClickHouse databases from Python scripts, including data manipulation (insertion, deletion, updating) and data retrieval (select queries).
+3. **Query Parameters**: Supports parameterized queries, which helps in preventing SQL injection attacks and allows for more efficient execution of repeated queries with different parameter values.
+4. **Connection Pooling**: Provides support for connection pooling, which helps manage connections efficiently, especially in high-concurrency applications, by reusing existing connections instead of creating new ones for each query.
+5. **Data Types**: Handles conversion between Python data types and ClickHouse data types, ensuring compatibility and consistency when passing data between Python and ClickHouse.
+6. **Error Handling**: Offers comprehensive error handling mechanisms, including exceptions and error codes, to facilitate graceful error recovery and handling in Python applications.
+7. **Asynchronous Support**: Supports asynchronous execution of queries using `asyncio`, allowing for non-blocking query execution in asynchronous Python applications.
+8. **Customization**: Provides options for customizing connection settings, query execution behavior, and other parameters to suit specific application requirements and performance considerations.
+9. **Compatibility**: Works with various versions of ClickHouse, ensuring compatibility and support for different ClickHouse features and functionalities.
+10. **Documentation and Community**: Offers comprehensive documentation and active community support, including examples, tutorials, and forums, to assist developers in effectively using the library and addressing any issues or questions they may have.
+11. **Supports multiple host** **on connection string** https://clickhouse-driver.readthedocs.io/en/latest/features.html#multiple-hosts
+12. **Connection pooling** (aiohttp)
+
+**Python ecosystem libs/modules:**
+
+- Good Pandas/Numpy support: [https://clickhouse-driver.readthedocs.io/en/latest/features.html#numpy-pandas-support](https://clickhouse-driver.readthedocs.io/en/latest/features.html#numpy-pandas-support)
+- Good SQLALchemy support: [https://pypi.org/project/clickhouse-sqlalchemy/](https://pypi.org/project/clickhouse-sqlalchemy/)
+
+This was the first python driver for ClickHouse. It has a mature codebase. By default ClickHouse drivers uses [synchronous code](https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading). There is a wrapper to convert code to asynchronous, [https://github.com/long2ice/asynch](https://github.com/long2ice/asynch)
+
+Here you can get a basic working example from Altinity repo for ingestion/selection using clickhouse-driver:
+
+[https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_ch_driver.py](https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_ch_driver.py)
+
+### ClickHouse-connect AKA [clickhouse-connect](https://clickhouse.com/docs/en/integrations/python)
+
+The ClickHouse Connect Python driver is the ClickHouse, Inc supported-official Python library. Here's a summary of its key features:
+
+1. **Connectivity**: allows Python applications to connect to ClickHouse servers over HTTP Interface (8123/8443 ports).
+2. **Compatibility**: The driver is compatible with Python 3.x versions, ensuring that it can be used with modern Python applications without compatibility issues.
+3. **Performance**: The driver is optimized for performance, allowing for efficient communication with ClickHouse databases to execute queries and retrieve results quickly, which is crucial for applications requiring low latency and high throughput.
+4. **Query Execution**: Developers can use the driver to execute SQL queries against ClickHouse databases, including SELECT, INSERT, UPDATE, DELETE, and other SQL operations, enabling them to perform various data manipulation tasks from Python applications.
+5. **Parameterized Queries**: The driver supports parameterized queries, allowing developers to safely pass parameters to SQL queries to prevent SQL injection attacks and improve query performance by reusing query execution plans.
+6. **Data Type Conversion**: The driver automatically handles data type conversion between Python data types and ClickHouse data types, ensuring seamless integration between Python applications and ClickHouse databases without manual data type conversion.
+7. **Error Handling**: The driver provides robust error handling mechanisms, including exceptions and error codes, to help developers handle errors gracefully and take appropriate actions based on the type of error encountered during query execution.
+8. **Limited Asynchronous Support**: Some implementations of the driver offer asynchronous support, allowing developers to execute queries asynchronously to improve concurrency and scalability in asynchronous Python applications using asynchronous I/O frameworks like `asyncio`.
+9. **Configuration Options**: The driver offers various configuration options, such as connection parameters, authentication methods, and connection pooling settings, allowing developers to customize the driver's behavior to suit their specific requirements and environment.
+10. **Documentation and Community**: Offers comprehensive documentation and active community support, including examples, tutorials, and forums, to assist developers in effectively using the library and addressing any issues or questions they may have. [https://clickhouse.com/docs/en/integrations/language-clients/python/intro/](https://clickhouse.com/docs/en/integrations/language-clients/python/intro/)
+11. **Multiple host on connection string not supported** https://github.com/ClickHouse/clickhouse-connect/issues/74
+12. **Connection pooling** (urllib3)
+
+**Python ecosystem libs/modules:**
+
+- Good Pandas/Numpy support: [https://clickhouse.com/docs/en/integrations/python#consuming-query-results-with-numpy-pandas-or-arrow](https://clickhouse.com/docs/en/integrations/python#consuming-query-results-with-numpy-pandas-or-arrow)
+- Decent SQLAlchemy 1.3 and 1.4 support (limited feature set)
+
+It is the most recent driver with the latest feature set (query context and query streaming …. ), and in recent release [asyncio wrapper](https://github.com/ClickHouse/clickhouse-connect/releases/tag/v0.7.16)
+
+You can check multiple official examples here:
+
+[https://github.com/ClickHouse/clickhouse-connect/tree/457533df05fa685b2a1424359bea5654240ef971/examples](https://github.com/ClickHouse/clickhouse-connect/tree/457533df05fa685b2a1424359bea5654240ef971/examples)
+
+Also some Altinity examples from repo:
+
+[https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_ch_connect_asyncio_insert.py](https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_ch_connect_asyncio_insert.py)
+
+You can clone the repo and use the helper files like `DDL.sql` to setup some tests.
+
+
+### Most common use cases:
+
+#### Connection pooler:
+
+- Clickhouse-connect can use a connection pooler (based on urllib3) https://clickhouse.com/docs/en/integrations/python#customizing-the-http-connection-pool
+- Clickhouse-driver you can use **aiohttp** (https://docs.aiohttp.org/en/stable/client_advanced.html#limiting-connection-pool-size)
+
+#### Managing ClickHouse `session_id`:
+
+- clickhouse-driver
+ - Because it is using the Native Interface `session_id` is managed internally by clickhouse, so it is very rare (unless using asyncio) to get:
+
+ `Code: 373. DB::Exception: Session is locked by a concurrent client. (SESSION_IS_LOCKED)` .
+
+- clickhouse-connect: How to use clickhouse-connect in a pythonic way and avoid getting `SESSION_IS_LOCKED` exceptions:
+ - [https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids](https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids)
+ - If you want to specify a session_id per query you should be able to use the setting dictionary to pass a `session_id` for each query (note that ClickHouse will automatically generate a `session_id` if none is provided).
+
+ ```python
+ SETTINGS = {"session_id": "dagster-batch" + "-" + f"{time.time()}"}
+ client.query("INSERT INTO table ....", settings=SETTINGS)
+ ```
+
+
+Also in clickhouse documentation some explanation how to set `session_id` with another approach: [https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids](https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids)
+
+[ClickHouse Connect Driver API | ClickHouse Docs](https://clickhouse.com/docs/en/integrations/language-clients/python/driver-api#common-method-arguments)
+
+[Best practices with flask · Issue #73 · ClickHouse/clickhouse-connect](https://github.com/ClickHouse/clickhouse-connect/issues/73#issuecomment-1325280242)
+
+#### Asyncio (asynchronous wrappers)
+
+##### clickhouse-connect
+
+New release with [asyncio wrapper for clickhouse-connect](https://github.com/ClickHouse/clickhouse-connect/releases/tag/v0.7.16)
+
+How the wrapper works: https://clickhouse.com/docs/en/integrations/python#asyncclient-wrapper
+
+Wrapper and connection pooler example:
+
+```python
+import clickhouse_connect
+import asyncio
+from clickhouse_connect.driver.httputil import get_pool_manager
+
+async def main():
+ client = await clickhouse_connect.get_async_client(host='localhost', port=8123, pool_mgr=get_pool_manager())
+ for i in range(100):
+ result = await client.query("SELECT name FROM system.databases")
+ print(result.result_rows)
+
+asyncio.run(main())
+```
+
+`clickhouse-connect` code is synchronous by default and running synchronous functions in an async application is a workaround and might not be as efficient as using a library/wrapper designed for asynchronous operations from the ground up.. So you can use the current wrapper or you can use another approach with `asyncio` and `concurrent.futures` and `ThreadpoolExecutor` or `ProcessPoolExecutor`. Python GIL has a mutex over Threads but not to Processes so if you need performance at the cost of using processes instead of threads (not much different for medium workloads) you can use `ProcesspoolExecutor` instead.
+
+Some info about this from the tinybird guys https://www.tinybird.co/blog-posts/killing-the-processpoolexecutor
+
+For clickhouse-connect :
+
+```python
+import asyncio
+from concurrent.futures import ProcessPoolExecutor
+import clickhouse_connect
+
+# Function to execute a query using clickhouse-connect synchronously
+def execute_query_sync(query):
+ client = clickhouse_connect.get_client() # Adjust connection params as needed
+ result = client.query(query)
+ return result
+
+# Asynchronous wrapper function to run the synchronous function in a process pool
+async def execute_query_async(query):
+ loop = asyncio.get_running_loop()
+ # Use ProcessPoolExecutor to execute the synchronous function
+ with ProcessPoolExecutor() as pool:
+ result = await loop.run_in_executor(pool, execute_query_sync, query)
+ return result
+
+async def main():
+ query = "SELECT * FROM your_table LIMIT 10" # Example query
+ result = await execute_query_async(query)
+ print(result)
+
+# Run the async main function
+if __name__ == '__main__':
+ asyncio.run(main())
+```
+##### Clickhouse-driver
+
+`clickhouse-driver` code is also synchronous and suffers the same problem as `clickhouse-connect` https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading
+
+So to use asynchronous approach it is recommended to use a connection pool and some asyncio wrapper that can hide the complexity of using the `ThreadPoolExecutor/ProcessPoolExecutor`
+
+- To begin testing such environment [aiohttp](https://docs.aiohttp.org/) is a good approach. Here an example: https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_aiohttp_inserts.py
+ This will use simply requests module and aiohttp (you can tune the connection pooler https://docs.aiohttp.org/en/stable/client_advanced.html#limiting-connection-pool-size)
+
+- Also `aiochclient` is another good wrapper https://github.com/maximdanilchenko/aiochclient for the HTTP interface
+- For the native interface you can try https://github.com/long2ice/asynch, `asynch` is an asyncio ClickHouse Python Driver with native (TCP) interface support, which reuse most of [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) and comply with [PEP249](https://www.python.org/dev/peps/pep-0249/).
diff --git a/content/en/altinity-kb-integrations/Spark.md b/content/en/altinity-kb-integrations/Spark.md
index e37f065f58..b1813c40cd 100644
--- a/content/en/altinity-kb-integrations/Spark.md
+++ b/content/en/altinity-kb-integrations/Spark.md
@@ -1,13 +1,9 @@
---
-title: "ClickHouse + Spark"
+title: "ClickHouse® + Spark"
linkTitle: "Spark"
weight: 100
-description: >-
- Spark
---
-## ClickHouse + Spark
-
### jdbc
The trivial & natural way to talk to ClickHouse from Spark is using jdbc. There are 2 jdbc drivers:
@@ -16,7 +12,7 @@ The trivial & natural way to talk to ClickHouse from Spark is using jdbc. There
ClickHouse-Native-JDBC has some hints about integration with Spark even in the main README file.
-'Official' driver does support some conversion of complex data types (Roarring bitmaps) for Spark-Clickhouse integration: https://github.com/ClickHouse/clickhouse-jdbc/pull/596
+'Official' driver does support some conversion of complex data types (Roaring bitmaps) for Spark-ClickHouse integration: https://github.com/ClickHouse/clickhouse-jdbc/pull/596
But proper partitioning of the data (to spark partitions) may be tricky with jdbc.
@@ -54,21 +50,18 @@ Arrays, Higher-order functions, machine learning, integration with lot of differ
## More info + some unordered links (mostly in Chinese / Russian)
-* Spark + ClickHouse: not a fight, but a symbiosis https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/spark_and_clickhouse.pdf (russian)
-* Using a bunch of ClickHouse and Spark in MFI Soft https://www.youtube.com/watch?v=ID8eTnmag0s (russian)
-* Spark read and write ClickHouse https://yerias.github.io/2020/12/08/clickhouse/9/#Jdbc%E6%93%8D%E4%BD%9Cclickhouse
-* Spark reads and writes ClickHouse through jdbc https://blog.katastros.com/a?ID=01800-e40e1b3c-5fa4-4ea0-a3a8-f5e89ef0ce14
-* Spark JDBC write clickhouse operation summary https://www.jianshu.com/p/43f78c8a025b?hmsr=toutiao.io&utm_campaign=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io https://toutiao.io/posts/m63yw89/preview
-* Spark-sql is based on Clickhouse's DataSourceV2 data source extension (russian)
+* Spark + ClickHouse: not a fight, but a symbiosis (Russian) https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/spark_and_clickhouse.pdf (russian)
+* Using a bunch of ClickHouse and Spark in MFI Soft (Russian) https://www.youtube.com/watch?v=ID8eTnmag0s (russian)
+* Spark read and write ClickHouse (Chinese: Spark读写ClickHouse) https://yerias.github.io/2020/12/08/clickhouse/9/#Jdbc%E6%93%8D%E4%BD%9Cclickhouse
+* Spark JDBC write ClickHouse operation summary (Chinese: Spark JDBC 写 ClickHouse 操作总结) https://www.jianshu.com/p/43f78c8a025b?hmsr=toutiao.io&utm_campaign=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io
+* Spark-sql is based on ClickHouse's DataSourceV2 data source extension (Chinese: spark-sql基于ClickHouse的DataSourceV2数据源扩展)
https://www.cnblogs.com/mengyao/p/4689866.html
-* Alibaba integration instructions https://www.alibabacloud.com/help/doc-detail/191192.htm
-* Tencent integration instructions https://intl.cloud.tencent.com/document/product/1026/35884
-* Yandex DataProc demo: loading files from S3 to ClickHouse with Spark https://www.youtube.com/watch?v=N3bZW0_rRzI
-* Clickhouse official documentation_Spark JDBC writes some pits of ClickHouse https://blog.csdn.net/weixin_39615984/article/details/111206050
-* ClickHouse data import (Flink, Spark, Kafka, MySQL, Hive) https://zhuanlan.zhihu.com/p/299094269
-* Baifendian Big Data Technical Team: Practice of ClickHouse data synchronization solutionbased on multiple Spark tasks. https://www.6aiq.com/article/1635461873075
-* SPARK-CLICKHOUSE-ES REAL-TIME PROJECT EIGHTH DAY-PRECISE ONE-TIME CONSUMPTION SAVE OFFSET. https://www.freesion.com/article/71421322524/
-* Still struggling with real-time data warehouse selection, Spark + ClickHouse makes yoamazing! https://dbaplus.cn/news-73-3806-1.html
-* HDFS+ClickHouse+Spark: A lightweight big data analysis system from 0 to 1. https://juejin.cn/post/6850418114962653198
-* ClickHouse Clustering for Spark Developer http://blog.madhukaraphatak.com/clickouse-clustering-spark-developer/
-* «Иногда приходится заглядывать в код Spark»: Александр Морозов (SEMrush) об использовании Scala, Spark и ClickHouse. https://habr.com/ru/company/jugru/blog/341288/
+* Alibaba integration instructions (English) https://www.alibabacloud.com/help/doc-detail/191192.htm
+* Tencent integration instructions (English) https://intl.cloud.tencent.com/document/product/1026/35884
+* Yandex DataProc demo: loading files from S3 to ClickHouse with Spark (Russian) https://www.youtube.com/watch?v=N3bZW0_rRzI
+* ClickHouse official documentation_Spark JDBC writes some pits of ClickHouse (Chinese: ClickHouse官方文档_Spark JDBC写ClickHouse的一些坑) https://blog.csdn.net/weixin_39615984/article/details/111206050
+* ClickHouse data import: Flink, Spark, Kafka, MySQL, Hive (Chinese: 篇五|ClickHouse数据导入 Flink、Spark、Kafka、MySQL、Hive) https://zhuanlan.zhihu.com/p/299094269
+* SPARK-CLICKHOUSE-ES REAL-TIME PROJECT EIGHTH DAY-PRECISE ONE-TIME CONSUMPTION SAVE OFFSET. (Chinese: SPARK-CLICKHOUSE-ES实时项目第八天-精确一次性消费保存偏移量) https://www.freesion.com/article/71421322524/
+* HDFS+ClickHouse+Spark: A lightweight big data analysis system from 0 to 1. (Chinese: HDFS+ClickHouse+Spark:从0到1实现一款轻量级大数据分析系统) https://juejin.cn/post/6850418114962653198
+* ClickHouse Clustering for Spark Developer (English) http://blog.madhukaraphatak.com/clickouse-clustering-spark-developer/
+* «Иногда приходится заглядывать в код Spark»: Александр Морозов (SEMrush) об использовании Scala, Spark и ClickHouse. (Russian) https://habr.com/ru/company/jugru/blog/341288/
diff --git a/content/en/altinity-kb-integrations/_index.md b/content/en/altinity-kb-integrations/_index.md
index dc1cd483d1..6951848f56 100644
--- a/content/en/altinity-kb-integrations/_index.md
+++ b/content/en/altinity-kb-integrations/_index.md
@@ -6,6 +6,6 @@ keywords:
- clickhouse bi
- clickhouse kafka
description: >
- Learn how you can integrate cloud services, BI tools, kafka, MySQL, Spark, MindsDB, and more with ClickHouse.
+ Learn how you can integrate cloud services, BI tools, kafka, MySQL, Spark, MindsDB, and more with ClickHouse®
weight: 4
---
diff --git a/content/en/altinity-kb-integrations/altinity-cloud/_index.md b/content/en/altinity-kb-integrations/altinity-cloud/_index.md
index fc03237ffc..94f729920e 100644
--- a/content/en/altinity-kb-integrations/altinity-cloud/_index.md
+++ b/content/en/altinity-kb-integrations/altinity-cloud/_index.md
@@ -1,7 +1,81 @@
---
-title: "Cloud Services"
-linkTitle: "Cloud Services"
+title: "Altinity Cloud Access Management"
+linkTitle: "Altinity Cloud Access Management"
description: >
- Tips and tricks for using ClickHouse with different cloud services.
-weight: 4
+ Enabling access_management for Altinity.Cloud databases.
+weight: 5
+alias: /altinity-kb-integrations/altinity-cloud
---
+Organizations that want to enable administrative users in their Altinity.Cloud ClickHouse® servers can do so by enabling `access_management` manually. This allows for administrative users to be created on the specific ClickHouse Cluster.
+
+{{% alert title="WARNING" color="warning" %}}
+Modifying the ClickHouse cluster settings manually can lead to the cluster not loading or other issues. Change settings only with full consultation with an Altinity.Cloud support team member, and be ready to remove settings if they cause any disruption of service.
+{{% /alert %}}
+
+To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster:
+
+1. Log into your Altinity.Cloud account.
+1. For the cluster to modify, select **Configure -> Settings**.
+
+ {{< figure src="/assets/altinity-cloud-cluster-settings-configure.png" width="400" title="Cluster setting configure" >}}
+
+1. From the Settings page, select **+ADD SETTING**.
+
+ {{< figure src="/assets/altinity-cloud-cluster-add-setting.png" title="Add cluster setting" >}}
+
+1. Set the following options:
+ 1. **Setting Type**: Select **users.d file**.
+ 1. **Filename**: `access_management.xml`
+ 1. **Contents**: Enter the following to allow the `clickhouse_operator` that controls the cluster through the `clickhouse-operator` the ability to set administrative options:
+
+ ```xml
+
+
+
+ 1
+
+
+ 1
+
+
+
+ ```
+
+ access_management=1 means that users `admin`, `clickhouse_operator` are able to create users and grant them privileges using SQL.
+
+1. Select **OK**. The cluster will restart, and users can now be created in the cluster that can be granted administrative access.
+
+1. If you are running ClickHouse 21.9 and above you can enable storing access management in ZooKeeper. in this case it will be automatically propagated to the cluster. This requires yet another configuration file:
+ 1. **Setting Type**: Select **config.d file**
+ 2. **Filename**: `user_directories.xml`
+ 3. **Contents**:
+
+ ```xml
+
+
+
+ /etc/clickhouse-server/users.xml
+
+
+ /clickhouse/access/
+
+
+ /var/lib/clickhouse/access/
+
+
+
+ ```
+
+[//]: # (---)
+
+[//]: # (title: "Cloud Services")
+
+[//]: # (linkTitle: "Cloud Services")
+
+[//]: # (description: >)
+
+[//]: # ( Tips and tricks for using ClickHouse® with different cloud services.)
+
+[//]: # (weight: 4)
+
+[//]: # (---)
diff --git a/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md b/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md
index 9803007c8f..b34ea1e85c 100644
--- a/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md
+++ b/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md
@@ -4,8 +4,10 @@ linkTitle: "Altinity Cloud Access Management"
description: >
Enabling access_management for Altinity.Cloud databases.
weight: 5
+alias: /altinity-kb-integrations/altinity-cloud
+draft: true
---
-Organizations that want to enable administrative users in their Altinity.Cloud ClickHouse servers can do so by enabling `access_management` manually. This allows for administrative users to be created on the specific ClickHouse Cluster.
+Organizations that want to enable administrative users in their Altinity.Cloud ClickHouse® servers can do so by enabling `access_management` manually. This allows for administrative users to be created on the specific ClickHouse Cluster.
{{% alert title="WARNING" color="warning" %}}
Modifying the ClickHouse cluster settings manually can lead to the cluster not loading or other issues. Change settings only with full consultation with an Altinity.Cloud support team member, and be ready to remove settings if they cause any disruption of service.
@@ -28,7 +30,7 @@ To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster:
1. **Contents**: Enter the following to allow the `clickhouse_operator` that controls the cluster through the `clickhouse-operator` the ability to set administrative options:
```xml
-
+ 1
@@ -37,7 +39,7 @@ To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster:
1
-
+
```
access_management=1 means that users `admin`, `clickhouse_operator` are able to create users and grant them privileges using SQL.
@@ -50,7 +52,7 @@ To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster:
3. **Contents**:
```xml
-
+ /etc/clickhouse-server/users.xml
@@ -58,6 +60,9 @@ To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster:
/clickhouse/access/
+
+ /var/lib/clickhouse/access/
+
-
+
```
diff --git a/content/en/altinity-kb-integrations/altinity-kb-google-s3-gcs.md b/content/en/altinity-kb-integrations/altinity-kb-google-s3-gcs.md
index b0fa975f78..eb1f5b99c1 100644
--- a/content/en/altinity-kb-integrations/altinity-kb-google-s3-gcs.md
+++ b/content/en/altinity-kb-integrations/altinity-kb-google-s3-gcs.md
@@ -1,9 +1,8 @@
---
title: "Google S3 (GCS)"
linkTitle: "Google S3 (GCS)"
-description: >
- "Google S3 GCS"
---
+
GCS with the table function - seems to work correctly for simple scenarios.
Essentially you can follow the steps from the [Migrating from Amazon S3 to Cloud Storage](https://cloud.google.com/storage/docs/aws-simple-migration).
@@ -11,8 +10,5 @@ Essentially you can follow the steps from the [Migrating from Amazon S3 to Cloud
1. Set up a GCS bucket.
2. This bucket must be set as part of the default project for the account. This configuration can be found in settings -> interoperability.
3. Generate a HMAC key for the account, can be done in settings -> interoperability, in the section for user account access keys.
-4. In ClickHouse, replace the S3 bucket endpoint with the GCS bucket endpoint This must be done with the path-style GCS endpoint: `https://storage.googleapis.com/BUCKET_NAME/OBJECT_NAME`.
+4. In ClickHouse®, replace the S3 bucket endpoint with the GCS bucket endpoint This must be done with the path-style GCS endpoint: `https://storage.googleapis.com/BUCKET_NAME/OBJECT_NAME`.
5. Replace the aws access key id and aws secret access key with the corresponding parts of the HMAC key.
-
-
-s3 Disk on the top of GCS and writing to GSC may be NOT working because GCS don't support some of bulk S3 API calls, see https://github.com/ClickHouse/ClickHouse/issues/24246
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/_index.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/_index.md
index 4a6de0c312..8d663c25f2 100644
--- a/content/en/altinity-kb-integrations/altinity-kb-kafka/_index.md
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/_index.md
@@ -8,8 +8,9 @@ description: >
git log -- contrib/librdkafka | git name-rev --stdin
```
-| **ClickHouse version** | **librdkafka version** |
+| **ClickHouse® version** | **librdkafka version** |
| :--- | :--- |
+| 25.3+ ([\#63697](https://github.com/ClickHouse/ClickHouse/issues/63697)) | [2.8.0](https://github.com/confluentinc/librdkafka/blob/v2.8.0/CHANGELOG.md) + few [fixes](https://gist.github.com/filimonov/ad252aa601d4d99fb57d4d76f14aa2bf) |
| 21.10+ ([\#27883](https://github.com/ClickHouse/ClickHouse/pull/27883)) | [1.6.1](https://github.com/edenhill/librdkafka/blob/v1.6.1/CHANGELOG.md) + snappy fixes + boring ssl + illumos_build fixes + edenhill#3279 fix|
| 21.6+ ([\#23874](https://github.com/ClickHouse/ClickHouse/pull/23874)) | [1.6.1](https://github.com/edenhill/librdkafka/blob/v1.6.1/CHANGELOG.md) + snappy fixes + boring ssl + illumos_build fixes|
| 21.1+ ([\#18671](https://github.com/ClickHouse/ClickHouse/pull/18671)) | [1.6.0-RC3](https://github.com/edenhill/librdkafka/blob/v1.6.0-RC3/CHANGELOG.md) + snappy fixes + boring ssl |
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-adjusting-librdkafka-settings.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-adjusting-librdkafka-settings.md
index 0a693d0648..716bab1e5e 100644
--- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-adjusting-librdkafka-settings.md
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-adjusting-librdkafka-settings.md
@@ -7,42 +7,51 @@ description: >
* To set rdkafka options - add to `` section in `config.xml` or preferably use a separate file in `config.d/`:
* [https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md)
-Some random example:
+Some random example using SSL certificates to authenticate:
```xml
-
- 60000
- 60000
- 10000
- 5000
- 60000
- 20000
- 500
- 20971520
- all
- SSL
- /etc/clickhouse-server/ssl/kafka-ca-qa.crt
- /etc/clickhouse-server/ssl/client_clickhouse_client.pem
- /etc/clickhouse-server/ssl/client_clickhouse_client.key
- pass
-
+
+
+ 60000
+ 60000
+ 10000
+ 5000
+ 60000
+ 20000
+ 500
+ 20971520
+ all
+ SSL
+ /etc/clickhouse-server/ssl/kafka-ca-qa.crt
+ /etc/clickhouse-server/ssl/client_clickhouse_client.pem
+ /etc/clickhouse-server/ssl/client_clickhouse_client.key
+ pass
+
+
```
## Authentication / connectivity
-### Amazon MSK
+Sometimes the consumer group needs to be explicitly allowed in the broker UI config.
+
+### Amazon MSK | SASL/SCRAM
```xml
sasl_ssl
+
+
roottoor
```
+- [Broker ports detail](https://docs.aws.amazon.com/msk/latest/developerguide/port-info.html)
+- [Read here more](https://leftjoin.ru/blog/data-engineering/clickhouse-as-a-consumer-to-amazon-msk/) (Russian language)
+
-### SASL/SCRAM
+### on-prem / self-hosted Kafka broker
```xml
@@ -51,17 +60,18 @@ Some random example:
SCRAM-SHA-512roottoor
+
+ /path/to/cert/fullchain.pem
```
-[https://leftjoin.ru/all/clickhouse-as-a-consumer-to-amazon-msk/](https://leftjoin.ru/all/clickhouse-as-a-consumer-to-amazon-msk/)
### Inline Kafka certs
To connect to some Kafka cloud services you may need to use certificates.
-If needed they can be converted to pem format and inlined into ClickHouse config.xml
+If needed they can be converted to pem format and inlined into ClickHouse® config.xml
Example:
```xml
@@ -81,11 +91,11 @@ Example:
```
-See xml
+See
-[https://help.aiven.io/en/articles/489572-getting-started-with-aiven-kafka](https://help.aiven.io/en/articles/489572-getting-started-with-aiven-kafka)
+- [https://help.aiven.io/en/articles/489572-getting-started-with-aiven-kafka](https://help.aiven.io/en/articles/489572-getting-started-with-aiven-kafka)
-[https://stackoverflow.com/questions/991758/how-to-get-pem-file-from-key-and-crt-files](https://stackoverflow.com/questions/991758/how-to-get-pem-file-from-key-and-crt-files)
+- [https://stackoverflow.com/questions/991758/how-to-get-pem-file-from-key-and-crt-files](https://stackoverflow.com/questions/991758/how-to-get-pem-file-from-key-and-crt-files)
### Azure Event Hub
@@ -105,33 +115,32 @@ See [https://github.com/ClickHouse/ClickHouse/issues/12609](https://github.com/C
```
-### confluent cloud
+### Confluent Cloud / Google Cloud
```xml
-
-
- smallest
- SASL_SSL
- https
- PLAIN
-xmlusername
- password
- probe
-
-
-
+
+
+ smallest
+ SASL_SSL
+
+
+ PLAIN
+ username
+ password
+
+
+
+
```
-
-[https://docs.confluent.io/cloud/current/client-apps/config-client.html](https://docs.confluent.io/cloud/current/client-apps/config-client.html)
+- [https://docs.confluent.io/cloud/current/client-apps/config-client.html](https://docs.confluent.io/cloud/current/client-apps/config-client.html)
+- [https://cloud.google.com/managed-service-for-apache-kafka/docs/authentication-kafka](https://cloud.google.com/managed-service-for-apache-kafka/docs/authentication-kafka)
## How to test connection settings
-Use kafkacat utility - it internally uses same library to access Kafla as clickhouse itself and allows easily to test different settings.
+Use kafkacat utility - it internally uses same library to access Kafla as ClickHouse itself and allows easily to test different settings.
```bash
-kafkacat -b my_broker:9092 -C -o -10 -t my_topic \
+kafkacat -b my_broker:9092 -C -o -10 -t my_topic \ (Google cloud and on-prem use 9092 port)
-X security.protocol=SASL_SSL \
-X sasl.mechanisms=PLAIN \
-X sasl.username=uerName \
@@ -139,7 +148,7 @@ kafkacat -b my_broker:9092 -C -o -10 -t my_topic \
```
-# Different configurations for different tables?
+## Different configurations for different tables?
> Is there some more documentation how to use this multiconfiguration for Kafka ?
@@ -148,7 +157,7 @@ https://github.com/ClickHouse/ClickHouse/blob/da4856a2be035260708fe2ba3ffb9e437d
So it load the main config first, after that it load (with overwrites) the configs for all topics, **listed in `kafka_topic_list` of the table**.
-Also since v21.12 it's possible to use more straght-forward way using named_collections:
+Also since v21.12 it's possible to use more straightforward way using named_collections:
https://github.com/ClickHouse/ClickHouse/pull/31691
So you can say something like
@@ -169,7 +178,28 @@ And after that in configuration:
+
+
+
+
+
+ ...
+ foo.bar
+ foo.bar.group
+
+ ...
+ ...
+ ...
+ ...
+ smallest
+ https
+ probe
+
+
+
+
+
```
The same fragment of code in newer versions:
-https://github.com/ClickHouse/ClickHouse/blob/d19e24f530c30f002488bc136da78f5fb55aedab/src/Storages/Kafka/StorageKafka.cpp#L474-L496
+- https://github.com/ClickHouse/ClickHouse/blob/d19e24f530c30f002488bc136da78f5fb55aedab/src/Storages/Kafka/StorageKafka.cpp#L474-L496
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md
index 3432db9fd5..fa54523134 100644
--- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md
@@ -4,7 +4,7 @@ linkTitle: "Exactly once semantics"
description: >
Exactly once semantics
---
-EOS consumer (isolation.level=read_committed) is enabled by default since librdkafka 1.2.0, so for ClickHouse - since 20.2
+EOS consumer (isolation.level=read_committed) is enabled by default since librdkafka 1.2.0, so for ClickHouse® - since 20.2
See:
@@ -18,6 +18,6 @@ We need to have something like transactions on ClickHouse side to be able to avo
## block-aggregator by eBay
-Block Aggregator is a data loader that subscribes to Kafka topics, aggregates the Kafka messages into blocks that follow the Clickhouse’s table schemas, and then inserts the blocks into ClickHouse. Block Aggregator provides exactly-once delivery guarantee to load data from Kafka to ClickHouse. Block Aggregator utilizes Kafka’s metadata to keep track of blocks that are intended to send to ClickHouse, and later uses this metadata information to deterministically re-produce ClickHouse blocks for re-tries in case of failures. The identical blocks are guaranteed to be deduplicated by ClickHouse.
+Block Aggregator is a data loader that subscribes to Kafka topics, aggregates the Kafka messages into blocks that follow the ClickHouse’s table schemas, and then inserts the blocks into ClickHouse. Block Aggregator provides exactly-once delivery guarantee to load data from Kafka to ClickHouse. Block Aggregator utilizes Kafka’s metadata to keep track of blocks that are intended to send to ClickHouse, and later uses this metadata information to deterministically re-produce ClickHouse blocks for re-tries in case of failures. The identical blocks are guaranteed to be deduplicated by ClickHouse.
[eBay/block-aggregator](https://github.com/eBay/block-aggregator)
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-main-parsing-loop.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-main-parsing-loop.md
index 2b9bd975f9..74eeab4326 100644
--- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-main-parsing-loop.md
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-main-parsing-loop.md
@@ -4,7 +4,7 @@ linkTitle: "Kafka main parsing loop"
description: >
Kafka main parsing loop
---
-One of the threads from scheduled_pool (pre 20.9) / `background_message_broker_schedule_pool` (after 20.9) do that in infinite loop:
+One of the threads from scheduled_pool (pre ClickHouse® 20.9) / `background_message_broker_schedule_pool` (after 20.9) do that in infinite loop:
1. Batch poll (time limit: `kafka_poll_timeout_ms` 500ms, messages limit: `kafka_poll_max_batch_size` 65536)
2. Parse messages.
@@ -31,3 +31,8 @@ You may want to adjust those depending on your scenario:
## See also
[https://github.com/ClickHouse/ClickHouse/pull/11388](https://github.com/ClickHouse/ClickHouse/pull/11388)
+
+## Disable at-least-once delivery
+
+`kafka_commit_every_batch` = 1 will change the loop logic mentioned above. Consumed batch committed to the Kafka and the block of rows send to Materialized Views only after that. It could be resembled as at-most-once delivery mode as prevent duplicate creation but allow loss of data in case of failures.
+
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-mv-consuming.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-mv-consuming.md
new file mode 100644
index 0000000000..50c4b2ca6f
--- /dev/null
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-mv-consuming.md
@@ -0,0 +1,120 @@
+---
+title: "Multiple MVs attached to Kafka table"
+linkTitle: "Multiple MVs attached to Kafka table"
+description: >
+ How Multiple MVs attached to Kafka table consume and how they are affected by kafka_num_consumers/kafka_thread_per_consumer
+---
+
+Kafka Consumer is a thread inside the Kafka Engine table that is visible by Kafka monitoring tools like kafka-consumer-groups and in Clickhouse in system.kafka_consumers table.
+
+Having multiple consumers increases ingesting parallelism and can significantly speed up event processing. However, it comes with a trade-off: it's a CPU-intensive task, especially under high event load and/or complicated parsing of incoming data. Therefore, it's crucial to create as many consumers as you really need and ensure you have enough CPU cores to handle them. We don’t recommend creating too many Kafka Engines per server because it could lead to uncontrolled CPU usage in situations like bulk data upload or catching up a huge kafka lag due to excessive parallelism of the ingesting process.
+
+## kafka_thread_per_consumer meaning
+
+Consider a basic pipeline depicted as a Kafka table with 2 MVs attached. The Kafka broker has 2 topics and 4 partitions.
+
+### kafka_thread_per_consumer = 0
+
+Kafka engine table will act as 2 consumers, but only 1 insert thread for both of them. It is important to note that the topic needs to have as many partitions as consumers. For this scenario, we use these settings:
+
+```
+kafka_num_consumers = 2
+kafka_thread_per_consumer = 0
+```
+
+The same Kafka engine will create 2 streams, 1 for each consumer, and will join them in a union stream. And it will use 1 thread for inserting `[ 2385 ]`
+This is how we can see it in the logs:
+
+```log
+2022.11.09 17:49:34.282077 [ 2385 ] {} StorageKafka (kafka_table): Started streaming to 2 attached views
+```
+
+* How ClickHouse® calculates the number of threads depending on the `thread_per_consumer` setting:
+
+ ```c++
+ auto stream_count = thread_per_consumer ? 1 : num_created_consumers;
+ sources.reserve(stream_count);
+ pipes.reserve(stream_count);
+ for (size_t i = 0; i < stream_count; ++i)
+ {
+ ......
+ }
+ ```
+
+Details:
+
+https://github.com/ClickHouse/ClickHouse/blob/1b49463bd297ade7472abffbc931c4bb9bf213d0/src/Storages/Kafka/StorageKafka.cpp#L834
+
+
+Also, a detailed graph of the pipeline:
+
+
+
+With this approach, even if the number of consumers increased, the Kafka engine will still use only 1 thread to flush. The consuming/processing rate will probably increase a bit, but not linearly. For example, 5 consumers will not consume 5 times faster. Also, a good property of this approach is the `linearization` of INSERTS, which means that the order of the inserts is preserved and sequential. This option is good for small/medium Kafka topics.
+
+
+### kafka_thread_per_consumer = 1
+
+Kafka engine table will act as 2 consumers and 1 thread per consumer. For this scenario, we use these settings:
+
+```
+kafka_num_consumers = 2
+kafka_thread_per_consumer = 1
+```
+
+Here, the pipeline works like this:
+
+
+
+
+With this approach, the number of consumers remains the same, but each consumer will use their own insert/flush thread, and the consuming/processing rate should increase.
+
+## Background Pool
+
+In Clickhouse there is a special thread pool for background processes, such as streaming engines. Its size is controlled by the background_message_broker_schedule_pool_size setting and is 16 by default. If you exceed this limit across all tables on the server, you’ll likely encounter continuous Kafka rebalances, which will slow down processing considerably. For a server with a lot of CPU cores, you can increase that limit to a higher value, like 20 or even 40. `background_message_broker_schedule_pool_size` = 20 allows you to create 5 Kafka Engine tables with 4 consumers each of them has its own insert thread. This option is good for large Kafka topics with millions of messages per second.
+
+
+## Multiple Materialized Views
+
+Attaching multiple Materialized Views (MVs) to a Kafka Engine table can be used when you need to apply different transformations to the same topic and store the resulting data in different tables.
+
+(This approach also applies to the other streaming engines - RabbitMQ, s3queue, etc).
+
+All streaming engines begin processing data (reading from the source and producing insert blocks) only after at least one Materialized View is attached to the engine. Multiple Materialized Views can be connected to distribute data across various tables with different transformations. But how does it work when the server starts?
+
+Once the first Materialized View (MV) is loaded, started, and attached to the Kafka/s3queue table, data consumption begins immediately—data is read from the source, pushed to the destination, and the pointers advance to the next position. However, any other MVs that haven't started yet will miss the data consumed by the first MV, leading to some data loss.
+
+This issue worsens with asynchronous table loading. Tables are only loaded upon first access, and the loading process takes time. When multiple MVs direct the data stream to different tables, some tables might be ready sooner than others. As soon as the first table becomes ready, data consumption starts, and any tables still loading will miss the data consumed during that interval, resulting in further data loss for those tables.
+
+
+That means when you make a design with Multiple MVs `async_load_databases` should be switched off:
+
+```sql
+false
+```
+
+Also, you have to prevent starting to consume until all MVs are loaded and started. For that, you can add an additional Null table to the MV pipeline, so the Kafka table will pass the block to a single Null table first, and only then many MVs start their own transformations to many dest tables:
+
+ KafkaTable → dummy_MV -> NullTable -> [MV1, MV2, ….] → [Table1, Table2, …]
+
+```sql
+create table NullTable Engine=Null as KafkaTable;
+create materialized view dummy_MV to NullTable
+select * from KafkaTable
+--WHERE NOT ignore(throwIf(if((uptime() < 120), 1 , 0)))
+WHERE NOT ignore(throwIf(if((uptime() < 120), 1 + sleep(3), 0)))
+```
+
+120 seconds should be enough for loading all MVs.
+
+Using an intermediate Null table is also preferable because it's easier to make any changes with MVs:
+
+- drop the dummy_MV to stop consuming
+- make any changes to transforming MVs by drop/recreate
+- create dummy_MV again to resume consuming
+
+The fix for correctly starting multiple MVs will be available from 25.5 version - https://github.com/ClickHouse/ClickHouse/pull/72123
+
+
+
+
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-parallel-consuming.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-parallel-consuming.md
index 53c06a8972..4f7b62d5d3 100644
--- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-parallel-consuming.md
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-parallel-consuming.md
@@ -4,7 +4,7 @@ linkTitle: "Kafka parallel consuming"
description: >
Kafka parallel consuming
---
-For very large topics when you need more parallelism (especially on the insert side) you may use several tables with the same pipeline (pre 20.9) or enable `kafka_thread_per_consumer` (after 20.9).
+For very large topics when you need more parallelism (especially on the insert side) you may use several tables with the same pipeline (pre ClickHouse® 20.9) or enable `kafka_thread_per_consumer` (after 20.9).
```ini
kafka_num_consumers = N,
@@ -15,5 +15,7 @@ Notes:
* the inserts will happen in parallel (without that setting inserts happen linearly)
* enough partitions are needed.
+* `kafka_num_consumers` is limited by number of physical cores (half of vCPUs). `kafka_disable_num_consumers_limit` can be used to override the limit.
+* `background_message_broker_schedule_pool_size` is 16 by default, you may need to increase if using more than 16 consumers
Before increasing `kafka_num_consumers` with keeping `kafka_thread_per_consumer=0` may improve consumption & parsing speed, but flushing & committing still happens by a single thread there (so inserts are linear).
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-rewind-fast-forward-replay.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-rewind-fast-forward-replay.md
index a2dae43a0d..cc655531d7 100644
--- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-rewind-fast-forward-replay.md
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-rewind-fast-forward-replay.md
@@ -4,10 +4,16 @@ linkTitle: "Rewind / fast-forward / replay"
description: >
Rewind / fast-forward / replay
---
-* Step 1: Detach Kafka tables in ClickHouse
+* Step 1: Detach Kafka tables in ClickHouse®
+ ```
+ DETACH TABLE db.kafka_table_name ON CLUSTER '{cluster}';
+ ```
* Step 2: `kafka-consumer-groups.sh --bootstrap-server kafka:9092 --topic topic:0,1,2 --group id1 --reset-offsets --to-latest --execute`
* More samples: [https://gist.github.com/filimonov/1646259d18b911d7a1e8745d6411c0cc](https://gist.github.com/filimonov/1646259d18b911d7a1e8745d6411c0cc)
-* Step: Attach Kafka tables back
+* Step 3: Attach Kafka tables back
+ ```
+ ATTACH TABLE db.kafka_table_name ON CLUSTER '{cluster}';
+ ```
See also these configuration settings:
@@ -16,3 +22,13 @@ See also these configuration settings:
smallest
```
+### About Offset Consuming
+
+When a consumer joins the consumer group, the broker will check if it has a committed offset. If that is the case, then it will start from the latest offset. Both ClickHouse and librdKafka documentation state that the default value for `auto_offset_reset` is largest (or `latest` in new Kafka versions) but it is not, if the consumer is new:
+
+https://github.com/ClickHouse/ClickHouse/blob/f171ad93bcb903e636c9f38812b6aaf0ab045b04/src/Storages/Kafka/StorageKafka.cpp#L506
+
+ `conf.set("auto.offset.reset", "earliest"); // If no offset stored for this group, read all messages from the start`
+
+If there is no offset stored or it is out of range, for that particular consumer group, the consumer will start consuming from the beginning (`earliest`), and if there is some offset stored then it should use the `latest`.
+The log retention policy influences which offset values correspond to the `earliest` and `latest` configurations. Consider a scenario where a topic has a retention policy set to 1 hour. Initially, you produce 5 messages, and then, after an hour, you publish 5 more messages. In this case, the latest offset will remain unchanged from the previous example. However, due to Kafka removing the earlier messages, the earliest available offset will not be 0; instead, it will be 5.
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/background_message_broker_schedule_pool_size.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/background_message_broker_schedule_pool_size.md
new file mode 100644
index 0000000000..108fc13992
--- /dev/null
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/background_message_broker_schedule_pool_size.md
@@ -0,0 +1,131 @@
+---
+title: "Setting the background message broker schedule pool size"
+linkTitle: "Setting the background message broker schedule pool size"
+weight: 100
+description: >-
+ Guide to managing the `background_message_broker_schedule_pool_size` setting for Kafka, RabbitMQ, and NATS table engines in your database.
+---
+
+## Overview
+
+When using Kafka, RabbitMQ, or NATS table engines in ClickHouse®, you may encounter issues related to a saturated background thread pool. One common symptom is a warning similar to the following:
+
+```
+2025.03.14 08:44:26.725868 [ 344 ] {} StorageKafka (events_kafka): [rdk:MAXPOLL] [thrd:main]: Application maximum poll interval (60000ms) exceeded by 159ms (adjust max.poll.interval.ms for long-running message processing): leaving group
+```
+
+This warning typically appears **not because ClickHouse fails to poll**, but because **there are no available threads** in the background pool to handle the polling in time. In rare cases, the same error might also be caused by long flushing operations to Materialized Views (MVs), especially if their logic is complex or chained.
+
+To resolve this, you should monitor and, if needed, increase the value of the `background_message_broker_schedule_pool_size` setting.
+
+---
+
+## Step 1: Check Thread Pool Utilization
+
+Run the following SQL query to inspect the current status of your background message broker thread pool:
+
+```sql
+SELECT
+ (
+ SELECT value
+ FROM system.metrics
+ WHERE metric = 'BackgroundMessageBrokerSchedulePoolTask'
+ ) AS tasks,
+ (
+ SELECT value
+ FROM system.metrics
+ WHERE metric = 'BackgroundMessageBrokerSchedulePoolSize'
+ ) AS pool_size,
+ pool_size - tasks AS free_threads
+```
+
+If you have `metric_log` enabled, you can also monitor the **minimum number of free threads over the day**:
+
+```sql
+SELECT min(CurrentMetric_BackgroundMessageBrokerSchedulePoolSize - CurrentMetric_BackgroundMessageBrokerSchedulePoolTask) AS min_free_threads
+FROM system.metric_log
+WHERE event_date = today()
+```
+
+**If `free_threads` is close to zero or negative**, it means your thread pool is saturated and should be increased.
+
+---
+
+## Step 2: Estimate Required Pool Size
+
+To estimate a reasonable value for `background_message_broker_schedule_pool_size`, run the following query:
+
+```sql
+WITH
+ toUInt32OrDefault(extract(engine_full, 'kafka_num_consumers\s*=\s*(\d+)')) as kafka_num_consumers,
+ extract(engine_full, 'kafka_thread_per_consumer\s*=\s*(\d+|\'true\')') not in ('', '0') as kafka_thread_per_consumer,
+ multiIf(
+ engine = 'Kafka',
+ if(kafka_thread_per_consumer AND kafka_num_consumers > 0, kafka_num_consumers, 1),
+ engine = 'RabbitMQ',
+ 3,
+ engine = 'NATS',
+ 3,
+ 0 /* should not happen */
+ ) as threads_needed
+SELECT
+ ceil(sum(threads_needed) * 1.25)
+FROM
+ system.tables
+WHERE
+ engine in ('Kafka', 'RabbitMQ', 'NATS')
+```
+
+This will return an estimate that includes a 25% buffer to accommodate spikes in load.
+
+---
+
+## Step 3: Apply the New Setting
+
+1. **Create or update** the following configuration file:
+
+ **Path:** `/etc/clickhouse-server/config.d/background_message_broker_schedule_pool_size.xml`
+
+ **Content:**
+ ```xml
+
+ 120
+
+ ```
+
+ Replace `120` with the value recommended from Step 2 (rounded up if needed).
+
+2. **(Only for ClickHouse versions 23.8 and older)**
+
+ Add the same setting to the default user profile:
+
+ **Path:** `/etc/clickhouse-server/users.d/background_message_broker_schedule_pool_size.xml`
+
+ **Content:**
+ ```xml
+
+
+
+ 120
+
+
+
+ ```
+
+---
+
+## Step 4: Restart ClickHouse
+
+After applying the configuration, restart ClickHouse to apply the changes:
+
+```bash
+sudo systemctl restart clickhouse-server
+```
+
+---
+
+## Summary
+
+A saturated background message broker thread pool can lead to missed Kafka polls and consumer group dropouts. Monitoring your metrics and adjusting `background_message_broker_schedule_pool_size` accordingly ensures stable operation of Kafka, RabbitMQ, and NATS integrations.
+
+If the problem persists even after increasing the pool size, consider investigating slow MV chains or flushing logic as a potential bottleneck.
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/error-handling.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/error-handling.md
index e9e2f533de..b2ac0fef88 100644
--- a/content/en/altinity-kb-integrations/altinity-kb-kafka/error-handling.md
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/error-handling.md
@@ -14,7 +14,7 @@ It's also possible to skip up to N malformed messages for each block, with used
## After 21.6
-It's possible to stream messages which could not be parsed, this behavior could be enabled via setting: `kafka_handle_error_mode='stream'` and clickhouse wil write error and message from Kafka itself to two new virtual columns: `_error, _raw_message`.
+It's possible to stream messages which could not be parsed, this behavior could be enabled via setting: `kafka_handle_error_mode='stream'` and ClickHouse® wil write error and message from Kafka itself to two new virtual columns: `_error, _raw_message`.
So you can create another Materialized View which would collect to a separate table all errors happening while parsing with all important information like offset and content of message.
@@ -31,7 +31,7 @@ kafka_group_name = 'clickhouse',
kafka_format = 'JSONEachRow',
kafka_handle_error_mode='stream';
-CREATE MATERIALIZED VIEW default.kafka_errors
+CREATE TABLE default.kafka_errors
(
`topic` String,
`partition` Int64,
@@ -41,7 +41,11 @@ CREATE MATERIALIZED VIEW default.kafka_errors
)
ENGINE = MergeTree
ORDER BY (topic, partition, offset)
-SETTINGS index_granularity = 8192 AS
+SETTINGS index_granularity = 8192
+
+
+CREATE MATERIALIZED VIEW default.kafka_errors_mv TO default.kafka_errors
+AS
SELECT
_topic AS topic,
_partition AS partition,
@@ -52,9 +56,15 @@ FROM default.kafka_engine
WHERE length(_error) > 0
```
+## Since 25.8
+
+dead letter queue can be used via setting: `kafka_handle_error_mode='dead_letter'` [https://github.com/ClickHouse/ClickHouse/pull/68873](https://github.com/ClickHouse/ClickHouse/pull/68873)
+
+
+

-[https://github.com/ClickHouse/ClickHouse/pull/20249\#issuecomment-779054737](https://github.com/ClickHouse/ClickHouse/pull/20249\#issuecomment-779054737)
+[https://github.com/ClickHouse/ClickHouse/pull/20249](https://github.com/ClickHouse/ClickHouse/pull/20249)
[https://github.com/ClickHouse/ClickHouse/pull/21850](https://github.com/ClickHouse/ClickHouse/pull/21850)
diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/kafka-schema-inference.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/kafka-schema-inference.md
new file mode 100644
index 0000000000..a47e65a6c9
--- /dev/null
+++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/kafka-schema-inference.md
@@ -0,0 +1,88 @@
+---
+title: "Inferring Schema from AvroConfluent Messages in Kafka for ClickHouse®"
+linkTitle: "Schema Inference for Kafka"
+weight: 100
+description: >-
+ Learn how to define Kafka table structures in ClickHouse® by using Avro's schema registry & sample message.
+---
+
+To consume messages from Kafka within ClickHouse®, you need to define the `ENGINE=Kafka` table structure with all the column names and types.
+This task can be particularly challenging when dealing with complex Avro messages, as manually determining the exact schema for
+ClickHouse is both tricky and time-consuming. This complexity is particularly frustrating in the case of Avro formats,
+where the column names and their types are already clearly defined in the schema registry.
+
+Although ClickHouse supports schema inference for files, it does not natively support this for Kafka streams.
+
+Here’s a workaround to infer the schema using AvroConfluent messages:
+
+## Step 1: Capture and Store a Raw Kafka Message
+
+First, create a table in ClickHouse to consume a raw message from Kafka and store it as a file:
+
+```sql
+CREATE TABLE test_kafka (raw String) ENGINE = Kafka
+SETTINGS kafka_broker_list = 'localhost:29092',
+ kafka_topic_list = 'movies-raw',
+ kafka_format = 'RawBLOB', -- Don't try to parse the message, return it 'as is'
+ kafka_group_name = 'tmp_test'; -- Using some dummy consumer group here.
+
+INSERT INTO FUNCTION file('./avro_raw_sample.avro', 'RawBLOB')
+SELECT * FROM test_kafka LIMIT 1
+SETTINGS max_block_size=1, stream_like_engine_allow_direct_select=1;
+
+DROP TABLE test_kafka;
+```
+
+## Step 2: Infer Schema Using the Stored File
+Using the stored raw message, let ClickHouse infer the schema based on the AvroConfluent format and a specified schema registry URL:
+
+```sql
+CREATE TEMPORARY TABLE test AS
+SELECT * FROM file('./avro_raw_sample.avro', 'AvroConfluent')
+SETTINGS format_avro_schema_registry_url='http://localhost:8085';
+
+SHOW CREATE TEMPORARY TABLE test\G;
+```
+The output from the `SHOW CREATE` command will display the inferred schema, for example:
+
+```plaintext
+Row 1:
+──────
+statement: CREATE TEMPORARY TABLE test
+(
+ `movie_id` Int64,
+ `title` String,
+ `release_year` Int64
+)
+ENGINE = Memory
+```
+
+## Step 3: Create the Kafka Table with the Inferred Schema
+Now, use the inferred schema to create the Kafka table:
+
+```sql
+CREATE TABLE movies_kafka
+(
+ `movie_id` Int64,
+ `title` String,
+ `release_year` Int64
+)
+ENGINE = Kafka
+SETTINGS kafka_broker_list = 'localhost:29092',
+ kafka_topic_list = 'movies-raw',
+ kafka_format = 'AvroConfluent',
+ kafka_group_name = 'movies',
+ kafka_schema_registry_url = 'http://localhost:8085';
+```
+
+This approach reduces manual schema definition efforts and enhances data integration workflows by utilizing the schema inference capabilities of ClickHouse for AvroConfluent messages.
+
+## Appendix
+
+**Avro** is a binary serialization format used within Apache Kafka for efficiently serializing data with a compact binary format. It relies on schemas, which define the structure of the serialized data, to ensure robust data compatibility and type safety.
+
+**Schema Registry** is a service that provides a centralized repository for Avro schemas. It helps manage and enforce schemas across applications, ensuring that the data exchanged between producers and consumers adheres to a predefined format, and facilitates schema evolution in a safe manner.
+
+In ClickHouse, the **Avro** format is used for data that contains the schema embedded directly within the file or message. This means the structure of the data is defined and included with the data itself, allowing for self-describing messages. However, embedding the schema within every message is not optimal for streaming large volumes of data, as it increases the workload and network overhead. Repeatedly passing the same schema with each message can be inefficient, particularly in high-throughput environments.
+
+On the other hand, the **AvroConfluent** format in ClickHouse is specifically designed to work with the Confluent Schema Registry. This format expects the schema to be managed externally in a schema registry rather than being embedded within each message. It retrieves schema information from the Schema Registry, which allows for centralized schema management and versioning, facilitating easier schema evolution and enforcement across different applications using Kafka.
diff --git a/content/en/altinity-kb-integrations/altinity-kb-rabbitmq/_index.md b/content/en/altinity-kb-integrations/altinity-kb-rabbitmq/_index.md
new file mode 100644
index 0000000000..93ca8288d0
--- /dev/null
+++ b/content/en/altinity-kb-integrations/altinity-kb-rabbitmq/_index.md
@@ -0,0 +1,28 @@
+---
+title: "RabbitMQ"
+linkTitle: "RabbitMQ"
+description: >
+ RabbitMQ engine in ClickHouse® 24.3+
+---
+
+### Settings
+
+Basic RabbitMQ settings and use cases: https://clickhouse.com/docs/en/engines/table-engines/integrations/rabbitmq
+
+### Latest improvements/fixes
+
+##### (v23.10+)
+
+- **Allow to save unparsed records and errors in RabbitMQ**:
+NATS and FileLog engines. Add virtual columns `_error` and `_raw_message` (for NATS and RabbitMQ), `_raw_record` (for FileLog) that are filled when ClickHouse fails to parse new record.
+The behaviour is controlled under storage settings `nats_handle_error_mode` for NATS, `rabbitmq_handle_error_mode` for RabbitMQ, `handle_error_mode` for FileLog similar to `kafka_handle_error_mode`.
+If it's set to `default`, en exception will be thrown when ClickHouse fails to parse a record, if it's set to `stream`, error and raw record will be saved into virtual columns.
+Closes [#36035](https://github.com/ClickHouse/ClickHouse/issues/36035) and [#55477](https://github.com/ClickHouse/ClickHouse/pull/55477)
+
+
+##### (v24+)
+
+- [#45350 RabbitMq Storage Engine should NACK messages if exception is thrown during processing](https://github.com/ClickHouse/ClickHouse/issues/45350)
+- [#59775 rabbitmq: fix having neither acked nor nacked messages](https://github.com/ClickHouse/ClickHouse/pull/59775)
+- [#60312 Make rabbitmq nack broken messages](https://github.com/ClickHouse/ClickHouse/pull/60312)
+- [#61320 Fix logical error in RabbitMQ storage with MATERIALIZED columns](https://github.com/ClickHouse/ClickHouse/pull/61320)
diff --git a/content/en/altinity-kb-integrations/altinity-kb-rabbitmq/error-handling.md b/content/en/altinity-kb-integrations/altinity-kb-rabbitmq/error-handling.md
new file mode 100644
index 0000000000..4acbb34434
--- /dev/null
+++ b/content/en/altinity-kb-integrations/altinity-kb-rabbitmq/error-handling.md
@@ -0,0 +1,56 @@
+---
+title: "RabbitMQ Error handling"
+linkTitle: "RabbitMQ Error handling"
+description: >
+ Error handling for RabbitMQ table engine
+---
+
+Same approach as in Kafka but virtual columns are different. Check https://clickhouse.com/docs/en/engines/table-engines/integrations/rabbitmq#virtual-columns
+
+```sql
+CREATE TABLE IF NOT EXISTS rabbitmq.broker_errors_queue
+(
+ exchange_name String,
+ channel_id String,
+ delivery_tag UInt64,
+ redelivered UInt8,
+ message_id String,
+ timestamp UInt64
+)
+engine = RabbitMQ
+SETTINGS
+ rabbitmq_host_port = 'localhost:5672',
+ rabbitmq_exchange_name = 'exchange-test', -- required parameter even though this is done via the rabbitmq config
+ rabbitmq_queue_consume = true,
+ rabbitmq_queue_base = 'test-errors',
+ rabbitmq_format = 'JSONEachRow',
+ rabbitmq_username = 'guest',
+ rabbitmq_password = 'guest',
+ rabbitmq_handle_error_mode = 'stream';
+
+CREATE MATERIALIZED VIEW IF NOT EXISTS rabbitmq.broker_errors_mv
+(
+ exchange_name String,
+ channel_id String,
+ delivery_tag UInt64,
+ redelivered UInt8,
+ message_id String,
+ timestamp UInt64
+ raw_message String,
+ error String
+)
+ENGINE = MergeTree
+ORDER BY (error)
+SETTINGS index_granularity = 8192 AS
+SELECT
+ _exchange_name AS exchange_name,
+ _channel_id AS channel_id,
+ _delivery_tag AS delivery_tag,
+ _redelivered AS redelivered,
+ _message_id AS message_id,
+ _timestamp AS timestamp,
+ _raw_message AS raw_message,
+ _error AS error
+FROM rabbitmq.broker_errors_queue
+WHERE length(_error) > 0
+```
diff --git a/content/en/altinity-kb-integrations/bi-tools.md b/content/en/altinity-kb-integrations/bi-tools.md
index 0928c8b263..ee1d2f09ea 100644
--- a/content/en/altinity-kb-integrations/bi-tools.md
+++ b/content/en/altinity-kb-integrations/bi-tools.md
@@ -7,7 +7,7 @@ description: >
* Superset: [https://superset.apache.org/docs/databases/clickhouse](https://superset.apache.org/docs/databases/clickhouse)
* Metabase: [https://github.com/enqueue/metabase-clickhouse-driver](https://github.com/enqueue/metabase-clickhouse-driver)
* Querybook: [https://www.querybook.org/docs/setup_guide/connect_to_query_engines/\#all-query-engines](https://www.querybook.org/docs/setup_guide/connect_to_query_engines/#all-query-engines)
-* Tableau: [Clickhouse Tableau connector odbc](https://github.com/Altinity/clickhouse-tableau-connector-odbc)
+* Tableau: [Altinity Tableau Connector for ClickHouse®](https://github.com/Altinity/tableau-connector-for-clickhouse) support both JDBC & ODBC drivers
* Looker: [https://docs.looker.com/setup-and-management/database-config/clickhouse](https://docs.looker.com/setup-and-management/database-config/clickhouse)
* Apache Zeppelin
* SeekTable
diff --git a/content/en/altinity-kb-integrations/catboost-mindsdb-fast.ai.md b/content/en/altinity-kb-integrations/catboost-mindsdb-fast.ai.md
index acecf80f5a..ee41487d1c 100644
--- a/content/en/altinity-kb-integrations/catboost-mindsdb-fast.ai.md
+++ b/content/en/altinity-kb-integrations/catboost-mindsdb-fast.ai.md
@@ -11,7 +11,7 @@ Article is based on feedback provided by one of Altinity clients.
CatBoost:
* It uses gradient boosting - a hard to use technique which can outperform neural networks. Gradient boosting is powerful but it's easy to shoot yourself in the foot using it.
-* The documentation on how to use it is quite lacking. The only good source of information on how to properly configure a model to yield good results is this video: [https://www.youtube.com/watch?v=usdEWSDisS0](https://www.youtube.com/watch?v=usdEWSDisS0) . We had to dig around GitHub issues to find out how to make it work with ClickHouse.
+* The documentation on how to use it is quite lacking. The only good source of information on how to properly configure a model to yield good results is this video: [https://www.youtube.com/watch?v=usdEWSDisS0](https://www.youtube.com/watch?v=usdEWSDisS0) . We had to dig around GitHub issues to find out how to make it work with ClickHouse®.
* CatBoost is fast. Other libraries will take ~5X to ~10X as long to do what CatBoost does.
* CatBoost will do preprocessing out of the box (fills nulls, apply standard scaling, encodes strings as numbers).
* CatBoost has all functions you'd need (metrics, plotters, feature importance)
diff --git a/content/en/altinity-kb-integrations/clickhouse-odbc.md b/content/en/altinity-kb-integrations/clickhouse-odbc.md
index eec3a2e5c6..d8c66cf95f 100644
--- a/content/en/altinity-kb-integrations/clickhouse-odbc.md
+++ b/content/en/altinity-kb-integrations/clickhouse-odbc.md
@@ -1,16 +1,14 @@
---
-title: "ODBC Driver for ClickHouse"
-linkTitle: "ODBC Driver for ClickHouse"
+title: "ODBC Driver for ClickHouse®"
+linkTitle: "ODBC Driver for ClickHouse®"
weight: 100
description: >-
- ODBC Driver for ClickHouse
+ ODBC Driver for ClickHouse®
---
-# ODBC Driver for ClickHouse.
+[ODBC](https://docs.microsoft.com/en-us/sql/odbc/reference/odbc-overview) interface for ClickHouse® RDBMS.
-[ODBC](https://docs.microsoft.com/en-us/sql/odbc/reference/odbc-overview) interface for [ClickHouse](https://clickhouse.yandex) RDBMS.
-
-Licensed under the [Apache 2.0](LICENSE).
+Licensed under the [Apache 2.0](https://github.com/ClickHouse/clickhouse-odbc?tab=Apache-2.0-1-ov-file#readme).
## Installation and usage
@@ -21,7 +19,7 @@ Licensed under the [Apache 2.0](LICENSE).
3. Configure ClickHouse DSN.
Note: that install driver linked against MDAC (which is default for Windows), some non-windows native
-applications (cygwin / msys64 based) may require driver linked agains unixodbc. Build section below.
+applications (cygwin / msys64 based) may require driver linked against unixodbc. Build section below.
### MacOS
@@ -30,7 +28,7 @@ applications (cygwin / msys64 based) may require driver linked agains unixodbc.
```bash
brew install https://raw.githubusercontent.com/proller/homebrew-core/chodbc/Formula/clickhouse-odbc.rb
```
-3. Add clickhouse DSN configuration into ~/.odbc.ini file. ([sample]())
+3. Add ClickHouse DSN configuration into ~/.odbc.ini file. ([sample]())
Note: that install driver linked against iodbc (which is default for Mac), some homebrew applications
(like python) may require unixodbc driver to work properly. In that case see Build section below.
@@ -38,7 +36,7 @@ Note: that install driver linked against iodbc (which is default for Mac), some
### Linux
1. DEB/RPM packaging is not provided yet, please build & install the driver from sources.
-2. Add clickhouse DSN configuration into ~/.odbc.ini file. ([sample]())
+2. Add ClickHouse DSN configuration into ~/.odbc.ini file. ([sample]())
## Configuration
@@ -49,29 +47,29 @@ On Windows you can create/edit DSN using GUI tool through Control Panel.
The list of DSN parameters recognized by the driver is as follows:
-| Parameter | Default value | Description |
-| :-----------------: | :----------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `Url` | empty | URL that points to a running ClickHouse instance, may include username, password, port, database, etc. |
-| `Proto` | deduced from `Url`, or from `Port` and `SSLMode`: `https` if `443` or `8443` or `SSLMode` is not empty, `http` otherwise | Protocol, one of: `http`, `https` |
-| `Server` or `Host` | deduced from `Url` | IP or hostname of a server with a running ClickHouse instance on it |
-| `Port` | deduced from `Url`, or from `Proto`: `8443` if `https`, `8123` otherwise | Port on which the ClickHouse instance is listening |
-| `Path` | `/query` | Path portion of the URL |
-| `UID` or `Username` | `default` | User name |
-| `PWD` or `Password` | empty | Password |
-| `Database` | `default` | Database name to connect to |
-| `Timeout` | `30` | Connection timeout |
-| `SSLMode` | empty | Certificate verification method (used by TLS/SSL connections, ignored in Windows), one of: `allow`, `prefer`, `require`, use `allow` to enable [`SSL_VERIFY_PEER`](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_verify.html) TLS/SSL certificate verification mode, [`SSL_VERIFY_PEER \| SSL_VERIFY_FAIL_IF_NO_PEER_CERT`](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_verify.html) is used otherwise |
-| `PrivateKeyFile` | empty | Path to private key file (used by TLS/SSL connections), can be empty if no private key file is used |
-| `CertificateFile` | empty | Path to certificate file (used by TLS/SSL connections, ignored in Windows), if the private key and the certificate are stored in the same file, this can be empty if `PrivateKeyFile` is specified |
-| `CALocation` | empty | Path to the file or directory containing the CA/root certificates (used by TLS/SSL connections, ignored in Windows) |
-| `DriverLog` | `on` if `CMAKE_BUILD_TYPE` is `Debug`, `off` otherwise | Enable or disable the extended driver logging |
-| `DriverLogFile` | `\temp\clickhouse-odbc-driver.log` on Windows, `/tmp/clickhouse-odbc-driver.log` otherwise | Path to the extended driver log file (used when `DriverLog` is `on`) |
+| Parameter | Default value | Description |
+| :-----------------: | :----------------------------------------------------------------------------------------------------------------------: |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `Url` | empty | URL that points to a running ClickHouse instance, may include username, password, port, database, etc. |
+| `Proto` | deduced from `Url`, or from `Port` and `SSLMode`: `https` if `443` or `8443` or `SSLMode` is not empty, `http` otherwise | Protocol, one of: `http`, `https` |
+| `Server` or `Host` | deduced from `Url` | IP or hostname of a server with a running ClickHouse instance on it |
+| `Port` | deduced from `Url`, or from `Proto`: `8443` if `https`, `8123` otherwise | Port on which the ClickHouse instance is listening |
+| `Path` | `/query` | Path portion of the URL |
+| `UID` or `Username` | `default` | User name |
+| `PWD` or `Password` | empty | Password |
+| `Database` | `default` | Database name to connect to |
+| `Timeout` | `30` | Connection timeout |
+| `SSLMode` | empty | Certificate verification method (used by TLS/SSL connections, ignored in Windows), one of: `allow`, `prefer`, `require`, use `allow` to enable [SSL_VERIFY_PEER](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_verify.html) TLS/SSL certificate verification mode, [SSL_VERIFY_PEER \| SSL_VERIFY_FAIL_IF_NO_PEER_CERT](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_verify.html) is used otherwise |
+| `PrivateKeyFile` | empty | Path to private key file (used by TLS/SSL connections), can be empty if no private key file is used |
+| `CertificateFile` | empty | Path to certificate file (used by TLS/SSL connections, ignored in Windows), if the private key and the certificate are stored in the same file, this can be empty if `PrivateKeyFile` is specified |
+| `CALocation` | empty | Path to the file or directory containing the CA/root certificates (used by TLS/SSL connections, ignored in Windows) |
+| `DriverLog` | `on` if `CMAKE_BUILD_TYPE` is `Debug`, `off` otherwise | Enable or disable the extended driver logging |
+| `DriverLogFile` | `\temp\clickhouse-odbc-driver.log` on Windows, `/tmp/clickhouse-odbc-driver.log` otherwise | Path to the extended driver log file (used when `DriverLog` is `on`) |
## Troubleshooting & bug reporting
-If some software doesn't work properly with that driver, but works good with other drivers - we will be appritiate if you will be able to collect debug info.
+If some software doesn't work properly with that driver, but works good with other drivers - we will be appropriate if you will be able to collect debug info.
To debug issues with the driver, first things that need to be done are:
- enabling driver manager tracing. Links may contain some irrelevant vendor-specific details.
@@ -142,7 +140,7 @@ brew install git cmake make poco openssl libiodbc # You may use unixodbc INSTEAD
**Note:** usually on Linux you use unixODBC driver manager, and on Mac - iODBC.
In some (rare) cases you may need use other driver manager, please do it only
-if you clearly understand the differencies. Driver should be used with the driver
+if you clearly understand the differences. Driver should be used with the driver
manager it was linked to.
Clone the repo with submodules:
diff --git a/content/en/altinity-kb-integrations/mysql-clickhouse.md b/content/en/altinity-kb-integrations/mysql-clickhouse.md
index 73166c6fb4..34a7d4b9c5 100644
--- a/content/en/altinity-kb-integrations/mysql-clickhouse.md
+++ b/content/en/altinity-kb-integrations/mysql-clickhouse.md
@@ -1,28 +1,36 @@
---
title: "MySQL"
-linkTitle: "Integration Clickhouse with MySQL"
+linkTitle: "Integrating ClickHouse® with MySQL"
weight: 100
-description: >-
- Integration Clickhouse with MySQL
---
### Replication using MaterializeMySQL.
-- https://clickhouse.tech/docs/en/engines/database-engines/materialized-mysql/
+- https://clickhouse.com/docs/en/engines/database-engines/materialized-mysql
- https://translate.google.com/translate?sl=auto&tl=en&u=https://www.jianshu.com/p/d0d4306411b3
- https://raw.githubusercontent.com/ClickHouse/clickhouse-presentations/master/meetup47/materialize_mysql.pdf
-It reads mysql binlog directly and transform queries into something which clickhouse can support. Supports updates and deletes (under the hood implemented via something like ReplacingMergeTree with enforced FINAL and 'deleted' flag). Status is 'experimental', there are quite a lot of known limitations and issues, but some people use it. The original author of that went to another project, and the main team don't have a lot of resource to improve that for now (more important thing in the backlog)
+It reads mysql binlog directly and transform queries into something which ClickHouse® can support. Supports updates and deletes (under the hood implemented via something like ReplacingMergeTree with enforced FINAL and 'deleted' flag). Status is 'experimental', there are quite a lot of known limitations and issues, but some people use it. The original author of that went to another project, and the main team don't have a lot of resource to improve that for now (more important thing in the backlog)
The replication happens on the mysql database level.
-### Replication using debezium + Kafka
+### Replication using debezium + Kafka (+ Altinity Sink Connector for ClickHouse)
-Debezium can read the binlog and transform it to Kafka messages. You can later capture the stream of message on ClickHouse side and process it as you like.
-Please remeber that currently Kafka engine supports only at-least-once delivery guarantees.
+Debezium can read the binlog and transform it to Kafka messages.
+You can later capture the stream of message on ClickHouse side and process it as you like.
+Please remember that currently Kafka engine supports only at-least-once delivery guarantees.
It's used by several companies, quite nice & flexible. But initial setup may require some efforts.
+#### Altinity Sink Connector for ClickHouse
+
+Can handle transformation of debezium messages (with support for DELETEs and UPDATEs) and exactly-once delivery for you.
+
+Links:
+* https://altinity.com/blog/fast-mysql-to-clickhouse-replication-announcing-the-altinity-sink-connector-for-clickhouse
+* https://altinity.com/mysql-to-clickhouse/
+* https://github.com/Altinity/clickhouse-sink-connector
+
#### Same as above but using https://maxwells-daemon.io/ instead of debezium.
Have no experience / feedback there, but should be very similar to debezium.
@@ -32,16 +40,16 @@ Have no experience / feedback there, but should be very similar to debezium.
See https://altinity.com/blog/2018/6/30/realtime-mysql-clickhouse-replication-in-practice
That was done long time ago in altinity for one use-case, and it seem like it was never used outside of that.
-It's a python application with lot of switches which can copy a schema or read binlog from mysql and put it to clickhouse.
+It's a python application with lot of switches which can copy a schema or read binlog from mysql and put it to ClickHouse.
Not supported currently. But it's just a python, so maybe can be adjusted to different needs.
-### Accessing MySQL data via integration engines from inside clickhouse.
+### Accessing MySQL data via integration engines from inside ClickHouse.
-MySQL [table engine](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) / [table function](https://clickhouse.com/docs/en/sql-reference/table-functions/mysql/), or [MySQL database engine](https://clickhouse.com/docs/en/engines/database-engines/mysql/) - clickhouse just connects to mysql server as a client, and can do normal selects.
+MySQL [table engine](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) / [table function](https://clickhouse.com/docs/en/sql-reference/table-functions/mysql/), or [MySQL database engine](https://clickhouse.com/docs/en/engines/database-engines/mysql/) - ClickHouse just connects to mysql server as a client, and can do normal selects.
We had webinar about that a year ago: https://www.youtube.com/watch?v=44kO3UzIDLI
-Using that you can easily create some ETL script which will copy the data from mysql to clickhouse regularly, i.e. something like
+Using that you can easily create some ETL script which will copy the data from mysql to ClickHouse regularly, i.e. something like
```sql
INSERT INTO clickhouse_table SELECT * FROM mysql_table WHERE id > ...
@@ -49,7 +57,7 @@ INSERT INTO clickhouse_table SELECT * FROM mysql_table WHERE id > ...
Works great if you have append only table in MySQL.
-In newer clickhouse versions you can query this was also sharded / replicated MySQL cluster - see [ExternalDistributed](https://clickhouse.com/docs/en/engines/table-engines/integrations/ExternalDistributed/)
+In newer ClickHouse versions you can query this was also sharded / replicated MySQL cluster - see [ExternalDistributed](https://clickhouse.com/docs/en/engines/table-engines/integrations/ExternalDistributed/)
### MySQL dictionaries
diff --git a/content/en/altinity-kb-interfaces/_index.md b/content/en/altinity-kb-interfaces/_index.md
index 5fb1e32fa8..e8b5b644f7 100644
--- a/content/en/altinity-kb-interfaces/_index.md
+++ b/content/en/altinity-kb-interfaces/_index.md
@@ -4,6 +4,6 @@ linkTitle: "Interfaces"
keywords:
- clickhouse interface
description: >
- See the frequent questions users have about clickhouse-client.
+ Frequent questions users have about `clickhouse-client`
weight: 9
---
diff --git a/content/en/altinity-kb-interfaces/altinity-kb-clickhouse-client.md b/content/en/altinity-kb-interfaces/altinity-kb-clickhouse-client.md
index 4ffaba09a4..4346abd242 100644
--- a/content/en/altinity-kb-interfaces/altinity-kb-clickhouse-client.md
+++ b/content/en/altinity-kb-interfaces/altinity-kb-clickhouse-client.md
@@ -4,7 +4,7 @@ linkTitle: "clickhouse-client"
keywords:
- clickhouse client
description: >
- ClickHouse client
+ ClickHouse® client
---
Q. How can I input multi-line SQL code? can you guys give me an example?
@@ -50,4 +50,4 @@ Also, it’s possible to have several client config files and pass one of them t
References:
-* [https://clickhouse.tech/docs/en/interfaces/cli/](https://clickhouse.tech/docs/en/interfaces/cli/)
+* [https://clickhouse.com/docs/en/interfaces/cli](https://clickhouse.com/docs/en/interfaces/cli)
diff --git a/content/en/altinity-kb-kubernetes/_index.md b/content/en/altinity-kb-kubernetes/_index.md
index d569a64ee2..09ca08dc0d 100644
--- a/content/en/altinity-kb-kubernetes/_index.md
+++ b/content/en/altinity-kb-kubernetes/_index.md
@@ -1,13 +1,565 @@
---
-title: "Kubernetes"
-linkTitle: "Kubernetes"
+title: "Using the Altinity Kubernetes Operator for ClickHouse®"
+linkTitle: "Using the Altinity Kubernetes Operator for ClickHouse®"
keywords:
- clickhouse in kubernetes
- kubernetes issues
+- ALtinity Kubernetes operator for ClickHouse
description: >
- Run ClickHouse in Kubernetes without any issues.
+ Run ClickHouse® in Kubernetes without any issues.
weight: 8
+aliases:
+ /altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s/
---
-## clickhouse-backup
+## Useful links
+
+The Altinity Kubernetes Operator for ClickHouse® repo has very useful documentation:
+
+- [Quick Start Guide](https://github.com/Altinity/clickhouse-operator/blob/master/docs/quick_start.md)
+- [Operator Custom Resource Definition explained](https://github.com/Altinity/clickhouse-operator/blob/master/docs/custom_resource_explained.md)
+- [Examples - YAML files to deploy the operator in many common configurations](https://github.com/Altinity/clickhouse-operator/tree/master/docs/chi-examples)
+- [Main documentation](https://github.com/Altinity/clickhouse-operator/tree/master/docs#table-of-contents)
+
+## ClickHouse Operator ip filter
+
+- In the current version of operator default user is limited to IP addresses of the cluster pods. We plan to have a password option for 0.20.0 and use a 'secret' authentication for distributed queries
+
+## Start/Stop cluster
+
+- Don't delete the operator using:
+
+```bash
+kubectl delete -f https://raw.githubusercontent.com/Altinity/clickhouse-operator/master/deploy/operator/clickhouse-operator-install-bundle.yaml
+```
+
+- kubectl delete chi cluster-name # chi is the name of the CRD clickhouseInstallation
+
+## DELETE PVCs
+
+https://altinity.com/blog/preventing-clickhouse-storage-deletion-with-the-altinity-kubernetes-operator-reclaimpolicy
+
+## Scaling
+
+Best way is to scale down the deployments to 0 replicas, after that reboot the node and scale up again:
+
+1. first check that all your PVCs have the retain policy:
+
+```bash
+kubectl get pv -o=custom-columns=PV:.metadata.name,NAME:.spec.claimRef.name,POLICY:.spec.persistentVolumeReclaimPolicy
+# Patch it if you need
+kubectl patch pv -p '{"spec":{"persistentVolumeReclaimPolicy":"Retain"}}'
+```
+
+```yaml
+spec:
+ templates:
+ volumeClaimTemplates:
+ - name: XXX
+ reclaimPolicy: Retain
+```
+
+2. After that just create a stop.yaml and `kubectl apply -f stop.yaml`
+
+```yaml
+kind: ClickHouseInstallation
+spec:
+ stop: yes
+```
+
+3. Reboot kubernetes node
+4. Scale up deployment changing the stop property to no and do an `kubectl apply -f stop.yml`
+
+```yaml
+kind: ClickHouseInstallation
+spec:
+ stop: no
+```
+
+## Check where pods are executing
+
+```bash
+kubectl get pod -o=custom-columns=NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName -n zk
+# Check which hosts in which AZs
+kubectl get node -o=custom-columns=NODE:.metadata.name,ZONE:.metadata.labels.'failure-domain\.beta\.kubernetes\.io/zone'
+```
+
+## Check node instance types:
+
+```sql
+kubectl get nodes -o json|jq -Cjr '.items[] | .metadata.name," ",.metadata.labels."beta.kubernetes.io/instance-type"," ",.metadata.labels."beta.kubernetes.io/arch", "\n"'|sort -k3 -r
+
+ip-10-3-9-2.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-9-236.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-9-190.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-9-138.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-9-110.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-8-39.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-8-219.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-8-189.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-13-40.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-12-248.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-12-216.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-12-170.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-11-229.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-11-188.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-11-175.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-10-218.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-10-160.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-10-145.eu-central-1.compute.internal t4g.large arm64
+ip-10-3-9-57.eu-central-1.compute.internal m5.large amd64
+ip-10-3-8-146.eu-central-1.compute.internal m5.large amd64
+ip-10-3-13-1.eu-central-1.compute.internal m5.xlarge amd64
+ip-10-3-11-52.eu-central-1.compute.internal m5.xlarge amd64
+ip-10-3-11-187.eu-central-1.compute.internal m5.xlarge amd64
+ip-10-3-10-217.eu-central-1.compute.internal m5.xlarge amd64
+```
+
+## Search for missing affinity rules:
+
+```bash
+kubectl get pods -o json -n zk |\
+jq -r "[.items[] | {name: .metadata.name,\
+ affinity: .spec.affinity}]"
+[
+ {
+ "name": "zookeeper-0",
+ "affinity": null
+ },
+ . . .
+]
+```
+
+## Storage classes
+
+```bash
+kubectl get pvc -o=custom-columns=NAME:.metadata.name,SIZE:.spec.resources.requests.storage,CLASS:.spec.storageClassName,VOLUME:.spec.volumeName
+...
+NAME SIZE CLASS VOLUME
+datadir-volume-zookeeper-0 25Gi gp2 pvc-9a3...9ee
+
+kubectl get storageclass/gp2
+...
+NAME PROVISIONER RECLAIMPOLICY...
+gp2 (default) ebs.csi.aws.com Delete
+```
+
+## Using CSI driver to protect storage:
+
+```yaml
+allowVolumeExpansion: true
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+ name: gp2-protected
+parameters:
+ encrypted: "true"
+ type: gp2
+provisioner: ebs.csi.aws.com
+reclaimPolicy: Retain
+volumeBindingMode: WaitForFirstConsumer
+```
+
+## Enable Resize of Volumes
+
+Operator does not delete volumes, so those were probably deleted by Kubernetes. In some new versions there is a feature flag that deletes PVCs attached to STS when STS is deleted.
+
+Please try do the following: Use operator 0.20.3. Add the following to the defaults:
+``
+
+```yaml
+ defaults:
+ storageManagement:
+ provisioner: Operator
+```
+
+That enables storage management by operator, instead of STS. It allows to extend volumes without re-creating STS, and us increase Volume size without restart of clickhouse statefulset pods for CSI drivers which support `allowVolumeExpansion` in storage classes because statefulset template don't change and we don't need delete/create statefulset
+
+## Change server settings:
+
+https://github.com/Altinity/clickhouse-operator/issues/828
+
+```yaml
+kind: ClickHouseInstallation
+spec:
+ configuration:
+ settings:
+ max_concurrent_queries: 150
+```
+
+Or **edit ClickHouseInstallation:**
+
+```bash
+kubectl -n get chi
+
+NAME CLUSTERS HOSTS STATUS HOSTS-COMPLETED AGE
+dnieto-test 1 4 Completed 211d
+mbak-test 1 1 Completed 44d
+rory-backupmar8 1 4 Completed 42h
+
+kubectl -n edit ClickHouseInstallation dnieto-test
+```
+
+## Clickhouse-backup for CHOP
+
+Examples for use clickhouse-backup + clickhouse-operator for EKS cluster which not managed by `altinity.cloud`
+
+Main idea: second container in clickhouse pod + CronJob which will insert and poll `system.backup_actions` commands to execute clickhouse-backup commands
+
+https://github.com/AlexAkulov/clickhouse-backup/blob/master/Examples.md#how-to-use-clickhouse-backup-in-kubernetes
+
+## Configurations:
+
+How to modify yaml configs:
+
+https://github.com/Altinity/clickhouse-operator/blob/dc6cdc6f2f61fc333248bb78a8f8efe792d14ca2/tests/e2e/manifests/chi/test-016-settings-04.yaml#L26
+
+## clickhouse-operator install Example:
+
+use latest release if possible
+https://github.com/Altinity/clickhouse-operator/releases
+
+- No. Nodes/replicas: 2 to 3 nodes with 500GB per node minimum
+- Zookeeper: 3 node ensemble
+- Type of instances: m6i.x4large to start with and you can go up to m6i.16xlarge
+- Persistent Storage/volumes: EBS gp2 for data and logs and gp3 for zookeeper
+
+### Install operator in namespace
+
+```bash
+#!/bin/bash
+
+# Namespace to install operator into
+OPERATOR_NAMESPACE="${OPERATOR_NAMESPACE:-dnieto-test-chop}"
+# Namespace to install metrics-exporter into
+METRICS_EXPORTER_NAMESPACE="${OPERATOR_NAMESPACE}"
+# Operator's docker image
+OPERATOR_IMAGE="${OPERATOR_IMAGE:-altinity/clickhouse-operator:latest}"
+# Metrics exporter's docker image
+METRICS_EXPORTER_IMAGE="${METRICS_EXPORTER_IMAGE:-altinity/metrics-exporter:latest}"
+
+# Setup clickhouse-operator into specified namespace
+kubectl apply --namespace="${OPERATOR_NAMESPACE}" -f <( \
+ curl -s https://raw.githubusercontent.com/Altinity/clickhouse-operator/master/deploy/operator/clickhouse-operator-install-template.yaml | \
+ OPERATOR_IMAGE="${OPERATOR_IMAGE}" \
+ OPERATOR_NAMESPACE="${OPERATOR_NAMESPACE}" \
+ METRICS_EXPORTER_IMAGE="${METRICS_EXPORTER_IMAGE}" \
+ METRICS_EXPORTER_NAMESPACE="${METRICS_EXPORTER_NAMESPACE}" \
+ envsubst \
+)
+```
+
+### Install zookeeper ensemble
+
+zookeepers will be named like zookeeper-0.zoons
+
+```bash
+> kubectl create ns zoo3ns
+> kubectl -n zoo3ns apply -f https://raw.githubusercontent.com/Altinity/clickhouse-operator/master/deploy/zookeeper/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only.yaml
+
+# check names they should be like:
+# zookeeper.zoo3ns if using a new namespace
+# If using the same namespace zookeeper.
+# zookeeper must be accessed using the service like service_name.namespace
+```
+
+### Deploy test cluster
+
+```bash
+> kubectl -n dnieto-test-chop apply -f dnieto-test-chop.yaml
+```
+
+```yaml
+# dnieto-test-chop.yaml
+apiVersion: "clickhouse.altinity.com/v1"
+kind: "ClickHouseInstallation"
+metadata:
+ name: "dnieto-dev"
+spec:
+ configuration:
+ settings:
+ max_concurrent_queries: "200"
+ merge_tree/ttl_only_drop_parts: "1"
+ profiles:
+ default/queue_max_wait_ms: "10000"
+ readonly/readonly: "1"
+ users:
+ admin/networks/ip:
+ - 0.0.0.0/0
+ - '::/0'
+ admin/password_sha256_hex: ""
+ admin/profile: default
+ admin/access_management: 1
+ zookeeper:
+ nodes:
+ - host: zookeeper.dnieto-test-chop
+ port: 2181
+ clusters:
+ - name: dnieto-dev
+ templates:
+ podTemplate: pod-template-with-volumes
+ serviceTemplate: chi-service-template
+ layout:
+ shardsCount: 1
+ # put the number of desired nodes 3 by default
+ replicasCount: 2
+ templates:
+ podTemplates:
+ - name: pod-template-with-volumes
+ spec:
+ containers:
+ - name: clickhouse
+ image: clickhouse/clickhouse-server:22.3
+ # separate data from logs
+ volumeMounts:
+ - name: data-storage-vc-template
+ mountPath: /var/lib/clickhouse
+ - name: log-storage-vc-template
+ mountPath: /var/log/clickhouse-server
+ serviceTemplates:
+ - name: chi-service-template
+ generateName: "service-{chi}"
+ # type ObjectMeta struct from k8s.io/meta/v1
+ metadata:
+ annotations:
+ # https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer
+ # this tags for elb load balancer
+ #service.beta.kubernetes.io/aws-load-balancer-backend-protocol: tcp
+ #service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: "true"
+ #https://kubernetes.io/docs/concepts/services-networking/service/#aws-nlb-support
+ service.beta.kubernetes.io/aws-load-balancer-internal: "true"
+ service.beta.kubernetes.io/aws-load-balancer-type: nlb
+ spec:
+ ports:
+ - name: http
+ port: 8123
+ - name: client
+ port: 9000
+ type: LoadBalancer
+ volumeClaimTemplates:
+ - name: data-storage-vc-template
+ spec:
+ # no storageClassName - means use default storageClassName
+ # storageClassName: default
+ # here if you have a storageClassName defined for gp2 you can use it.
+ # kubectl get storageclass
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 50Gi
+ reclaimPolicy: Retain
+ - name: log-storage-vc-template
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 2Gi
+```
+
+### Install monitoring:
+
+In order to setup prometheus as a backend for all the asynchronous_metric_log / metric_log tables and also set up grafana dashboards:
+
+- https://github.com/Altinity/clickhouse-operator/blob/master/docs/prometheus_setup.md
+- https://github.com/Altinity/clickhouse-operator/blob/master/docs/grafana_setup.md
+- [clickhouse-operator/monitoring_setup.md at master · Altinity/clickhouse-operator](https://github.com/Altinity/clickhouse-operator/blob/master/docs/monitoring_setup.md)
+
+## Extra configs
+
+There is an admin user by default in the deployment that is used to admin stuff
+
+## KUBECTL chi basic comands:
+
+```bash
+*> kubectl get crd*
+
+NAME CREATED AT
+clickhouseinstallations.clickhouse.altinity.com 2021-10-11T13:46:43Z
+clickhouseinstallationtemplates.clickhouse.altinity.com 2021-10-11T13:46:44Z
+clickhouseoperatorconfigurations.clickhouse.altinity.com 2021-10-11T13:46:44Z
+eniconfigs.crd.k8s.amazonaws.com 2021-10-11T13:41:23Z
+grafanadashboards.integreatly.org 2021-10-11T13:54:37Z
+grafanadatasources.integreatly.org 2021-10-11T13:54:38Z
+grafananotificationchannels.integreatly.org 2022-05-17T14:27:48Z
+grafanas.integreatly.org 2021-10-11T13:54:37Z
+provisioners.karpenter.sh 2022-05-17T14:27:49Z
+securitygrouppolicies.vpcresources.k8s.aws 2021-10-11T13:41:27Z
+volumesnapshotclasses.snapshot.storage.k8s.io 2022-04-22T13:34:20Z
+volumesnapshotcontents.snapshot.storage.k8s.io 2022-04-22T13:34:20Z
+volumesnapshots.snapshot.storage.k8s.io 2022-04-22T13:34:20Z
+
+> *kubectl -n test-clickhouse-operator-dnieto2 get chi*
+NAME CLUSTERS HOSTS STATUS HOSTS-COMPLETED AGE
+simple-01 70m
+
+> *kubectl -n test-clickhouse-operator-dnieto2 describe chi simple-01*
+Name: simple-01
+Namespace: test-clickhouse-operator-dnieto2
+Labels:
+Annotations:
+API Version: clickhouse.altinity.com/v1
+Kind: ClickHouseInstallation
+Metadata:
+ Creation Timestamp: 2023-01-09T20:38:06Z
+ Generation: 1
+ Managed Fields:
+ API Version: clickhouse.altinity.com/v1
+ Fields Type: FieldsV1
+ fieldsV1:
+ f:metadata:
+ f:annotations:
+ .:
+ f:kubectl.kubernetes.io/last-applied-configuration:
+ f:spec:
+ .:
+ f:configuration:
+ .:
+ f:clusters:
+ Manager: kubectl-client-side-apply
+ Operation: Update
+ Time: 2023-01-09T20:38:06Z
+ Resource Version: 267483138
+ UID: d7018efa-2b13-42fd-b1c5-b798fc6d0098
+Spec:
+ Configuration:
+ Clusters:
+ Name: simple
+Events:
+
+> *kubectl get chi --all-namespaces*
+
+NAMESPACE NAME CLUSTERS HOSTS STATUS HOSTS-COMPLETED AGE
+andrey-dev source 1 1 Completed 38d
+eu chi-dnieto-test-common-configd 1 1 Completed 161d
+eu dnieto-test 1 4 Completed 151d
+laszlo-dev node-rescale-2 1 4 Completed 5d13h
+laszlo-dev single 1 1 Completed 5d13h
+laszlo-dev2 zk2 1 1 Completed 52d
+test-clickhouse-operator-dnieto2 simple-01
+
+> *kubectl -n test-clickhouse-operator-dnieto2 edit clickhouseinstallations.clickhouse.altinity.com simple-01
+
+# Troubleshoot operator stuff
+> kubectl -n test-clickhouse-operator-ns edit chi
+> kubectl -n test-clickhouse-operator describe chi
+> kubectl -n test-clickhouse-operator get chi -o yaml
+
+# Check operator logs usually located in kube-system or specific namespace
+> kubectl -n test-ns logs chi-operator-pod -f
+
+# Check output to yaml
+> kubectl -n test-ns get services -o yaml*
+```
+
+## Problem with DELETE finalizers:
+
+https://github.com/Altinity/clickhouse-operator/issues/830
+
+There's a problem with stuck finalizers that can cause old CHI installations to hang. The sequence of operations looks like this.
+
+1. You delete the existing ClickHouse operator using `kubectl delete -f operator-installation.yaml` with running CHI clusters.
+2. You then drop the namespace where the CHI clusters are running, e.g., `kubectl delete ns my-namespace`
+3. This hangs. You run `kubectl get ns my-namespace -o yaml` and you'll see a message like the following: "message: 'Some content in the namespace has finalizers remaining: [finalizer.clickhouseinstallation.altinity.com](http://finalizer.clickhouseinstallation.altinity.com/)"
+
+That means the CHI can't delete because its finalizer was deleted out from under it.
+
+The fix is to figure out the chi name which should still be visible and edit it to remove the finalizer reference.
+
+1. `kubectl -n my-namespace get chi`
+2. `kubectl -n my-namespace edit [clickhouseinstallations.clickhouse.altinity.com](http://clickhouseinstallations.clickhouse.altinity.com/) my-clickhouse-cluster`
+
+Remove the finalizer from the spec, save it, and everything will delete properly.
+
+**`TIP: if you delete the ns too and there is no ns just create it and apply the above method`**
+
+## Karpenter scaler
+
+```sql
+> kubectl -n karpenter get all
+NAME READY STATUS RESTARTS AGE
+pod/karpenter-75c8b7667b-vbmj4 1/1 Running 0 16d
+pod/karpenter-75c8b7667b-wszxt 1/1 Running 0 16d
+
+NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
+service/karpenter ClusterIP 172.20.129.188 8080/TCP,443/TCP 16d
+
+NAME READY UP-TO-DATE AVAILABLE AGE
+deployment.apps/karpenter 2/2 2 2 16d
+
+NAME DESIRED CURRENT READY AGE
+replicaset.apps/karpenter-75c8b7667b 2 2 2 16d
+
+> kubectl -n karpenter logs pod/karpenter-75c8b7667b-vbmj4
+
+2023-02-06T06:33:44.269Z DEBUG Successfully created the logger.
+2023-02-06T06:33:44.269Z DEBUG Logging level set to: debug
+{"level":"info","ts":1675665224.2755454,"logger":"fallback","caller":"injection/injection.go:63","msg":"Starting informers..."}
+2023-02-06T06:33:44.376Z DEBUG controller waiting for configmaps {"commit": "f60dacd", "configmaps": ["karpenter-global-settings"]}
+2023-02-06T06:33:44.881Z DEBUG controller karpenter-global-settings config "karpenter-global-settings" config was added or updated: settings.Settings{BatchMaxDuration:v1.Duration{Duration:10000000000}, BatchIdleDuration:v1.Duration{Duration:1000000000}} {"commit": "f60dacd"}
+2023-02-06T06:33:44.881Z DEBUG controller karpenter-global-settings config "karpenter-global-settings" config was added or updated: settings.Settings{ClusterName:"eu", ClusterEndpoint:"https://79974769E264251E43B18AF4CA31CE8C.gr7.eu-central-1.eks.amazonaws.com", DefaultInstanceProfile:"KarpenterNodeInstanceProfile-eu", EnablePodENI:false, EnableENILimitedPodDensity:true, IsolatedVPC:false, NodeNameConvention:"ip-name", VMMemoryOverheadPercent:0.075, InterruptionQueueName:"Karpenter-eu", Tags:map[string]string{}} {"commit": "f60dacd"}
+2023-02-06T06:33:45.001Z DEBUG controller.aws discovered region {"commit": "f60dacd", "region": "eu-central-1"}
+2023-02-06T06:33:45.003Z DEBUG controller.aws unable to detect the IP of the kube-dns service, services "kube-dns" is forbidden: User "system:serviceaccount:karpenter:karpenter" cannot get resource "services" in API group "" in the namespace "kube-system" {"commit": "f60dacd"}
+2023/02/06 06:33:45 Registering 2 clients
+2023/02/06 06:33:45 Registering 2 informer factories
+2023/02/06 06:33:45 Registering 3 informers
+2023/02/06 06:33:45 Registering 6 controllers
+2023-02-06T06:33:45.080Z DEBUG controller.aws discovered version {"commit": "f60dacd", "version": "v0.20.0"}
+2023-02-06T06:33:45.082Z INFO controller Starting server {"commit": "f60dacd", "path": "/metrics", "kind": "metrics", "addr": "[::]:8080"}
+2023-02-06T06:33:45.082Z INFO controller Starting server {"commit": "f60dacd", "kind": "health probe", "addr": "[::]:8081"}
+I0206 06:33:45.182600 1 leaderelection.go:248] attempting to acquire leader lease karpenter/karpenter-leader-election...
+2023-02-06T06:33:45.226Z INFO controller Starting informers... {"commit": "f60dacd"}
+2023-02-06T06:33:45.417Z INFO controller.aws.pricing updated spot pricing with instance types and offerings {"commit": "f60dacd", "instance-type-count": 607, "offering-count": 1400}
+2023-02-06T06:33:47.670Z INFO controller.aws.pricing updated on-demand pricing {"commit": "f60dacd", "instance-type-count": 505}
+```
+
+## Operator Affinities:
+
+
+
+## Deploy operator with clickhouse-keeper
+
+https://github.com/Altinity/clickhouse-operator/issues/959
[setup-example.yaml](https://github.com/Altinity/clickhouse-operator/blob/eb3fc4e28514d0d6ea25a40698205b02949bcf9d/docs/chi-examples/03-persistent-volume-07-do-not-chown.yaml)
+
+## Possible issues with running ClickHouse in K8s
+
+The biggest problem with running ClickHouse® in K8s, happens when clickhouse-server can't start for some reason and pod is falling in CrashloopBackOff, so you can't easily get in the pod and check/fix/restart ClickHouse.
+
+There is multiple possible reasons for this, some of them can be fixed without manual intervention in pod:
+
+1. Wrong configuration files Fix: Check templates which are being used for config file generation and fix them.
+2. While upgrade some backward incompatible changes prevents ClickHouse from start. Fix: Downgrade and check backward incompatible changes for all versions in between.
+
+Next reasons would require to have manual intervention in pod/volume.
+There is two ways, how you can get access to data:
+
+1. Change entry point of ClickHouse pod to something else, so pod wouldn’t be terminated due ClickHouse error.
+2. Attach ClickHouse data volume to some generic pod (like Ubuntu).
+3. Unclear restart which produced broken files and/or state on disk is differs too much from state in zookeeper for replicated tables. Fix: Create `force_restore_data` flag.
+4. Wrong file permission for ClickHouse files in pod. Fix: Use chown to set right ownership for files and directories.
+5. Errors in ClickHouse table schema prevents ClickHouse from start. Fix: Rename problematic `table.sql` scripts to `table.sql.bak`
+6. Occasional failure of distributed queries because of wrong user/password. Due nature of k8s with dynamic ip allocations, it's possible that ClickHouse would cache wrong ip-> hostname combination and disallow connections because of mismatched hostname. Fix: run `SYSTEM DROP DNS CACHE;` `1` in config.xml.
+
+Caveats:
+
+1. Not all configuration/state folders are being covered by persistent volumes. ([geobases](https://clickhouse.tech/docs/en/sql-reference/functions/ym-dict-functions/#multiple-geobases))
+2. Page cache belongs to k8s node and pv are being mounted to pod, in case of fast shutdown there is possibility to loss some data(needs to be clarified)
+3. Some cloud providers (GKE) can have slow unlink command, which is important for ClickHouse because it's needed for parts management. (`max_part_removal_threads` setting)
+
+Useful commands:
+
+```bash
+kubectl logs chi-chcluster-2-1-0 -c clickhouse-pod -n chcluster --previous
+kubectl describe pod chi-chcluster-2-1-0 -n chcluster
+```
+
+Q. ClickHouse is caching the Kafka pod's IP and trying to connect to the same ip even when there is a new Kafka pod running and the old one is deprecated. Is there some setting where we could refresh the connection
+
+`1` in config.xml
+
+### ClickHouse init process failed
+
+It's due to low value for env `CLICKHOUSE_INIT_TIMEOUT` value. Consider increasing it up to 1 min.
+[https://github.com/ClickHouse/ClickHouse/blob/9f5cd35a6963cc556a51218b46b0754dcac7306a/docker/server/entrypoint.sh\#L120](https://github.com/ClickHouse/ClickHouse/blob/9f5cd35a6963cc556a51218b46b0754dcac7306a/docker/server/entrypoint.sh#L120)
diff --git a/content/en/altinity-kb-kubernetes/altinity-kb-istio-user-issue-k8s.md b/content/en/altinity-kb-kubernetes/altinity-kb-istio-user-issue-k8s.md
new file mode 100644
index 0000000000..5e162d8377
--- /dev/null
+++ b/content/en/altinity-kb-kubernetes/altinity-kb-istio-user-issue-k8s.md
@@ -0,0 +1,76 @@
+---
+title: "Istio Issues"
+linkTitle: "Istio Issues"
+weight: 100
+description:
+ Working with the popular service mesh
+keywords:
+ - istio
+---
+
+## What is Istio?
+
+Per documentation on [Istio Project\'s website](https://istio.io/latest/docs/overview/what-is-istio/), Istio is "an open source service mesh that layers transparently onto existing distributed applications. Istio’s powerful features provide a uniform and more efficient way to secure, connect, and monitor services. Istio is the path to load balancing, service-to-service authentication, and monitoring – with few or no service code changes."
+
+Istio works quite well at providing this functionality, and does so through controlling service-to-service communication in a Cluster, find-grained control of traffic behavior, routing rules, load-balancing, a policy layer and configuration API supporting access controls, rate limiting, etc.
+
+It also provides metrics about all traffic in a cluster. One can get an amazing amount of metrics from it. Datadog even has a provider that when turned on is a bit like a firehose of information.
+
+Istio essentially uses a proxy to intercapt all network traffic and provides the ability to configured for providing a appliction-aware features.
+
+## ClickHouse and Istio
+
+The implications for ClickHouse need to be taken into consideration however, and this page will attempt to address this from real-life scenarios that Altinity devops, infrastructural, and support engineers have had to solve.
+
+### Operator High Level Description
+
+The Altinity ClickHouse Operator, when installed using a deployment, also creates four custom resources:
+
+- clickhouseinstallations.clickhouse.altinity.com (chi)
+- clickhousekeeperinstallations.clickhouse-keeper.altinity.com (chk)
+- clickhouseinstallationtemplates.clickhouse.altinity.com (chit)
+- clickhouseoperatorconfigurations.clickhouse.altinity.com (chopconf)
+
+For the first two, it uses StatefullSets to run both Keeper and and ClickHouse clusters. For Keeper, it manages how many replicas specified, and for ClickHouse, it manages both how many replicas and shards are specified.
+
+In managing `ClickHouseInstallations`, it requires that the operator can interact with the database running on clusters it creates using a specific `clickhouse_operator` user and needs network access rules that allow connection to the ClickHouse pods.
+
+Many of the issues with Istio can pertain to issues where this can be a problem, particularly in the case where the IP address of the Operator pod changes and no longer is allowed to connect to it's ClickHouse clusters that it manages.
+
+### Issue: Authentication error of clickhouse-operator
+
+This was a ClickHouse cluster running in a Kubernetes setup with Istio.
+
+- The clickhouse operator was unable to query the clickhouse pods because of authentication errors. After a period of time, the operator gave up yet the ClickHouse cluster (ClickHouseInstallation) worked normally.
+- Errors showed `AUTHENTICATION_FAILED` and `connections from :ffff:127.0.0.6 are not allowed` as well as `IP_ADDRESS_NOT_ALLOWED`
+- Also, the `clickhouse_operator` user correctly configured
+- There was a recent issue that on the surface looked similar to a recent issue with https://altinity.com/blog/deepseek-clickhouse-and-the-altinity-kubernetes-operator (disabled network access for default user due to issue with DeepSeek) and one idea seemed as if upgrading the operator (which would fix the issue if it were default user).
+- However, the key to this issue is that the problem was with the `clickhouse_operator` user, not `default` user, hence not due to the aforementioned issue.
+- More consiration was given in light of how Istio effects what services can connect which made it more obvious that it was an issue with using Istio in the operator vs. operator version
+- The suggestion was given to remove istio from the clickhouse operator `ClickHouseInstallation` and references this issue https://github.com/Altinity/clickhouse-operator/issues/1261#issuecomment-1797895080
+- The change required would be something of the sort:
+
+```yaml
+---
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: clickhouse-operator
+spec:
+ template:
+ metadata:
+ annotations:
+ sidecar.istio.io/inject: "false"
+
+---
+
+apiVersion: [clickouse.altinity.com/v1](http://clickouse.altinity.com/v1)
+kind: ClickHouseInstallation
+metadata:
+ name: your-chi
+ annotations:
+ sidecar.istio.io/inject: "false"
+
+```
+
diff --git a/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md b/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md
index de1c5240ed..c908418304 100644
--- a/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md
+++ b/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md
@@ -1,10 +1,11 @@
---
-title: "Possible issues with running ClickHouse in k8s"
-linkTitle: "Possible issues with running ClickHouse in k8s"
+title: "Possible issues with running ClickHouse® in K8s"
+linkTitle: "Possible issues with running ClickHouse® in K8s"
description: >
- Possible issues with running ClickHouse in k8s
+ Possible issues with running ClickHouse® in K8s
+draft: true
---
-The biggest problem with running ClickHouse in k8s, happens when clickhouse-server can't start for some reason and pod is falling in CrashloopBackOff, so you can't easily get in the pod and check/fix/restart ClickHouse.
+The biggest problem with running ClickHouse® in K8s, happens when clickhouse-server can't start for some reason and pod is falling in CrashloopBackOff, so you can't easily get in the pod and check/fix/restart ClickHouse.
There is multiple possible reasons for this, some of them can be fixed without manual intervention in pod:
@@ -25,7 +26,7 @@ Caveats:
1. Not all configuration/state folders are being covered by persistent volumes. ([geobases](https://clickhouse.tech/docs/en/sql-reference/functions/ym-dict-functions/#multiple-geobases))
2. Page cache belongs to k8s node and pv are being mounted to pod, in case of fast shutdown there is possibility to loss some data(needs to be clarified)
-3. Some cloud providers (GKE) can have slow unlink command, which is important for clickhouse because it's needed for parts management. (`max_part_removal_threads` setting)
+3. Some cloud providers (GKE) can have slow unlink command, which is important for ClickHouse because it's needed for parts management. (`max_part_removal_threads` setting)
Useful commands:
@@ -34,7 +35,7 @@ kubectl logs chi-chcluster-2-1-0 -c clickhouse-pod -n chcluster --previous
kubectl describe pod chi-chcluster-2-1-0 -n chcluster
```
-Q. Clickhouse is caching the Kafka pod's IP and trying to connect to the same ip even when there is a new Kafka pod running and the old one is deprecated. Is there some setting where we could refresh the connection
+Q. ClickHouse is caching the Kafka pod's IP and trying to connect to the same ip even when there is a new Kafka pod running and the old one is deprecated. Is there some setting where we could refresh the connection
`1` in config.xml
diff --git a/content/en/altinity-kb-queries-and-syntax/_index.md b/content/en/altinity-kb-queries-and-syntax/_index.md
index e771c1d459..46d710897d 100644
--- a/content/en/altinity-kb-queries-and-syntax/_index.md
+++ b/content/en/altinity-kb-queries-and-syntax/_index.md
@@ -5,6 +5,6 @@ keywords:
- clickhouse queries
- clickhouse joins
description: >
- Learn about ClickHouse queries & syntax, including Joins & Window Functions.
+ Learn about ClickHouse® queries & syntax, including Joins & Window Functions.
weight: 1
---
diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-alter-modify-column-is-stuck-the-column-is-inaccessible.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-alter-modify-column-is-stuck-the-column-is-inaccessible.md
index 45be4c3aaf..ce63579370 100644
--- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-alter-modify-column-is-stuck-the-column-is-inaccessible.md
+++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-alter-modify-column-is-stuck-the-column-is-inaccessible.md
@@ -6,13 +6,13 @@ description: >
---
## Problem
-You have table:
+You’ve created a table in ClickHouse with the following structure:
```sql
CREATE TABLE modify_column(column_n String) ENGINE=MergeTree() ORDER BY tuple();
```
-Populate it with data:
+You populated the table with some data:
```sql
INSERT INTO modify_column VALUES ('key_a');
@@ -20,13 +20,13 @@ INSERT INTO modify_column VALUES ('key_b');
INSERT INTO modify_column VALUES ('key_c');
```
-Tried to apply alter table query with changing column type:
+Next, you attempted to change the column type using this query:
```sql
ALTER TABLE modify_column MODIFY COLUMN column_n Enum8('key_a'=1, 'key_b'=2);
```
-But it didn’t succeed and you see an error in system.mutations table:
+However, the operation failed, and you encountered an error when inspecting the system.mutations table:
```sql
SELECT *
@@ -51,7 +51,12 @@ latest_fail_time: 2021-03-03 18:38:59
latest_fail_reason: Code: 36, e.displayText() = DB::Exception: Unknown element 'key_c' for type Enum8('key_a' = 1, 'key_b' = 2): while executing 'FUNCTION CAST(column_n :: 0, 'Enum8(\'key_a\' = 1, \'key_b\' = 2)' :: 1) -> cast(column_n, 'Enum8(\'key_a\' = 1, \'key_b\' = 2)') Enum8('key_a' = 1, 'key_b' = 2) : 2': (while reading from part /var/lib/clickhouse/data/default/modify_column/all_3_3_0/): While executing MergeTree (version 21.3.1.6041)
```
-And you can’t query that column anymore:
+The mutation result showed an error indicating that the value 'key_c' was not recognized in the Enum8 definition:
+```sql
+Unknown element 'key_c' for type Enum8('key_a' = 1, 'key_b' = 2)
+```
+
+Now, when trying to query the column, ClickHouse returns an exception and the column becomes inaccessible:
```sql
SELECT column_n
@@ -70,36 +75,54 @@ Received exception from server (version 21.3.1):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Unknown element 'key_c' for type Enum8('key_a' = 1, 'key_b' = 2): while executing 'FUNCTION CAST(column_n :: 0, 'Enum8(\'key_a\' = 1, \'key_b\' = 2)' :: 1) -> cast(column_n, 'Enum8(\'key_a\' = 1, \'key_b\' = 2)') Enum8('key_a' = 1, 'key_b' = 2) : 2': (while reading from part /var/lib/clickhouse/data/default/modify_column/all_3_3_0/): While executing MergeTreeThread.
```
-### Solution
+This query results in:
+```sql
+Code: 36. DB::Exception: Unknown element 'key_c' for type Enum8('key_a' = 1, 'key_b' = 2)
+```
-You should do the following:
+### Root Cause
+The failure occurred because the Enum8 type only allows for predefined values. Since 'key_c' wasn't included in the definition, the mutation failed and left the table in an inconsistent state.
+
+### Solution
-Check which mutation is stuck and kill it:
+1. Identify and Terminate the Stuck Mutation
+First, you need to locate the mutation that’s stuck in an incomplete state.
```sql
SELECT * FROM system.mutations WHERE table = 'modify_column' AND is_done=0 FORMAT Vertical;
+```
+
+Once you’ve identified the mutation, terminate it using:
+```sql
KILL MUTATION WHERE table = 'modify_column' AND mutation_id = 'id_of_stuck_mutation';
```
+This will stop the operation and allow you to revert the changes.
-Apply reverting modify column query to convert table to previous column type:
+2. Revert the Column Type
+Next, revert the column back to its original type, which was String, to restore the table’s accessibility:
```sql
ALTER TABLE modify_column MODIFY COLUMN column_n String;
```
-Check if column is accessible now:
+3. Verify the Column is Accessible Again
+To ensure the column is functioning normally, run a simple query to verify its data:
```sql
SELECT column_n, count() FROM modify_column GROUP BY column_n;
```
-Run fixed ALTER MODIFY COLUMN query.
+4. Apply the Correct Column Modification
+Now that the column is accessible, you can safely reapply the ALTER query, but this time include all the required enum values:
```sql
ALTER TABLE modify_column MODIFY COLUMN column_n Enum8('key_a'=1, 'key_b'=2, 'key_c'=3);
```
-You can monitor progress of column type change with system.mutations or system.parts_columns tables:
+5. Monitor Progress
+You can monitor the progress of the column modification using the system.mutations or system.parts_columns tables to ensure everything proceeds as expected:
+
+To track mutation progress:
```sql
SELECT
@@ -107,8 +130,12 @@ SELECT
parts_to_do,
is_done
FROM system.mutations
-WHERE table = 'modify_column'
+WHERE table = 'modify_column';
+```
+
+To review the column's active parts:
+```sql
SELECT
column,
type,
@@ -119,5 +146,5 @@ FROM system.parts_columns
WHERE (table = 'modify_column') AND (column = 'column_n') AND active
GROUP BY
column,
- type
+ type;
```
diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-final-clause-speed.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-final-clause-speed.md
index ade3331949..64d833fa81 100644
--- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-final-clause-speed.md
+++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-final-clause-speed.md
@@ -6,18 +6,32 @@ description: >
---
`SELECT * FROM table FINAL`
-* Before 20.5 - always executed in a single thread and slow.
+### History
+
+* Before ClickHouse® 20.5 - always executed in a single thread and slow.
* Since 20.5 - final can be parallel, see [https://github.com/ClickHouse/ClickHouse/pull/10463](https://github.com/ClickHouse/ClickHouse/pull/10463)
-* Since 20.10 - you can use `do_not_merge_across_partitions_select_final` setting.
-* Sinse 22.6 - final even more parallel, see [https://github.com/ClickHouse/ClickHouse/pull/36396](https://github.com/ClickHouse/ClickHouse/pull/36396)
+* Since 20.10 - you can use `do_not_merge_across_partitions_select_final` setting. See [https://github.com/ClickHouse/ClickHouse/pull/15938](https://github.com/ClickHouse/ClickHouse/pull/15938) and [https://github.com/ClickHouse/ClickHouse/issues/11722](https://github.com/ClickHouse/ClickHouse/issues/11722)
+* Since 22.6 - final even more parallel, see [https://github.com/ClickHouse/ClickHouse/pull/36396](https://github.com/ClickHouse/ClickHouse/pull/36396)
+* Since 22.8 - final doesn't read excessive data, see [https://github.com/ClickHouse/ClickHouse/pull/47801](https://github.com/ClickHouse/ClickHouse/pull/47801)
+* Since 23.5 - final use less memory, see [https://github.com/ClickHouse/ClickHouse/pull/50429](https://github.com/ClickHouse/ClickHouse/pull/50429)
+* Since 23.9 - final doesn't read PK columns if unneeded ie only one part in partition, see [https://github.com/ClickHouse/ClickHouse/pull/53919](https://github.com/ClickHouse/ClickHouse/pull/53919)
+* Since 23.12 - final applied only for intersecting ranges of parts, see [https://github.com/ClickHouse/ClickHouse/pull/58120](https://github.com/ClickHouse/ClickHouse/pull/58120)
+* Since 24.1 - final doesn't compare rows from the same part with level > 0, see [https://github.com/ClickHouse/ClickHouse/pull/58142](https://github.com/ClickHouse/ClickHouse/pull/58142)
+* Since 24.1 - final use vertical algorithm (more cache friendly), see [https://github.com/ClickHouse/ClickHouse/pull/54366](https://github.com/ClickHouse/ClickHouse/pull/54366)
+* Since 25.6 - final supports Additional Skip Indexes, see [https://github.com/ClickHouse/ClickHouse/pull/78350](https://github.com/ClickHouse/ClickHouse/pull/78350)
+
+
+### Partitioning
-See [https://github.com/ClickHouse/ClickHouse/pull/15938](https://github.com/ClickHouse/ClickHouse/pull/15938) and [https://github.com/ClickHouse/ClickHouse/issues/11722](https://github.com/ClickHouse/ClickHouse/issues/11722)
+Proper partition design could speed up FINAL processing.
-So it can work in the following way:
+For example, if you have a table with Daily partitioning, you can:
+- After day end + some time interval during which you can get some updates run `OPTIMIZE TABLE xxx PARTITION 'prev_day' FINAL`
+- or add table SETTINGS min_age_to_force_merge_seconds=86400,min_age_to_force_merge_on_partition_only=1
+
+In that case, using FINAL with `do_not_merge_across_partitions_select_final` will be cheap or even zero.
-1. Daily partitioning
-2. After day end + some time interval during which you can get some updates - for example at 3am / 6am you do `OPTIMIZE TABLE xxx PARTITION 'prev_day' FINAL`
-3. In that case using that FINAL with `do_not_merge_across_partitions_select_final` will be cheap.
+Example:
```sql
DROP TABLE IF EXISTS repl_tbl;
@@ -81,3 +95,33 @@ SELECT count() FROM repl_tbl FINAL WHERE NOT ignore(*)
/* only 0.35 sec slower, and while partitions have about the same size that extra cost will be about constant */
```
+
+### Light ORDER BY
+
+All columns specified in ORDER BY will be read during FINAL processing, creating additional disk load. Use fewer columns and lighter column types to create faster queries.
+
+Example: UUID vs UInt64
+```
+CREATE TABLE uuid_table (id UUID, value UInt64) ENGINE = ReplacingMergeTree() ORDER BY id;
+CREATE TABLE uint64_table (id UInt64,value UInt64) ENGINE = ReplacingMergeTree() ORDER BY id;
+
+INSERT INTO uuid_table SELECT generateUUIDv4(), number FROM numbers(5E7);
+INSERT INTO uint64_table SELECT number, number FROM numbers(5E7);
+
+SELECT sum(value) FROM uuid_table FINAL format JSON;
+SELECT sum(value) FROM uint64_table FINAL format JSON;
+```
+[Results](https://fiddle.clickhouse.com/e2441e5d-ccb6-4f67-bee0-7cc2c4e3f43e):
+```
+ "elapsed": 0.58738197,
+ "rows_read": 50172032,
+ "bytes_read": 1204128768
+
+ "elapsed": 0.189792142,
+ "rows_read": 50057344,
+ "bytes_read": 480675040
+```
+
+
+
+
diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-kill-query.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-kill-query.md
index 8cb6d48148..255bc07de0 100644
--- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-kill-query.md
+++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-kill-query.md
@@ -7,12 +7,12 @@ description: >
Unfortunately not all queries can be killed.
`KILL QUERY` only sets a flag that must be checked by the query.
A query pipeline is checking this flag before a switching to next block. If the pipeline has stuck somewhere in the middle it cannot be killed.
-If a query does not stop, the only way to get rid of it is to restart ClickHouse.
+If a query does not stop, the only way to get rid of it is to restart ClickHouse®.
-See also
+See also:
-[https://github.com/ClickHouse/ClickHouse/issues/3964](https://github.com/ClickHouse/ClickHouse/issues/3964)
-[https://github.com/ClickHouse/ClickHouse/issues/1576](https://github.com/ClickHouse/ClickHouse/issues/1576)
+* [https://github.com/ClickHouse/ClickHouse/issues/3964](https://github.com/ClickHouse/ClickHouse/issues/3964)
+* [https://github.com/ClickHouse/ClickHouse/issues/1576](https://github.com/ClickHouse/ClickHouse/issues/1576)
## How to replace a running query
diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-optimize-vs-optimize-final.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-optimize-vs-optimize-final.md
index 8d525b7bf0..a2f0a245a8 100644
--- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-optimize-vs-optimize-final.md
+++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-optimize-vs-optimize-final.md
@@ -12,7 +12,7 @@ You have 40 parts in 3 partitions. This unscheduled merge selects some partition
`OPTIMIZE TABLE xyz FINAL` -- initiates a cycle of unscheduled merges.
-ClickHouse merges parts in this table until will remains 1 part in each partition (if a system has enough free disk space). As a result, you get 3 parts, 1 part per partition. In this case, CH rewrites parts even if they are already merged into a single part. It creates a huge CPU / Disk load if the table ( XYZ) is huge. ClickHouse reads / uncompress / merge / compress / writes all data in the table.
+ClickHouse® merges parts in this table until will remains 1 part in each partition (if a system has enough free disk space). As a result, you get 3 parts, 1 part per partition. In this case, ClickHouse rewrites parts even if they are already merged into a single part. It creates a huge CPU / Disk load if the table (XYZ) is huge. ClickHouse reads / uncompress / merge / compress / writes all data in the table.
If this table has size 1TB it could take around 3 hours to complete.
diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-parameterized-views.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-parameterized-views.md
index ab0499f8e3..27b1a53b7b 100644
--- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-parameterized-views.md
+++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-parameterized-views.md
@@ -4,6 +4,36 @@ linkTitle: "Parameterized views"
description: >
Parameterized views
---
+
+ClickHouse® versions 23.1+ (23.1.6.42, 23.2.5.46, 23.3.1.2823)
+have inbuilt support for [parametrized views](https://clickhouse.com/docs/en/sql-reference/statements/create/view#parameterized-view):
+
+```sql
+CREATE VIEW my_new_view AS
+SELECT *
+FROM deals
+WHERE category_id IN (
+ SELECT category_id
+ FROM deal_categories
+ WHERE category = {category:String}
+)
+
+SELECT * FROM my_new_view(category = 'hot deals');
+```
+### One more example
+
+```sql
+CREATE OR REPLACE VIEW v AS SELECT 1::UInt32 x WHERE x IN ({xx:Array(UInt32)});
+
+select * from v(xx=[1,2,3]);
+┌─x─┐
+│ 1 │
+└───┘
+```
+
+
+## ClickHouse versions pre 23.1
+
Custom settings allows to emulate parameterized views.
You need to enable custom settings and define any prefixes for settings.
diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-possible-deadlock-avoided.-client-should-retry.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-possible-deadlock-avoided.-client-should-retry.md
index cd8727780c..e5c1be17b0 100644
--- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-possible-deadlock-avoided.-client-should-retry.md
+++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-possible-deadlock-avoided.-client-should-retry.md
@@ -4,7 +4,7 @@ linkTitle: "Possible deadlock avoided. Client should retry"
description: >
Possible deadlock avoided. Client should retry
---
-In version 19.14 a serious issue was found: a race condition that can lead to server deadlock. The reason for that was quite fundamental, and a temporary workaround for that was added ("possible deadlock avoided").
+In ClickHouse® version 19.14 a serious issue was found: a race condition that can lead to server deadlock. The reason for that was quite fundamental, and a temporary workaround for that was added ("possible deadlock avoided").
Those locks are one of the fundamental things that the core team was actively working on in 2020.
@@ -20,4 +20,8 @@ In 20.6 all table-level locks which were possible to remove were removed, so alt
Typically issue was happening when doing some concurrent select on `system.parts` / `system.columns` / `system.table` with simultaneous table manipulations (doing some kind of ALTERS / TRUNCATES / DROP)I
-If that exception happens often in your use-case: An update is recommended. In the meantime, check which queries are running (especially to system.tables / system.parts and other system tables) and check if killing them / avoiding them helps to solve the issue.
+If that exception happens often in your use-case:
+- use recent clickhouse versions
+- ensure you use Atomic engine for the database (not Ordinary) (can be checked in system.databases)
+
+Sometime you can try to workaround issue by finding the queries which uses that table concurently (especially to system.tables / system.parts and other system tables) and try killing them (or avoiding them).
diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-sample-by.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-sample-by.md
index 6dfa4cecbe..7f7e010091 100644
--- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-sample-by.md
+++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-sample-by.md
@@ -8,17 +8,17 @@ The execution pipeline is embedded in the partition reading code.
So that works this way:
-1. ClickHouse does partition pruning based on `WHERE` conditions.
+1. ClickHouse® does partition pruning based on `WHERE` conditions.
2. For every partition, it picks a columns ranges (aka 'marks' / 'granulas') based on primary key conditions.
3. Here the sampling logic is applied: a) in case of `SAMPLE k` (`k` in `0..1` range) it adds conditions `WHERE sample_key < k * max_int_of_sample_key_type` b) in case of `SAMPLE k OFFSET m` it adds conditions `WHERE sample_key BETWEEN m * max_int_of_sample_key_type AND (m + k) * max_int_of_sample_key_type`c) in case of `SAMPLE N` (N>1) if first estimates how many rows are inside the range we need to read and based on that convert it to 3a case (calculate k based on number of rows in ranges and desired number of rows)
4. on the data returned by those other conditions are applied (so here the number of rows can be decreased here)
-[Source Code](https://github.com/ClickHouse/ClickHouse/blob/92c937db8b50844c7216d93c5c398d376e82f6c3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L355)
+* [Source Code](https://github.com/ClickHouse/ClickHouse/blob/92c937db8b50844c7216d93c5c398d376e82f6c3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L355)
## SAMPLE by
-[Docs](https://clickhouse.yandex/docs/en/query_language/select/#select-sample-clause)
-[Source Code](https://github.com/ClickHouse/ClickHouse/blob/92c937db8b50844c7216d93c5c398d376e82f6c3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L355)
+* [Docs](https://clickhouse.yandex/docs/en/query_language/select/#select-sample-clause)
+* [Source Code](https://github.com/ClickHouse/ClickHouse/blob/92c937db8b50844c7216d93c5c398d376e82f6c3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L355)
SAMPLE key
Must be:
@@ -56,4 +56,4 @@ SELECT count() FROM table WHERE ... AND cityHash64(some_high_card_key) % 10 = 0;
SELECT count() FROM table WHERE ... AND rand() % 10 = 0; -- Non-deterministic
```
-ClickHouse will read more data from disk compared to an example with a good SAMPLE key, but it's more universal and can be used if you can't change table ORDER BY key.
\ No newline at end of file
+ClickHouse will read more data from disk compared to an example with a good SAMPLE key, but it's more universal and can be used if you can't change table ORDER BY key. (To learn more about ClickHouse internals, [Administrator Training for ClickHouse](https://altinity.com/clickhouse-training/) is available.)
\ No newline at end of file
diff --git a/content/en/altinity-kb-queries-and-syntax/ansi-sql-mode.md b/content/en/altinity-kb-queries-and-syntax/ansi-sql-mode.md
index 61d3973b5d..ab4306ecda 100644
--- a/content/en/altinity-kb-queries-and-syntax/ansi-sql-mode.md
+++ b/content/en/altinity-kb-queries-and-syntax/ansi-sql-mode.md
@@ -4,13 +4,49 @@ linkTitle: "ANSI SQL mode"
description: >
ANSI SQL mode
---
-It's possible to tune some settings which would make ClickHouse more ANSI SQL compatible(and slower):
+To make ClickHouse® more compatible with ANSI SQL standards (at the expense of some performance), you can adjust several settings. These configurations will bring ClickHouse closer to ANSI SQL behavior but may introduce a slowdown in query performance:
```sql
-SET join_use_nulls=1; -- introduced long ago
-SET cast_keep_nullable=1; -- introduced in 20.5
-SET union_default_mode='DISTINCT'; -- introduced in 21.1
-SET allow_experimental_window_functions=1; -- introduced in 21.3
-SET prefer_column_name_to_alias=1; -- introduced in 21.4;
-SET group_by_use_nulls=1; -- introduced in 22.7;
+join_use_nulls=1
```
+Introduced in: early versions
+Ensures that JOIN operations return NULL for non-matching rows, aligning with standard SQL behavior.
+
+
+```sql
+cast_keep_nullable=1
+```
+Introduced in: v20.5
+Preserves the NULL flag when casting between data types, which is typical in ANSI SQL.
+
+
+```sql
+union_default_mode='DISTINCT'
+```
+Introduced in: v21.1
+Makes the UNION operation default to UNION DISTINCT, which removes duplicate rows, following ANSI SQL behavior.
+
+
+```sql
+allow_experimental_window_functions=1
+```
+Introduced in: v21.3
+Enables support for window functions, which are a standard feature in ANSI SQL.
+
+
+```sql
+prefer_column_name_to_alias=1
+```
+Introduced in: v21.4
+This setting resolves ambiguities by preferring column names over aliases, following ANSI SQL conventions.
+
+
+```sql
+group_by_use_nulls=1
+```
+Introduced in: v22.7
+Allows NULL values to appear in the GROUP BY clause, consistent with ANSI SQL behavior.
+
+By enabling these settings, ClickHouse becomes more ANSI SQL-compliant, although this may come with a trade-off in terms of performance. Each of these options can be enabled as needed, based on the specific SQL compatibility requirements of your application.
+
+
diff --git a/content/en/altinity-kb-queries-and-syntax/array-functions-as-window.md b/content/en/altinity-kb-queries-and-syntax/array-functions-as-window.md
index 8a66dde750..73a08bdca8 100644
--- a/content/en/altinity-kb-queries-and-syntax/array-functions-as-window.md
+++ b/content/en/altinity-kb-queries-and-syntax/array-functions-as-window.md
@@ -2,15 +2,14 @@
title: "Using array functions to mimic window-functions alike behavior"
linkTitle: "Using array functions to mimic window-functions alike behavior"
weight: 100
-description: >-
- Using array functions to mimic window-functions alike behavior.
---
-# Using array functions to mimic window functions alike behavior
+There are cases where you may need to mimic window functions using arrays in ClickHouse. This could be for optimization purposes, to better manage memory, or to enable on-disk spilling, especially if you’re working with an older version of ClickHouse that doesn't natively support window functions.
-There are some usecases when you may want to mimic window functions using Arrays - as an optimization step, or to contol the memory better / use on-disk spiling, or just if you have old ClickHouse version.
+Here’s an example demonstrating how to mimic a window function like runningDifference() using arrays:
-## Running difference sample
+#### Step 1: Create Sample Data
+We’ll start by creating a test table with some sample data:
```sql
DROP TABLE IS EXISTS test_running_difference
@@ -24,10 +23,8 @@ SELECT
FROM numbers(100)
-SELECT * FROM test_running_difference
-```
+SELECT * FROM test_running_difference;
-```text
┌─id─┬──────────────────ts─┬────val─┐
│ 0 │ 2010-01-01 00:00:00 │ -1209 │
│ 1 │ 2010-01-01 00:00:00 │ 43 │
@@ -134,13 +131,15 @@ SELECT * FROM test_running_difference
100 rows in set. Elapsed: 0.003 sec.
```
+This table contains IDs, timestamps (ts), and values (val), where each id appears multiple times with different timestamps.
+
+#### Step 2: Running Difference Example
+If you try using runningDifference directly, it works block by block, which can be problematic when the data needs to be ordered or when group changes occur.
+
-runningDifference works only in blocks & require ordered data & problematic when group changes
```sql
select id, val, runningDifference(val) from (select * from test_running_difference order by id, ts);
-```
-```
┌─id─┬────val─┬─runningDifference(val)─┐
│ 0 │ -1209 │ 0 │
│ 0 │ 66839 │ 68048 │
@@ -248,13 +247,15 @@ select id, val, runningDifference(val) from (select * from test_running_differen
```
-## Arrays !
+The output may look inconsistent because runningDifference requires ordered data within blocks.
-### 1. Group & Collect the data into array
+#### Step 3: Using Arrays for Grouping and Calculation
+Instead of using runningDifference, we can utilize arrays to group data, sort it, and apply similar logic more efficiently.
+**Grouping Data into Arrays** -
+You can group multiple columns into arrays by using the groupArray function. For example, to collect several columns as arrays of tuples, you can use the following query:
-you can collect several column by builing array of tuples:
-```
+```sql
SELECT
id,
groupArray(tuple(ts, val))
@@ -285,10 +286,9 @@ GROUP BY id
└────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
-### Do needed ordering in each array
-
-For example - by second element of tuple:
-```
+**Sorting Arrays** -
+To sort the arrays by a specific element, for example, by the second element of the tuple, you can use the arraySort function:
+```sql
SELECT
id,
arraySort(x -> (x.2), groupArray((ts, val)))
@@ -321,9 +321,11 @@ GROUP BY id
20 rows in set. Elapsed: 0.004 sec.
```
-That can be rewritten like this:
+This sorts each array by the val (second element of the tuple) for each id.
-```
+Simplified Sorting Example - We can rewrite the query in a more concise way using WITH clauses for better readability:
+
+```sql
WITH
groupArray(tuple(ts, val)) as window_rows,
arraySort(x -> x.1, window_rows) as sorted_window_rows
@@ -334,9 +336,10 @@ FROM test_running_difference
GROUP BY id
```
-### Apply needed logic arrayMap / arrayDifference etc
+**Applying Calculations with Arrays** -
+Once the data is sorted, you can apply array functions like arrayMap and arrayDifference to calculate differences between values in the arrays:
-```
+```sql
WITH
groupArray(tuple(ts, val)) as window_rows,
arraySort(x -> x.1, window_rows) as sorted_window_rows,
@@ -347,10 +350,7 @@ SELECT
sorted_window_rows_val_column_diff
FROM test_running_difference
GROUP BY id
-```
-
-```
┌─id─┬─sorted_window_rows_val_column_diff─┐
│ 0 │ [0,68048,68243,72389,67860] │
│ 1 │ [0,19397,17905,16978,18345] │
@@ -380,10 +380,8 @@ GROUP BY id
You can do also a lot of magic with arrayEnumerate and accessing different values by their ids.
-### Now you can return you arrays back to rows
-
-
-use arrayJoin
+**Reverting Arrays Back to Rows** -
+You can convert the arrays back into rows using arrayJoin:
```sql
WITH
@@ -398,9 +396,7 @@ SELECT
FROM test_running_difference
GROUP BY id
```
-
-
- or ARRAY JOIN
+Or use ARRAY JOIN to join the arrays back to the original structure:
```sql
SELECT
@@ -421,8 +417,6 @@ FROM test_running_difference
GROUP BY id
) as t1
ARRAY JOIN sorted_window_rows_val_column_diff as diff, sorted_window_rows_ts_column as ts
-
```
-
-etc.
+This allows you to manipulate and analyze data within arrays effectively, using powerful functions such as arrayMap, arrayDifference, and arrayEnumerate.
diff --git a/content/en/altinity-kb-queries-and-syntax/async-inserts.md b/content/en/altinity-kb-queries-and-syntax/async-inserts.md
new file mode 100644
index 0000000000..2bb0486557
--- /dev/null
+++ b/content/en/altinity-kb-queries-and-syntax/async-inserts.md
@@ -0,0 +1,157 @@
+---
+title: "Async INSERTs"
+linkTitle: "Async INSERTs"
+description: >
+ Comprehensive guide to ClickHouse Async INSERTs - configuration, best practices, and monitoring
+---
+
+## Overview
+
+Async INSERTs is a ClickHouse® feature that enables automatic server-side batching of data. While we generally recommend batching at the application/ingestor level for better control and decoupling, async inserts are valuable when you have hundreds or thousands of clients performing small inserts and client-side batching is not feasible.
+
+**Key Documentation:** [Official Async Inserts Documentation](https://clickhouse.com/docs/en/optimize/asynchronous-inserts)
+
+## How Async Inserts Work
+
+When `async_insert=1` is enabled, ClickHouse buffers incoming inserts and flushes them to disk when one of these conditions is met:
+1. Buffer reaches specified size (`async_insert_max_data_size`)
+2. Time threshold elapses (`async_insert_busy_timeout_ms`)
+3. Maximum number of queries accumulate (`async_insert_max_query_number`)
+
+## Critical Configuration Settings
+
+### Core Settings
+
+```sql
+-- Enable async inserts (0=disabled, 1=enabled)
+SET async_insert = 1;
+
+-- Wait behavior (STRONGLY RECOMMENDED: use 1)
+-- 0 = fire-and-forget mode (risky - no error feedback)
+-- 1 = wait for data to be written to storage
+SET wait_for_async_insert = 1;
+
+-- Buffer flush conditions
+SET async_insert_max_data_size = 1000000; -- 1MB default
+SET async_insert_busy_timeout_ms = 1000; -- 1 second
+SET async_insert_max_query_number = 100; -- max queries before flush
+```
+
+### Adaptive Timeout (Since 24.3)
+
+```sql
+-- Adaptive timeout automatically adjusts flush timing based on server load
+-- Default: 1 (enabled) - OVERRIDES manual timeout settings
+-- Set to 0 for deterministic behavior with manual settings
+SET async_insert_use_adaptive_busy_timeout = 0;
+```
+
+## Important Behavioral Notes
+
+### What Works and What Doesn't
+
+✅ **Works with Async Inserts:**
+- Direct INSERT with VALUES
+- INSERT with FORMAT (JSONEachRow, CSV, etc.)
+- Native protocol inserts (since 22.x)
+
+❌ **Does NOT Work:**
+- `INSERT .. SELECT` statements - Other strategies are needed for managing performance and load. Do not use `async_insert`.
+
+### Data Safety Considerations
+
+**ALWAYS use `wait_for_async_insert = 1` in production!**
+
+Risks with `wait_for_async_insert = 0`:
+- **Silent data loss** on errors (read-only table, disk full, too many parts)
+- Data loss on sudden restart (no fsync by default)
+- Data not immediately queryable after acknowledgment
+- No error feedback to client
+
+### Deduplication Behavior
+
+- **Sync inserts:** Automatic deduplication enabled by default
+- **Async inserts:** Deduplication disabled by default
+- Enable with `async_insert_deduplicate = 1` (since 22.x)
+- **Warning:** Don't use with `deduplicate_blocks_in_dependent_materialized_views = 1`
+
+# features / improvements
+
+* Async insert dedup: Support block deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch:
+ - [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075)
+ - [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304)
+* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode. (with wait_for_async_insert=0)) for better introspection [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040)
+* Support async inserts in **clickhouse-client** for queries with inlined data **(Native protocol)**:
+ - [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267)
+ - [#54098](https://github.com/ClickHouse/ClickHouse/issues/54098)
+ - [#54381](https://github.com/ClickHouse/ClickHouse/issues/54381)
+* Async insert backpressure [#4762](https://github.com/ClickHouse/ClickHouse/issues/47623)
+* Limit the deduplication overhead when using `async_insert_deduplicate` [#46549](https://github.com/ClickHouse/ClickHouse/pull/46549)
+* `SYSTEM FLUSH ASYNC INSERTS` [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160)
+* Adjustable asynchronous insert timeouts [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486)
+
+
+## bugfixes
+
+- Fixed bug which could lead to deadlock while using asynchronous inserts [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233).
+- Fix crash when async inserts with deduplication are used for ReplicatedMergeTree tables using a nondefault merging algorithm [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676)
+- Async inserts not working with log_comment setting [48430](https://github.com/ClickHouse/ClickHouse/issues/48430)
+- Fix misbehaviour with async inserts with deduplication [#50663](https://github.com/ClickHouse/ClickHouse/pull/50663)
+- Reject Insert if `async_insert=1` and `deduplicate_blocks_in_dependent_materialized_views=1`[#60888](https://github.com/ClickHouse/ClickHouse/pull/60888)
+- Disable `async_insert_use_adaptive_busy_timeout` correctly with compatibility settings [#61486](https://github.com/ClickHouse/ClickHouse/pull/61468)
+
+
+## observability / introspection
+
+In 22.x versions, it is not possible to relate `part_log/query_id` column with `asynchronous_insert_log/query_id` column. We need to use `query_log/query_id`:
+
+`asynchronous_insert_log` shows up the `query_id` and `flush_query_id` of each async insert. The `query_id` from `asynchronous_insert_log` shows up in the `system.query_log` as `type = 'QueryStart'` but the same `query_id` does not show up in the `query_id` column of the `system.part_log`. Because the `query_id` column in the `part_log` is the identifier of the INSERT query that created a data part, and it seems it is for sync INSERTS but not for async inserts.
+
+So in `asynchronous_inserts` table you can check the current batch that still has not been flushed. In the `asynchronous_insert_log` you can find a log of all the flushed async inserts.
+
+This has been improved in **ClickHouse 23.7** Flush queries for async inserts (the queries that do the final push of data) are now logged in the `system.query_log` where they appear as `query_kind = 'AsyncInsertFlush'` [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160)
+
+
+## Versions
+
+- **23.8** is a good version to start using async inserts because of the improvements and bugfixes.
+- **24.3** the new adaptive timeout mechanism has been added so ClickHouse will throttle the inserts based on the server load.[#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) This new feature is enabled by default and will OVERRRIDE current async insert settings, so better to disable it if your async insert settings are working. Here's how to do it in a clickhouse-client session: `SET async_insert_use_adaptive_busy_timeout = 0;` You can also add it as a setting on the INSERT or as a profile setting.
+
+
+## Metrics
+
+```sql
+SELECT name
+FROM system.columns
+WHERE (`table` = 'metric_log') AND ((name ILIKE '%asyncinsert%') OR (name ILIKE '%asynchronousinsert%'))
+
+┌─name─────────────────────────────────────────────┐
+│ ProfileEvent_AsyncInsertQuery │
+│ ProfileEvent_AsyncInsertBytes │
+│ ProfileEvent_AsyncInsertRows │
+│ ProfileEvent_AsyncInsertCacheHits │
+│ ProfileEvent_FailedAsyncInsertQuery │
+│ ProfileEvent_DistributedAsyncInsertionFailures │
+│ CurrentMetric_AsynchronousInsertThreads │
+│ CurrentMetric_AsynchronousInsertThreadsActive │
+│ CurrentMetric_AsynchronousInsertThreadsScheduled │
+│ CurrentMetric_AsynchronousInsertQueueSize │
+│ CurrentMetric_AsynchronousInsertQueueBytes │
+│ CurrentMetric_PendingAsyncInsert │
+│ CurrentMetric_AsyncInsertCacheSize │
+└──────────────────────────────────────────────────┘
+
+SELECT *
+FROM system.metrics
+WHERE (metric ILIKE '%asyncinsert%') OR (metric ILIKE '%asynchronousinsert%')
+
+┌─metric─────────────────────────────┬─value─┬─description─────────────────────────────────────────────────────────────┐
+│ AsynchronousInsertThreads │ 1 │ Number of threads in the AsynchronousInsert thread pool. │
+│ AsynchronousInsertThreadsActive │ 0 │ Number of threads in the AsynchronousInsert thread pool running a task. │
+│ AsynchronousInsertThreadsScheduled │ 0 │ Number of queued or active jobs in the AsynchronousInsert thread pool. │
+│ AsynchronousInsertQueueSize │ 1 │ Number of pending tasks in the AsynchronousInsert queue. │
+│ AsynchronousInsertQueueBytes │ 680 │ Number of pending bytes in the AsynchronousInsert queue. │
+│ PendingAsyncInsert │ 7 │ Number of asynchronous inserts that are waiting for flush. │
+│ AsyncInsertCacheSize │ 0 │ Number of async insert hash id in cache │
+└────────────────────────────────────┴───────┴─────────────────────────────────────────────────────────────────────────┘
+```
diff --git a/content/en/altinity-kb-queries-and-syntax/atomic-insert.md b/content/en/altinity-kb-queries-and-syntax/atomic-insert.md
index 91351e2dfa..61a96e7d42 100644
--- a/content/en/altinity-kb-queries-and-syntax/atomic-insert.md
+++ b/content/en/altinity-kb-queries-and-syntax/atomic-insert.md
@@ -10,12 +10,18 @@ An insert will create one part if:
* Data is inserted directly into a MergeTree table
* Data is inserted into a single partition.
+* Smaller blocks are properly squashed up to the configured block size (`min_insert_block_size_rows` and `min_insert_block_size_bytes`)
* For INSERT FORMAT:
* Number of rows is less than `max_insert_block_size` (default is `1048545`)
- * Parallel formatting is disabled (For TSV, TKSV, CSV, and JSONEachRow formats setting `input_format_parallel_parsing=0` is set).
-* For INSERT SELECT:
- * Number of rows is less than `max_block_size`
-* Smaller blocks are properly squashed up to the configured block size (`min_insert_block_size_rows` and `min_insert_block_size_bytes`)
+ * Parallel formatting is disabled (For TSV, TSKV, CSV, and JSONEachRow formats setting `input_format_parallel_parsing=0` is set).
+* For INSERT SELECT (including all variants with table functions), data for insert should be created fully deterministically.
+ * non-deterministic functions there like rand() not used in SELECT
+ * Number of rows/bytes is less than `min_insert_block_size_rows` and `min_insert_block_size_bytes`
+ * And one of:
+ * setting max_threads to 1
+ * adding ORDER BY to the table's DDL (not ordering by tuple)
+ * There is some ORDER BY inside SELECT
+ * See [example](https://fiddle.clickhouse.com/48d38d3d-668d-4513-ba21-e595276b3136)
* The MergeTree table doesn't have Materialized Views (there is no atomicity Table <> MV)
https://github.com/ClickHouse/ClickHouse/issues/9195#issuecomment-587500824
@@ -25,23 +31,23 @@ https://github.com/ClickHouse/ClickHouse/issues/5148#issuecomment-487757235
### Generate test data in Native and TSV format ( 100 millions rows )
-Text formats and Native format require different set of settings, here I want to find / demonstrate mandatory minumum of settings for any case.
+Text formats and Native format require different set of settings, here I want to find / demonstrate mandatory minimum of settings for any case.
```bash
clickhouse-client -q \
- 'select toInt64(number) A, toString(number) S from numbers(100000000) format Native' > t.native
+ 'SELECT toInt64(number) A, toString(number) S FROM numbers(100000000) FORMAT Native' > t.native
clickhouse-client -q \
- 'select toInt64(number) A, toString(number) S from numbers(100000000) format TSV' > t.tsv
+ 'SELECT toInt64(number) A, toString(number) S FROM numbers(100000000) FORMAT TSV' > t.tsv
```
### Insert with default settings (not atomic)
```bash
-drop table if exists trg;
-create table trg(A Int64, S String) Engine=MergeTree order by A;
+DROP TABLE IF EXISTS trg;
+CREATE TABLE trg(A Int64, S String) Engine=MergeTree ORDER BY A;
-- Load data in Native format
-clickhouse-client -q 'insert into trg format Native' (y <= ts), state_arr, ts_arr)) AS uniq
+ toStartOfDay(ts) AS ts,
+ uniqExactMerge(uniqExactState(user_id)) OVER (ORDER BY ts ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS uniq
+FROM events
+GROUP BY ts
+ORDER BY ts ASC
+
+┌──────────────────ts─┬─uniq─┐
+│ 2021-04-29 00:00:00 │ 2 │
+│ 2021-04-30 00:00:00 │ 3 │
+│ 2021-05-01 00:00:00 │ 4 │
+│ 2021-05-02 00:00:00 │ 5 │
+│ 2021-05-03 00:00:00 │ 7 │
+└─────────────────────┴──────┘
+
+SELECT
+ ts,
+ uniqExactMerge(state) OVER (ORDER BY ts ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS uniq
FROM
(
SELECT
- toStartOfDay(ts) AS _ts,
+ toStartOfDay(ts) AS ts,
uniqExactState(user_id) AS state
FROM events
- GROUP BY _ts
+ GROUP BY ts
)
ORDER BY ts ASC
@@ -46,11 +58,17 @@ ORDER BY ts ASC
│ 2021-05-02 00:00:00 │ 5 │
│ 2021-05-03 00:00:00 │ 7 │
└─────────────────────┴──────┘
+```
-WITH arrayJoin(range(toUInt32(_ts) AS int, least(int + toUInt32((3600 * 24) * 5), toUInt32(toDateTime('2021-05-04 00:00:00'))), 3600 * 24)) AS ts_expanded
+## Using arrays
+
+```sql
+WITH
+ groupArray(_ts) AS ts_arr,
+ groupArray(state) AS state_arr
SELECT
- toDateTime(ts_expanded) AS ts,
- uniqExactMerge(state) AS uniq
+ arrayJoin(ts_arr) AS ts,
+ arrayReduce('uniqExactMerge', arrayFilter((x, y) -> (y <= ts), state_arr, ts_arr)) AS uniq
FROM
(
SELECT
@@ -59,7 +77,6 @@ FROM
FROM events
GROUP BY _ts
)
-GROUP BY ts
ORDER BY ts ASC
┌──────────────────ts─┬─uniq─┐
@@ -69,22 +86,20 @@ ORDER BY ts ASC
│ 2021-05-02 00:00:00 │ 5 │
│ 2021-05-03 00:00:00 │ 7 │
└─────────────────────┴──────┘
-```
-## Using window functions (starting from Clickhouse 21.3)
-
-```sql
+WITH arrayJoin(range(toUInt32(_ts) AS int, least(int + toUInt32((3600 * 24) * 5), toUInt32(toDateTime('2021-05-04 00:00:00'))), 3600 * 24)) AS ts_expanded
SELECT
- ts,
- uniqExactMerge(state) OVER (ORDER BY ts ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS uniq
+ toDateTime(ts_expanded) AS ts,
+ uniqExactMerge(state) AS uniq
FROM
(
SELECT
- toStartOfDay(ts) AS ts,
+ toStartOfDay(ts) AS _ts,
uniqExactState(user_id) AS state
FROM events
- GROUP BY ts
+ GROUP BY _ts
)
+GROUP BY ts
ORDER BY ts ASC
┌──────────────────ts─┬─uniq─┐
diff --git a/content/en/altinity-kb-queries-and-syntax/data-types-on-disk-and-in-ram.md b/content/en/altinity-kb-queries-and-syntax/data-types-on-disk-and-in-ram.md
index 2150d339f9..daa99a3301 100644
--- a/content/en/altinity-kb-queries-and-syntax/data-types-on-disk-and-in-ram.md
+++ b/content/en/altinity-kb-queries-and-syntax/data-types-on-disk-and-in-ram.md
@@ -39,4 +39,4 @@ description: >
-See also [https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup41/data_processing.pdf](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup41/data_processing.pdf) (slide 17-22)
+See also the presentation [Data processing into ClickHouse®](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup41/data_processing.pdf), especially slides 17-22.
diff --git a/content/en/altinity-kb-queries-and-syntax/datetime64.md b/content/en/altinity-kb-queries-and-syntax/datetime64.md
index fae72c9f16..601cf478d6 100644
--- a/content/en/altinity-kb-queries-and-syntax/datetime64.md
+++ b/content/en/altinity-kb-queries-and-syntax/datetime64.md
@@ -2,11 +2,9 @@
title: "DateTime64"
linkTitle: "DateTime64"
weight: 100
-description: >-
- DateTime64 data type
---
-## Substract fractional seconds
+## Subtract fractional seconds
```sql
WITH toDateTime64('2021-09-07 13:41:50.926', 3) AS time
diff --git a/content/en/altinity-kb-queries-and-syntax/delete-via-tombstone-column.md b/content/en/altinity-kb-queries-and-syntax/delete-via-tombstone-column.md
index 32b7292d9f..61ccb70496 100644
--- a/content/en/altinity-kb-queries-and-syntax/delete-via-tombstone-column.md
+++ b/content/en/altinity-kb-queries-and-syntax/delete-via-tombstone-column.md
@@ -4,6 +4,12 @@ linkTitle: "DELETE via tombstone column"
description: >
DELETE via tombstone column
---
+
+This article provides an overview of the different methods to handle row deletion in ClickHouse, using tombstone columns and ALTER UPDATE or DELETE. The goal is to highlight the performance impacts of different techniques and storage settings, including a scenario using S3 for remote storage.
+
+1. Creating a Test Table
+We will start by creating a simple MergeTree table with a tombstone column (is_active) to track active rows:
+
```sql
CREATE TABLE test_delete
(
@@ -16,7 +22,10 @@ CREATE TABLE test_delete
)
ENGINE = MergeTree
ORDER BY key;
-
+```
+2. Inserting Data
+Insert sample data into the table:
+```sql
INSERT INTO test_delete (key, ts, value_a, value_b, value_c) SELECT
number,
1,
@@ -25,8 +34,12 @@ INSERT INTO test_delete (key, ts, value_a, value_b, value_c) SELECT
concat('string', toString(number))
FROM numbers(10000000);
-INSERT INTO test_delete (key, ts, value_a, value_b, value_c) VALUES (400000, 2, 'totally different string', 'another totally different string', 'last string');
+INSERT INTO test_delete (key, ts, value_a, value_b, value_c) VALUES (400000, 2, 'totally different string', 'another totally different string', 'last string');
+```
+3. Querying the Data
+To verify the inserted data:
+```sql
SELECT *
FROM test_delete
WHERE key = 400000;
@@ -37,31 +50,49 @@ WHERE key = 400000;
┌────key─┬─ts─┬─value_a──────────────────┬─value_b────────────────┬─value_c──────┬─is_active─┐
│ 400000 │ 1 │ some_looong_string400000 │ another_long_str400000 │ string400000 │ 1 │
└────────┴────┴──────────────────────────┴────────────────────────┴──────────────┴───────────┘
+```
+This should return two rows with different ts values.
+
+4. Soft Deletion Using ALTER UPDATE
+Instead of deleting a row, you can mark it as inactive by setting is_active to 0:
+```sql
SET mutations_sync = 2;
ALTER TABLE test_delete
UPDATE is_active = 0 WHERE (key = 400000) AND (ts = 1);
-
Ok.
0 rows in set. Elapsed: 0.058 sec.
-
+```
+After updating, you can filter out inactive rows:
+```sql
SELECT *
FROM test_delete
-WHERE (key = 400000) AND is_active;
-
-┌────key─┬─ts─┬─value_a──────────────────┬─value_b──────────────────────────┬─value_c─────┬─is_active─┐
-│ 400000 │ 2 │ totally different string │ another totally different string │ last string │ 1 │
-└────────┴────┴──────────────────────────┴──────────────────────────────────┴─────────────┴───────────┘
+WHERE (key = 400000) AND is_active=0;
+┌────key─┬─ts─┬─value_a──────────────────┬─value_b────────────────┬─value_c──────┬─is_active─┐
+│ 400000 │ 1 │ some_looong_string400000 │ another_long_str400000 │ string400000 │ 0 │
+└────────┴────┴──────────────────────────┴────────────────────────┴──────────────┴───────────┘
+```
+5. Hard Deletion Using ALTER DELETE
+If you need to completely remove a row from the table, you can use ALTER DELETE:
+```sql
ALTER TABLE test_delete
DELETE WHERE (key = 400000) AND (ts = 1);
Ok.
0 rows in set. Elapsed: 1.101 sec. -- 20 times slower!!!
+```
+However, this operation is significantly slower compared to the ALTER UPDATE approach. For example:
+
+ALTER DELETE: Takes around 1.1 seconds
+ALTER UPDATE: Only 0.05 seconds
+The reason for this difference is that DELETE modifies the physical data structure, while UPDATE merely changes a column value.
+
+```sql
SELECT *
FROM test_delete
WHERE key = 400000;
@@ -70,7 +101,7 @@ WHERE key = 400000;
│ 400000 │ 2 │ totally different string │ another totally different string │ last string │ 1 │
└────────┴────┴──────────────────────────┴──────────────────────────────────┴─────────────┴───────────┘
--- For ReplacingMergeTree
+-- For ReplacingMergeTree -> https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replacingmergetree
OPTIMIZE TABLE test_delete FINAL;
@@ -86,3 +117,94 @@ WHERE key = 400000
│ 400000 │ 2 │ totally different string │ another totally different string │ last string │ 1 │
└────────┴────┴──────────────────────────┴──────────────────────────────────┴─────────────┴───────────┘
```
+
+Soft Deletion (via ALTER UPDATE): A quicker approach that does not involve physical data deletion but rather updates the tombstone column.
+Hard Deletion (via ALTER DELETE): Can take significantly longer, especially with large datasets stored in remote storage like S3.
+
+6. Optimizing for Faster Deletion with S3 Storage
+If using S3 for storage, the DELETE operation becomes even slower due to the overhead of handling remote data. Here’s an example with a table using S3-backed storage:
+
+```sql
+CREATE TABLE test_delete
+(
+ `key` UInt32,
+ `value_a` String,
+ `value_b` String,
+ `value_c` String,
+ `is_deleted` UInt8 DEFAULT 0
+)
+ENGINE = MergeTree
+ORDER BY key
+SETTINGS storage_policy = 's3tiered';
+
+INSERT INTO test_delete (key, value_a, value_b, value_c) SELECT
+ number,
+ concat('some_looong_string', toString(number)),
+ concat('another_long_str', toString(number)),
+ concat('really long string', toString(arrayMap(i -> cityHash64(i*number), range(50))))
+FROM numbers(10000000);
+
+OPTIMIZE TABLE test_delete FINAL;
+
+ALTER TABLE test_delete MOVE PARTITION tuple() TO DISK 's3disk';
+
+SELECT count() FROM test_delete;
+┌──count()─┐
+│ 10000000 │
+└──────────┘
+1 row in set. Elapsed: 0.002 sec.
+```
+
+7. DELETE Using ALTER UPDATE and Row Policy
+You can also control visibility at the query level using row policies. For example, to only show rows where is_active = 1:
+
+To delete a row using ALTER UPDATE:
+
+```sql
+CREATE ROW POLICY pol1 ON test_delete USING is_active=1 TO all;
+
+SELECT count() FROM test_delete; -- select count() became much slower, it reads data now, not metadata
+┌──count()─┐
+│ 10000000 │
+└──────────┘
+1 row in set. Elapsed: 0.314 sec. Processed 10.00 million rows, 10.00 MB (31.84 million rows/s., 31.84 MB/s.)
+
+ALTER TABLE test_delete UPDATE is_active = 0 WHERE (key = 400000) settings mutations_sync = 2;
+0 rows in set. Elapsed: 1.256 sec.
+
+SELECT count() FROM test_delete;
+┌─count()─┐
+│ 9999999 │
+└─────────┘
+```
+This impacts the performance of queries like SELECT count(), as ClickHouse now needs to scan data instead of reading metadata.
+
+8. DELETE Using ALTER DELETE - https://clickhouse.com/docs/en/sql-reference/statements/alter/delete
+To delete a row using ALTER DELETE:
+
+```sql
+ALTER TABLE test_delete DELETE WHERE (key = 400001) settings mutations_sync = 2;
+0 rows in set. Elapsed: 955.672 sec.
+
+SELECT count() FROM test_delete;
+┌─count()─┐
+│ 9999998 │
+└─────────┘
+```
+This operation may take significantly longer compared to soft deletions (around 955 seconds in this example for large datasets):
+
+9. DELETE Using DELETE Statement - https://clickhouse.com/docs/en/sql-reference/statements/delete
+The DELETE statement can also be used to remove data from a table:
+
+```sql
+DELETE FROM test_delete WHERE (key = 400002);
+0 rows in set. Elapsed: 1.281 sec.
+
+SELECT count() FROM test_delete;
+┌─count()─┐
+│ 9999997 │
+└─────────┘
+```
+This operation is faster, with an elapsed time of around 1.28 seconds in this case:
+
+The choice between ALTER UPDATE and ALTER DELETE depends on your use case. For soft deletes, updating a tombstone column is significantly faster and easier to manage. However, if you need to physically remove rows, be mindful of the performance costs, especially with remote storage like S3.
diff --git a/content/en/altinity-kb-queries-and-syntax/distinct-vs-group-by-vs-limit-by.md b/content/en/altinity-kb-queries-and-syntax/distinct-vs-group-by-vs-limit-by.md
index 2ec384e2a9..20779629b2 100644
--- a/content/en/altinity-kb-queries-and-syntax/distinct-vs-group-by-vs-limit-by.md
+++ b/content/en/altinity-kb-queries-and-syntax/distinct-vs-group-by-vs-limit-by.md
@@ -2,8 +2,6 @@
title: "DISTINCT & GROUP BY & LIMIT 1 BY what the difference"
linkTitle: "DISTINCT & GROUP BY & LIMIT 1 BY what the difference"
weight: 100
-description: >-
- Page description for heading and indexes.
---
## DISTINCT
@@ -94,7 +92,7 @@ MemoryTracker: Peak memory usage (for query): 4.05 GiB.
0 rows in set. Elapsed: 4.852 sec. Processed 100.00 million rows, 800.00 MB (20.61 million rows/s., 164.88 MB/s.)
-This query faster than first, because ClickHouse doesn't need to merge states for all keys, only for first 1000 (based on LIMIT)
+This query faster than first, because ClickHouse® doesn't need to merge states for all keys, only for first 1000 (based on LIMIT)
SELECT number % 1000 AS key
@@ -119,7 +117,7 @@ MemoryTracker: Peak memory usage (for query): 3.77 MiB.
```
* Multi threaded
-* Will return result only after competion of aggregation
+* Will return result only after completion of aggregation
## LIMIT BY
diff --git a/content/en/altinity-kb-queries-and-syntax/explain-query.md b/content/en/altinity-kb-queries-and-syntax/explain-query.md
index 9517af142b..685453e570 100644
--- a/content/en/altinity-kb-queries-and-syntax/explain-query.md
+++ b/content/en/altinity-kb-queries-and-syntax/explain-query.md
@@ -10,10 +10,12 @@ description: >
```sql
EXPLAIN AST
SYNTAX
- PLAN header = 0,
+ PLAN indexes = 0,
+ header = 0,
description = 1,
actions = 0,
optimize = 1
+ json = 0
PIPELINE header = 0,
graph = 0,
compact = 1
@@ -25,7 +27,9 @@ SELECT ...
* `SYNTAX` - query text after AST-level optimizations
* `PLAN` - query execution plan
* `PIPELINE` - query execution pipeline
-* `ESTIMATE` - https://github.com/ClickHouse/ClickHouse/pull/26131 (since 21.9)
+* `ESTIMATE` - See [Estimates for select query](https://github.com/ClickHouse/ClickHouse/pull/26131), available since ClickHouse® 21.9
+* `indexes=1` supported starting from 21.6 (https://github.com/ClickHouse/ClickHouse/pull/22352 )
+* `json=1` supported starting from 21.6 (https://github.com/ClickHouse/ClickHouse/pull/23082)
References
diff --git a/content/en/altinity-kb-queries-and-syntax/group-by/_index.md b/content/en/altinity-kb-queries-and-syntax/group-by/_index.md
index e7866bc3a9..5a79605fad 100644
--- a/content/en/altinity-kb-queries-and-syntax/group-by/_index.md
+++ b/content/en/altinity-kb-queries-and-syntax/group-by/_index.md
@@ -6,7 +6,7 @@ keywords:
- clickhouse group by
- clickhouse memory
description: >
- Learn about GROUP BY clause in ClickHouse.
+ Learn about the GROUP BY clause in ClickHouse®
weight: 1
---
@@ -14,7 +14,7 @@ weight: 1
[Code](https://github.com/ClickHouse/ClickHouse/blob/8ab5270ded39c8b044f60f73c1de00c8117ab8f2/src/Interpreters/Aggregator.cpp#L382)
-ClickHouse uses non-blocking? hash tables, so each thread has at least one hash table.
+ClickHouse® uses non-blocking? hash tables, so each thread has at least one hash table.
It makes easier to not care about sync between multiple threads, but has such disadvantages as:
1. Bigger memory usage.
@@ -52,7 +52,7 @@ https://clickhouse.com/docs/en/sql-reference/statements/select/group-by/#select-
## optimize_aggregation_in_order GROUP BY
-Usually it works slower than regular GROUP BY, because ClickHouse need's to read and process data in specific ORDER, which makes it much more complicated to parallelize reading and aggregating.
+Usually it works slower than regular GROUP BY, because ClickHouse needs to read and process data in specific ORDER, which makes it much more complicated to parallelize reading and aggregating.
But it use much less memory, because ClickHouse can stream resultset and there is no need to keep it in memory.
@@ -143,7 +143,7 @@ Size of keys participated in GROUP BY
2. States of aggregation functions:
-Be careful with function, which state can use unrestricted amount of memory and grow indefenetely:
+Be careful with function, which state can use unrestricted amount of memory and grow indefinitely:
- groupArray (groupArray(1000)())
- uniqExact (uniq,uniqCombined)
diff --git a/content/en/altinity-kb-queries-and-syntax/group-by/tricks.md b/content/en/altinity-kb-queries-and-syntax/group-by/tricks.md
index 1483073b7d..4595d6a279 100644
--- a/content/en/altinity-kb-queries-and-syntax/group-by/tricks.md
+++ b/content/en/altinity-kb-queries-and-syntax/group-by/tricks.md
@@ -54,7 +54,7 @@ FROM numbers_mt(1000000000)
-All queries and datasets are unique, so in different situations different hacks could work better or worsen.
+All queries and datasets are unique, so in different situations different hacks could work better or worse.
### PreFilter values before GROUP BY
@@ -90,11 +90,11 @@ FORMAT `Null`
### Use Fixed-width data types instead of String
-EG you have 2 strings which has values in special form like this
+For example, you have 2 strings which has values in special form like this
'ABX 1412312312313'
-You can just remove 4 first characters and convert rest of them to UInt64
+You can just remove the first 4 characters and convert the rest to UInt64
toUInt64(substr('ABX 1412312312313',5))
@@ -193,7 +193,7 @@ Elapsed: 6.247 sec. Processed 1.00 billion rows, 27.00 GB (160.09 million rows/s
```
-It can be especially useful when you tries to do GROUP BY lc_column_1, lc_column_2 and ClickHouse falls back to serialized algorytm.
+It can be especially useful when you tries to do GROUP BY lc_column_1, lc_column_2 and ClickHouse® falls back to serialized algorithm.
### Two LowCardinality Columns in GROUP BY
@@ -281,9 +281,9 @@ Elapsed: 2.910 sec. Processed 1.00 billion rows, 27.00 GB (343.64 million rows/s
```
### Shard your data by one of common high cardinal GROUP BY key
-So on each shard you will have 1/N of all unique combination and this will result in smaller hash table.
+So on each shard you will have 1/N of all unique combination and this will result in smaller hash tables.
-Lets create 2 distributed tables with different distribution: rand() and by user_id
+Let's create 2 distributed tables with different distribution: rand() and by user_id
```sql
CREATE TABLE sessions_distributed AS sessions
@@ -728,7 +728,7 @@ MemoryTracker: Peak memory usage (for query): 14.55 GiB.
### Reduce number of threads
-Because each thread use independent hash table, if you lower thread amount it will reduce number of hash tables as well and lower memory usage at the cost of slower query execution.
+Because each thread uses an independent hash table, if you lower thread amount it will reduce number of hash tables as well and lower memory usage at the cost of slower query execution.
```sql
@@ -1093,7 +1093,7 @@ https://github.com/ClickHouse/ClickHouse/pull/33439
### GROUP BY in external memory
-Slow
+Slow!
### Use hash function for GROUP BY keys
diff --git a/content/en/altinity-kb-queries-and-syntax/joins/_index.md b/content/en/altinity-kb-queries-and-syntax/joins/_index.md
index 7868e67f86..6f6267594a 100644
--- a/content/en/altinity-kb-queries-and-syntax/joins/_index.md
+++ b/content/en/altinity-kb-queries-and-syntax/joins/_index.md
@@ -3,9 +3,70 @@ title: "JOINs"
linkTitle: "JOINs"
description: >
JOINs
+aliases:
+ - /altinity-kb-queries-and-syntax/joins/join-table-engine/
---
-See presentation:
+Resources:
-[https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/join.pdf](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/join.pdf)
+* [Overview of JOINs (Russian)](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/join.pdf) - Presentation from Meetup 38 in 2019
+* [Notes on JOIN options](https://excalidraw.com/#json=xX_heZcCu0whsDmC2Mdvo,ppbUVFpPz-flJu5ZDnwIPw)
-https://excalidraw.com/#json=xX_heZcCu0whsDmC2Mdvo,ppbUVFpPz-flJu5ZDnwIPw
+## Join Table Engine
+
+The main purpose of JOIN table engine is to avoid building the right table for joining on each query execution. So it's usually used when you have a high amount of fast queries which share the same right table for joining.
+
+### Updates
+
+It's possible to update rows with setting `join_any_take_last_row` enabled.
+
+```sql
+CREATE TABLE id_val_join
+(
+ `id` UInt32,
+ `val` UInt8
+)
+ENGINE = Join(ANY, LEFT, id)
+SETTINGS join_any_take_last_row = 1
+
+Ok.
+
+INSERT INTO id_val_join VALUES (1,21)(1,22)(3,23);
+
+Ok.
+
+SELECT *
+FROM
+(
+ SELECT toUInt32(number) AS id
+ FROM numbers(4)
+) AS n
+ANY LEFT JOIN id_val_join USING (id)
+
+┌─id─┬─val─┐
+│ 0 │ 0 │
+│ 1 │ 22 │
+│ 2 │ 0 │
+│ 3 │ 23 │
+└────┴─────┘
+
+INSERT INTO id_val_join VALUES (1,40)(2,24);
+
+Ok.
+
+SELECT *
+FROM
+(
+ SELECT toUInt32(number) AS id
+ FROM numbers(4)
+) AS n
+ANY LEFT JOIN id_val_join USING (id)
+
+┌─id─┬─val─┐
+│ 0 │ 0 │
+│ 1 │ 40 │
+│ 2 │ 24 │
+│ 3 │ 23 │
+└────┴─────┘
+```
+
+[Join table engine documentation](https://clickhouse.com/docs/en/engines/table-engines/special/join/)
diff --git a/content/en/altinity-kb-queries-and-syntax/joins/join-table-engine.md b/content/en/altinity-kb-queries-and-syntax/joins/join-table-engine.md
index 86a4453fad..1b0a6fb757 100644
--- a/content/en/altinity-kb-queries-and-syntax/joins/join-table-engine.md
+++ b/content/en/altinity-kb-queries-and-syntax/joins/join-table-engine.md
@@ -3,6 +3,7 @@ title: "JOIN table engine"
linkTitle: "JOIN table engine"
description: >
JOIN table engine
+draft: true
---
The main purpose of JOIN table engine is to avoid building the right table for joining on each query execution. So it's usually used when you have a high amount of fast queries which share the same right table for joining.
@@ -60,4 +61,4 @@ ANY LEFT JOIN id_val_join USING (id)
└────┴─────┘
```
-[https://clickhouse.tech/docs/en/engines/table-engines/special/join/](https://clickhouse.tech/docs/en/engines/table-engines/special/join/)
+[Join table engine documentation](https://clickhouse.com/docs/en/engines/table-engines/special/join/)
diff --git a/content/en/altinity-kb-queries-and-syntax/joins/joins-tricks.md b/content/en/altinity-kb-queries-and-syntax/joins/joins-tricks.md
new file mode 100644
index 0000000000..26468cee0b
--- /dev/null
+++ b/content/en/altinity-kb-queries-and-syntax/joins/joins-tricks.md
@@ -0,0 +1,400 @@
+---
+title: "JOIN optimization tricks"
+linkTitle: "JOIN optimization tricks"
+---
+
+All tests below were done with default `hash` join. ClickHouse joins are evolving rapidly and behavior varies with other join types.
+
+# Data
+
+For our exercise, we will use two tables from a well known TPS-DS benchmark: store_sales and customer. Table sizes are the following:
+
+store_sales = 2 billion rows
+customer = 12 millions rows
+
+So there are 200 rows in store_sales table per each customer on average. Also 90% of customers made 1-10 purchases.
+
+Schema example:
+
+```sql
+CREATE TABLE store_sales
+(
+ `ss_sold_time_sk` DateTime,
+ `ss_sold_date_sk` Date,
+ `ss_ship_date_sk` Date,
+ `ss_item_sk` UInt32,
+ `ss_customer_sk` UInt32,
+ `ss_cdemo_sk` UInt32,
+ `ss_hdemo_sk` UInt32,
+ `ss_addr_sk` UInt32,
+ `ss_store_sk` UInt32,
+ `ss_promo_sk` UInt32,
+ `ss_ticket_number` UInt32,
+ `ss_quantity` UInt32,
+ `ss_wholesale_cost` Float64,
+ `ss_list_price` Float64,
+ `ss_sales_price` Float64,
+ `ss_ext_discount_amt` Float64,
+ `ss_ext_sales_price` Float64,
+ `ss_ext_wholesale_cost` Float64,
+ `ss_ext_list_price` Float64,
+ `ss_ext_tax` Float64,
+ `ss_coupon_amt` Float64,
+ `ss_net_paid` Float64,
+ `ss_net_paid_inc_tax` Float64,
+ `ss_net_profit` Float64
+)
+ENGINE = MergeTree
+ORDER BY ss_ticket_number
+
+CREATE TABLE customer
+(
+ `c_customer_sk` UInt32,
+ `c_current_addr_sk` UInt32,
+ `c_first_shipto_date_sk` Date,
+ `c_first_sales_date_sk` Date,
+ `c_salutation` String,
+ `c_c_first_name` String,
+ `c_last_name` String,
+ `c_preferred_cust_flag` String,
+ `c_birth_date` Date,
+ `c_birth_country` String,
+ `c_login` String,
+ `c_email_address` String,
+ `c_last_review_date` Date
+)
+ENGINE = MergeTree
+ORDER BY c_customer_id
+```
+
+# Target query
+
+```sql
+SELECT
+ sumIf(ss_sales_price, customer.c_first_name = 'James') AS sum_James,
+ sumIf(ss_sales_price, customer.c_first_name = 'Lisa') AS sum_Lisa,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+INNER JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk
+```
+
+## Baseline performance
+
+```sql
+SELECT
+ sumIf(ss_sales_price, customer.c_first_name = 'James') AS sum_James,
+ sumIf(ss_sales_price, customer.c_first_name = 'Lisa') AS sum_Lisa,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+INNER JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk
+
+0 rows in set. Elapsed: 188.384 sec. Processed 2.89 billion rows, 40.60 GB (15.37 million rows/s., 216.92 MB/s.)
+```
+
+## Manual pushdown of conditions
+
+If we look at our query, we only care if sale belongs to customer named `James` or `Lisa` and dont care for rest of cases. We can use that.
+
+Usually, ClickHouse is able to pushdown conditions, but not in that case, when conditions itself part of function expression, so you can manually help in those cases.
+
+```sql
+SELECT
+ sumIf(ss_sales_price, customer.c_first_name = 'James') as sum_James,
+ sumIf(ss_sales_price, customer.c_first_name = 'Lisa') as sum_Lisa,
+ sum(ss_sales_price) as sum_total
+FROM store_sales LEFT JOIN (SELECT * FROM customer WHERE c_first_name = 'James' OR c_first_name = 'Lisa') as customer ON store_sales.ss_customer_sk = customer.c_customer_sk
+
+1 row in set. Elapsed: 35.370 sec. Processed 2.89 billion rows, 40.60 GB (81.76 million rows/s., 1.15 GB/s.)
+```
+
+
+## Reduce right table row size
+
+### Reduce attribute columns (push expression before JOIN step)
+
+Our row from the right table consists of 2 fields: customer_sk and c_first_name.
+First one is needed to JOIN by it, so it's not much we can do here, but we can transform a bit of the second column.
+
+Again, let's look in how we use this column in main query:
+
+customer.c_first_name = 'James'
+customer.c_first_name = 'Lisa'
+
+We calculate 2 simple conditions(which don't have any dependency on data from the left table) and nothing more.
+It does mean that we can move this calculation to the right table, it will make 3 improvements!
+
+1. Right table will be smaller -> smaller RAM usage -> better cache hits
+2. We will calculate our conditions over a smaller data set. In the right table we have only 10 million rows and after joining because of the left table we have 2 billion rows -> 200 times improvement!
+3. Our resulting table after JOIN will not have an expensive String column, only 1 byte UInt8 instead -> less copy of data in memory.
+
+Let's do it:
+
+There are several ways to rewrite that query, let's not bother with simple once and go straight to most optimized:
+
+Put our 2 conditions in hand-made bitmask:
+
+In order to do that we will take our conditions and multiply them by
+
+```
+(c_first_name = 'James') + (2 * (c_first_name = 'Lisa')
+
+C_first_name | (c_first_name = 'James') + (2 * (c_first_name = 'Lisa')
+ James | 00000001
+ Lisa | 00000010
+```
+
+As you can see, if you do it in that way, your conditions will not interfere with each other!
+But we need to be careful with the wideness of the resulting numeric type.
+Let's write our calculations in type notation:
+`UInt8 + UInt8*2 -> UInt8 + UInt16 -> UInt32`
+
+But we actually do not use more than first 2 bits, so we need to cast this expression back to UInt8
+
+Last thing to do is use the bitTest function in order to get the result of our condition by its position.
+
+And resulting query is:
+
+```sql
+SELECT
+ sumIf(ss_sales_price, bitTest(customer.cond, 0)) AS sum_James,
+ sumIf(ss_sales_price, bitTest(customer.cond, 1)) AS sum_Lisa,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+LEFT JOIN
+(
+ SELECT
+ c_customer_sk,
+ ((c_first_name = 'James') + (2 * (c_first_name = 'Lisa')))::UInt8 AS cond FROM customer
+ WHERE (c_first_name = 'James') OR (c_first_name = 'Lisa')
+) AS customer ON store_sales.ss_customer_sk = customer.c_customer_sk
+
+1 row in set. Elapsed: 31.699 sec. Processed 2.89 billion rows, 40.60 GB (91.23 million rows/s., 1.28 GB/s.)
+```
+
+### Reduce key column size
+
+But can we make something with our JOIN key column?
+
+It's type is Nullable(UInt64)
+
+Let's check if we really need to have a 0…18446744073709551615 range for our customer id, it sure looks like that we have much less people on earth than this number. The same about Nullable trait, we don’t care about Nulls in customer_id
+
+SELECT max(c_customer_sk) FROM customer
+
+For sure, we don't need that wide type.
+Lets remove Nullable trait and cast column to UInt32, twice smaller in byte size compared to UInt64.
+
+```sql
+SELECT
+ sumIf(ss_sales_price, bitTest(customer.cond, 0)) AS sum_James,
+ sumIf(ss_sales_price, bitTest(customer.cond, 1)) AS sum_Lisa,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+LEFT JOIN
+(
+ SELECT
+ CAST(c_customer_sk, 'UInt32') AS c_customer_sk,
+ (c_first_name = 'James') + (2 * (c_first_name = 'Lisa')) AS cond
+ FROM customer
+ WHERE (c_first_name = 'James') OR (c_first_name = 'Lisa')
+) AS customer ON store_sales.ss_customer_sk_nn = customer.c_customer_sk
+
+1 row in set. Elapsed: 27.093 sec. Processed 2.89 billion rows, 26.20 GB (106.74 million rows/s., 967.16 MB/s.)
+```
+
+Another 10% perf improvement from using UInt32 key instead of Nullable(Int64)
+Looks pretty neat, we almost got 10 times improvement over our initial query.
+Can we do better?
+
+Probably, but it does mean that we need to get rid of JOIN.
+
+## Use IN clause instead of JOIN
+
+Despite that all DBMS support ~ similar feature set, feature performance on different database are different:
+
+Small example, for PostgreSQL, is recommended to replace big IN clauses with JOINs, because IN clauses have bad performance.
+But for ClickHouse it's the opposite!, IN works faster than JOIN, because it only checks key existence in HashSet and doesn't need to extract any data from the right table in IN.
+
+Let's test that:
+
+```sql
+SELECT
+ sumIf(ss_sales_price, ss_customer_sk IN (
+ SELECT c_customer_sk
+ FROM customer
+ WHERE c_first_name = 'James'
+ )) AS sum_James,
+ sumIf(ss_sales_price, ss_customer_sk IN (
+ SELECT c_customer_sk
+ FROM customer
+ WHERE c_first_name = 'Lisa'
+ )) AS sum_Lisa,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+
+1 row in set. Elapsed: 16.546 sec. Processed 2.90 billion rows, 40.89 GB (175.52 million rows/s., 2.47 GB/s.)
+```
+
+Almost 2 times faster than our previous record with JOIN, what if we will improve the same hint with c_customer_sk key like in JOIN?
+
+```sql
+SELECT
+ sumIf(ss_sales_price, ss_customer_sk_nn IN (
+ SELECT c_customer_sk::UInt32
+ FROM customer
+ WHERE c_first_name = 'James'
+ )) AS sum_James,
+ sumIf(ss_sales_price, ss_customer_sk_nn IN (
+ SELECT c_customer_sk::UInt32
+ FROM customer
+ WHERE c_first_name = 'Lisa'
+ )) AS sum_Lisa,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+
+1 row in set. Elapsed: 12.355 sec. Processed 2.90 billion rows, 26.49 GB (235.06 million rows/s., 2.14 GB/s.)
+```
+
+Another 25% performance!
+
+But, there is one big limitation with IN approach, what if we have more than just 2 conditions?
+
+```sql
+SELECT
+ sumIf(ss_sales_price, ss_customer_sk_nn IN (
+ SELECT c_customer_sk::UInt32
+ FROM customer
+ WHERE c_first_name = 'James'
+ )) AS sum_James,
+ sumIf(ss_sales_price, ss_customer_sk_nn IN (
+ SELECT c_customer_sk::UInt32
+ FROM customer
+ WHERE c_first_name = 'Lisa'
+ )) AS sum_Lisa,
+ sumIf(ss_sales_price, ss_customer_sk_nn IN (
+ SELECT c_customer_sk::UInt32
+ FROM customer
+ WHERE c_last_name = 'Smith'
+ )) AS sum_Smith,
+ sumIf(ss_sales_price, ss_customer_sk_nn IN (
+ SELECT c_customer_sk::UInt32
+ FROM customer
+ WHERE c_last_name = 'Williams'
+ )) AS sum_Williams,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+
+1 row in set. Elapsed: 23.690 sec. Processed 2.93 billion rows, 27.06 GB (123.60 million rows/s., 1.14 GB/s.)
+```
+
+## Adhoc alternative to Dictionary with FLAT layout
+
+But first is a short introduction. What the hell is a Dictionary with a FLAT layout?
+
+Basically, it's just a set of Array's for each attribute where the value position in the attribute array is just a dictionary key
+For sure it put heavy limitation about what dictionary key could be, but it gives really good advantages:
+
+`['Alice','James', 'Robert','John', ...].length = 12mil, Memory usage ~ N*sum(sizeOf(String(N)) + 1)`
+
+It's really small memory usage (good cache hit rate) & really fast key lookups (no complex hash calculation)
+
+So, if it's that great what are the caveats?
+First one is that your keys should be ideally autoincremental (with small number of gaps)
+And for second, lets look in that simple query and write down all calculations:
+
+```sql
+SELECT sumIf(ss_sales_price, dictGet(...) = 'James')
+```
+
+1. Dictionary call (2 billion times)
+2. String equality check (2 billion times)
+
+Although it's really efficient in terms of dictGet call and memory usage by Dictionary, it still materializes the String column (memcpy) and we pay a penalty of execution condition on top of such a string column for each row.
+
+But what if we could first calculate our required condition and create such a "Dictionary" ad hoc in query time?
+
+And we can actually do that!
+But let's repeat our analysis again:
+
+```sql
+SELECT sumIf(ss_sales_price, here_lives_unicorns(dictGet(...) = 'James'))
+```
+
+`['Alice','James', 'Lisa','James', ...].map(x -> multiIf(x = 'James', 1, x = 'Lisa', 2, 0)) => [0,1,2,1,...].length` = 12mil, Memory usage ~ `N*sizeOf(UInt8)` <- It's event smaller than FLAT dictionary
+
+And actions:
+
+1. String equality check (12 million times)
+2. Create Array (12 million elements)
+3. Array call (2 billion times)
+4. UInt8 equality check (2 billion times)
+
+But what is `here_lives_unicorns` function, does it exist in ClickHouse?
+
+No, but we can hack it with some array manipulation:
+
+```sql
+SELECT sumIf(ss_sales_price, arr[customer_id] = 2)
+
+WITH (
+ SELECT groupArray(assumeNotNull((c_first_name = 'James') + (2 * (c_first_name = 'Lisa')))::UInt8)
+ FROM
+ (
+ SELECT *
+ FROM customer
+ ORDER BY c_customer_sk ASC
+ )
+ ) AS cond
+SELECT
+ sumIf(ss_sales_price, bitTest(cond[ss_customer_sk], 0)) AS sum_James,
+ sumIf(ss_sales_price, bitTest(cond[ss_customer_sk], 1)) AS sum_Lisa,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+
+1 row in set. Elapsed: 13.006 sec. Processed 2.89 billion rows, 40.60 GB (222.36 million rows/s., 3.12 GB/s.)
+
+WITH (
+ SELECT groupArray(assumeNotNull((c_first_name = 'James') + (2 * (c_first_name = 'Lisa')))::UInt8)
+ FROM
+ (
+ SELECT *
+ FROM customer
+ ORDER BY c_customer_sk ASC
+ )
+ ) AS cond,
+ bitTest(cond[ss_customer_sk_nn], 0) AS cond_james,
+ bitTest(cond[ss_customer_sk_nn], 1) AS cond_lisa
+SELECT
+ sumIf(ss_sales_price, cond_james) AS sum_James,
+ sumIf(ss_sales_price, cond_lisa) AS sum_Lisa,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+
+
+1 row in set. Elapsed: 10.054 sec. Processed 2.89 billion rows, 26.20 GB (287.64 million rows/s., 2.61 GB/s.)
+```
+
+20% faster than the IN approach, what if we will have not 2 but 4 such conditions:
+
+```sql
+WITH (
+ SELECT groupArray(assumeNotNull((((c_first_name = 'James') + (2 * (c_first_name = 'Lisa'))) + (4 * (c_last_name = 'Smith'))) + (8 * (c_last_name = 'Williams')))::UInt8)
+ FROM
+ (
+ SELECT *
+ FROM customer
+ ORDER BY c_customer_sk ASC
+ )
+ ) AS cond
+SELECT
+ sumIf(ss_sales_price, bitTest(cond[ss_customer_sk_nn], 0)) AS sum_James,
+ sumIf(ss_sales_price, bitTest(cond[ss_customer_sk_nn], 1)) AS sum_Lisa,
+ sumIf(ss_sales_price, bitTest(cond[ss_customer_sk_nn], 2)) AS sum_Smith,
+ sumIf(ss_sales_price, bitTest(cond[ss_customer_sk_nn], 3)) AS sum_Williams,
+ sum(ss_sales_price) AS sum_total
+FROM store_sales
+
+1 row in set. Elapsed: 11.454 sec. Processed 2.89 billion rows, 26.39 GB (252.49 million rows/s., 2.30 GB/s.)
+```
+
+As we can see, that Array approach doesn't even notice that we increased the amount of conditions by 2 times.
diff --git a/content/en/altinity-kb-queries-and-syntax/jsonextract-to-parse-many-attributes-at-a-time.md b/content/en/altinity-kb-queries-and-syntax/jsonextract-to-parse-many-attributes-at-a-time.md
index db5348511a..38fb071d87 100644
--- a/content/en/altinity-kb-queries-and-syntax/jsonextract-to-parse-many-attributes-at-a-time.md
+++ b/content/en/altinity-kb-queries-and-syntax/jsonextract-to-parse-many-attributes-at-a-time.md
@@ -4,6 +4,9 @@ linkTitle: "JSONExtract to parse many attributes at a time"
description: >
JSONExtract to parse many attributes at a time
---
+
+Don't use several JSONExtract for parsing big JSON. It's very ineffective, slow, and consumes CPU. Try to use one JSONExtract to parse String to Tupes and next get the needed elements:
+
```sql
WITH JSONExtract(json, 'Tuple(name String, id String, resources Nested(description String, format String, tracking_summary Tuple(total UInt32, recent UInt32)), extras Nested(key String, value String))') AS parsed_json
SELECT
@@ -15,3 +18,83 @@ SELECT
tupleElement(tupleElement(tupleElement(parsed_json, 'resources'), 'tracking_summary'), 'recent') AS `resources.tracking_summary.recent`
FROM url('https://raw.githubusercontent.com/jsonlines/guide/master/datagov100.json', 'JSONAsString', 'json String')
```
+However, such parsing requires static schema - all keys should be presented in every row, or you will get an empty structure. More dynamic parsing requires several JSONExtract invocations, but still - try not to scan the same data several times:
+
+```sql
+WITH
+ '{"timestamp":"2024-06-12T14:30:00.001Z","functionality":"DOCUMENT","flowId":"210abdee-6de5-474a-83da-748def0facc1","step":"BEGIN","env":"dev","successful":true,"data":{"action":"initiate_view","stats":{"total":1,"success":1,"failed":0},"client_ip":"192.168.1.100","client_port":"8080"}}' AS json,
+ JSONExtractKeysAndValues(json, 'String') AS m,
+ mapFromArrays(m.1, m.2) AS p
+SELECT
+ extractKeyValuePairs(p['data'])['action'] AS data,
+ (p['successful']) = 'true' AS successful
+FORMAT Vertical
+
+/*
+Row 1:
+──────
+data: initiate_view
+successful: 1
+*/
+
+```
+
+A good approach to get a proper schema from a json message is to let `clickhouse-local` schema inference do the job:
+
+```bash
+$ ls example_message.json
+example_message.json
+
+$ clickhouse-local --query="DESCRIBE file('example_message.json', 'JSONEachRow')" --format="Vertical";
+
+Row 1:
+──────
+name: resourceLogs
+type: Array(Tuple(
+ resource Nullable(String),
+ scopeLogs Array(Tuple(
+ logRecords Array(Tuple(
+ attributes Array(Tuple(
+ key Nullable(String),
+ value Tuple(
+ stringValue Nullable(String)))),
+ body Tuple(
+ stringValue Nullable(String)),
+ observedTimeUnixNano Nullable(String),
+ spanId Nullable(String),
+ traceId Nullable(String))),
+ scope Nullable(String)))))
+```
+
+For very subnested dynamic JSON files, if you don't need all the keys, you could parse sublevels specifically. Still this will require several JSONExtract calls but each call will have less data to parse so complexity will be reduced for each pass: O(log n)
+
+```sql
+CREATE TABLE better_parsing (json String) ENGINE = Memory;
+INSERT INTO better_parsing FORMAT JSONAsString {"timestamp":"2024-06-12T14:30:00.001Z","functionality":"DOCUMENT","flowId":"210abdee-6de5-474a-83da-748def0facc1","step":"BEGIN","env":"dev","successful":true,"data":{"action":"initiate_view","stats":{"total":1,"success":1,"failed":0},"client_ip":"192.168.1.100","client_port":"8080"}}
+
+WITH parsed_content AS
+ (
+ SELECT
+ JSONExtractKeysAndValues(json, 'String') AS 1st_level_arr,
+ mapFromArrays(1st_level_arr.1, 1st_level_arr.2) AS 1st_level_map,
+ JSONExtractKeysAndValues(1st_level_map['data'], 'String') AS 2nd_level_arr,
+ mapFromArrays(2nd_level_arr.1, 2nd_level_arr.2) AS 2nd_level_map,
+ JSONExtractKeysAndValues(2nd_level_map['stats'], 'String') AS 3rd_level_arr,
+ mapFromArrays(3rd_level_arr.1, 3rd_level_arr.2) AS 3rd_level_map
+ FROM json_tests.better_parsing
+ )
+SELECT
+ 1st_level_map['timestamp'] AS timestamp,
+ 2nd_level_map['action'] AS action,
+ 3rd_level_map['total'] AS total
+ 3rd_level_map['nokey'] AS no_key_empty
+FROM parsed_content
+
+/*
+ ┌─timestamp────────────────┬─action────────┬─total─┬─no_key_empty─┐
+1. │ 2024-06-12T14:30:00.001Z │ initiate_view │ 1 │ │
+ └──────────────────────────┴───────────────┴───────┴──────────────┘
+
+1 row in set. Elapsed: 0.003 sec.
+*/
+```
diff --git a/content/en/altinity-kb-queries-and-syntax/lag-lead.md b/content/en/altinity-kb-queries-and-syntax/lag-lead.md
index 8db5c6f649..9ca10840ea 100644
--- a/content/en/altinity-kb-queries-and-syntax/lag-lead.md
+++ b/content/en/altinity-kb-queries-and-syntax/lag-lead.md
@@ -59,7 +59,7 @@ order by g, a;
└───┴────────────┴────────────┴────────────┘
```
-## Using window functions (starting from Clickhouse 21.3)
+## Using window functions (starting from ClickHouse® 21.3)
```sql
SET allow_experimental_window_functions = 1;
diff --git a/content/en/altinity-kb-queries-and-syntax/literal-decimal-or-float.md b/content/en/altinity-kb-queries-and-syntax/literal-decimal-or-float.md
index b2246cfe47..cb8cad57ec 100644
--- a/content/en/altinity-kb-queries-and-syntax/literal-decimal-or-float.md
+++ b/content/en/altinity-kb-queries-and-syntax/literal-decimal-or-float.md
@@ -20,6 +20,21 @@ SELECT
└─────────────────┴─────────────┴────────────────────┴─────────────────────┘
```
+
+> When we try to type cast 64.32 to Decimal128(2) the resulted value is 64.31.
+
+When it sees a number with a decimal separator it interprets as `Float64` literal (where `64.32` have no accurate representation, and actually you get something like `64.319999999999999999`) and later that Float is casted to Decimal by removing the extra precision.
+
+Workaround is very simple - wrap the number in quotes (and it will be considered as a string literal by query parser, and will be transformed to Decimal directly), or use postgres-alike casting syntax:
+
+```sql
+select cast(64.32,'Decimal128(2)') a, cast('64.32','Decimal128(2)') b, 64.32::Decimal128(2) c;
+
+┌─────a─┬─────b─┬─────c─┐
+│ 64.31 │ 64.32 │ 64.32 │
+└───────┴───────┴───────┘
+```
+
## Float64
```sql
diff --git a/content/en/altinity-kb-queries-and-syntax/machine-learning-in-clickhouse.md b/content/en/altinity-kb-queries-and-syntax/machine-learning-in-clickhouse.md
index 06b52f0340..0fafdb833e 100644
--- a/content/en/altinity-kb-queries-and-syntax/machine-learning-in-clickhouse.md
+++ b/content/en/altinity-kb-queries-and-syntax/machine-learning-in-clickhouse.md
@@ -4,8 +4,9 @@ linkTitle: "Machine learning in ClickHouse"
description: >
Machine learning in ClickHouse
---
-[https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup31/ml.pdf](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup31/ml.pdf)
-[CatBoost / MindsDB / Fast.ai]({{}})
+Resources
-[https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/forecast.pdf](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/forecast.pdf)
+* [Machine Learning in ClickHouse](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup31/ml.pdf) - Presentation from 2019 (Meetup 31)
+* [ML discussion: CatBoost / MindsDB / Fast.ai](../../altinity-kb-integrations/catboost-mindsdb-fast.ai) - Brief article from 2021
+* [Machine Learning Forecase (Russian)](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/forecast.pdf) - Presentation from 2019 (Meetup 38)
diff --git a/content/en/altinity-kb-queries-and-syntax/multiple-date-column-in-partition-key.md b/content/en/altinity-kb-queries-and-syntax/multiple-date-column-in-partition-key.md
index 86ef6e0efd..9719b0a2e0 100644
--- a/content/en/altinity-kb-queries-and-syntax/multiple-date-column-in-partition-key.md
+++ b/content/en/altinity-kb-queries-and-syntax/multiple-date-column-in-partition-key.md
@@ -18,7 +18,7 @@ CREATE TABLE part_key_multiple_dates
`inserted_at` DateTime
)
ENGINE = MergeTree
-PARTITION BY (toYYYYMM(date), ignore(created_at), ignore(inserted_at))
+PARTITION BY (toYYYYMM(date), ignore(created_at, inserted_at))
ORDER BY (key, time);
diff --git a/content/en/altinity-kb-queries-and-syntax/mutations.md b/content/en/altinity-kb-queries-and-syntax/mutations.md
index 9b5093eef1..448e698933 100644
--- a/content/en/altinity-kb-queries-and-syntax/mutations.md
+++ b/content/en/altinity-kb-queries-and-syntax/mutations.md
@@ -4,7 +4,7 @@ linkTitle: "Mutations"
description: >
ALTER UPDATE / DELETE
---
-Q. How to know if `ALTER TABLE … DELETE/UPDATE mutation ON CLUSTER` was finished successfully on all the nodes?
+## How to know if `ALTER TABLE … DELETE/UPDATE mutation ON CLUSTER` was finished successfully on all the nodes?
A. mutation status in system.mutations is local to each replica, so use
@@ -14,3 +14,27 @@ SELECT hostname(), * FROM clusterAllReplicas('your_cluster_name', system.mutatio
```
Look on `is_done` and `latest_fail_reason` columns
+
+## Are mutations being run in parallel or they are sequential in ClickHouse® (in scope of one table)
+
+
+
+ClickHouse runs mutations sequentially, but it can combine several mutations in a single and apply all of them in one merge.
+Sometimes, it can lead to problems, when a combined expression which ClickHouse needs to execute becomes really big. (If ClickHouse combined thousands of mutations in one)
+
+
+Because ClickHouse stores data in independent parts, ClickHouse is able to run mutation(s) merges for each part independently and in parallel.
+It also can lead to high resource utilization, especially memory usage if you use `x IN (SELECT ... FROM big_table)` statements in mutation, because each merge will run and keep in memory its own HashSet. You can avoid this problem, if you will use [Dictionary approach](../update-via-dictionary) for such mutations.
+
+Parallelism of mutations controlled by settings:
+
+```sql
+SELECT *
+FROM system.merge_tree_settings
+WHERE name LIKE '%mutation%'
+
+┌─name───────────────────────────────────────────────┬─value─┬─changed─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
+│ max_replicated_mutations_in_queue │ 8 │ 0 │ How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue. │ UInt64 │
+│ number_of_free_entries_in_pool_to_execute_mutation │ 20 │ 0 │ When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid "Too many parts" │ UInt64 │
+└────────────────────────────────────────────────────┴───────┴─────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
+```
diff --git a/content/en/altinity-kb-queries-and-syntax/pivot-unpivot.md b/content/en/altinity-kb-queries-and-syntax/pivot-unpivot.md
index 52d5996f6e..a2847e045c 100644
--- a/content/en/altinity-kb-queries-and-syntax/pivot-unpivot.md
+++ b/content/en/altinity-kb-queries-and-syntax/pivot-unpivot.md
@@ -3,6 +3,9 @@ title: "PIVOT / UNPIVOT"
linkTitle: "PIVOT / UNPIVOT"
description: >
PIVOT / UNPIVOT
+keywords:
+ - clickhouse pivot
+ - clickhouse unpivot
---
## PIVOT
@@ -12,7 +15,7 @@ CREATE TABLE sales(suppkey UInt8, category String, quantity UInt32) ENGINE=Memor
INSERT INTO sales VALUES (2, 'AA' ,7500),(1, 'AB' , 4000),(1, 'AA' , 6900),(1, 'AB', 8900), (1, 'AC', 8300), (1, 'AA', 7000), (1, 'AC', 9000), (2,'AA', 9800), (2,'AB', 9600), (1,'AC', 8900),(1, 'AD', 400), (2,'AD', 900), (2,'AD', 1200), (1,'AD', 2600), (2, 'AC', 9600),(1, 'AC', 6200);
```
-### Using Map data type (starting from Clickhouse 21.1)
+### Using Map data type (starting from ClickHouse® 21.1)
```sql
WITH CAST(sumMap([category], [quantity]), 'Map(String, UInt32)') AS map
@@ -122,18 +125,14 @@ ORDER BY suppkey ASC
│ 3 │ BRAND_C │ AC │ 6900 │
│ 3 │ BRAND_C │ AD │ 3400 │
└─────────┴─────────┴──────────┴──────────┘
-```
-
-### Using tupleToNameValuePairs (starting from ClickHouse 21.9)
-```sql
SELECT
suppkey,
brand,
tpl.1 AS category,
tpl.2 AS quantity
FROM sales_w
-ARRAY JOIN tupleToNameValuePairs((AA, AB, AC, AD)) AS tpl
+ARRAY JOIN tupleToNameValuePairs(CAST((AA, AB, AC, AD), 'Tuple(AA UInt32, AB UInt32, AC UInt32, AD UInt32)')) AS tpl
ORDER BY suppkey ASC
┌─suppkey─┬─brand───┬─category─┬─quantity─┐
@@ -151,3 +150,4 @@ ORDER BY suppkey ASC
│ 3 │ BRAND_C │ AD │ 3400 │
└─────────┴─────────┴──────────┴──────────┘
```
+
diff --git a/content/en/altinity-kb-queries-and-syntax/projections-examples.md b/content/en/altinity-kb-queries-and-syntax/projections-examples.md
index 9ce1da7a05..76803bdedf 100644
--- a/content/en/altinity-kb-queries-and-syntax/projections-examples.md
+++ b/content/en/altinity-kb-queries-and-syntax/projections-examples.md
@@ -1,10 +1,203 @@
---
-title: "Projections examples"
-linkTitle: "Projections examples"
+title: "ClickHouse® Projections"
+linkTitle: "ClickHouse Projections"
description: >
- Projections examples
+ Using this ClickHouse feature to optimize queries
+keywords:
+ - clickhouse projections
+ - clickhouse projection vs materialized view
---
-## Aggregating projections
+
+Projections in ClickHouse act as inner tables within a main table, functioning as a mechanism to optimize queries by using these inner tables when only specific columns are needed. Essentially, a projection is similar to a [Materialized View](/altinity-kb-schema-design/materialized-views/) with an [AggregatingMergeTree engine](/engines/mergetree-table-engine-family/aggregatingmergetree/), designed to be automatically populated with relevant data.
+
+However, too many projections can lead to excess storage, much like overusing Materialized Views. Projections share the same lifecycle as the main table, meaning they are automatically backfilled and don’t require query rewrites, which is particularly advantageous when integrating with BI tools.
+
+Projection parts are stored within the main table parts, and their merges occur simultaneously as the main table merges, ensuring data consistency without additional maintenance.
+
+compared to a separate table+MV setup:
+- A separate table gives you more freedom (like partitioning, granularity, etc), but projections - more consistency (parts managed as a whole)
+- Projections do not support many features (like indexes and FINAL). That becomes better with recent versions, but still a drawback
+
+The design approach for projections is the same as for indexes. Create a table and give it to users. If you encounter a slower query, add a projection for that particular query (or set of similar queries). You can create 10+ projections per table, materialize, drop, etc - the very same as indexes. You exchange query speed for disk space/IO and CPU needed to build and rebuild projections on merges.
+
+## Links
+
+* Amos Bird - kuaishou.com - Projections in ClickHouse. [slides](https://github.com/ClickHouse/clickhouse-presentations/blob/master/percona2021/projections.pdf). [video](https://youtu.be/jJ5VuLr2k5k?list=PLWhC0zeznqkkNYzcvHEfZ8hly3Cu9ojKk)
+* [Documentation](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#projections)
+* [tinybird blog article](https://blog.tinybird.co/2021/07/09/projections/)
+* ClickHouse presentation on Projections https://www.youtube.com/watch?v=QDAJTKZT8y4
+* Blog video https://clickhouse.com/videos/how-to-a-clickhouse-query-using-projections
+
+
+## Why is a ClickHouse projection not used?
+
+A query analyzer should have a reason for using a projection and should not have any limitation to do so.
+
+- the query should use ONLY the columns defined in the projection.
+- There should be a lot of data to read from the main table (gigabytes)
+- for ORDER BY projection WHERE statement referring to a column should be in the query
+- FINAL queries do not work with projections.
+- tables with DELETEd rows do not work with projections. This is because rows in a projection may be affected by a DELETE operation. But there is a MergeTree setting lightweight_mutation_projection_mode to change the behavior (Since 24.7)
+- Projection is used only if it is cheaper to read from it than from the table (expected amount of rows and GBs read is smaller)
+- Projection should be materialized. Verify that all parts have the needed projection by comparing system.parts and system.projection_parts (see query below)
+- a bug in a Clickhouse version. Look at [changelog](https://clickhouse.com/docs/whats-new/changelog) and search for projection.
+- If there are many projections per table, the analyzer can select any of them. If you think that it is better, use settings `preferred_optimize_projection_name` or `force_optimize_projection_name`
+- If expressions are used instead of plain column names, the query should use the exact expression as defined in the projection with the same functions and modifiers. Use column aliases to make the query the very same as in the projection definition:
+
+```sql
+CREATE TABLE test
+(
+ a Int64,
+ ts DateTime,
+ week alias toStartOfWeek(ts),
+ PROJECTION weekly_projection
+ (
+ SELECT week, sum(a) group by week
+ )
+)
+ENGINE = MergeTree ORDER BY a;
+
+insert into test
+select number, now()-number*100
+from numbers(1e7);
+
+--explain indexes=1
+select week, sum(a) from test group by week
+settings force_optimize_projection=1;
+```
+
+https://fiddle.clickhouse.com/7f331eb2-9408-4813-9c67-caef4cdd227d
+
+Explain result: ReadFromMergeTree (weekly_projection)
+
+```
+Expression ((Project names + Projection))
+ Aggregating
+ Expression
+ ReadFromMergeTree (weekly_projection)
+ Indexes:
+ PrimaryKey
+ Condition: true
+ Parts: 9/9
+ Granules: 9/1223
+```
+
+## check parts
+
+- has the projection materialized
+- does not have lightweight deletes
+
+```
+SELECT
+ p.database AS base_database,
+ p.table AS base_table,
+ p.name AS base_part_name, -- Name of the part in the base table
+ p.has_lightweight_delete,
+ pp.active
+FROM system.parts AS p -- Alias for the base table's parts
+LEFT JOIN system.projection_parts AS pp -- Alias for the projection's parts
+ON p.database = pp.database AND p.table = pp.table
+ AND p.name = pp.parent_name
+ AND pp.name = 'projection'
+WHERE
+ p.database = 'database'
+ AND p.table = 'table'
+ AND p.active -- Consider only active parts of the base table
+ -- and not pp.active -- see only missed in the list
+ORDER BY p.database, p.table, p.name;
+
+```
+
+## Recalculate on Merge
+
+What happens in the case of non-trivial background merges in ReplacingMergeTree, AggregatingMergeTree and similar, and OPTIMIZE table DEDUPLICATE queries?
+
+* Before version 24.8, projections became out of sync with the main data.
+* Since version 24.8, it is controlled by a new table-level setting: [deduplicate_merge_projection_mode](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings#deduplicate_merge_projection_mode) = `throw`/`drop`/`rebuild`
+* Somewhere later (before 25.3) `ignore` option was introduced. It can be helpful for cases when SummingMergeTree is used with Projections and no DELETE operation in any flavor (Replacing/Collapsing/DELETE/ALTER DELETE) is executed over the table.
+
+However, projection usage is still disabled for FINAL queries. So, you have to use OPTIMIZE FINAL or SELECT ...GROUP BY instead of FINAL for fighting duplicates between parts
+
+```
+CREATE TABLE users (uid Int16, name String, version Int16,
+ projection xx (
+ select name,uid,version order by name
+ )
+) ENGINE=ReplacingMergeTree order by uid
+settings deduplicate_merge_projection_mode='rebuild'
+ ;
+
+INSERT INTO users
+SELECT
+ number AS uid,
+ concat('User_', toString(uid)) AS name,
+ 1 AS version
+FROM numbers(100000);
+
+INSERT INTO users
+SELECT
+ number AS uid,
+ concat('User_', toString(uid)) AS name,
+ 2 AS version
+FROM numbers(100000);
+
+SELECT 'duplicate',name,uid,version FROM users
+where name ='User_98304'
+settings force_optimize_projection=1 ;
+
+SELECT 'dedup by group by/limit 1 by',name,uid,version FROM users
+where name ='User_98304'
+order by version DESC
+limit 1 by uid
+settings force_optimize_projection=1
+;
+
+optimize table users final ;
+
+SELECT 'dedup after optimize',name,uid,version FROM users
+where name ='User_98304'
+settings force_optimize_projection=1 ;
+
+```
+https://fiddle.clickhouse.com/e1977a66-09ce-43c4-aabc-508c957d44d7
+
+
+## System tables
+
+- system.projections
+- system.projection_parts
+- system.projection_parts_columns
+
+```
+SELECT
+ database,
+ table,
+ name,
+ formatReadableSize(sum(data_compressed_bytes) AS size) AS compressed,
+ formatReadableSize(sum(data_uncompressed_bytes) AS usize) AS uncompressed,
+ round(usize / size, 2) AS compr_rate,
+ sum(rows) AS rows,
+ count() AS part_count
+FROM system.projection_parts
+WHERE active
+GROUP BY
+ database,
+ table,
+ name
+ORDER BY size DESC;
+```
+
+## How to receive a list of tables with projections?
+
+```
+select database, table from system.tables
+where create_table_query ilike '%projection%'
+ and database <> 'system'
+```
+
+## Examples
+
+### Aggregating ClickHouse projections
```sql
create table z(Browser String, Country UInt8, F Float64)
@@ -61,9 +254,9 @@ group by Browser,Country format Null;
Elapsed: 0.005 sec. Processed 22.43 thousand rows
```
-## Emulation of an inverted index using orderby projection
+### Emulation of an inverted index using orderby projection
-You can create an `orderby projection` and include all columns of a table, but if a table is very wide it will double of stored data. This expample demonstrate a trick, we create an `orderby projection` and include primary key columns and the target column and sort by the target column. This allows using subquery to find primary key values and after that to query the table using the primary key.
+You can create an `orderby projection` and include all columns of a table, but if a table is very wide it will double the amount of stored data. This example demonstrate a trick, we create an `orderby projection` and include primary key columns and the target column and sort by the target column. This allows using subquery to find [primary key values](../../engines/mergetree-table-engine-family/pick-keys/) and after that to query the table using the primary key.
```sql
CREATE TABLE test_a
@@ -112,8 +305,4 @@ VS
**Elapsed: 0.013 sec. Processed 32.77 thousand rows** -- optimized
-## See also
-* Amos Bird - kuaishou.com - Projections in ClickHouse. [slides](https://github.com/ClickHouse/clickhouse-presentations/blob/master/percona2021/projections.pdf). [video](https://youtu.be/jJ5VuLr2k5k?list=PLWhC0zeznqkkNYzcvHEfZ8hly3Cu9ojKk)
-* [Documentation](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#projections)
-* [tinybird blog article](https://blog.tinybird.co/2021/07/09/projections/)
diff --git a/content/en/altinity-kb-queries-and-syntax/roaring-bitmaps-for-calculating-retention.md b/content/en/altinity-kb-queries-and-syntax/roaring-bitmaps-for-calculating-retention.md
index 394bb5e16e..074e25ac4c 100644
--- a/content/en/altinity-kb-queries-and-syntax/roaring-bitmaps-for-calculating-retention.md
+++ b/content/en/altinity-kb-queries-and-syntax/roaring-bitmaps-for-calculating-retention.md
@@ -41,4 +41,4 @@ WHERE h IN (0, 1)
└──────┴───────┘
```
-See also [https://cdmana.com/2021/01/20210109005922716t.html](https://cdmana.com/2021/01/20210109005922716t.html)
+See also [A primer on roaring bitmaps](https://vikramoberoi.com/a-primer-on-roaring-bitmaps-what-they-are-and-how-they-work/)
diff --git a/content/en/altinity-kb-queries-and-syntax/row_policy_using_dictionary.md b/content/en/altinity-kb-queries-and-syntax/row_policy_using_dictionary.md
new file mode 100644
index 0000000000..1c945c79ae
--- /dev/null
+++ b/content/en/altinity-kb-queries-and-syntax/row_policy_using_dictionary.md
@@ -0,0 +1,346 @@
+---
+title: "Row policies overhead (hiding 'removed' tenants)"
+linkTitle: "Row policies overhead"
+weight: 100
+description: >
+ One more approach to hide (delete) rows in ClickHouse®
+---
+
+## No row policy
+
+```sql
+CREATE TABLE test_delete
+(
+ tenant Int64,
+ key Int64,
+ ts DateTime,
+ value_a String
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(ts)
+ORDER BY (tenant, key, ts);
+
+INSERT INTO test_delete
+SELECT
+ number%5,
+ number,
+ toDateTime('2020-01-01')+number/10,
+ concat('some_looong_string', toString(number)),
+FROM numbers(1e8);
+
+INSERT INTO test_delete -- multiple small tenants
+SELECT
+ number%5000,
+ number,
+ toDateTime('2020-01-01')+number/10,
+ concat('some_looong_string', toString(number)),
+FROM numbers(1e8);
+```
+
+```sql
+Q1) SELECT tenant, count() FROM test_delete GROUP BY tenant ORDER BY tenant LIMIT 6;
+┌─tenant─┬──count()─┐
+│ 0 │ 20020000 │
+│ 1 │ 20020000 │
+│ 2 │ 20020000 │
+│ 3 │ 20020000 │
+│ 4 │ 20020000 │
+│ 5 │ 20000 │
+└────────┴──────────┘
+6 rows in set. Elapsed: 0.285 sec. Processed 200.00 million rows, 1.60 GB (702.60 million rows/s., 5.62 GB/s.)
+
+Q2) SELECT uniq(value_a) FROM test_delete where tenant = 4;
+┌─uniq(value_a)─┐
+│ 20016427 │
+└───────────────┘
+1 row in set. Elapsed: 0.265 sec. Processed 20.23 million rows, 863.93 MB (76.33 million rows/s., 3.26 GB/s.)
+
+Q3) SELECT max(ts) FROM test_delete where tenant = 4;
+┌─────────────max(ts)─┐
+│ 2020-04-25 17:46:39 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.062 sec. Processed 20.23 million rows, 242.31 MB (324.83 million rows/s., 3.89 GB/s.)
+
+Q4) SELECT max(ts) FROM test_delete where tenant = 4 and key = 444;
+┌─────────────max(ts)─┐
+│ 2020-01-01 00:00:44 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.009 sec. Processed 212.99 thousand rows, 1.80 MB (24.39 million rows/s., 206.36 MB/s.)
+```
+
+## row policy using expression
+
+```sql
+CREATE ROW POLICY pol1 ON test_delete USING tenant not in (1,2,3) TO all;
+
+Q1) SELECT tenant, count() FROM test_delete GROUP BY tenant ORDER BY tenant LIMIT 6;
+┌─tenant─┬──count()─┐
+│ 0 │ 20020000 │
+│ 4 │ 20020000 │
+│ 5 │ 20000 │
+│ 6 │ 20000 │
+│ 7 │ 20000 │
+│ 8 │ 20000 │
+└────────┴──────────┘
+6 rows in set. Elapsed: 0.333 sec. Processed 140.08 million rows, 1.12 GB (420.59 million rows/s., 3.36 GB/s.)
+
+Q2) SELECT uniq(value_a) FROM test_delete where tenant = 4;
+┌─uniq(value_a)─┐
+│ 20016427 │
+└───────────────┘
+1 row in set. Elapsed: 0.287 sec. Processed 20.23 million rows, 863.93 MB (70.48 million rows/s., 3.01 GB/s.)
+
+Q3) SELECT max(ts) FROM test_delete where tenant = 4;
+┌─────────────max(ts)─┐
+│ 2020-04-25 17:46:39 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.080 sec. Processed 20.23 million rows, 242.31 MB (254.20 million rows/s., 3.05 GB/s.)
+
+Q4) SELECT max(ts) FROM test_delete where tenant = 4 and key = 444;
+┌─────────────max(ts)─┐
+│ 2020-01-01 00:00:44 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.011 sec. Processed 212.99 thousand rows, 3.44 MB (19.53 million rows/s., 315.46 MB/s.)
+
+Q5) SELECT uniq(value_a) FROM test_delete where tenant = 1;
+┌─uniq(value_a)─┐
+│ 0 │
+└───────────────┘
+1 row in set. Elapsed: 0.008 sec. Processed 180.22 thousand rows, 1.44 MB (23.69 million rows/s., 189.54 MB/s.)
+
+DROP ROW POLICY pol1 ON test_delete;
+```
+
+## row policy using table subquery
+
+```sql
+create table deleted_tenants(tenant Int64) ENGINE=MergeTree order by tenant;
+
+CREATE ROW POLICY pol1 ON test_delete USING tenant not in deleted_tenants TO all;
+
+SELECT tenant, count() FROM test_delete GROUP BY tenant ORDER BY tenant LIMIT 6;
+┌─tenant─┬──count()─┐
+│ 0 │ 20020000 │
+│ 1 │ 20020000 │
+│ 2 │ 20020000 │
+│ 3 │ 20020000 │
+│ 4 │ 20020000 │
+│ 5 │ 20000 │
+└────────┴──────────┘
+6 rows in set. Elapsed: 0.455 sec. Processed 200.00 million rows, 1.60 GB (439.11 million rows/s., 3.51 GB/s.)
+
+insert into deleted_tenants values(1),(2),(3);
+
+Q1) SELECT tenant, count() FROM test_delete GROUP BY tenant ORDER BY tenant LIMIT 6;
+┌─tenant─┬──count()─┐
+│ 0 │ 20020000 │
+│ 4 │ 20020000 │
+│ 5 │ 20000 │
+│ 6 │ 20000 │
+│ 7 │ 20000 │
+│ 8 │ 20000 │
+└────────┴──────────┘
+6 rows in set. Elapsed: 0.329 sec. Processed 140.08 million rows, 1.12 GB (426.34 million rows/s., 3.41 GB/s.)
+
+Q2) SELECT uniq(value_a) FROM test_delete where tenant = 4;
+┌─uniq(value_a)─┐
+│ 20016427 │
+└───────────────┘
+1 row in set. Elapsed: 0.287 sec. Processed 20.23 million rows, 863.93 MB (70.56 million rows/s., 3.01 GB/s.)
+
+Q3) SELECT max(ts) FROM test_delete where tenant = 4;
+┌─────────────max(ts)─┐
+│ 2020-04-25 17:46:39 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.080 sec. Processed 20.23 million rows, 242.31 MB (251.39 million rows/s., 3.01 GB/s.)
+
+Q4) SELECT max(ts) FROM test_delete where tenant = 4 and key = 444;
+┌─────────────max(ts)─┐
+│ 2020-01-01 00:00:44 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.010 sec. Processed 213.00 thousand rows, 3.44 MB (20.33 million rows/s., 328.44 MB/s.)
+
+Q5) SELECT uniq(value_a) FROM test_delete where tenant = 1;
+┌─uniq(value_a)─┐
+│ 0 │
+└───────────────┘
+1 row in set. Elapsed: 0.008 sec. Processed 180.23 thousand rows, 1.44 MB (22.11 million rows/s., 176.90 MB/s.)
+
+DROP ROW POLICY pol1 ON test_delete;
+DROP TABLE deleted_tenants;
+```
+
+## row policy using external dictionary (NOT dictHas)
+
+```sql
+create table deleted_tenants(tenant Int64, deleted UInt8 default 1) ENGINE=MergeTree order by tenant;
+
+insert into deleted_tenants(tenant) values(1),(2),(3);
+
+CREATE DICTIONARY deleted_tenants_dict (tenant UInt64, deleted UInt8)
+PRIMARY KEY tenant SOURCE(CLICKHOUSE(TABLE deleted_tenants))
+LIFETIME(600) LAYOUT(FLAT());
+
+CREATE ROW POLICY pol1 ON test_delete USING NOT dictHas('deleted_tenants_dict', tenant) TO all;
+
+Q1) SELECT tenant, count() FROM test_delete GROUP BY tenant ORDER BY tenant LIMIT 6;
+┌─tenant─┬──count()─┐
+│ 0 │ 20020000 │
+│ 4 │ 20020000 │
+│ 5 │ 20000 │
+│ 6 │ 20000 │
+│ 7 │ 20000 │
+│ 8 │ 20000 │
+└────────┴──────────┘
+6 rows in set. Elapsed: 0.388 sec. Processed 200.00 million rows, 1.60 GB (515.79 million rows/s., 4.13 GB/s.)
+
+Q2) SELECT uniq(value_a) FROM test_delete where tenant = 4;
+┌─uniq(value_a)─┐
+│ 20016427 │
+└───────────────┘
+1 row in set. Elapsed: 0.291 sec. Processed 20.23 million rows, 863.93 MB (69.47 million rows/s., 2.97 GB/s.)
+
+Q3) SELECT max(ts) FROM test_delete where tenant = 4;
+┌─────────────max(ts)─┐
+│ 2020-04-25 17:46:39 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.084 sec. Processed 20.23 million rows, 242.31 MB (240.07 million rows/s., 2.88 GB/s.)
+
+Q4) SELECT max(ts) FROM test_delete where tenant = 4 and key = 444;
+┌─────────────max(ts)─┐
+│ 2020-01-01 00:00:44 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.010 sec. Processed 212.99 thousand rows, 3.44 MB (21.45 million rows/s., 346.56 MB/s.)
+
+Q5) SELECT uniq(value_a) FROM test_delete where tenant = 1;
+┌─uniq(value_a)─┐
+│ 0 │
+└───────────────┘
+1 row in set. Elapsed: 0.046 sec. Processed 20.22 million rows, 161.74 MB (440.26 million rows/s., 3.52 GB/s.)
+
+DROP ROW POLICY pol1 ON test_delete;
+DROP DICTIONARY deleted_tenants_dict;
+DROP TABLE deleted_tenants;
+```
+
+## row policy using external dictionary (dictHas)
+
+```sql
+create table deleted_tenants(tenant Int64, deleted UInt8 default 1) ENGINE=MergeTree order by tenant;
+
+insert into deleted_tenants(tenant) select distinct tenant from test_delete where tenant not in (1,2,3);
+
+CREATE DICTIONARY deleted_tenants_dict (tenant UInt64, deleted UInt8)
+PRIMARY KEY tenant SOURCE(CLICKHOUSE(TABLE deleted_tenants))
+LIFETIME(600) LAYOUT(FLAT());
+
+CREATE ROW POLICY pol1 ON test_delete USING dictHas('deleted_tenants_dict', tenant) TO all;
+
+Q1) SELECT tenant, count() FROM test_delete GROUP BY tenant ORDER BY tenant LIMIT 6;
+┌─tenant─┬──count()─┐
+│ 0 │ 20020000 │
+│ 4 │ 20020000 │
+│ 5 │ 20000 │
+│ 6 │ 20000 │
+│ 7 │ 20000 │
+│ 8 │ 20000 │
+└────────┴──────────┘
+6 rows in set. Elapsed: 0.399 sec. Processed 200.00 million rows, 1.60 GB (501.18 million rows/s., 4.01 GB/s.)
+
+Q2) SELECT uniq(value_a) FROM test_delete where tenant = 4;
+┌─uniq(value_a)─┐
+│ 20016427 │
+└───────────────┘
+1 row in set. Elapsed: 0.284 sec. Processed 20.23 million rows, 863.93 MB (71.30 million rows/s., 3.05 GB/s.)
+
+Q3) SELECT max(ts) FROM test_delete where tenant = 4;
+┌─────────────max(ts)─┐
+│ 2020-04-25 17:46:39 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.080 sec. Processed 20.23 million rows, 242.31 MB (251.88 million rows/s., 3.02 GB/s.)
+
+Q4) SELECT max(ts) FROM test_delete where tenant = 4 and key = 444;
+┌─────────────max(ts)─┐
+│ 2020-01-01 00:00:44 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.010 sec. Processed 212.99 thousand rows, 3.44 MB (22.01 million rows/s., 355.50 MB/s.)
+
+Q5) SELECT uniq(value_a) FROM test_delete where tenant = 1;
+┌─uniq(value_a)─┐
+│ 0 │
+└───────────────┘
+1 row in set. Elapsed: 0.034 sec. Processed 20.22 million rows, 161.74 MB (589.90 million rows/s., 4.72 GB/s.)
+
+DROP ROW POLICY pol1 ON test_delete;
+DROP DICTIONARY deleted_tenants_dict;
+DROP TABLE deleted_tenants;
+```
+
+## row policy using engine=Set
+```sql
+create table deleted_tenants(tenant Int64) ENGINE=Set;
+
+insert into deleted_tenants(tenant) values(1),(2),(3);
+
+CREATE ROW POLICY pol1 ON test_delete USING tenant not in deleted_tenants TO all;
+
+Q1) SELECT tenant, count() FROM test_delete GROUP BY tenant ORDER BY tenant LIMIT 6;
+┌─tenant─┬──count()─┐
+│ 0 │ 20020000 │
+│ 4 │ 20020000 │
+│ 5 │ 20000 │
+│ 6 │ 20000 │
+│ 7 │ 20000 │
+│ 8 │ 20000 │
+└────────┴──────────┘
+6 rows in set. Elapsed: 0.322 sec. Processed 200.00 million rows, 1.60 GB (621.38 million rows/s., 4.97 GB/s.)
+
+Q2) SELECT uniq(value_a) FROM test_delete where tenant = 4;
+┌─uniq(value_a)─┐
+│ 20016427 │
+└───────────────┘
+1 row in set. Elapsed: 0.275 sec. Processed 20.23 million rows, 863.93 MB (73.56 million rows/s., 3.14 GB/s.)
+
+Q3) SELECT max(ts) FROM test_delete where tenant = 4;
+┌─────────────max(ts)─┐
+│ 2020-04-25 17:46:39 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.084 sec. Processed 20.23 million rows, 242.31 MB (240.07 million rows/s., 2.88 GB/s.)
+
+Q4) SELECT max(ts) FROM test_delete where tenant = 4 and key = 444;
+┌─────────────max(ts)─┐
+│ 2020-01-01 00:00:44 │
+└─────────────────────┘
+1 row in set. Elapsed: 0.010 sec. Processed 212.99 thousand rows, 3.44 MB (20.69 million rows/s., 334.18 MB/s.)
+
+Q5) SELECT uniq(value_a) FROM test_delete where tenant = 1;
+┌─uniq(value_a)─┐
+│ 0 │
+└───────────────┘
+1 row in set. Elapsed: 0.030 sec. Processed 20.22 million rows, 161.74 MB (667.06 million rows/s., 5.34 GB/s.)
+
+DROP ROW POLICY pol1 ON test_delete;
+DROP TABLE deleted_tenants;
+```
+
+
+
+## results
+
+expression: `CREATE ROW POLICY pol1 ON test_delete USING tenant not in (1,2,3) TO all;`
+
+table subq: `CREATE ROW POLICY pol1 ON test_delete USING tenant not in deleted_tenants TO all;`
+
+ext. dict. NOT dictHas : `CREATE ROW POLICY pol1 ON test_delete USING NOT dictHas('deleted_tenants_dict', tenant) TO all;`
+
+ext. dict. dictHas :
+
+| Q | no policy | expression | table subq | ext. dict. NOT | ext. dict. | engine=Set |
+|----|-----------------|-----------------|-----------------|-----------------|-----------------|-----------------|
+| Q1 | 0.285 / 200.00m | 0.333 / 140.08m | 0.329 / 140.08m | 0.388 / 200.00m | 0.399 / 200.00m | 0.322 / 200.00m |
+| Q2 | 0.265 / 20.23m | 0.287 / 20.23m | 0.287 / 20.23m | 0.291 / 20.23m | 0.284 / 20.23m | 0.275 / 20.23m |
+| Q3 | 0.062 / 20.23m | 0.080 / 20.23m | 0.080 / 20.23m | 0.084 / 20.23m | 0.080 / 20.23m | 0.084 / 20.23m |
+| Q4 | 0.009 / 212.99t | 0.011 / 212.99t | 0.010 / 213.00t | 0.010 / 212.99t | 0.010 / 212.99t | 0.010 / 212.99t |
+| Q5 | | 0.008 / 180.22t | 0.008 / 180.23t | 0.046 / 20.22m | 0.034 / 20.22m | 0.030 / 20.22m |
+
+Expression in row policy seems to be fastest way (Q1, Q5).
diff --git a/content/en/altinity-kb-queries-and-syntax/sampling-example.md b/content/en/altinity-kb-queries-and-syntax/sampling-example.md
index 4c28707bd6..b270199107 100644
--- a/content/en/altinity-kb-queries-and-syntax/sampling-example.md
+++ b/content/en/altinity-kb-queries-and-syntax/sampling-example.md
@@ -1,10 +1,11 @@
---
title: "Sampling Example"
linkTitle: "Sampling Example"
-description: >
- Clickhouse table sampling example
---
-The most important idea about sampling that the primary index must have **low cardinality**. The following example demonstrates how sampling can be setup correctly, and an example if it being set up incorrectly as a comparison.
+
+The most important idea about sampling that the primary index must have **LowCardinality**. (For more information, see [the Altinity Knowledge Base article on LowCardinality](../../altinity-kb-schema-design/lowcardinality) or [a ClickHouse® user\'s lessons learned from LowCardinality](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)).
+
+The following example demonstrates how sampling can be setup correctly, and an example if it being set up incorrectly as a comparison.
Sampling requires `sample by expression` . This ensures a range of sampled column types fit within a specified range, which ensures the requirement of low cardinality. In this example, I cannot use `transaction_id` because I can not ensure that the min value of `transaction_id = 0` and `max value = MAX_UINT64`. Instead, I used `cityHash64(transaction_id)`to expand the range within the minimum and maximum values.
diff --git a/content/en/altinity-kb-queries-and-syntax/simplestateif-or-ifstate-for-simple-aggregate-functions.md b/content/en/altinity-kb-queries-and-syntax/simplestateif-or-ifstate-for-simple-aggregate-functions.md
index f7bbb99eb3..06f81fb7b3 100644
--- a/content/en/altinity-kb-queries-and-syntax/simplestateif-or-ifstate-for-simple-aggregate-functions.md
+++ b/content/en/altinity-kb-queries-and-syntax/simplestateif-or-ifstate-for-simple-aggregate-functions.md
@@ -6,7 +6,7 @@ description: >
---
### Q. What is SimpleAggregateFunction? Are there advantages to use it instead of AggregateFunction in AggregatingMergeTree?
-SimpleAggregateFunction can be used for those aggregations when the function state is exactly the same as the resulting function value. Typical example is `max` function: it only requires storing the single value which is already maximum, and no extra steps needed to get the final value. In contrast `avg` need to store two numbers - sum & count, which should be divided to get the final value of aggregation (done by the `-Merge` step at the very end).
+The ClickHouse® SimpleAggregateFunction can be used for those aggregations when the function state is exactly the same as the resulting function value. Typical example is `max` function: it only requires storing the single value which is already maximum, and no extra steps needed to get the final value. In contrast `avg` need to store two numbers - sum & count, which should be divided to get the final value of aggregation (done by the `-Merge` step at the very end).
@@ -47,7 +47,7 @@ SimpleAggregateFunction can be used for those aggregations when the function sta
reading raw value per row
you can access it directly
-
you need to use finalizeAgggregation function
+
you need to use finalizeAggregation function
using aggregated value
@@ -77,9 +77,11 @@ SimpleAggregateFunction can be used for those aggregations when the function sta
-See also
-[https://github.com/ClickHouse/ClickHouse/pull/4629](https://github.com/ClickHouse/ClickHouse/pull/4629)
-[https://github.com/ClickHouse/ClickHouse/issues/3852](https://github.com/ClickHouse/ClickHouse/issues/3852)
+See also:
+
+* [Altinity Knowledge Base article on AggregatingMergeTree](../../engines/mergetree-table-engine-family/aggregatingmergetree/)
+* [https://github.com/ClickHouse/ClickHouse/pull/4629](https://github.com/ClickHouse/ClickHouse/pull/4629)
+* [https://github.com/ClickHouse/ClickHouse/issues/3852](https://github.com/ClickHouse/ClickHouse/issues/3852)
### Q. How maxSimpleState combinator result differs from plain max?
diff --git a/content/en/altinity-kb-queries-and-syntax/skip-indexes/_index.md b/content/en/altinity-kb-queries-and-syntax/skip-indexes/_index.md
index 760784d3b0..962c1355d8 100644
--- a/content/en/altinity-kb-queries-and-syntax/skip-indexes/_index.md
+++ b/content/en/altinity-kb-queries-and-syntax/skip-indexes/_index.md
@@ -4,3 +4,4 @@ linkTitle: "Skip indexes"
description: >
Skip indexes
---
+ClickHouse® provides a type of index that in specific circumstances can significantly improve query speed. These structures are labeled "skip" indexes because they enable ClickHouse to skip reading significant chunks of data that are guaranteed to have no matching values.
\ No newline at end of file
diff --git a/content/en/altinity-kb-queries-and-syntax/skip-indexes/skip-index-bloom_filter-for-array-column.md b/content/en/altinity-kb-queries-and-syntax/skip-indexes/skip-index-bloom_filter-for-array-column.md
index 0e8a52fe89..610b44093f 100644
--- a/content/en/altinity-kb-queries-and-syntax/skip-indexes/skip-index-bloom_filter-for-array-column.md
+++ b/content/en/altinity-kb-queries-and-syntax/skip-indexes/skip-index-bloom_filter-for-array-column.md
@@ -1,12 +1,13 @@
---
title: "Skip index bloom_filter Example"
linkTitle: "Skip index bloom_filter Example"
-description: >
- Example: skip index bloom_filter & array column
+aliases:
+ /altinity-kb-queries-and-syntax/skip-indexes/example-skip-index-bloom_filter-and-array-column
---
-tested with 20.8.17.25
-[https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/\#table_engine-mergetree-data_skipping-indexes](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes)
+tested with ClickHouse® 20.8.17.25
+
+[https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/\#table_engine-mergetree-data_skipping-indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes)
### Let's create test data
@@ -38,7 +39,7 @@ select count() from bftest where has(x, -42);
Processed 110.00 million rows, 9.68 GB (217.69 million rows/s., 19.16 GB/s.)
```
-As you can see Clickhouse read **110.00 million rows** and the query elapsed **Elapsed: 0.505 sec**.
+As you can see ClickHouse read **110.00 million rows** and the query elapsed **Elapsed: 0.505 sec**.
### Let's add an index
@@ -155,4 +156,3 @@ Also no improvement :(
Outcome: I would use TYPE bloom_filter GRANULARITY 3.
- 2021 Altinity Inc. All rights reserved.
diff --git a/content/en/altinity-kb-queries-and-syntax/slow_select_count.md b/content/en/altinity-kb-queries-and-syntax/slow_select_count.md
new file mode 100644
index 0000000000..c8a51412e7
--- /dev/null
+++ b/content/en/altinity-kb-queries-and-syntax/slow_select_count.md
@@ -0,0 +1,40 @@
+---
+title: "Why is simple `SELECT count()` Slow in ClickHouse®?"
+linkTitle: "Slow `SELECT count()`"
+weight: 100
+description: >-
+---
+
+ClickHouse is a columnar database that provides excellent performance for analytical queries. However, in some cases, a simple count query can be slow. In this article, we'll explore the reasons why this can happen and how to optimize the query.
+
+### Three Strategies for Counting Rows in ClickHouse
+
+There are three ways to count rows in a table in ClickHouse:
+
+1. `optimize_trivial_count_query`: This strategy extracts the number of rows from the table metadata. It's the fastest and most efficient way to count rows, but it only works for simple count queries.
+
+2. `allow_experimental_projection_optimization`: This strategy uses a virtual projection called _minmax_count_projection to count rows. It's faster than scanning the table but slower than the trivial count query.
+
+3. Scanning the smallest column in the table and reading rows from that. This is the slowest strategy and is only used when the other two strategies can't be used.
+
+### Why Does ClickHouse Sometimes Choose the Slowest Counting Strategy?
+
+In some cases, ClickHouse may choose the slowest counting strategy even when there are faster options available. Here are some possible reasons why this can happen:
+
+1. Row policies are used on the table: If row policies are used, ClickHouse needs to filter rows to give the proper count. You can check if row policies are used by selecting from system.row_policies.
+
+2. Experimental light-weight delete feature was used on the table: If the experimental light-weight delete feature was used, ClickHouse may use the slowest counting strategy. You can check this by looking into parts_columns for the column named _row_exists. To do this, run the following query:
+
+```sql
+SELECT DISTINCT database, table FROM system.parts_columns WHERE column = '_row_exists';
+```
+
+You can also refer to this issue on GitHub for more information: https://github.com/ClickHouse/ClickHouse/issues/47930.
+
+3. `SELECT FINAL` or `final=1` setting is used.
+
+4. `max_parallel_replicas > 1` is used.
+
+5. Sampling is used.
+
+6. Some other features like `allow_experimental_query_deduplication` or `empty_result_for_aggregation_by_empty_set` is used.
diff --git a/content/en/altinity-kb-queries-and-syntax/state-and-merge-combinators.md b/content/en/altinity-kb-queries-and-syntax/state-and-merge-combinators.md
index 44fe42bb5c..9741685144 100644
--- a/content/en/altinity-kb-queries-and-syntax/state-and-merge-combinators.md
+++ b/content/en/altinity-kb-queries-and-syntax/state-and-merge-combinators.md
@@ -4,7 +4,11 @@ linkTitle: "-State & -Merge combinators"
description: >
-State & -Merge combinators
---
--State combinator doesn't actually store information about -If combinator, so aggregate functions with -If and without have the same serialized data.
+
+The -State combinator in ClickHouse® does not store additional information about the -If combinator, which means that aggregate functions with and without -If have the same serialized data structure. This can be verified through various examples, as demonstrated below.
+
+**Example 1**: maxIfState and maxState
+In this example, we use the maxIfState and maxState functions on a dataset of numbers, serialize the result, and merge it using the maxMerge function.
```sql
$ clickhouse-local --query "SELECT maxIfState(number,number % 2) as x, maxState(number) as y FROM numbers(10) FORMAT RowBinary" | clickhouse-local --input-format RowBinary --structure="x AggregateFunction(max,UInt64), y AggregateFunction(max,UInt64)" --query "SELECT maxMerge(x), maxMerge(y) FROM table"
@@ -13,7 +17,11 @@ $ clickhouse-local --query "SELECT maxIfState(number,number % 2) as x, maxState(
9 10
```
--State combinator have the same serialized data footprint regardless of parameters used in definition of aggregate function. That's true for quantile\* and sequenceMatch/sequenceCount functions.
+In both cases, the -State combinator results in identical serialized data footprints, regardless of the conditions in the -If variant. The maxMerge function merges the state without concern for the original -If condition.
+
+**Example 2**: quantilesTDigestIfState
+Here, we use the quantilesTDigestIfState function to demonstrate that functions like quantile-based and sequence matching functions follow the same principle regarding serialized data consistency.
+
```sql
$ clickhouse-local --query "SELECT quantilesTDigestIfState(0.1,0.9)(number,number % 2) FROM numbers(1000000) FORMAT RowBinary" | clickhouse-local --input-format RowBinary --structure="x AggregateFunction(quantileTDigestWeighted(0.5),UInt64,UInt8)" --query "SELECT quantileTDigestWeightedMerge(0.4)(x) FROM table"
@@ -22,6 +30,12 @@ $ clickhouse-local --query "SELECT quantilesTDigestIfState(0.1,0.9)(number,numbe
$ clickhouse-local --query "SELECT quantilesTDigestIfState(0.1,0.9)(number,number % 2) FROM numbers(1000000) FORMAT RowBinary" | clickhouse-local --input-format RowBinary --structure="x AggregateFunction(quantilesTDigestWeighted(0.5),UInt64,UInt8)" --query "SELECT quantilesTDigestWeightedMerge(0.4,0.8)(x) FROM table"
[400000,800000]
+```
+
+**Example 3**: Quantile Functions with -Merge
+This example shows how the quantileState and quantileMerge functions work together to calculate a specific quantile.
+
+```sql
SELECT quantileMerge(0.9)(x)
FROM
(
@@ -34,6 +48,9 @@ FROM
└───────────────────────┘
```
+**Example 4**: sequenceMatch and sequenceCount Functions with -Merge
+Finally, we demonstrate the behavior of sequenceMatchState and sequenceMatchMerge, as well as sequenceCountState and sequenceCountMerge, in ClickHouse.
+
```sql
SELECT
sequenceMatchMerge('(?2)(?3)')(x) AS `2_3`,
@@ -48,6 +65,11 @@ FROM
┌─2_3─┬─1_4─┬─1_2_3─┐
│ 1 │ 1 │ 0 │
└─────┴─────┴───────┘
+```
+
+Similarly, sequenceCountState and sequenceCountMerge functions behave consistently when merging states:
+
+```sql
SELECT
sequenceCountMerge('(?1)(?2)')(x) AS `2_3`,
@@ -64,3 +86,4 @@ FROM
│ 3 │ 0 │ 2 │
└─────┴─────┴───────┘
```
+ClickHouse's -State combinator stores serialized data in a consistent manner, irrespective of conditions used with -If. The same applies to a wide range of functions, including quantile and sequence-based functions. This behavior ensures that functions like maxMerge, quantileMerge, sequenceMatchMerge, and sequenceCountMerge work seamlessly, even across varied inputs.
diff --git a/content/en/altinity-kb-queries-and-syntax/time-zones.md b/content/en/altinity-kb-queries-and-syntax/time-zones.md
index 479b94eb50..911347e7a0 100644
--- a/content/en/altinity-kb-queries-and-syntax/time-zones.md
+++ b/content/en/altinity-kb-queries-and-syntax/time-zones.md
@@ -6,11 +6,11 @@ description: >
---
Important things to know:
-1. DateTime inside clickhouse is actually UNIX timestamp always, i.e. number of seconds since 1970-01-01 00:00:00 GMT.
+1. DateTime inside ClickHouse® is actually UNIX timestamp always, i.e. number of seconds since 1970-01-01 00:00:00 GMT.
2. Conversion from that UNIX timestamp to a human-readable form and reverse can happen on the client (for native clients) and on the server (for HTTP clients, and for some type of queries, like `toString(ts)`)
3. Depending on the place where that conversion happened rules of different timezones may be applied.
4. You can check server timezone using `SELECT timezone()`
-5. clickhouse-client also by default tries to use server timezone (see also `--use_client_time_zone` flag)
+5. [clickhouse-client](https://docs.altinity.com/altinitycloud/altinity-cloud-connections/clickhouseclient/) also by default tries to use server timezone (see also `--use_client_time_zone` flag)
6. If you want you can store the timezone name inside the data type, in that case, timestamp <-> human-readable time rules of that timezone will be applied.
```sql
@@ -34,7 +34,7 @@ toUnixTimestamp(toDateTime(now())): 1626432628
toUnixTimestamp(toDateTime(now(), 'UTC')): 1626432628
```
-Since version 20.4 clickhouse uses embedded tzdata (see [https://github.com/ClickHouse/ClickHouse/pull/10425](https://github.com/ClickHouse/ClickHouse/pull/10425) )
+Since version 20.4 ClickHouse uses embedded tzdata (see [https://github.com/ClickHouse/ClickHouse/pull/10425](https://github.com/ClickHouse/ClickHouse/pull/10425) )
You get used tzdata version
@@ -79,6 +79,16 @@ Query id: 855453d7-eccd-44cb-9631-f63bb02a273c
```
+ClickHouse uses system timezone info from tzdata package if it exists, and uses own builtin tzdata if it is missing in the system.
+
+```
+cd /usr/share/zoneinfo/Canada
+ln -s ../America/Halifax A
+
+TZ=Canada/A clickhouse-local -q 'select timezone()'
+Canada/A
+```
+
### When the conversion using different rules happen
```sql
@@ -109,4 +119,4 @@ SELECT * FROM t_with_dt_utc
└─────────────────────────┘
```
-Best practice here: use UTC timezone everywhere, OR use the same default timezone for clickhouse server as used by your data
+Best practice here: use UTC timezone everywhere, OR use the same default timezone for ClickHouse server as used by your data
diff --git a/content/en/altinity-kb-queries-and-syntax/top-n-and-remain.md b/content/en/altinity-kb-queries-and-syntax/top-n-and-remain.md
index 923ebd3214..e2aac006dc 100644
--- a/content/en/altinity-kb-queries-and-syntax/top-n-and-remain.md
+++ b/content/en/altinity-kb-queries-and-syntax/top-n-and-remain.md
@@ -4,6 +4,12 @@ linkTitle: "Top N & Remain"
description: >
Top N & Remain
---
+
+When working with large datasets, you may often need to compute the sum of values for the top N groups and aggregate the remainder separately. This article demonstrates several methods to achieve that in ClickHouse.
+
+Dataset Setup
+We'll start by creating a table top_with_rest and inserting data for demonstration purposes:
+
```sql
CREATE TABLE top_with_rest
(
@@ -18,7 +24,10 @@ INSERT INTO top_with_rest SELECT
FROM numbers_mt(10000);
```
-## Using UNION ALL
+This creates a table with 10,000 numbers, grouped by dividing the numbers into tens.
+
+## Method 1: Using UNION ALL
+This approach retrieves the top 10 groups by sum and aggregates the remaining groups as a separate row.
```sql
SELECT *
@@ -63,7 +72,9 @@ ORDER BY res ASC
└──────┴──────────┘
```
-## Using arrays
+
+## Method 2: Using Arrays
+In this method, we push the top 10 groups into an array and add a special row for the remainder
```sql
WITH toUInt64(sumIf(sum, isNull(k)) - sumIf(sum, isNotNull(k))) AS total
@@ -98,7 +109,8 @@ ORDER BY res ASC
└──────┴──────────┘
```
-## Using window functions (starting from 21.1)
+## Method 3: Using Window Functions
+Window functions, available from ClickHouse version 21.1, provide an efficient way to calculate the sum for the top N rows and the remainder.
```sql
SET allow_experimental_window_functions = 1;
@@ -139,7 +151,10 @@ ORDER BY res ASC
│ null │ 49000050 │
└──────┴──────────┘
```
+Window functions allow efficient summation of the total and top groups in one query.
+## Method 4: Using Row Number and Grouping
+This approach calculates the row number (rn) for each group and replaces the remaining groups with NULL.
```sql
SELECT
k,
@@ -183,3 +198,39 @@ ORDER BY res
│ null │ 49000050 │
└──────┴──────────┘
```
+This method uses ROW_NUMBER() to segregate the top N from the rest.
+
+## Method 5: Using WITH TOTALS
+This method includes totals for all groups, and you calculate the remainder on the application side.
+
+```
+SELECT
+ k,
+ sum(number) AS res
+FROM top_with_rest
+GROUP BY k
+ WITH TOTALS
+ORDER BY res DESC
+LIMIT 10
+
+┌─k───┬───res─┐
+│ 999 │ 99945 │
+│ 998 │ 99845 │
+│ 997 │ 99745 │
+│ 996 │ 99645 │
+│ 995 │ 99545 │
+│ 994 │ 99445 │
+│ 993 │ 99345 │
+│ 992 │ 99245 │
+│ 991 │ 99145 │
+│ 990 │ 99045 │
+└─────┴───────┘
+
+Totals:
+┌─k─┬──────res─┐
+│ │ 49995000 │
+└───┴──────────┘
+```
+You would subtract the sum of the top rows from the totals in your application.
+
+These methods offer different approaches for handling the Top N rows and aggregating the remainder in ClickHouse. Depending on your requirements—whether you prefer using UNION ALL, arrays, window functions, or totals—each method provides flexibility for efficient querying.
diff --git a/content/en/altinity-kb-queries-and-syntax/trace_log.md b/content/en/altinity-kb-queries-and-syntax/trace_log.md
index 64f445e082..5ef49ddc7a 100644
--- a/content/en/altinity-kb-queries-and-syntax/trace_log.md
+++ b/content/en/altinity-kb-queries-and-syntax/trace_log.md
@@ -2,13 +2,9 @@
title: "Collecting query execution flamegraphs using system.trace_log"
linkTitle: "trace_log"
weight: 100
-description: >-
- Collecting query execution flamegraph using trace_log
---
-## Collecting query execution flamegraph using system.trace_log
-
-ClickHouse has embedded functionality to analyze the details of query performance.
+ClickHouse® has embedded functionality to analyze the details of query performance.
It's `system.trace_log` table.
@@ -17,7 +13,7 @@ By default it collects information only about queries when runs longer than 1 se
You can adjust that per query using settings `query_profiler_real_time_period_ns` & `query_profiler_cpu_time_period_ns`.
Both works very similar (with desired interval dump the stacktraces of all the threads which execute the query).
-real timer - allows to 'see' the situtions when cpu was not working much, but time was spend for example on IO.
+real timer - allows to 'see' the situations when cpu was not working much, but time was spend for example on IO.
cpu timer - allows to see the 'hot' points in calculations more accurately (skip the io time).
Trying to collect stacktraces with a frequency higher than few KHz is usually not possible.
diff --git a/content/en/altinity-kb-queries-and-syntax/troubleshooting.md b/content/en/altinity-kb-queries-and-syntax/troubleshooting.md
index d16e7cdd2b..4e7b9bacf3 100644
--- a/content/en/altinity-kb-queries-and-syntax/troubleshooting.md
+++ b/content/en/altinity-kb-queries-and-syntax/troubleshooting.md
@@ -2,16 +2,22 @@
title: "Troubleshooting"
linkTitle: "Troubleshooting"
description: >
- Troubleshooting
+ Tips for ClickHouse® troubleshooting
---
-## Log of query execution
-Controlled by session level setting `send_logs_level`
+### Query Execution Logging
+
+When troubleshooting query execution in ClickHouse®, one of the most useful tools is logging the query execution details. This can be controlled using the session-level setting `send_logs_level`. Here are the different log levels you can use:
Possible values: `'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'`
-Can be used with clickhouse-client in both interactive and non-interactive mode.
+
+This can be used with [clickhouse-client](https://docs.altinity.com/altinitycloud/altinity-cloud-connections/clickhouseclient/) in both interactive and non-interactive mode.
+
+The logs provide detailed information about query execution, making it easier to identify issues or bottlenecks. You can use the following command to run a query with logging enabled:
```bash
$ clickhouse-client -mn --send_logs_level='trace' --query "SELECT sum(number) FROM numbers(1000)"
+
+-- output --
[LAPTOP] 2021.04.29 00:05:31.425842 [ 25316 ] {14b0646d-8a6e-4b2f-9b13-52a218cf43ba} executeQuery: (from 127.0.0.1:42590, using production parser) SELECT sum(number) FROM numbers(1000)
[LAPTOP] 2021.04.29 00:05:31.426281 [ 25316 ] {14b0646d-8a6e-4b2f-9b13-52a218cf43ba} ContextAccess (default): Access granted: CREATE TEMPORARY TABLE ON *.*
[LAPTOP] 2021.04.29 00:05:31.426648 [ 25316 ] {14b0646d-8a6e-4b2f-9b13-52a218cf43ba} InterpreterSelectQuery: FetchColumns -> Complete
@@ -22,10 +28,18 @@ $ clickhouse-client -mn --send_logs_level='trace' --query "SELECT sum(number) FR
[LAPTOP] 2021.04.29 00:05:31.427875 [ 25316 ] {14b0646d-8a6e-4b2f-9b13-52a218cf43ba} executeQuery: Read 1000 rows, 7.81 KiB in 0.0019463 sec., 513795 rows/sec., 3.92 MiB/sec.
[LAPTOP] 2021.04.29 00:05:31.427898 [ 25316 ] {14b0646d-8a6e-4b2f-9b13-52a218cf43ba} MemoryTracker: Peak memory usage (for query): 0.00 B.
499500
+```
+You can also redirect the logs to a file for further analysis:
+```bash
$ clickhouse-client -mn --send_logs_level='trace' --query "SELECT sum(number) FROM numbers(1000)" 2> ./query.log
```
+### Analyzing Logs in System Tables
+If you need to analyze the logs after executing a query, you can query the system tables to retrieve the execution details.
+
+Query Log: You can fetch query logs from the `system.query_log` table:
+
```sql
LAPTOP.localdomain :) SET send_logs_level='trace';
@@ -60,9 +74,12 @@ Query id: d3db767b-34e9-4252-9f90-348cf958f822
1 rows in set. Elapsed: 0.007 sec. Processed 1.00 thousand rows, 8.00 KB (136.43 thousand rows/s., 1.09 MB/s.)
```
-## system tables
+## Analyzing Logs in System Tables
+
```sql
+# Query Log: You can fetch query logs from the system.query_log table:
+
SELECT sum(number)
FROM numbers(1000);
@@ -78,13 +95,15 @@ SELECT *
FROM system.query_log
WHERE (event_date = today()) AND (query_id = '34c61093-3303-47d0-860b-0d644fa7264b');
-If query_thread_log enabled (SET log_query_threads = 1)
+# Query Thread Log: If thread-level logging is enabled (log_query_threads = 1), retrieve logs using:
+# To capture detailed thread-level logs, enable log_query_threads: (SET log_query_threads = 1;)
SELECT *
FROM system.query_thread_log
WHERE (event_date = today()) AND (query_id = '34c61093-3303-47d0-860b-0d644fa7264b');
-If opentelemetry_span_log enabled (SET opentelemetry_start_trace_probability = 1, opentelemetry_trace_processors = 1)
+# OpenTelemetry Span Log: For detailed tracing with OpenTelemetry, if enabled (opentelemetry_start_trace_probability = 1), use:
+# To enable OpenTelemetry tracing for queries, set: (SET opentelemetry_start_trace_probability = 1, opentelemetry_trace_processors = 1)
SELECT *
FROM system.opentelemetry_span_log
@@ -97,10 +116,9 @@ WHERE (trace_id, finish_date) IN (
);
```
+### Visualizing Query Performance with Flamegraphs
-
-## Flamegraph
-
+ClickHouse supports exporting query performance data in a format compatible with speedscope.app. This can help you visualize performance bottlenecks within queries. Example query to generate a flamegraph:
[https://www.speedscope.app/](https://www.speedscope.app/)
```sql
@@ -113,7 +131,7 @@ WITH
SELECT
concat('clickhouse-server@', version()) AS exporter,
'https://www.speedscope.app/file-format-schema.json' AS `$schema`,
- concat('Clickhouse query id: ', query) AS name,
+ concat('ClickHouse query id: ', query) AS name,
CAST(samples, 'Array(Tuple(type String, name String, unit String, startValue UInt64, endValue UInt64, samples Array(Array(UInt32)), weights Array(UInt32)))') AS profiles,
CAST(tuple(arrayMap(x -> (demangle(addressToSymbol(x)), addressToLine(x)), uniq_frames)), 'Tuple(frames Array(Tuple(name String, line String)))') AS shared
FROM
@@ -142,5 +160,58 @@ FROM
)
SETTINGS allow_introspection_functions = 1, output_format_json_named_tuples_as_objects = 1
FORMAT JSONEachRow
-SETTINGS output_format_json_named_tuples_as_objects = 1
```
+
+And query to generate traces per thread
+
+```sql
+WITH
+ '8e7e0616-cfaf-43af-a139-d938ced7655a' AS query,
+ min(min) AS start_value,
+ max(max) AS end_value,
+ groupUniqArrayArrayArray(trace_arr) AS uniq_frames,
+ arrayMap((x, a, b, c, d) -> ('sampled', concat(b, ' - thread ', c.1, ' - traces ', c.2), 'nanoseconds', d.1 - start_value, d.2 - start_value, arrayMap(s -> reverse(arrayMap(y -> toUInt32(indexOf(uniq_frames, y) - 1), s)), x), a), groupArray(trace_arr), groupArray(weights), groupArray(trace_type), groupArray((thread_id, total)), groupArray((min, max))) AS samples
+SELECT
+ concat('clickhouse-server@', version()) AS exporter,
+ 'https://www.speedscope.app/file-format-schema.json' AS `$schema`,
+ concat('ClickHouse query id: ', query) AS name,
+ CAST(samples, 'Array(Tuple(type String, name String, unit String, startValue UInt64, endValue UInt64, samples Array(Array(UInt32)), weights Array(UInt32)))') AS profiles,
+ CAST(tuple(arrayMap(x -> (demangle(addressToSymbol(x)), addressToLine(x)), uniq_frames)), 'Tuple(frames Array(Tuple(name String, line String)))') AS shared
+FROM
+(
+ SELECT
+ min(min_ns) AS min,
+ trace_type,
+ thread_id,
+ max(max_ns) AS max,
+ groupArray(trace) AS trace_arr,
+ groupArray(cnt) AS weights,
+ sum(cnt) as total
+ FROM
+ (
+ SELECT
+ min(timestamp_ns) AS min_ns,
+ max(timestamp_ns) AS max_ns,
+ trace,
+ trace_type,
+ thread_id,
+ sum(if(trace_type IN ('Memory', 'MemoryPeak', 'MemorySample'), size, 1)) AS cnt
+ FROM system.trace_log
+ WHERE query_id = query
+ GROUP BY
+ trace_type,
+ trace,
+ thread_id
+ )
+ GROUP BY
+ trace_type,
+ thread_id
+ ORDER BY
+ trace_type ASC,
+ total DESC
+)
+SETTINGS allow_introspection_functions = 1, output_format_json_named_tuples_as_objects = 1, output_format_json_quote_64bit_integers=1
+FORMAT JSONEachRow
+```
+
+By enabling detailed logging and tracing, you can effectively diagnose issues and optimize query performance in ClickHouse.
diff --git a/content/en/altinity-kb-queries-and-syntax/ts-interpolation.md b/content/en/altinity-kb-queries-and-syntax/ts-interpolation.md
index e36d222bb0..eeea98c5dd 100644
--- a/content/en/altinity-kb-queries-and-syntax/ts-interpolation.md
+++ b/content/en/altinity-kb-queries-and-syntax/ts-interpolation.md
@@ -5,36 +5,54 @@ description: >
Time-series alignment with interpolation
---
+This article demonstrates how to perform time-series data alignment with interpolation using window functions in ClickHouse. The goal is to align two different time-series (A and B) on the same timestamp axis and fill the missing values using linear interpolation.
+
+Step-by-Step Implementation
+We begin by creating a table with test data that simulates two time-series (A and B) with randomly distributed timestamps and values. Then, we apply interpolation to fill missing values for each time-series based on the surrounding data points.
+
+#### 1. Drop Existing Table (if it exists)
```sql
DROP TABLE test_ts_interpolation;
+```
+This ensures that any previous versions of the table are removed.
---- generate test data
+#### 2. Generate Test Data
+In this step, we generate random time-series data with timestamps and values for series A and B. The values are calculated differently for each series:
+```sql
CREATE TABLE test_ts_interpolation
ENGINE = Log AS
SELECT
- ((number * 100) + 50) - (rand() % 100) AS timestamp,
- transform(rand() % 2, [0, 1], ['A', 'B'], '') AS ts,
- if(ts = 'A', timestamp * 10, timestamp * 100) AS value
+ ((number * 100) + 50) - (rand() % 100) AS timestamp, -- random timestamp generation
+ transform(rand() % 2, [0, 1], ['A', 'B'], '') AS ts, -- randomly assign series 'A' or 'B'
+ if(ts = 'A', timestamp * 10, timestamp * 100) AS value -- different value generation for each series
FROM numbers(1000000);
+```
+Here, the timestamp is generated randomly and assigned to either series A or B using the transform() function. The value is calculated based on the series type (A or B), with different multipliers for each.
-
+#### 3. Preview the Generated Data
+After generating the data, you can inspect it by running a simple SELECT query:
+```sql
SELECT * FROM test_ts_interpolation;
+```
+This will show the randomly generated timestamps, series (A or B), and their corresponding values.
--- interpolation select with window functions
+#### 4. Perform Interpolation with Window Functions
+To align the time-series and interpolate missing values, we use window functions in the following query:
+```sql
SELECT
timestamp,
if(
ts = 'A',
- toFloat64(value),
- prev_a.2 + (timestamp - prev_a.1 ) * (next_a.2 - prev_a.2) / ( next_a.1 - prev_a.1)
+ toFloat64(value), -- If the current series is 'A', keep the original value
+ prev_a.2 + (timestamp - prev_a.1 ) * (next_a.2 - prev_a.2) / ( next_a.1 - prev_a.1) -- Interpolate for 'A'
) as a_value,
if(
ts = 'B',
- toFloat64(value),
- prev_b.2 + (timestamp - prev_b.1 ) * (next_b.2 - prev_b.2) / ( next_b.1 - prev_b.1)
+ toFloat64(value), -- If the current series is 'B', keep the original value
+ prev_b.2 + (timestamp - prev_b.1 ) * (next_b.2 - prev_b.2) / ( next_b.1 - prev_b.1) -- Interpolate for 'B'
) as b_value
FROM
(
@@ -42,11 +60,33 @@ FROM
timestamp,
ts,
value,
+ -- Find the previous and next values for series 'A'
anyLastIf((timestamp,value), ts='A') OVER (ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS prev_a,
anyLastIf((timestamp,value), ts='A') OVER (ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS next_a,
+ -- Find the previous and next values for series 'B'
anyLastIf((timestamp,value), ts='B') OVER (ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS prev_b,
anyLastIf((timestamp,value), ts='B') OVER (ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS next_b
FROM
test_ts_interpolation
)
+
+```
+#### Explanation:
+**Timestamp Alignment:**
+We align the timestamps of both series (A and B) and handle missing data points.
+
+**Interpolation Logic:**
+For each A-series timestamp, if the current series is not A, we calculate the interpolated value using the linear interpolation formula:
+
+```bash
+interpolated_value = prev_a.2 + ((timestamp - prev_a.1) / (next_a.1 - prev_a.1)) * (next_a.2 - prev_a.2)
```
+Similarly, for the B series, interpolation is calculated between the previous (prev_b) and next (next_b) known values.
+
+**Window Functions:**
+anyLastIf() is used to fetch the previous or next values for series A and B based on the timestamps.
+We use window functions to efficiently calculate these values over the ordered sequence of timestamps.
+
+
+By using window functions and interpolation, we can align time-series data with irregular timestamps and fill in missing values based on nearby data points. This technique is useful in scenarios where data is recorded at different times or irregular intervals across multiple series.
+
diff --git a/content/en/altinity-kb-queries-and-syntax/ttl/modify-ttl.md b/content/en/altinity-kb-queries-and-syntax/ttl/modify-ttl.md
new file mode 100644
index 0000000000..49015d8989
--- /dev/null
+++ b/content/en/altinity-kb-queries-and-syntax/ttl/modify-ttl.md
@@ -0,0 +1,179 @@
+---
+title: "MODIFY (ADD) TTL in ClickHouse®"
+linkTitle: "MODIFY (ADD) TTL"
+weight: 100
+description: >-
+ What happens during a MODIFY or ADD TTL query
+keywords:
+ - clickhouse modify ttl
+ - clickhouse alter table ttl
+---
+
+*For a general overview of TTL, see the article [Putting Things Where They Belong Using New TTL Moves](https://altinity.com/blog/2020-3-23-putting-things-where-they-belong-using-new-ttl-moves).*
+
+## ALTER TABLE tbl MODIFY (ADD) TTL:
+
+It's 2 step process:
+
+1. `ALTER TABLE tbl MODIFY (ADD) TTL ...`
+
+Update table metadata: schema .sql & metadata in ZK.
+It's usually cheap and fast command. And any new INSERT after schema change will calculate TTL according to new rule.
+
+
+2. `ALTER TABLE tbl MATERIALIZE TTL`
+
+Recalculate TTL for already exist parts.
+It can be heavy operation, because ClickHouse® will read column data & recalculate TTL & apply TTL expression.
+You can disable this step completely by using `materialize_ttl_after_modify` user session setting (by default it's 1, so materialization is enabled).
+
+
+```sql
+SET materialize_ttl_after_modify=0;
+ALTER TABLE tbl MODIFY TTL
+```
+
+If you will disable materialization of TTL, it does mean that all old parts will be transformed according OLD TTL rules.
+MATERIALIZE TTL:
+
+1. Recalculate TTL (Kinda cheap, it read only column participate in TTL)
+2. Apply TTL (Rewrite of table data for all columns)
+
+You also can disable apply TTL substep via `materialize_ttl_recalculate_only` merge_tree setting (by default it's 0, so clickhouse will apply TTL expression)
+
+```sql
+ALTER TABLE tbl MODIFY SETTING materialize_ttl_recalculate_only=1;
+```
+
+It does mean, that TTL rule will not be applied during `ALTER TABLE tbl MODIFY (ADD) TTL ...` query.
+
+After this you can apply TTL (MATERIALIZE) per partition manually:
+
+```sql
+ALTER TABLE tbl MATERIALIZE TTL [IN PARTITION partition | IN PARTITION ID 'partition_id'];
+```
+
+The idea of `materialize_ttl_after_modify = 1` is to use `ALTER TABLE tbl MATERIALIZE TTL IN PARTITION xxx; ALTER TABLE tbl MATERIALIZE TTL IN PARTITION yyy;` and materialize TTL gently or drop/move partitions manually until the old data without/old TTL is processed.
+
+MATERIALIZE TTL done via Mutation:
+1. ClickHouse create new parts via hardlinks and write new ttl.txt file
+2. ClickHouse remove old(inactive) parts after remove time (default is 8 minutes)
+
+To stop materialization of TTL:
+
+```sql
+SELECT * FROM system.mutations WHERE is_done=0 AND table = 'tbl';
+KILL MUTATION WHERE command LIKE '%MATERIALIZE TTL%' AND table = 'tbl'
+```
+
+### MODIFY TTL MOVE
+
+today: 2022-06-02
+
+Table tbl
+
+Daily partitioning by toYYYYMMDD(timestamp) -> 20220602
+
+#### Increase of TTL
+
+TTL timestamp + INTERVAL 30 DAY MOVE TO DISK s3 -> TTL timestamp + INTERVAL 60 DAY MOVE TO DISK s3
+
+* Idea: ClickHouse need to move data from s3 to local disk BACK
+* Actual: There is no rule that data earlier than 60 DAY **should be** on local disk
+
+Table parts:
+
+```
+20220401 ttl: 20220501 disk: s3
+20220416 ttl: 20220516 disk: s3
+20220501 ttl: 20220531 disk: s3
+20220502 ttl: 20220601 disk: local
+20220516 ttl: 20220616 disk: local
+20220601 ttl: 20220631 disk: local
+```
+
+```sql
+ALTER TABLE tbl MODIFY TTL timestamp + INTERVAL 60 DAY MOVE TO DISK s3;
+```
+
+Table parts:
+
+```
+20220401 ttl: 20220601 disk: s3
+20220416 ttl: 20220616 disk: s3
+20220501 ttl: 20220631 disk: s3 (ClickHouse will not move this part to local disk, because there is no TTL rule for that)
+20220502 ttl: 20220701 disk: local
+20220516 ttl: 20220716 disk: local
+20220601 ttl: 20220731 disk: local
+```
+
+#### Decrease of TTL
+
+TTL timestamp + INTERVAL 30 DAY MOVE TO DISK s3 -> TTL timestamp + INTERVAL 14 DAY MOVE TO DISK s3
+
+Table parts:
+
+```
+20220401 ttl: 20220401 disk: s3
+20220416 ttl: 20220516 disk: s3
+20220501 ttl: 20220531 disk: s3
+20220502 ttl: 20220601 disk: local
+20220516 ttl: 20220616 disk: local
+20220601 ttl: 20220631 disk: local
+```
+
+```sql
+ALTER TABLE tbl MODIFY TTL timestamp + INTERVAL 14 DAY MOVE TO DISK s3;
+```
+
+Table parts:
+
+```
+20220401 ttl: 20220415 disk: s3
+20220416 ttl: 20220501 disk: s3
+20220501 ttl: 20220515 disk: s3
+20220502 ttl: 20220517 disk: local (ClickHouse will move this part to disk s3 in background according to TTL rule)
+20220516 ttl: 20220601 disk: local (ClickHouse will move this part to disk s3 in background according to TTL rule)
+20220601 ttl: 20220616 disk: local
+```
+
+### Possible TTL Rules
+
+TTL:
+```
+DELETE (With enabled `ttl_only_drop_parts`, it's cheap operation, ClickHouse will drop the whole part)
+MOVE
+GROUP BY
+WHERE
+RECOMPRESS
+```
+
+Related settings:
+
+Server settings:
+
+```
+background_move_processing_pool_thread_sleep_seconds | 10 |
+background_move_processing_pool_thread_sleep_seconds_random_part | 1.0 |
+background_move_processing_pool_thread_sleep_seconds_if_nothing_to_do | 0.1 |
+background_move_processing_pool_task_sleep_seconds_when_no_work_min | 10 |
+background_move_processing_pool_task_sleep_seconds_when_no_work_max | 600 |
+background_move_processing_pool_task_sleep_seconds_when_no_work_multiplier | 1.1 |
+background_move_processing_pool_task_sleep_seconds_when_no_work_random_part | 1.0 |
+```
+
+MergeTree settings:
+
+```
+merge_with_ttl_timeout │ 14400 │ 0 │ Minimal time in seconds, when merge with delete TTL can be repeated.
+merge_with_recompression_ttl_timeout │ 14400 │ 0 │ Minimal time in seconds, when merge with recompression TTL can be repeated.
+max_replicated_merges_with_ttl_in_queue │ 1 │ 0 │ How many tasks of merging parts with TTL are allowed simultaneously in ReplicatedMergeTree queue.
+max_number_of_merges_with_ttl_in_pool │ 2 │ 0 │ When there is more than specified number of merges with TTL entries in pool, do not assign new merge with TTL. This is to leave free threads for regular merges and avoid "Too many parts"
+ttl_only_drop_parts │ 0 │ 0 │ Only drop altogether the expired parts and not partially prune them.
+```
+
+Session settings:
+
+```
+materialize_ttl_after_modify │ 1 │ 0 │ Apply TTL for old data, after ALTER MODIFY TTL query
+```
diff --git a/content/en/altinity-kb-queries-and-syntax/ttl/ttl-group-by-examples.md b/content/en/altinity-kb-queries-and-syntax/ttl/ttl-group-by-examples.md
index dd50b71f64..790c5d72a3 100644
--- a/content/en/altinity-kb-queries-and-syntax/ttl/ttl-group-by-examples.md
+++ b/content/en/altinity-kb-queries-and-syntax/ttl/ttl-group-by-examples.md
@@ -3,6 +3,8 @@ title: "TTL GROUP BY Examples"
linkTitle: "TTL GROUP BY Examples"
description: >
TTL GROUP BY Examples
+aliases:
+ /altinity-kb-queries-and-syntax/ttl-group-by-examples
---
### Example with MergeTree table
@@ -26,9 +28,9 @@ TTL ts + interval 30 day
ts = min(toStartOfDay(ts));
```
-During TTL merges Clickhouse re-calculates values of columns in the SET section.
+During TTL merges ClickHouse® re-calculates values of columns in the SET section.
-GROUP BY section should be a prefix of a table's ORDER BY.
+GROUP BY section should be a prefix of a table's PRIMARY KEY (the same as ORDER BY, if no separate PRIMARY KEY defined).
```sql
-- stop merges to demonstrate data before / after
@@ -178,7 +180,106 @@ GROUP BY m;
└────────┴─────────┴────────────┴────────────────┴────────────────┘
```
-During merges Clickhouse re-calculates **ts** columns as **min(toStartOfDay(ts))**. It's possible **only for the last column** of `SummingMergeTree` `ORDER BY` section `ORDER BY (key1, key2, toStartOfDay(ts), ts)` otherwise it will **break** the order of rows in the table.
+During merges ClickHouse re-calculates **ts** columns as **min(toStartOfDay(ts))**. It's possible **only for the last column** of `SummingMergeTree` `ORDER BY` section `ORDER BY (key1, key2, toStartOfDay(ts), ts)` otherwise it will **break** the order of rows in the table.
+
+### Example with AggregatingMergeTree table
+
+```sql
+CREATE TABLE test_ttl_group_by_agg
+(
+ `key1` UInt32,
+ `key2` UInt32,
+ `ts` DateTime,
+ `counter` AggregateFunction(count, UInt32)
+)
+ENGINE = AggregatingMergeTree
+PARTITION BY toYYYYMM(ts)
+PRIMARY KEY (key1, key2, toStartOfDay(ts))
+ORDER BY (key1, key2, toStartOfDay(ts), ts)
+TTL ts + interval 30 day
+ GROUP BY key1, key2, toStartOfDay(ts)
+ SET counter = countMergeState(counter),
+ ts = min(toStartOfDay(ts));
+
+CREATE TABLE test_ttl_group_by_raw
+(
+ `key1` UInt32,
+ `key2` UInt32,
+ `ts` DateTime
+) ENGINE = Null;
+
+CREATE MATERIALIZED VIEW test_ttl_group_by_mv
+ TO test_ttl_group_by_agg
+AS
+SELECT
+ `key1`,
+ `key2`,
+ `ts`,
+ countState() as counter
+FROM test_ttl_group_by_raw
+GROUP BY key1, key2, ts;
+
+-- stop merges to demonstrate data before / after
+-- a rolling up
+SYSTEM STOP TTL MERGES test_ttl_group_by_agg;
+SYSTEM STOP MERGES test_ttl_group_by_agg;
+
+INSERT INTO test_ttl_group_by_raw (key1, key2, ts)
+SELECT
+ 1,
+ 1,
+ toStartOfMinute(now() + number*60)
+FROM numbers(100);
+
+INSERT INTO test_ttl_group_by_raw (key1, key2, ts)
+SELECT
+ 1,
+ 1,
+ toStartOfMinute(now() + number*60)
+FROM numbers(100);
+
+INSERT INTO test_ttl_group_by_raw (key1, key2, ts)
+SELECT
+ 1,
+ 1,
+ toStartOfMinute(now() + number*60 - toIntervalDay(60))
+FROM numbers(100);
+
+INSERT INTO test_ttl_group_by_raw (key1, key2, ts)
+SELECT
+ 1,
+ 1,
+ toStartOfMinute(now() + number*60 - toIntervalDay(60))
+FROM numbers(100);
+
+SELECT
+ toYYYYMM(ts) AS m,
+ count(),
+ countMerge(counter)
+FROM test_ttl_group_by_agg
+GROUP BY m;
+
+┌──────m─┬─count()─┬─countMerge(counter)─┐
+│ 202307 │ 200 │ 200 │
+│ 202309 │ 200 │ 200 │
+└────────┴─────────┴─────────────────────┘
+
+SYSTEM START TTL MERGES test_ttl_group_by_agg;
+SYSTEM START MERGES test_ttl_group_by_agg;
+OPTIMIZE TABLE test_ttl_group_by_agg FINAL;
+
+SELECT
+ toYYYYMM(ts) AS m,
+ count(),
+ countMerge(counter)
+FROM test_ttl_group_by_agg
+GROUP BY m;
+
+┌──────m─┬─count()─┬─countMerge(counter)─┐
+│ 202307 │ 1 │ 200 │
+│ 202309 │ 100 │ 200 │
+└────────┴─────────┴─────────────────────┘
+```
### Multilevel TTL Group by
@@ -356,3 +457,5 @@ OPTIMIZE TABLE test_ttl_group_by FINAL;
└────────┴─────────┴────────────┴────────────────┴────────────────┘
```
+
+Also see the [Altinity Knowledge Base pages on the MergeTree table engine family](../../../engines/mergetree-table-engine-family).
\ No newline at end of file
diff --git a/content/en/altinity-kb-queries-and-syntax/ttl/ttl-recompress-example.md b/content/en/altinity-kb-queries-and-syntax/ttl/ttl-recompress-example.md
index 64539b8fcd..42ed0d8ea4 100644
--- a/content/en/altinity-kb-queries-and-syntax/ttl/ttl-recompress-example.md
+++ b/content/en/altinity-kb-queries-and-syntax/ttl/ttl-recompress-example.md
@@ -5,6 +5,7 @@ description: >
TTL Recompress example
---
+*See also [the Altinity Knowledge Base article on testing different compression codecs](../../../altinity-kb-schema-design/codecs/altinity-kb-how-to-test-different-compression-codecs).*
## Example how to create a table and define recompression rules
@@ -23,7 +24,7 @@ TTL event_time + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(1)),
event_time + toIntervalMonth(6) RECOMPRESS CODEC(ZSTD(6);
```
-Default comression is LZ4 [https://clickhouse.tech/docs/en/operations/server-configuration-parameters/settings/\#server-settings-compression](https://clickhouse.tech/docs/en/operations/server-configuration-parameters/settings/#server-settings-compression)
+Default compression is LZ4. See [the ClickHouse® documentation](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#server-settings-compression) for more information.
These TTL rules recompress data after 1 and 6 months.
@@ -49,4 +50,4 @@ ALTER TABLE hits
event_time + toIntervalMonth(6) RECOMPRESS CODEC(ZSTD(6));
```
-All columns have implicite default compression from server config, except `event_time`, that's why need to change to compression to `Default` for this column otherwise it won't be recompressed.
+All columns have implicit default compression from server config, except `event_time`, that's why need to change to compression to `Default` for this column otherwise it won't be recompressed.
diff --git a/content/en/altinity-kb-queries-and-syntax/update-via-dictionary.md b/content/en/altinity-kb-queries-and-syntax/update-via-dictionary.md
index 2d81cb6847..6b4f93f79b 100644
--- a/content/en/altinity-kb-queries-and-syntax/update-via-dictionary.md
+++ b/content/en/altinity-kb-queries-and-syntax/update-via-dictionary.md
@@ -101,11 +101,11 @@ FROM test_update
```
{{% alert title="Info" color="info" %}}
-In case of Replicated installation, Dictionary should be created on all nodes and source tables should have ReplicatedMergeTree engine and be replicated across all nodes.
+In case of Replicated installation, Dictionary should be created on all nodes and source tables should use the [ReplicatedMergeTree](../../altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated/) engine and be replicated across all nodes.
{{% /alert %}}
{{% alert title="Info" color="info" %}}
-[Starting](https://github.com/ClickHouse/ClickHouse/pull/10186) from 20.4, ClickHouse forbid by default any potential non-deterministic mutations.
+[Starting](https://github.com/ClickHouse/ClickHouse/pull/10186) from 20.4, ClickHouse® forbid by default any potential non-deterministic mutations.
This behavior controlled by setting `allow_nondeterministic_mutations`. You can append it to query like this `ALTER TABLE xxx UPDATE ... WHERE ... SETTINGS allow_nondeterministic_mutations = 1;`
For `ON CLUSTER` queries, you would need to put this setting in default profile and restart ClickHouse servers.
{{% /alert %}}
diff --git a/content/en/altinity-kb-queries-and-syntax/variable-partitioning.md b/content/en/altinity-kb-queries-and-syntax/variable-partitioning.md
new file mode 100644
index 0000000000..8fbd8c5c3b
--- /dev/null
+++ b/content/en/altinity-kb-queries-and-syntax/variable-partitioning.md
@@ -0,0 +1,91 @@
+---
+title: "Adjustable table partitioning"
+linkTitle: "Adjustable table partitioning"
+weight: 100
+description: >-
+ An approach that allows you to redefine partitioning without table creation
+---
+
+In that example, partitioning is being calculated via `MATERIALIZED` column expression `toDate(toStartOfInterval(ts, toIntervalT(...)))`, but partition id also can be generated on application side and inserted to ClickHouse® as is.
+
+```sql
+CREATE TABLE tbl
+(
+ `ts` DateTime,
+ `key` UInt32,
+ `partition_key` Date MATERIALIZED toDate(toStartOfInterval(ts, toIntervalYear(1)))
+)
+ENGINE = MergeTree
+PARTITION BY (partition_key, ignore(ts))
+ORDER BY key;
+
+SET send_logs_level = 'trace';
+
+INSERT INTO tbl SELECT toDateTime(toDate('2020-01-01') + number) as ts, number as key FROM numbers(300);
+
+Renaming temporary part tmp_insert_20200101-0_1_1_0 to 20200101-0_1_1_0
+
+INSERT INTO tbl SELECT toDateTime(toDate('2021-01-01') + number) as ts, number as key FROM numbers(300);
+
+Renaming temporary part tmp_insert_20210101-0_2_2_0 to 20210101-0_2_2_0
+
+ALTER TABLE tbl
+ MODIFY COLUMN `partition_key` Date MATERIALIZED toDate(toStartOfInterval(ts, toIntervalMonth(1)));
+
+INSERT INTO tbl SELECT toDateTime(toDate('2022-01-01') + number) as ts, number as key FROM numbers(300);
+
+Renaming temporary part tmp_insert_20220101-0_3_3_0 to 20220101-0_3_3_0
+Renaming temporary part tmp_insert_20220201-0_4_4_0 to 20220201-0_4_4_0
+Renaming temporary part tmp_insert_20220301-0_5_5_0 to 20220301-0_5_5_0
+Renaming temporary part tmp_insert_20220401-0_6_6_0 to 20220401-0_6_6_0
+Renaming temporary part tmp_insert_20220501-0_7_7_0 to 20220501-0_7_7_0
+Renaming temporary part tmp_insert_20220601-0_8_8_0 to 20220601-0_8_8_0
+Renaming temporary part tmp_insert_20220701-0_9_9_0 to 20220701-0_9_9_0
+Renaming temporary part tmp_insert_20220801-0_10_10_0 to 20220801-0_10_10_0
+Renaming temporary part tmp_insert_20220901-0_11_11_0 to 20220901-0_11_11_0
+Renaming temporary part tmp_insert_20221001-0_12_12_0 to 20221001-0_12_12_0
+
+
+ALTER TABLE tbl
+ MODIFY COLUMN `partition_key` Date MATERIALIZED toDate(toStartOfInterval(ts, toIntervalDay(1)));
+
+INSERT INTO tbl SELECT toDateTime(toDate('2023-01-01') + number) as ts, number as key FROM numbers(5);
+
+Renaming temporary part tmp_insert_20230101-0_13_13_0 to 20230101-0_13_13_0
+Renaming temporary part tmp_insert_20230102-0_14_14_0 to 20230102-0_14_14_0
+Renaming temporary part tmp_insert_20230103-0_15_15_0 to 20230103-0_15_15_0
+Renaming temporary part tmp_insert_20230104-0_16_16_0 to 20230104-0_16_16_0
+Renaming temporary part tmp_insert_20230105-0_17_17_0 to 20230105-0_17_17_0
+
+
+SELECT _partition_id, min(ts), max(ts), count() FROM tbl GROUP BY _partition_id ORDER BY _partition_id;
+
+┌─_partition_id─┬─────────────min(ts)─┬─────────────max(ts)─┬─count()─┐
+│ 20200101-0 │ 2020-01-01 00:00:00 │ 2020-10-26 00:00:00 │ 300 │
+│ 20210101-0 │ 2021-01-01 00:00:00 │ 2021-10-27 00:00:00 │ 300 │
+│ 20220101-0 │ 2022-01-01 00:00:00 │ 2022-01-31 00:00:00 │ 31 │
+│ 20220201-0 │ 2022-02-01 00:00:00 │ 2022-02-28 00:00:00 │ 28 │
+│ 20220301-0 │ 2022-03-01 00:00:00 │ 2022-03-31 00:00:00 │ 31 │
+│ 20220401-0 │ 2022-04-01 00:00:00 │ 2022-04-30 00:00:00 │ 30 │
+│ 20220501-0 │ 2022-05-01 00:00:00 │ 2022-05-31 00:00:00 │ 31 │
+│ 20220601-0 │ 2022-06-01 00:00:00 │ 2022-06-30 00:00:00 │ 30 │
+│ 20220701-0 │ 2022-07-01 00:00:00 │ 2022-07-31 00:00:00 │ 31 │
+│ 20220801-0 │ 2022-08-01 00:00:00 │ 2022-08-31 00:00:00 │ 31 │
+│ 20220901-0 │ 2022-09-01 00:00:00 │ 2022-09-30 00:00:00 │ 30 │
+│ 20221001-0 │ 2022-10-01 00:00:00 │ 2022-10-27 00:00:00 │ 27 │
+│ 20230101-0 │ 2023-01-01 00:00:00 │ 2023-01-01 00:00:00 │ 1 │
+│ 20230102-0 │ 2023-01-02 00:00:00 │ 2023-01-02 00:00:00 │ 1 │
+│ 20230103-0 │ 2023-01-03 00:00:00 │ 2023-01-03 00:00:00 │ 1 │
+│ 20230104-0 │ 2023-01-04 00:00:00 │ 2023-01-04 00:00:00 │ 1 │
+│ 20230105-0 │ 2023-01-05 00:00:00 │ 2023-01-05 00:00:00 │ 1 │
+└───────────────┴─────────────────────┴─────────────────────┴─────────┘
+
+
+SELECT count() FROM tbl WHERE ts > '2023-01-04';
+
+Key condition: unknown
+MinMax index condition: (column 0 in [1672758001, +Inf))
+Selected 1/17 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges
+Spreading mark ranges among streams (default reading)
+Reading 1 ranges in order from part 20230105-0_17_17_0, approx. 1 rows starting from 0
+```
diff --git a/content/en/altinity-kb-queries-and-syntax/window-functions.md b/content/en/altinity-kb-queries-and-syntax/window-functions.md
index 6e55afca69..c4e26b62a1 100644
--- a/content/en/altinity-kb-queries-and-syntax/window-functions.md
+++ b/content/en/altinity-kb-queries-and-syntax/window-functions.md
@@ -4,19 +4,14 @@ linkTitle: "Window functions"
description: >
Window functions
---
-| Link | [blog.tinybird.co/2021/03/16/c…](https://blog.tinybird.co/2021/03/16/coming-soon-on-clickhouse-window-functions/) |
-| :--- | :--- |
-| Date | Mar 26, 2021 |
-
+#### Resources:
-[blog.tinybird.co/2021/03/16/c…](https://blog.tinybird.co/2021/03/16/coming-soon-on-clickhouse-window-functions/)
+* [Tutorial: ClickHouse® Window Functions](https://altinity.com/blog/clickhouse-window-functions-current-state-of-the-art)
+* [Video: Fun with ClickHouse Window Functions](https://www.youtube.com/watch?v=sm_vUdMQz4s)
+* [Blog: Battle of the Views: ClickHouse Window View vs. Live View](https://altinity.com/blog/battle-of-the-views-clickhouse-window-view-vs-live-view)
-> An exploration on what's possible to do with the most recent experimental feature on ClickHouse - window functions, and an overview of other interesting feat...
-
-[Windows Functions Blog Link](https://blog.tinybird.co/2021/03/16/coming-soon-on-clickhouse-window-functions/)
-
-#### How Do I Simulate Window Functions Using Arrays on older versions of clickhouse?
+#### How Do I Simulate Window Functions Using Arrays on older versions of ClickHouse?
1. Group with groupArray.
2. Calculate the needed metrics.
diff --git a/content/en/altinity-kb-schema-design/_index.md b/content/en/altinity-kb-schema-design/_index.md
index 7ffb0165c0..d59eb63d6e 100644
--- a/content/en/altinity-kb-schema-design/_index.md
+++ b/content/en/altinity-kb-schema-design/_index.md
@@ -6,6 +6,6 @@ keywords:
- clickhouse lowcardinality
- clickhouse materialized view
description: >
- All you need to know about ClickHouse schema design, including materialized view, limitations, lowcardinality, codecs.
+ All you need to know about ClickHouse® schema design, including materialized view, limitations, lowcardinality, codecs.
weight: 7
---
diff --git a/content/en/altinity-kb-schema-design/altinity-kb-dictionaries-vs-lowcardinality.md b/content/en/altinity-kb-schema-design/altinity-kb-dictionaries-vs-lowcardinality.md
index 5593b87658..a3b9e7ef26 100644
--- a/content/en/altinity-kb-schema-design/altinity-kb-dictionaries-vs-lowcardinality.md
+++ b/content/en/altinity-kb-schema-design/altinity-kb-dictionaries-vs-lowcardinality.md
@@ -16,4 +16,4 @@ From the other hand: if data can be changed in future, and that change should im
For example if you need to change the used currency rare every day- it would be quite stupid to update all historical records to apply the newest exchange rate. And putting it to dict will allow to do calculations with latest exchange rate at select time.
-For dictionary it's possible to mark some of the attributes as injective. An attribute is called injective if different attribute values correspond to different keys. It would allow ClickHouse to replace dictGet call in GROUP BY with cheap dict key.
+For dictionary it's possible to mark some of the attributes as injective. An attribute is called injective if different attribute values correspond to different keys. It would allow ClickHouse® to replace dictGet call in GROUP BY with cheap dict key.
diff --git a/content/en/altinity-kb-schema-design/altinity-kb-jsonasstring-and-mat.-view-as-json-parser.md b/content/en/altinity-kb-schema-design/altinity-kb-jsonasstring-and-mat.-view-as-json-parser.md
index b10df86000..feca7d9434 100644
--- a/content/en/altinity-kb-schema-design/altinity-kb-jsonasstring-and-mat.-view-as-json-parser.md
+++ b/content/en/altinity-kb-schema-design/altinity-kb-jsonasstring-and-mat.-view-as-json-parser.md
@@ -35,4 +35,4 @@ SELECT * FROM datastore;
└──────┴────┴─────┘
```
-See also: [JSONExtract to parse many attributes at a time](../altinity-kb-queries-and-syntax/jsonextract-to-parse-many-attributes-at-a-time/)
+See also: [JSONExtract to parse many attributes at a time](/altinity-kb-queries-and-syntax/jsonextract-to-parse-many-attributes-at-a-time/)
diff --git a/content/en/altinity-kb-schema-design/altinity-kb-jsoneachrow-tuples-and-mvs.md b/content/en/altinity-kb-schema-design/altinity-kb-jsoneachrow-tuples-and-mvs.md
new file mode 100644
index 0000000000..b17fa42788
--- /dev/null
+++ b/content/en/altinity-kb-schema-design/altinity-kb-jsoneachrow-tuples-and-mvs.md
@@ -0,0 +1,151 @@
+---
+title: "JSONEachRow, Tuples, Maps and Materialized Views"
+linkTitle: "JSONEachRow, tuple, map and MVs"
+weight: 100
+description: >-
+ How to use Tuple() and Map() with nested JSON messages in MVs
+---
+
+## Using JSONEachRow with Tuple() in Materialized views
+
+Sometimes we can have a nested json message with a fixed size structure like this:
+
+```json
+{"s": "val1", "t": {"i": 42, "d": "2023-09-01 12:23:34.231"}}
+```
+
+Values can be NULL but the structure should be fixed. In this case we can use `Tuple()` to parse the JSON message:
+
+```sql
+CREATE TABLE tests.nest_tuple_source
+(
+ `s` String,
+ `t` Tuple(`i` UInt8, `d` DateTime64(3))
+)
+ENGINE = Null
+```
+
+We can use the above table as a source for a materialized view, like it was a Kafka table and in case our message has unexpected keys we make the Kafka table ignore them with the setting (23.3+):
+
+`input_format_json_ignore_unknown_keys_in_named_tuple = 1`
+
+```sql
+CREATE MATERIALIZED VIEW tests.mv_nest_tuple TO tests.nest_tuple_destination
+AS
+SELECT
+ s AS s,
+ t.1 AS i,
+ t.2 AS d
+FROM tests.nest_tuple_source
+```
+
+Also, we need a destination table with an adapted structure as the source table:
+
+```sql
+CREATE TABLE tests.nest_tuple_destination
+(
+ `s` String,
+ `i` UInt8,
+ `d` DateTime64(3)
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+
+INSERT INTO tests.nest_tuple_source FORMAT JSONEachRow {"s": "val1", "t": {"i": 42, "d": "2023-09-01 12:23:34.231"}}
+
+
+SELECT *
+FROM nest_tuple_destination
+
+┌─s────┬──i─┬───────────────────────d─┐
+│ val1 │ 42 │ 2023-09-01 12:23:34.231 │
+└──────┴────┴─────────────────────────┘
+```
+
+Some hints:
+
+- 💡 Beware of column names in ClickHouse® they are Case sensitive. If a JSON message has the key names in Capitals, the Kafka/Source table should have the same column names in Capitals.
+
+- 💡 Also this `Tuple()` approach is not for Dynamic json schemas as explained above. In the case of having a dynamic schema, use the classic approach using `JSONExtract` set of functions. If the schema is fixed, you can use `Tuple()` for `JSONEachRow` format but you need to use classic tuple notation (using index reference) inside the MV, because using named tuples inside the MV won't work:
+
+- 💡 `tuple.1 AS column1, tuple.2 AS column2` **CORRECT!**
+- 💡 `tuple.column1 AS column1, tuple.column2 AS column2` **WRONG!**
+- 💡 use `AS` (alias) for aggregated columns or columns affected by functions because MV do not work by positional arguments like SELECTs,they work by names**
+
+Example:
+
+- `parseDateTime32BestEffort(t_date)` **WRONG!**
+- `parseDateTime32BestEffort(t_date) AS t_date` **CORRECT!**
+
+## Using JSONEachRow with Map() in Materialized views
+
+Sometimes we can have a nested json message with a dynamic size like these and all elements inside the nested json must be of the same type:
+
+```json
+{"k": "val1", "st": {"a": 42, "b": 1.877363}}
+
+{"k": "val2", "st": {"a": 43, "b": 2.3343, "c": 34.4434}}
+
+{"k": "val3", "st": {"a": 66743}}
+```
+
+In this case we can use Map() to parse the JSON message:
+
+```sql
+
+CREATE TABLE tests.nest_map_source
+(
+ `k` String,
+ `st` Map(String, Float64)
+)
+Engine = Null
+
+CREATE MATERIALIZED VIEW tests.mv_nest_map TO tests.nest_map_destination
+AS
+SELECT
+ k AS k,
+ st['a'] AS st_a,
+ st['b'] AS st_b,
+ st['c'] AS st_c
+FROM tests.nest_map_source
+
+
+CREATE TABLE tests.nest_map_destination
+(
+ `k` String,
+ `st_a` Float64,
+ `st_b` Float64,
+ `st_c` Float64
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+```
+
+By default, ClickHouse will ignore unknown keys in the Map() but if you want to fail the insert if there are unknown keys then use the setting:
+
+`input_format_skip_unknown_fields = 0`
+
+```sql
+INSERT INTO tests.nest_map_source FORMAT JSONEachRow {"k": "val1", "st": {"a": 42, "b": 1.877363}}
+INSERT INTO tests.nest_map_source FORMAT JSONEachRow {"k": "val2", "st": {"a": 43, "b": 2.3343, "c": 34.4434}}
+INSERT INTO tests.nest_map_source FORMAT JSONEachRow {"k": "val3", "st": {"a": 66743}}
+
+
+SELECT *
+FROM tests.nest_map_destination
+
+┌─k────┬─st_a─┬─────st_b─┬─st_c─┐
+│ val1 │ 42 │ 1.877363 │ 0 │
+└──────┴──────┴──────────┴──────┘
+┌─k────┬──st_a─┬─st_b─┬─st_c─┐
+│ val3 │ 66743 │ 0 │ 0 │
+└──────┴───────┴──────┴──────┘
+┌─k────┬─st_a─┬───st_b─┬────st_c─┐
+│ val2 │ 43 │ 2.3343 │ 34.4434 │
+└──────┴──────┴────────┴─────────┘
+```
+
+See also:
+
+- [JSONExtract to parse many attributes at a time](/altinity-kb-queries-and-syntax/jsonextract-to-parse-many-attributes-at-a-time/)
+- [JSONAsString and Mat. View as JSON parser](/altinity-kb-schema-design/altinity-kb-jsonasstring-and-mat.-view-as-json-parser/)
\ No newline at end of file
diff --git a/content/en/altinity-kb-schema-design/backfill_column.md b/content/en/altinity-kb-schema-design/backfill_column.md
index 74b6ded2fb..0d1ff01b96 100644
--- a/content/en/altinity-kb-schema-design/backfill_column.md
+++ b/content/en/altinity-kb-schema-design/backfill_column.md
@@ -10,6 +10,15 @@ description: >-
Sometimes you need to add a column into a huge table and backfill it with a data from another source, without reingesting all data.
+
+{{% alert title="Replicated setup" color="info" %}}
+In case of a replicated / sharded setup you need to have the dictionary and source table (dict_table / item_dict) on all nodes and they have to all have EXACTLY the same data. The easiest way to do this is to make dict_table replicated.
+
+In this case, you will need to set the setting `allow_nondeterministic_mutations=1` on the user that runs the `ALTER TABLE`. See the [ClickHouse® docs](https://clickhouse.com/docs/en/operations/settings/settings#allow_nondeterministic_mutations) for more information about this setting.
+{{% /alert %}}
+
+
+
Here is an example.
```sql
@@ -160,7 +169,3 @@ WHERE key1 = 11111
│ 11111 │ 90 │ 9 │ 2021-11-30 │ 11111xxx │
└───────┴──────┴──────┴────────────┴──────────┘
```
-
-In case of a replicated / sharded setup you need to have the dictionary and source table (dict_table / item_dict) on all nodes and they have to all have EXACTLY the same data. The easiest way to do this is to make dict_table replicated.
-
-In this case, you will need to set the setting `allow_nondeterministic_mutations=1` on the user that runs the `ALTER TABLE`. See the [ClickHouse docs](https://clickhouse.com/docs/en/operations/settings/settings#allow_nondeterministic_mutations) for more information about this setting.
diff --git a/content/en/altinity-kb-schema-design/best-schema-for-storing-many-metrics-registered-from-the-single-source.md b/content/en/altinity-kb-schema-design/best-schema-for-storing-many-metrics-registered-from-the-single-source.md
index 80f9d7430a..886d824d84 100644
--- a/content/en/altinity-kb-schema-design/best-schema-for-storing-many-metrics-registered-from-the-single-source.md
+++ b/content/en/altinity-kb-schema-design/best-schema-for-storing-many-metrics-registered-from-the-single-source.md
@@ -72,7 +72,7 @@ Pros and cons:
* easy to extend, you can have very dynamic / huge number of metrics.
* the only option to store hierarchical / complicated data structures, also with arrays etc. inside.
* good for sparse recording (each time point can have only 1% of all the possible metrics)
- * ClickHouse has efficient API to work with JSON
+ * ClickHouse® has efficient API to work with JSON
* nice if your data originally came in JSON (don't need to reformat)
* Cons
* uses storage non efficiently
@@ -88,14 +88,14 @@ Same pros/cons as raw JSON, but usually bit more compact than JSON
Pros and cons:
* Pros
- * clickhouse has efficient API to work with URLs (extractURLParameter etc)
+ * ClickHouse has efficient API to work with URLs (extractURLParameter etc)
* can have sense if you data came in such format (i.e. you can store GET / POST request data directly w/o reprocessing)
* Cons
* slower than arrays
### 2e Several 'baskets' of arrays
-i.e.: timestamp, sourceid, metric_names_basket1, metric_values_basker1, ..., metric_names_basketN, metric_values_basketN
+i.e.: timestamp, sourceid, metric_names_basket1, metric_values_basket1, ..., metric_names_basketN, metric_values_basketN
The same as 2b, but there are several key-value arrays ('basket'), and metric go to one particular basket depending on metric name (and optionally by metric type)
Pros and cons:
@@ -115,18 +115,14 @@ With that approach you can have as many metrics as you need and they can be very
At any time you can decide to move one more metric to a separate column `ALTER TABLE ... ADD COLUMN metricX Float64 MATERIALIZED metrics.value[indexOf(metrics.names,'metricX')];`
-### 2e Subcolumns [future]
+## 3 json type
-[https://github.com/ClickHouse/ClickHouse/issues/23516](https://github.com/ClickHouse/ClickHouse/issues/23516)
+https://clickhouse.com/blog/a-new-powerful-json-data-type-for-clickhouse
-WIP currently, ETA of first beta = autumn 2021
Related links:
-[There is one article on our blog on this subject with some benchmarks.](https://www.altinity.com/blog/2019/5/23/handling-variable-time-series-efficiently-in-clickhouse")
+[There is one article on our blog on this subject with some benchmarks.](https://www.altinity.com/blog/2019/5/23/handling-variable-time-series-efficiently-in-clickhouse)
[Slides from Percona Live](https://www.percona.com/sites/default/files/ple19-slides/day1-pm/clickhouse-for-timeseries.pdf")
-[Uber article about how they adapted combined approach](https://eng.uber.com/logging/")
-
-[Slides for Uber log storage approach](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup40/uber.pdf")
diff --git a/content/en/altinity-kb-schema-design/change-order-by.md b/content/en/altinity-kb-schema-design/change-order-by.md
index 4cad274ec8..3253c0d1b7 100644
--- a/content/en/altinity-kb-schema-design/change-order-by.md
+++ b/content/en/altinity-kb-schema-design/change-order-by.md
@@ -37,14 +37,24 @@ ORDER BY (column1, column2, column3)
4. Copy data from `example_table_old` into `example_table_temp`
- a. Use this query to generate a list of INSERT statements
+ a. Use this query to generate a list of INSERT statements
```sql
+ -- old Clickhouse versions before a support of `where _partition_id`
select concat('insert into example_table_temp select * from example_table_old where toYYYYMM(date)=',partition) as cmd,
database, table, partition, sum(rows), sum(bytes_on_disk), count()
from system.parts
where database='default' and table='example_table_old'
group by database, table, partition
order by partition
+
+ -- newer Clickhouse versions with a support of `where _partition_id`
+ select concat('insert into example_table_temp select * from ', table,' where _partition_id = \'',partition_id, '\';') as cmd,
+ database, table, partition, sum(rows), sum(bytes_on_disk), count()
+ from system.parts
+ where database='default' and table='example_table_old'
+ group by database, table, partition_id, partition
+ order by partition_id
+
```
b. Create an intermediate table
@@ -70,7 +80,7 @@ ORDER BY (column1, column2, column3)
order by partition
```
-5. Attach data from the intermediate table to `example_table`
+6. Attach data from the intermediate table to `example_table`
a. Use this query to generate a list of ATTACH statements
```sql
@@ -93,5 +103,5 @@ ORDER BY (column1, column2, column3)
order by partition
```
-6. Drop `example_table_old` and `example_table_temp`
+7. Drop `example_table_old` and `example_table_temp`
diff --git a/content/en/altinity-kb-schema-design/codecs/_index.md b/content/en/altinity-kb-schema-design/codecs/_index.md
index 0321fe3371..e3281b82ff 100644
--- a/content/en/altinity-kb-schema-design/codecs/_index.md
+++ b/content/en/altinity-kb-schema-design/codecs/_index.md
@@ -4,6 +4,25 @@ linkTitle: "Codecs"
description: >
Codecs
---
+
+| Codec Name | Recommended Data Types | Performance Notes |
+|------------------|--------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| LZ4 | Any | Used by default. Extremely fast; good compression; balanced speed and efficiency |
+| ZSTD(level) | Any | Good compression; pretty fast; best for high compression needs. Don't use levels higher than 3. |
+| LZ4HC(level) | Any | LZ4 High Compression algorithm with configurable level; slower but better compression than LZ4, but decompression is still fast. |
+| Delta | Integer Types, Time Series Data, Timestamps | Preprocessor (should be followed by some compression codec). Stores difference between neighboring values; good for monotonically increasing data. |
+| DoubleDelta | Integer Types, Time Series Data | Stores difference between neighboring delta values; suitable for time series data |
+| Gorilla | Floating Point Types | Calculates XOR between current and previous value; suitable for slowly changing numbers |
+| T64 | Integer, Time Series Data, Timestamps | Preprocessor (should be followed by some compression codec). Crops unused high bits; puts them into a 64x64 bit matrix; optimized for 64-bit data types |
+| GCD | Integer Numbers | Preprocessor (should be followed by some compression codec). Greatest common divisor compression; divides values by a common divisor; effective for divisible integer sequences |
+| FPC | Floating Point Numbers | Designed for Float64; Algorithm detailed in [FPC paper](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf), [ClickHouse® PR #37553](https://github.com/ClickHouse/ClickHouse/pull/37553) |
+| ZSTD_QAT | Any | Requires hardware support for QuickAssist Technology (QAT) hardware; provides accelerated compression tasks |
+| DEFLATE_QPL | Any | Requires hardware support for Intel’s QuickAssist Technology for DEFLATE compression; enhanced performance for specific hardware |
+| LowCardinality | String | It's not a codec, but a datatype modifier. Reduces representation size; effective for columns with low cardinality |
+| NONE | Non-compressable data with very high entropy, like some random string, or some AggregateFunction states | No compression at all. Can be used on the columns that can not be compressed anyway. |
+
+
+
See
[How to test different compression codecs](altinity-kb-how-to-test-different-compression-codecs)
diff --git a/content/en/altinity-kb-schema-design/floats-vs-decimals.md b/content/en/altinity-kb-schema-design/floats-vs-decimals.md
index ddb8683916..bb5ed7eb21 100644
--- a/content/en/altinity-kb-schema-design/floats-vs-decimals.md
+++ b/content/en/altinity-kb-schema-design/floats-vs-decimals.md
@@ -43,7 +43,7 @@ SELECT (toDecimal64(100000000000000000., 1) - toDecimal64(100000000000000000., 1
```
{{% alert title="Warning" color="warning" %}}
-Because clickhouse uses MPP order of execution of a single query can vary on each run, and you can get slightly different results from the float column every time you run the query.
+Because ClickHouse® uses MPP order of execution of a single query can vary on each run, and you can get slightly different results from the float column every time you run the query.
Usually, this deviation is small, but it can be significant when some kind of arithmetic operation is performed on very large and very small numbers at the same time.
{{% /alert %}}
diff --git a/content/en/altinity-kb-schema-design/how-much-is-too-much.md b/content/en/altinity-kb-schema-design/how-much-is-too-much.md
index 88978592f5..193e3c4c82 100644
--- a/content/en/altinity-kb-schema-design/how-much-is-too-much.md
+++ b/content/en/altinity-kb-schema-design/how-much-is-too-much.md
@@ -1,14 +1,15 @@
---
-title: "How much is too much?"
-linkTitle: "How much is too much?"
+title: "ClickHouse® limitations"
+linkTitle: "ClickHouse limitations"
weight: 100
-description: >-
- ClickHouse Limitations.
+description:
+ How much is too much?
+keywords:
+ - clickhouse limitations
+ - clickhouse too many parts
---
-## How much is too much?
-
-In most of the cases clickhouse don't have any hard limits. But obsiously there there are some practical limitation / barriers for different things - often they are caused by some system / network / filesystem limitation.
+In most of the cases ClickHouse® doesn't have any hard limits. But obviously there there are some practical limitation / barriers for different things - often they are caused by some system / network / filesystem limitation.
So after reaching some limits you can get different kind of problems, usually it never a failures / errors, but different kinds of degradations (slower queries / high cpu/memory usage, extra load on the network / zookeeper etc).
@@ -16,14 +17,18 @@ While those numbers can vary a lot depending on your hardware & settings there i
### Number of tables (system-wide, across all databases)
-- non-replicated MergeTree-family tables = few thousands is still acceptable, if you don't do realtime inserts in more that few dozens of them. See [#32259](https://github.com/ClickHouse/ClickHouse/issues/32259)
-- ReplicatedXXXMergeTree = few hundreds is still acceptable, if you don't do realtime inserts in more that few dozens of them. Every Replicated table comes with it's own cost (need to do housekeepeing operations, monitoing replication queues etc). See [#31919](https://github.com/ClickHouse/ClickHouse/issues/31919)
+- non-replicated [MergeTree-family](https://kb.altinity.com/engines/mergetree-table-engine-family/) tables = few thousands is still acceptable, if you don't do realtime inserts in more that few dozens of them. See [#32259](https://github.com/ClickHouse/ClickHouse/issues/32259)
+- ReplicatedXXXMergeTree = few hundreds is still acceptable, if you don't do realtime inserts in more that few dozens of them. Every Replicated table comes with it's own cost (need to do housekeeping operations, monitoring replication queues etc). See [#31919](https://github.com/ClickHouse/ClickHouse/issues/31919)
- Log family table = even dozens of thousands is still ok, especially if database engine = Lazy is used.
### Number of databases
Fewer than number of tables (above). Dozens / hundreds is usually still acceptable.
+### Number of inserts per seconds
+
+For usual (non async) inserts - dozen is enough. Every insert creates a part, if you will create parts too often, ClickHouse will not be able to merge them and you will be getting 'too many parts'.
+
### Number of columns in the table
Up to a few hundreds. With thousands of columns the inserts / background merges may become slower / require more of RAM.
@@ -59,17 +64,17 @@ Dozens is still ok. More may require having more complex (non-flat) routing.
2 is minimum for HA. 3 is a 'golden standard'. Up to 6-8 is still ok. If you have more with realtime inserts - it can impact the zookeeper traffic.
-### Number of zookeeper nodes in the ensemble
+### Number of [Zookeeper nodes](https://docs.altinity.com/operationsguide/clickhouse-zookeeper/) in the ensemble
-3 (Three) for most of the cases is enough (you can loose one node). Using more nodes allows to scale up read throughput for zookeeper, but don't improve writes at all.
+3 (Three) for most of the cases is enough (you can loose one node). Using more nodes allows to scale up read throughput for zookeeper, but doesn't improve writes at all.
-### Number of materialized view attached to a single table.
+### Number of [materialized views](/altinity-kb-schema-design/materialized-views/) attached to a single table.
-Up to few. The less the better if the table is getting realtime inserts. (no matter if MV are chained or all are feeded from the same source table).
+Up to few. The less the better if the table is getting realtime inserts. (no matter if MV are chained or all are fed from the same source table).
-The more you have the more costy your inserts are, and the bigger risks to get some inconsitencies between some MV (inserts to MV and main table are not atomic).
+The more you have the more costly your inserts are, and the bigger risks to get some inconsistencies between some MV (inserts to MV and main table are not atomic).
-If the table don't have realtime inserts you can have more MV.
+If the table doesn't have realtime inserts you can have more MV.
### Number of projections inside a single table.
@@ -81,10 +86,10 @@ One to about a dozen. Different types of indexes has different penalty, bloom_fi
At some point your inserts will slow down. Try to create possible minimum of indexes.
You can combine many columns into a single index and this index will work for any predicate but create less impact.
-### Number of Kafka tables / consumers inside
+### Number of [Kafka tables / consumers](https://altinity.com/blog/kafka-engine-the-story-continues) inside
High number of Kafka tables maybe quite expensive (every consumer = very expensive librdkafka object with several threads inside).
-Usually alternative approaches are preferrable (mixing several datastreams in one topic, denormalizing, consuming several topics of identical structure with a single Kafka table, etc).
+Usually alternative approaches are preferable (mixing several datastreams in one topic, denormalizing, consuming several topics of identical structure with a single Kafka table, etc).
If you really need a lot of Kafka tables you may need more ram / CPU on the node and
increase `background_message_broker_schedule_pool_size` (default is 16) to the number of Kafka tables.
diff --git a/content/en/altinity-kb-schema-design/ingestion-aggregate-function.md b/content/en/altinity-kb-schema-design/ingestion-aggregate-function.md
new file mode 100644
index 0000000000..a9158a7422
--- /dev/null
+++ b/content/en/altinity-kb-schema-design/ingestion-aggregate-function.md
@@ -0,0 +1,97 @@
+---
+title: "Ingestion of AggregateFunction"
+linkTitle: "Ingestion of AggregateFunction"
+weight: 100
+description: >-
+ ClickHouse® - How to insert AggregateFunction data
+---
+
+## How to insert AggregateFunction data
+
+### Ephemeral column
+
+```sql
+CREATE TABLE users (
+ uid Int16,
+ updated SimpleAggregateFunction(max, DateTime),
+ name_stub String Ephemeral,
+ name AggregateFunction(argMax, String, DateTime)
+ default arrayReduce('argMaxState', [name_stub], [updated])
+) ENGINE=AggregatingMergeTree order by uid;
+
+INSERT INTO users(uid, updated, name_stub) VALUES (1231, '2020-01-02 00:00:00', 'Jane');
+
+INSERT INTO users(uid, updated, name_stub) VALUES (1231, '2020-01-01 00:00:00', 'John');
+
+SELECT
+ uid,
+ max(updated) AS updated,
+ argMaxMerge(name)
+FROM users
+GROUP BY uid
+┌──uid─┬─────────────updated─┬─argMaxMerge(name)─┐
+│ 1231 │ 2020-01-02 00:00:00 │ Jane │
+└──────┴─────────────────────┴───────────────────┘
+```
+
+### Input function
+
+```sql
+CREATE TABLE users (
+ uid Int16,
+ updated SimpleAggregateFunction(max, DateTime),
+ name AggregateFunction(argMax, String, DateTime)
+) ENGINE=AggregatingMergeTree order by uid;
+
+INSERT INTO users
+SELECT uid, updated, arrayReduce('argMaxState', [name], [updated])
+FROM input('uid Int16, updated DateTime, name String') FORMAT Values (1231, '2020-01-02 00:00:00', 'Jane');
+
+INSERT INTO users
+SELECT uid, updated, arrayReduce('argMaxState', [name], [updated])
+FROM input('uid Int16, updated DateTime, name String') FORMAT Values (1231, '2020-01-01 00:00:00', 'John');
+
+SELECT
+ uid,
+ max(updated) AS updated,
+ argMaxMerge(name)
+FROM users
+GROUP BY uid;
+┌──uid─┬─────────────updated─┬─argMaxMerge(name)─┐
+│ 1231 │ 2020-01-02 00:00:00 │ Jane │
+└──────┴─────────────────────┴───────────────────┘
+```
+
+### Materialized View And Null Engine
+
+```sql
+CREATE TABLE users (
+ uid Int16,
+ updated SimpleAggregateFunction(max, DateTime),
+ name AggregateFunction(argMax, String, DateTime)
+) ENGINE=AggregatingMergeTree order by uid;
+
+CREATE TABLE users_null (
+ uid Int16,
+ updated DateTime,
+ name String
+) ENGINE=Null;
+
+CREATE MATERIALIZED VIEW users_mv TO users AS
+SELECT uid, updated, arrayReduce('argMaxState', [name], [updated]) name
+FROM users_null;
+
+INSERT INTO users_null Values (1231, '2020-01-02 00:00:00', 'Jane');
+
+INSERT INTO users_null Values (1231, '2020-01-01 00:00:00', 'John');
+
+SELECT
+ uid,
+ max(updated) AS updated,
+ argMaxMerge(name)
+FROM users
+GROUP BY uid;
+┌──uid─┬─────────────updated─┬─argMaxMerge(name)─┐
+│ 1231 │ 2020-01-02 00:00:00 │ Jane │
+└──────┴─────────────────────┴───────────────────┘
+```
diff --git a/content/en/altinity-kb-schema-design/insert_deduplication.md b/content/en/altinity-kb-schema-design/insert_deduplication.md
index 229a20a648..841dfef788 100644
--- a/content/en/altinity-kb-schema-design/insert_deduplication.md
+++ b/content/en/altinity-kb-schema-design/insert_deduplication.md
@@ -1,13 +1,14 @@
---
-title: "Insert Deduplication / Insert idempotency"
+title: "Insert Deduplication / Insert Idempotency"
linkTitle: "insert deduplication"
weight: 100
description: >-
- Insert Deduplication / Insert idempotency , insert_deduplicate setting.
+ Using ClickHouse® features to avoid duplicate data
+keywords:
+ - clickhouse insert deduplication
+ - clickhouse insert_deduplicate
---
-# Insert Deduplication
-
Replicated tables have a special feature insert deduplication (enabled by default).
[Documentation:](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/)
@@ -49,7 +50,7 @@ In `clickhouse-server.log` you may see trace messages `Block with ID ... already
..17:52:45.076738.. Block with ID all_7615936253566048997_747463735222236827 already exists locally as part all_0_0_0; ignoring it.
```
-Deduplication checksums are stored in Zookeeper in `/blocks` table's znode for each partition separately, so when you drop partition, they could be identified and removed for this partition.
+Deduplication checksums are stored in [Zookeeper](https://docs.altinity.com/operationsguide/clickhouse-zookeeper/) in `/blocks` table's znode for each partition separately, so when you drop partition, they could be identified and removed for this partition.
(during `alter table delete` it's impossible to match checksums, that's why checksums stay in Zookeeper).
```sql
SELECT name, value
@@ -62,7 +63,7 @@ WHERE path = '/clickhouse/cluster_test/tables/test_insert/blocks'
## insert_deduplicate setting
-Insert deduplication is controled by the [insert_deduplicate](https://clickhouse.com/docs/en/operations/settings/settings/#settings-insert-deduplicate) setting
+Insert deduplication is controlled by the [insert_deduplicate](https://clickhouse.com/docs/en/operations/settings/settings/#settings-insert-deduplicate) setting
Let's disable it:
```sql
@@ -244,7 +245,7 @@ select * from test_insert format PrettyCompactMonoBlock;
## insert_deduplication_token
-Since Clikhouse 22.2 there is a new setting [insert_dedupplication_token](https://clickhouse.com/docs/en/operations/settings/settings/#insert_deduplication_token).
+Since ClickHouse® 22.2 there is a new setting [insert_deduplication_token](https://clickhouse.com/docs/en/operations/settings/settings/#insert_deduplication_token).
This setting allows you to define an explicit token that will be used for deduplication instead of calculating a checksum from the inserted data.
```sql
@@ -254,14 +255,14 @@ ENGINE = MergeTree
ORDER BY A
SETTINGS non_replicated_deduplication_window = 100;
-INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test' (1);
+INSERT INTO test_table SETTINGS insert_deduplication_token = 'test' VALUES (1);
-- the next insert won't be deduplicated because insert_deduplication_token is different
-INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test1' (1);
+INSERT INTO test_table SETTINGS insert_deduplication_token = 'test1' VALUES (1);
-- the next insert will be deduplicated because insert_deduplication_token
-- is the same as one of the previous
-INSERT INTO test_table Values SETTINGS insert_deduplication_token = 'test' (2);
+INSERT INTO test_table SETTINGS insert_deduplication_token = 'test' VALUES (2);
SELECT * FROM test_table
┌─A─┐
│ 1 │
diff --git a/content/en/altinity-kb-schema-design/materialized-views/_index.md b/content/en/altinity-kb-schema-design/materialized-views/_index.md
index f4b684dc24..95e3f147e3 100644
--- a/content/en/altinity-kb-schema-design/materialized-views/_index.md
+++ b/content/en/altinity-kb-schema-design/materialized-views/_index.md
@@ -1,18 +1,27 @@
---
-title: "MATERIALIZED VIEWS"
-linkTitle: "MATERIALIZED VIEWS"
+title: "ClickHouse® Materialized Views"
+linkTitle: "Materialized Views"
description: >
- MATERIALIZED VIEWS
+ Making the most of this powerful ClickHouse® feature
+keywords:
+ - clickhouse materialized view
+ - create materialized view clickhouse
---
-{{% alert title="Info" color="info" %}}
-MATERIALIZED VIEWs in ClickHouse behave like AFTER INSERT TRIGGER to the left-most table listed in its SELECT statement.
-{{% /alert %}}
+ClickHouse® MATERIALIZED VIEWs behave like AFTER INSERT TRIGGER to the left-most table listed in their SELECT statement and never read data from disk. Only rows that are placed to the RAM buffer by INSERT are read.
-# MATERIALIZED VIEWS
+## Useful links
-* Clickhouse and the magic of materialized views. Basics explained with examples: [webinar recording](https://altinity.com/webinarspage/2019/6/26/clickhouse-and-the-magic-of-materialized-views)
-* Everything you should know about materialized views. Very detailed information about internals: [video](https://youtu.be/ckChUkC3Pns?t=9353), [annotated presentation](https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf), [presentation](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup47/materialized_views.pdf)
+* ClickHouse Materialized Views Illuminated, Part 1:
+ * [Blog post](https://altinity.com/blog/clickhouse-materialized-views-illuminated-part-1)
+ * [Webinar recording](https://www.youtube.com/watch?app=desktop&v=j15dvPGzhyE)
+* ClickHouse Materialized Views Illuminated, Part 2:
+ * [Blog post](https://altinity.com/blog/clickhouse-materialized-views-illuminated-part-2)
+ * [Webinar recording](https://www.youtube.com/watch?v=THDk625DGsQ)
+ * [Slides](https://altinity.com/wp-content/uploads/2024/05/ClickHouse-Materialized-Views-The-Magic-Continues-1.pdf)
+* Everything you should know about Materialized Views:
+ * [Video](https://www.youtube.com/watch?v=ckChUkC3Pns&t=9353s)
+ * [Annotated slides](https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf)
## Best practices
@@ -29,7 +38,7 @@ MATERIALIZED VIEWs in ClickHouse behave like AFTER INSERT TRIGGER to the left-mo
That way it's bit simpler to do schema migrations or build more complicated pipelines when one table is filled by several MV.
- With engine=Atomic it hard to map undelying table with the MV.
+ With engine=Atomic it hard to map underlying table with the MV.
2. Avoid using POPULATE when creating MATERIALIZED VIEW on big tables.
@@ -55,15 +64,15 @@ MATERIALIZED VIEWs in ClickHouse behave like AFTER INSERT TRIGGER to the left-mo
Since MATERIALIZED VIEWs are updated on every INSERT to the underlying table and you can not insert anything to the usual VIEW, the materialized view update will never be triggered.
-Normally you should build MATERIALIZED VIEWs on the top of the table with MergeTree engine family.
+Normally, you should build MATERIALIZED VIEWs on the top of the table with the MergeTree engine family.
-### Q. I've created materialized error with some error, and since it's it reading from Kafka I don't understand where the error is
+### Q. I've created a materialized error with some error, and since it's reading from Kafka, I don't understand where the error is
-Server logs will help you. Also, see the next question.
+Look into system.query_views_log table or server logs, or system.text_log table. Also, see the next question.
### Q. How to debug misbehaving MATERIALIZED VIEW?
-You can also attach the same MV to some dummy table with engine=Log (or even Null) and do some manual inserts there to debug the behavior. Similar way (as the Materialized view often can contain some pieces of the business logic of the application) you can create tests for your schema.
+You can also attach the same MV to a dummy table with engine=Null and do manual inserts to debug the behavior. In a similar way (as the Materialized view often contains some pieces of the application's business logic), you can create tests for your schema.
{{% alert title="Warning" color="warning" %}}
Always test MATERIALIZED VIEWs first on staging or testing environments
@@ -85,6 +94,12 @@ So it will most probably work not as you expect and will hit insert performance
The MV will be attached (as AFTER INSERT TRIGGER) to the left-most table in the MV SELECT statement, and it will 'see' only freshly inserted rows there. It will 'see' the whole set of rows of other tables, and the query will be executed EVERY TIME you do the insert to the left-most table. That will impact the performance speed there significantly.
If you really need to update the MV with the left-most table, not impacting the performance so much you can consider using dictionary / engine=Join / engine=Set for right-hand table / subqueries (that way it will be always in memory, ready to use).
+### Q. How are MVs executed sequentially or in parallel?
+
+By default, the execution is sequential and alphabetical. It can be switched by [parallel_view_processing](https://clickhouse.com/docs/en/operations/settings/settings#parallel_view_processing).
+
+Parallel processing could be helpful if you have a lot of spare CPU power (cores) and want to utilize it. Add the setting to the insert statement or to the user profile. New blocks created by MVs will also follow the squashing logic similar to the one used in the insert, but they will use the min_insert_block_size_rows_for_materialized_views and min_insert_block_size_bytes_for_materialized_views settings.
+
### Q. How to alter MV implicit storage (w/o TO syntax)
1) take the existing MV definition
diff --git a/content/en/altinity-kb-schema-design/materialized-views/backfill-populate-mv-in-a-controlled-manner.md b/content/en/altinity-kb-schema-design/materialized-views/backfill-populate-mv-in-a-controlled-manner.md
index 43213bd5f3..92dfa67e9b 100644
--- a/content/en/altinity-kb-schema-design/materialized-views/backfill-populate-mv-in-a-controlled-manner.md
+++ b/content/en/altinity-kb-schema-design/materialized-views/backfill-populate-mv-in-a-controlled-manner.md
@@ -24,4 +24,52 @@ INSERT INTO mv_import SELECT * FROM huge_table WHERE toYYYYMM(ts) = 202105;
ALTER TABLE mv ATTACH PARTITION ID '202105' FROM mv_import;
```
-See also [https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/\#alter_attach-partition-from](https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/\#alter_attach-partition-from).
+See also [the ClickHouse® documentation on Manipulating Partitions and Parts](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition).
+
+Q. I still do not have enough RAM to GROUP BY the whole partition.
+
+A. Push aggregating to the background during MERGES
+
+There is a modified version of MergeTree Engine, called [AggregatingMergeTree](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree). That engine has additional logic that is applied to rows with the same set of values in columns that are specified in the table's ORDER BY expression. All such rows are aggregated to only one rows using the aggregating functions defined in the column definitions. There are two "special" column types, designed specifically for that purpose:
+
+- [AggregatingFunction](https://clickhouse.com/docs/en/sql-reference/data-types/aggregatefunction)
+- [SimpleAggregatingFunction](https://clickhouse.com/docs/en/sql-reference/data-types/simpleaggregatefunction)
+
+INSERT ... SELECT operating over the very large partition will create data parts by 1M rows (min_insert_block_size_rows), those parts will be aggregated during the merge process the same way as GROUP BY do it, but the number of rows will be much less than the total rows in the partition and RAM usage too. Merge combined with GROUP BY will create a new part with a much less number of rows. That data part possibly will be merged again with other data, but the number of rows will be not too big.
+
+```sql
+CREATE TABLE mv_import (
+ id UInt64,
+ ts SimpleAggregatingFunction(max,DateTime), -- most fresh
+ v1 SimpleAggregatingFunction(sum,UInt64), -- just sum
+ v2 SimpleAggregatingFunction(max,String), -- some not empty string
+ v3 AggregatingFunction(argMax,String,ts) -- last value
+) ENGINE = AggregatingMergeTree()
+ORDER BY id;
+
+INSERT INTO mv_import
+SELECT id, -- ORDER BY column
+ ts,v1,v2, -- state for SimpleAggregatingFunction the same as value
+ initializeAggregation('argMaxState',v3,ts) -- we need to convert from values to States for columns with AggregatingFunction type
+FROM huge_table
+WHERE toYYYYMM(ts) = 202105;
+```
+
+Actually, the first GROUP BY run will happen just before 1M rows will be stored on disk as a data part. You may disable that behavior by switching off [optimize_on_insert](https://clickhouse.com/docs/en/operations/settings/settings#optimize-on-insert) setting if you have heavy calculations during aggregation.
+
+You may attach such a table (with AggregatingFunction columns) to the main table as in the example above, but if you don't like having States in the Materialized Table, data should be finalized and converted back to normal values. In that case, you have to move data by INSERT ... SELECT again:
+
+```sql
+INSERT INTO MV
+SELECT id,ts,v1,v2, -- nothing special for SimpleAggregatingFunction columns
+ finalizeAggregation(v3)
+from mv_import FINAL
+```
+
+The last run of GROUP BY will happen during FINAL execution and AggregatingFunction types converted back to normal values. To simplify retries after failures an additional temporary table and the same trick with ATTACH could be applied.
+
+
+
+
+
+
diff --git a/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md b/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md
index 19ebd72039..8a8e901672 100644
--- a/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md
+++ b/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md
@@ -8,17 +8,17 @@ description: >-
## Why inserts into materialized views are not idempotent?
-ClickHouse still does not have transactions. They will be implemented around 2022Q2.
+ClickHouse® still does not have transactions. They were to be implemented around 2022Q2 but still not in the roadmap.
-Because of Clickhouse materialized view is a trigger. And an insert into a table and an insert into a subordinate materialized view it's two different inserts so they are not atomic alltogether.
+Because of ClickHouse materialized view is a trigger. And an insert into a table and an insert into a subordinate materialized view it's two different inserts so they are not atomic altogether.
-And insert into a materialized view may fail after the succesful insert into the table. In case of any failure a client gets the error about failed insertion.
-You may enable insert_deduplication (it's enabled by default for Replciated engines) and repeate the insert with an idea to achive idempotate insertion,
-and insertion will be skipped into the source table becase of deduplication but it will be skipped for materialized view as well because
-by default materialized view inherites deduplication from the source table.
+And insert into a materialized view may fail after the successful insert into the table. In case of any failure a client gets the error about failed insertion.
+You may enable insert_deduplication (it's enabled by default for Replicated engines) and repeat the insert with an idea to archive idempotate insertion,
+and insertion will be skipped into the source table because of deduplication but it will be skipped for materialized view as well because
+by default materialized view inherits deduplication from the source table.
It's controlled by a parameter `deduplicate_blocks_in_dependent_materialized_views` https://clickhouse.com/docs/en/operations/settings/settings/#settings-deduplicate-blocks-in-dependent-materialized-views
-If your materialized view is wide enought and always have enought data for constistent deduplication then you can enable `deduplicate_blocks_in_dependent_materialized_views`.
+If your materialized view is wide enough and always has enough data for consistent deduplication then you can enable `deduplicate_blocks_in_dependent_materialized_views`.
Or you may add information for deduplication (some unique information / insert identifier).
### Example 1. Inconsistency with deduplicate_blocks_in_dependent_materialized_views 0
@@ -59,6 +59,10 @@ select sum(CNT) from test_mv;
0 rows in set. Elapsed: 0.001 sec. -- Inconsistency! Unfortunatly insert into MV was deduplicated as well
```
+That is another example - https://github.com/ClickHouse/ClickHouse/issues/56642
+
+
+
### Example 2. Inconsistency with deduplicate_blocks_in_dependent_materialized_views 1
```sql
@@ -148,7 +152,7 @@ select sum(CNT) from test_mv;
└──────────┘
```
-Idea how to fix it in Clickhouse source code https://github.com/ClickHouse/ClickHouse/issues/30240
+Idea how to fix it in ClickHouse source code https://github.com/ClickHouse/ClickHouse/issues/30240
### Fake (unused) metric to add uniqueness.
diff --git a/content/en/altinity-kb-schema-design/preaggregations.md b/content/en/altinity-kb-schema-design/preaggregations.md
index 3c1e96ac56..d650ff6eaa 100644
--- a/content/en/altinity-kb-schema-design/preaggregations.md
+++ b/content/en/altinity-kb-schema-design/preaggregations.md
@@ -3,22 +3,22 @@ title: "Pre-Aggregation approaches"
linkTitle: "Pre-Aggregation approaches"
weight: 100
description: >-
- ETL vs Materialized Views vs Projections in ClickHouse.
+ ETL vs Materialized Views vs Projections in ClickHouse®
---
## Pre-Aggregation approaches: ETL vs Materialized Views vs Projections
-| | ETL | MV | Projections |
-|:-|:-|:-|:-|
-| Realtime | no | yes | yes |
-| How complex queries can be used to build the preaggregaton | any | complex | very simple |
-| Impacts the insert speed | no | yes | yes |
-| Are inconsistancies possible | Depends on ETL. If it process the errors properly - no. | yes (no transactions / atomicity) | no |
-| Lifetime of aggregation | any | any | Same as the raw data |
-| Requirements | need external tools/scripting | is a part of database schema | is a part of table schema |
+| | ETL | MV | Projections |
+|:-|:-----------------------------------------------------------------|:-|:-|
+| Realtime | no | yes | yes |
+| How complex queries can be used to build the preaggregaton | any | complex | very simple |
+| Impacts the insert speed | no | yes | yes |
+| Are inconsistancies possible | Depends on ETL. If it process the errors properly - no. | yes (no transactions / atomicity) | no |
+| Lifetime of aggregation | any | any | Same as the raw data |
+| Requirements | need external tools/scripting | is a part of database schema | is a part of table schema |
| How complex to use in queries | Depends on aggregation, usually simple, quering a separate table | Depends on aggregation, sometimes quite complex, quering a separate table | Very simple, quering the main table |
-| Can work correctly with ReplacingMergeTree as a source | Yes | No | No |
-| Can work correctly with CollapsingMergeTree as a source | Yes | For simple aggregations | For simple aggregations |
-| Can be chained | Yes (Usually with DAGs / special scripts) | Yes (but may be not straightforward, and often is a bad idea) | No |
-| Resources needed to calculate the increment | May be signigicant | Usually tiny | Usually tiny |
+| Can work correctly with ReplacingMergeTree as a source | Yes | No | No |
+| Can work correctly with CollapsingMergeTree as a source | Yes | For simple aggregations | For simple aggregations |
+| Can be chained | Yes (Usually with DAGs / special scripts) | Yes (but may be not straightforward, and often is a bad idea) | No |
+| Resources needed to calculate the increment | May be significant | Usually tiny | Usually tiny |
diff --git a/content/en/altinity-kb-schema-design/row-level-deduplication.md b/content/en/altinity-kb-schema-design/row-level-deduplication.md
index 2f26edac6b..cfa5cbad58 100644
--- a/content/en/altinity-kb-schema-design/row-level-deduplication.md
+++ b/content/en/altinity-kb-schema-design/row-level-deduplication.md
@@ -1,12 +1,12 @@
---
-title: "ClickHouse row-level deduplication"
-linkTitle: "ClickHouse row-level deduplication"
+title: "ClickHouse® row-level deduplication"
+linkTitle: "ClickHouse® row-level deduplication"
weight: 100
description: >-
- ClickHouse row-level deduplication.
+ ClickHouse® row-level deduplication.
---
-## ClickHouse row-level deduplication.
+## ClickHouse® row-level deduplication.
(Block level deduplication exists in Replicated tables, and is not the subject of that article).
@@ -26,7 +26,7 @@ Approach 0. Make deduplication before ingesting data to ClickHouse
+ you have full control
- extra coding and 'moving parts', storing some ids somewhere
+ clean and simple schema and selects in ClickHouse
-! check if row exists in clickhouse before insert can give non-satisfing results if you use ClickHouse cluster (i.e. Replicated / Distributed tables) - due to eventual consistency.
+! check if row exists in ClickHouse before insert can give non-satisfying results if you use ClickHouse cluster (i.e. Replicated / Distributed tables) - due to eventual consistency.
Approach 1. Allow duplicates during ingestion. Remove them on SELECT level (by things like GROUP BY)
+ simple inserts
@@ -44,7 +44,7 @@ Approach 2. Eventual deduplication using Replacing
Approach 3. Eventual deduplication using Collapsing
- complicated
- can force you to use suboptimal primary key (which will guarantee record uniqueness)
- - you need to store previous state of the record somewhere, or extract it before ingestion from clickhouse
+ - you need to store previous state of the record somewhere, or extract it before ingestion from ClickHouse
- deduplication is eventual (same as with Replacing)
+ you can make the proper aggregations of last state w/o FINAL (bookkeeping-alike sums, counts etc)
diff --git a/content/en/altinity-kb-schema-design/snowflakeid.md b/content/en/altinity-kb-schema-design/snowflakeid.md
new file mode 100644
index 0000000000..320800a42d
--- /dev/null
+++ b/content/en/altinity-kb-schema-design/snowflakeid.md
@@ -0,0 +1,123 @@
+---
+title: "SnowflakeID for Efficient Primary Keys "
+linkTitle: "SnowflakeID"
+weight: 100
+description: >-
+ SnowflakeID for Efficient Primary Keys
+---
+
+In data warehousing (DWH) environments, the choice of primary key (PK) can significantly impact performance, particularly in terms of RAM usage and query speed. This is where [SnowflakeID](https://en.wikipedia.org/wiki/Snowflake_ID) comes into play, providing a robust solution for PK management. Here’s a deep dive into why and how Snowflake IDs are beneficial and practical implementation examples.
+
+### Why Snowflake ID?
+
+- **Natural IDs Suck**: Natural keys derived from business data can lead to various issues like complexity and instability. Surrogate keys, on the other hand, are system-generated and stable.
+- Surrogate keys simplify joins and indexing, which is crucial for performance in large-scale data warehousing.
+- Monotonic or sequential IDs help maintain the order of entries, which is essential for performance tuning and efficient range queries.
+- Having both a timestamp and a unique ID in the same column allows for fast filtering of rows during SELECT operations. This is particularly useful for time-series data.
+
+### **Building Snowflake IDs**
+
+There are two primary methods to construct the lower bits of a Snowflake ID:
+
+1. **Hash of Important Columns**:
+
+ Using a hash function on significant columns ensures uniqueness and distribution.
+
+2. **Row Number in insert batch**
+
+ Utilizing the row number within data blocks provides a straightforward approach to generating unique identifiers.
+
+
+### **Implementation as UDF**
+
+Here’s how to implement Snowflake IDs using standard SQL functions while utilizing second and millisecond timestamps.
+
+Pack hash to lower 22 bits for DateTime64 and 32bits for DateTime
+
+```sql
+create function toSnowflake64 as (dt,ch) ->
+ bitOr(dateTime64ToSnowflakeID(dt),
+ bitAnd(bitAnd(ch,0x3FFFFF)+
+ bitAnd(bitShiftRight(ch, 20),0x3FFFFF)+
+ bitAnd(bitShiftRight(ch, 40),0x3FFFFF),
+ 0x3FFFFF)
+ );
+
+create function toSnowflake as (dt,ch) ->
+ bitOr(dateTimeToSnowflakeID(dt),
+ bitAnd(bitAnd(ch,0xFFFFFFFF)+
+ bitAnd(bitShiftRight(ch, 32),0xFFFFFFFF),
+ 0xFFFFFFFF)
+ );
+
+with cityHash64('asdfsdnfs;n') as ch,
+ now64() as dt
+select dt,
+ hex(toSnowflake64(dt,ch) as sn) ,
+ snowflakeIDToDateTime64(sn);
+
+with cityHash64('asdfsdnfs;n') as ch,
+ now() as dt
+select dt,
+ hex(toSnowflake(dt,ch) as sn) ,
+ snowflakeIDToDateTime(sn);
+```
+
+### **Creating Tables with Snowflake ID**
+
+**Using Materialized Columns and hash**
+
+```sql
+create table XX
+(
+ id Int64 materialized toSnowflake(now(),cityHash64(oldID)),
+ oldID String,
+ data String
+) engine=MergeTree order by id;
+
+```
+
+Note: Using User-Defined Functions (UDFs) in CREATE TABLE statements is not always useful, as they expand to create table DDL, and changing them is inconvenient.
+
+**Using a Null Table, Materialized View, and** rowNumberInAllBlocks
+
+A more efficient approach involves using a Null table and materialized views.
+
+```sql
+create table XX
+(
+ id Int64,
+ data String
+) engine=MergeTree order by id;
+
+create table Null (data String) engine=Null;
+create materialized view _XX to XX as
+select toSnowflake(now(),rowNumberInAllBlocks()) is id, data
+from Null;
+```
+
+### Converting from UUID to SnowFlakeID for subsequent events
+
+Consider that your event stream only has a UUID column identifying a particular user. Registration time that can be used as a base for SnowFlakeID is presented only in the first ‘register’ event, but not in subsequent events. It’s easy to generate SnowFlakeID for the register event, but next, we need to get it from some other table without disturbing the ingestion process too much. Using Hash JOINs in Materialized Views is not recommended, so we need some “nested loop join” to get data fast. In Clickhouse, the “nested loop join” is still not supported, but Direct Dictionary can work around it.
+
+```sql
+CREATE TABLE UUID2ID_store (user_id UUID, id UInt64)
+ENGINE = MergeTree() -- EmbeddedRocksDB can be used instead
+ORDER BY user_id
+settings index_granularity=256;
+
+CREATE DICTIONARY UUID2ID_dict (user_id UUID, id UInt64)
+PRIMARY KEY user_id
+LAYOUT ( DIRECT ())
+SOURCE(CLICKHOUSE(TABLE 'UUID2ID_store'));
+
+CREATE OR REPLACE FUNCTION UUID2ID AS (uuid) -> dictGet('UUID2ID_dict',id,uuid);
+
+CREATE MATERIALIZED VIEW _toUUID_store TO UUID2ID_store AS
+select user_id, toSnowflake64(event_time, cityHash64(user_id)) as id
+from Actions;
+```
+
+**Conclusion**
+
+Snowflake IDs provide an efficient mechanism for generating unique, monotonic primary keys, which are essential for optimizing query performance in data warehousing environments. By combining timestamps and unique identifiers, snowflake IDs facilitate faster row filtering and ensure stable, surrogate key generation. Implementing these IDs using SQL functions and materialized views ensures that your data warehouse remains performant and scalable.
diff --git a/content/en/altinity-kb-schema-design/two-columns-indexing.md b/content/en/altinity-kb-schema-design/two-columns-indexing.md
new file mode 100644
index 0000000000..9fa87c0538
--- /dev/null
+++ b/content/en/altinity-kb-schema-design/two-columns-indexing.md
@@ -0,0 +1,122 @@
+---
+title: "Two columns indexing"
+linkTitle: "Two columns indexing"
+weight: 100
+description: >-
+ How to create ORDER BY suitable for filtering over two different columns in two different queries
+---
+
+Suppose we have telecom CDR data in which A party calls B party. Each data row consists of A party details: event_timestamp, A MSISDN , A IMEI, A IMSI , A start location, A end location , B MSISDN, B IMEI, B IMSI , B start location, B end location, and some other metadata.
+
+Searches will use one of the A or B fields, for example, A IMSI, within the start and end time window.
+
+A msisdn, A imsi, A imei values are tightly coupled as users rarely change their phones.
+
+
+The queries will be:
+
+```sql
+select * from X where A = '0123456789' and ts between ...;
+select * from X where B = '0123456789' and ts between ...;
+```
+
+and both A & B are high-cardinality values
+
+ClickHouse® primary skip index (ORDER BY/PRIMARY KEY) works great when you always include leading ORDER BY columns in the WHERE filter. There are exceptions for low-cardinality columns and high-correlated values, but here is another case. A & B both have high cardinality, and it seems that their correlation is at a medium level.
+
+Various solutions exist, and their effectiveness largely depends on the correlation of different column data. Testing all solutions on actual data is necessary to select the best one.
+
+
+### ORDER BY + additional Skip Index
+
+```sql
+create table X (
+ A UInt32,
+ B UInt32,
+ ts DateTime,
+ ....
+ INDEX ix_B (B) type minmax GRANULARITY 3
+) engine = MergeTree
+partition by toYYYYMM(ts)
+order by (toStartOfDay(ts),A,B);
+```
+
+bloom_filter index type instead of min_max could work fine in some situations.
+
+### Inverted index as a projection
+
+```sql
+create table X (
+ A UInt32,
+ B UInt32,
+ ts DateTime,
+ ....
+ PROJECTION ix_B (
+ select A, B,ts ORDER BY B, ts
+ )
+) engine = MergeTree
+partition by toYYYYMM(ts)
+order by (toStartOfDay(ts),A,B);
+
+select * from X
+where A in (select A from X where B='....' and ts between ...)
+ and B='...' and ts between ... ;
+```
+
+- The number of rows the subquery returns should not be very high. 1M rows seems to be a suitable limit.
+- A separate table with a Materialized View can also be used similarly.
+- accessing pattern for the main table will "point", so better to lower index_granularity to 256. That will increase RAM usage by Primary Key
+
+
+### mortonEncode
+(available from 23.10)
+
+Do not prioritize either A or B, but distribute indexing efficiency between them.
+
+ * https://github.com/ClickHouse/ClickHouse/issues/41195
+ * https://www.youtube.com/watch?v=5GR1J4T4_d8
+ * https://clickhouse.com/docs/en/operations/settings/settings#analyze_index_with_space_filling_curves
+
+```sql
+create table X (
+ A UInt32,
+ B UInt32,
+ ts DateTime,
+ ....
+) engine = MergeTree
+partition by toYYYYMM(ts)
+order by (toStartOfDay(ts),mortonEncode(A,B));
+select * from X where A = '0123456789' and ts between ...;
+select * from X where B = '0123456789' and ts between ...;
+```
+
+### mortonEncode with non-UInt columns
+
+[mortonEncode](https://clickhouse.com/docs/en/sql-reference/functions/encoding-functions#mortonencode) function requires UInt columns, but sometimes different column types are needed (like String or ipv6). In such a case, the cityHash64() function can be used both for inserting and querying:
+
+```sql
+create table X (
+ A IPv6,
+ B IPv6,
+ AA alias cityHash64(A),
+ BB alias cityHash64(B),
+ ts DateTime materialized now()
+) engine = MergeTree
+partition by toYYYYMM(ts)
+order by
+(toStartOfDay(ts),mortonEncode(cityHash64(A),cityHash64(B)))
+;
+
+insert into X values ('fd7a:115c:a1e0:ab12:4843:cd96:624c:9a17','fd7a:115c:a1e0:ab12:4843:cd96:624c:9a17')
+
+select * from X where cityHash64(toIPv6('fd7a:115c:a1e0:ab12:4843:cd96:624c:9a17')) = AA;
+```
+
+### hilbertEncode as alternative
+(available from 24.6)
+
+[hilbertEncode](https://clickhouse.com/docs/en/sql-reference/functions/encoding-functions#hilbertencode) can be used instead of mortonEncode. On some data it allows better results than mortonEncode.
+
+
+
+
diff --git a/content/en/altinity-kb-schema-design/uniq-functions.md b/content/en/altinity-kb-schema-design/uniq-functions.md
index 96b294845f..469467040e 100644
--- a/content/en/altinity-kb-schema-design/uniq-functions.md
+++ b/content/en/altinity-kb-schema-design/uniq-functions.md
@@ -46,3 +46,38 @@ do
printf "|%s|%s,%s,%s,%s\n" "$f1" "$f2" "$size" "$result" "$time"
done
```
+
+
+## groupBitmap
+
+Use [Roaring Bitmaps](https://roaringbitmap.org/) underneath.
+Return amount of uniq values.
+
+Can be used with Int* types
+Works really great when your values quite similar. (Low memory usage / state size)
+
+Example with blockchain data, block_number is monotonically increasing number.
+
+```sql
+SELECT groupBitmap(block_number) FROM blockchain;
+
+┌─groupBitmap(block_number)─┐
+│ 48478157 │
+└───────────────────────────┘
+
+MemoryTracker: Peak memory usage (for query): 64.44 MiB.
+1 row in set. Elapsed: 32.044 sec. Processed 4.77 billion rows, 38.14 GB (148.77 million rows/s., 1.19 GB/s.)
+
+SELECT uniqExact(block_number) FROM blockchain;
+
+┌─uniqExact(block_number)─┐
+│ 48478157 │
+└─────────────────────────┘
+
+MemoryTracker: Peak memory usage (for query): 4.27 GiB.
+1 row in set. Elapsed: 70.058 sec. Processed 4.77 billion rows, 38.14 GB (68.05 million rows/s., 544.38 MB/s.)
+```
+
+
+
+
diff --git a/content/en/altinity-kb-setup-and-maintenance/_index.md b/content/en/altinity-kb-setup-and-maintenance/_index.md
index 1c3df79b50..5184ed2241 100644
--- a/content/en/altinity-kb-setup-and-maintenance/_index.md
+++ b/content/en/altinity-kb-setup-and-maintenance/_index.md
@@ -7,6 +7,6 @@ keywords:
- monitor clickhouse
- data migration
description: >
- Learn how to set up, deploy, monitor, and backup ClickHouse with step-by-step guides.
+ Learn how to set up, deploy, monitor, and backup ClickHouse® with step-by-step guides.
weight: 5
---
diff --git a/content/en/altinity-kb-setup-and-maintenance/alters.md b/content/en/altinity-kb-setup-and-maintenance/alters.md
new file mode 100644
index 0000000000..85f8b90627
--- /dev/null
+++ b/content/en/altinity-kb-setup-and-maintenance/alters.md
@@ -0,0 +1,146 @@
+---
+title: "How ALTERs work in ClickHouse®"
+linkTitle: "How ALTERs work in ClickHouse®"
+weight: 100
+description: >-
+
+---
+
+### How ALTERs work in ClickHouse®:
+
+#### ADD (COLUMN/INDEX/PROJECTION)
+
+Lightweight, will only change table metadata.
+So new entity will be added in case of creation of new parts during INSERT's OR during merges of old parts.
+
+In case of COLUMN, ClickHouse will calculate column value on fly in query context.
+
+{{% alert title="Warning" color="warning" %}}
+
+```sql
+CREATE TABLE test_materialization
+(
+ `key` UInt32,
+ `value` UInt32
+)
+ENGINE = MergeTree
+ORDER BY key;
+
+INSERT INTO test_materialization(key, value) SELECT 1, 1;
+INSERT INTO test_materialization(key, value) SELECT 2, 2;
+
+ALTER TABLE test_materialization ADD COLUMN inserted_at DateTime DEFAULT now();
+
+SELECT key, inserted_at FROM test_materialization;
+
+┌─key─┬─────────inserted_at─┐
+│ 1 │ 2022-09-01 03:28:58 │
+└─────┴─────────────────────┘
+┌─key─┬─────────inserted_at─┐
+│ 2 │ 2022-09-01 03:28:58 │
+└─────┴─────────────────────┘
+
+SELECT key, inserted_at FROM test_materialization;
+
+┌─key─┬─────────inserted_at─┐
+│ 1 │ 2022-09-01 03:29:11 │
+└─────┴─────────────────────┘
+┌─key─┬─────────inserted_at─┐
+│ 2 │ 2022-09-01 03:29:11 │
+└─────┴─────────────────────┘
+
+Each query will return different inserted_at value, because each time now() function being executed.
+
+
+INSERT INTO test_materialization(key, value) SELECT 3, 3;
+
+SELECT key, inserted_at FROM test_materialization;
+
+┌─key─┬─────────inserted_at─┐
+│ 3 │ 2022-09-01 03:29:36 │ -- < This value was materialized during ingestion, that's why it's smaller than value for keys 1 & 2
+└─────┴─────────────────────┘
+┌─key─┬─────────inserted_at─┐
+│ 1 │ 2022-09-01 03:29:53 │
+└─────┴─────────────────────┘
+┌─key─┬─────────inserted_at─┐
+│ 2 │ 2022-09-01 03:29:53 │
+└─────┴─────────────────────┘
+
+OPTIMIZE TABLE test_materialization FINAL;
+
+SELECT key, inserted_at FROM test_materialization;
+
+┌─key─┬─────────inserted_at─┐
+│ 1 │ 2022-09-01 03:30:52 │
+│ 2 │ 2022-09-01 03:30:52 │
+│ 3 │ 2022-09-01 03:29:36 │
+└─────┴─────────────────────┘
+
+SELECT key, inserted_at FROM test_materialization;
+
+┌─key─┬─────────inserted_at─┐
+│ 1 │ 2022-09-01 03:30:52 │
+│ 2 │ 2022-09-01 03:30:52 │
+│ 3 │ 2022-09-01 03:29:36 │
+└─────┴─────────────────────┘
+
+So, data inserted after addition of column can have lower inserted_at value then old data without materialization.
+
+```
+
+{{% /alert %}}
+
+If you want to backpopulate data for old parts, you have multiple options:
+
+#### MATERIALIZE (COLUMN/INDEX/PROJECTION) (PART[ITION ID] '')
+
+Will materialize this entity.
+
+#### OPTIMIZE TABLE xxxx (PART[ITION ID] '') (FINAL)
+
+Will trigger merge, which will lead to materialization of all entities in affected parts.
+
+#### ALTER TABLE xxxx UPDATE column_name = column_name WHERE 1;
+
+Will trigger mutation, which will materialize this column.
+
+#### DROP (COLUMN/INDEX/PROJECTION)
+
+Lightweight, it's only about changing of table metadata and removing corresponding files from filesystem.
+For Compact parts it will trigger merge, which can be heavy. [issue](https://github.com/ClickHouse/ClickHouse/issues/27502)
+
+
+#### MODIFY COLUMN (DATE TYPE)
+
+1. Change column type in table schema.
+2. Schedule mutation to change type for old parts.
+
+
+### Mutations
+
+Affected parts - parts with rows matching condition.
+
+#### ALTER TABLE xxxxx DELETE WHERE column_1 = 1;
+
+1. Will overwrite all column data in affected parts.
+2. For all part(ition)s will create new directories on disk and write new data to them or create hardlinks if they untouched.
+3. Register new parts names in ZooKeeper.
+
+#### ALTER TABLE xxxxx DELETE IN PARTITION ID '' WHERE column_1 = 1;
+
+Will do the same but only for specific partition.
+
+#### ALTER TABLE xxxxx UPDATE SET column_2 = column_2, column_3 = column_3 WHERE column_1 = 1;
+
+1. Will overwrite column_2, column_3 data in affected parts.
+2. For all part(ition)s will create new directories on disk and write new data to them or create hardlinks if they untouched.
+3. Register new parts names in ZooKeeper.
+
+#### DELETE FROM xxxxx WHERE column_1 = 1;
+
+1. Will create & populate hidden boolean column in affected parts. (_row_exists column)
+2. For all part(ition)s will create new directories on disk and write new data to them or create hardlinks if they untouched.
+3. Register new parts names in ZooKeeper.
+
+Despite that LWD mutations will not rewrite all columns, steps 2 & 3 in case of big tables can take significant time.
+
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md
new file mode 100644
index 0000000000..ead7d010a0
--- /dev/null
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md
@@ -0,0 +1,69 @@
+---
+title: "Aggressive merges"
+linkTitle: "Aggressive merges"
+description: >
+ Aggressive merges
+---
+
+
+
+Q: Is there any way I can dedicate more resources to the merging process when running ClickHouse® on pretty beefy machines (like 36 cores, 1TB of RAM, and large NVMe disks)?
+
+
+Mostly such things doing by changing the level of parallelism:
+
+ 1. `background_pool_size` - how many threads will be actually doing the merge (if you can push all the server resources to do the merges, i.e. no selects will be running - you can give all the cores to that, so try increasing to 36). If you use replicated table - use the same value for `max_replicated_merges_in_queue`.
+
+ 2. `background_merges_mutations_concurrency_ratio` - how many merges will be assigned (multiplier of background_pool_size), sometimes the default (2) may work against you since it will assign smaller merges, which is nice if you need to deal with real-time inserts, but is not important it you do bulk inserts and later start a lot of merges. So I would try 1.
+
+ 3. `number_of_free_entries_in_pool_to_lower_max_size_of_merge` (merge_tree setting) should be changed together with background_pool_size (50-90% of that). "When there is less than a specified number of free entries in the pool (or replicated queue), start to lower the maximum size of the merge to process (or to put in the queue). This is to allow small merges to process - not filling the pool with long-running merges." To make it really aggressive try 90-95% of background_pool_size, for ex. 34 (so you will have 34 huge merges and 2 small ones).
+
+Additionally, you can:
+
+ - control how big target parts will be created by the merges (max_bytes_to_merge_at_max_space_in_pool)
+ - disable direct io for big merges (min_merge_bytes_to_use_direct_io) - direct io is often slower (it bypasses the page cache, and it is used there to prevent pushing out the often used data from the cache by the running merge).
+ - on a replicated system with slow merges and a fast network you can use execute_merges_on_single_replica_time_threshold
+ - analyze if the Vertical or Horizontal merge is better / faster for your case/schema. (Vertical first merges the columns from the table ORDER BY and then other columns one by another - that normally requires less ram, and keep fewer files opened, but requires more complex computations compared to horizontal when all columns are merged simultaneously).
+ - if you have a lot of tables - you can give also give more resources to the scheduler (the component which assigns the merges, and do some housekeeping) - background_schedule_pool_size & background_common_pool_size
+ - review the schema, especially codes/compression used (they allow to reduce the size, but often can impact the merge speed significantly).
+ - try to form bigger parts when doing inserts (min_insert_block_size_bytes / min_insert_block_size_rows / max_insert_block_size)
+ - check if wide (every column in a separate file) or compact (columns are mixed in one file) parts are used (system.parts). By default min_bytes_for_wide_part=10 mln rows (so if the part is bigger that that the wide format will be used, compact otherwise). Sometimes it can be beneficial to use a compact format even for bigger parts (a lot of relatively small columns) or oppositely - use a wide format even for small parts (few fat columns in the table).
+ - consider using recent ClickHouse releases - they use compressed marks by default, which can be beneficial for reducing the i/o
+
+All the adjustments/performance optimizations should be controlled by some reproducible 'benchmark' so you can control/prove that the change gives the expected result (sometimes it's quite hard to predict the impact of some change on the real system). Please also monitors how system resources (especially CPU, IO + for replicated tables: network & zookeeper) are used/saturated during the test. Also monitor/plot the usage of the pools:
+```
+select * from system.metrics where metric like '%PoolTask'
+```
+
+Those recommendations are NOT generic. For systems with real-time insert & select pressure happening together with merges - those adjustments can be 'too aggressive'. So if you have different setups with different usage patterns - avoid using the same 'aggressive' settings template for all of them.
+
+TL/DR version:
+
+```
+cat /etc/clickhouse-server/config.d/aggresive_merges.xml
+
+ 36
+ 128
+ 8
+ 1
+
+ 32
+ 36
+ 161061273600
+ 10737418240
+
+
+
+cat /etc/clickhouse-server/users.d/aggresive_merges.xml
+
+
+
+36
+128
+8
+1
+
+
+
+```
+
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md
new file mode 100644
index 0000000000..4c505878ef
--- /dev/null
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md
@@ -0,0 +1,407 @@
+---
+title: "ClickHouse® Replication problems"
+linkTitle: "Replication problems"
+description: >
+ Finding and troubleshooting problems in the `replication_queue`
+keywords:
+ - clickhouse replication
+ - clickhouse check replication status
+---
+
+# Common problems & solutions
+
+- If the replication queue does not have any Exceptions only postponed reasons without exceptions just leave ClickHouse® do Merges/Mutations and it will eventually catch up and reduce the number of tasks in `replication_queue`. Number of concurrent merges and fetches can be tuned but if it is done without an analysis of your workload then you may end up in a worse situation. If Delay in queue is going up actions may be needed:
+
+- First simplest approach:
+ try to `SYSTEM RESTART REPLICA db.table` (This will DETACH/ATTACH table internally)
+
+
+
+# How to check for replication problems
+
+1. Check `system.replicas` first, cluster-wide. It allows to check if the problem is local to some replica or global, and allows to see the exception.
+ allows to answer the following questions:
+ - Are there any ReadOnly replicas?
+ - Is there the connection to zookeeper active?
+ - Is there the exception during table init? (`Code: 999. Coordination::Exception: Transaction failed (No node): Op #1`)
+
+2. Check `system.replication_queue`.
+ - How many tasks there / are they moving / are there some very old tasks there? (check `created_time` column, if tasks are 24h old, it is a sign of a problem):
+ - You can use this qkb article query: https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-replication-queue/
+ - Check if there are tasks with a high number of `num_tries` or `num_postponed` and `postponed_reason` this is a sign of stuck tasks.
+ - Check the problematic parts affecting the stuck tasks. You can use columns `new_part_name` or `parts_to_merge`
+ - Check which type is the task. If it is `MUTATE_PART` then it is a mutation task. If it is `MERGE_PARTS` then it is a merge task. These tasks can be deleted from the replication queue but `GET_PARTS` should not be deleted.
+
+3. Check `system.errors`
+
+4. Check `system.mutations`:
+ - You can check that in the replication queue are stuck tasks of type `MUTATE_PART`, and that those mutations are still executing `system.mutations` using column `is_done`
+
+5. Find the moment when the problem started and collect/analyze / preserve logs from that moment. It is usually during the first steps of a restart/crash
+
+6. Use `part_log` and `system.parts` to gather information of the parts related with the stuck tasks in the replication queue:
+ - Check if those parts exist and are active from `system.parts` (use partition_id, name as part and active columns to filter)
+ - Extract the part history from `system.part_log`
+ - Example query from `part_log`:
+
+```sql
+SELECT hostName(), * FROM
+cluster('all-sharded',system.part_log)
+WHERE
+ hostName() IN ('chi-prod-live-2-0-0','chi-prod-live-2-2-0','chi-prod-live-2-1-0')
+ AND table = 'sessions_local'
+ AND database = 'analytics'
+ AND part_name in ('20230411_33631_33654_3')
+```
+
+7. If there are no errors, just everything get slower - check the load (usual system metrics)
+
+
+
+## Some stuck replication task for a partition that was already removed or has no data
+
+- This can be easily detected because some exceptions will be in the replication queue that reference a part from a partition that do not exist. Here the most probable scenario is that the partition was dropped and some tasks were left in the queue.
+
+- drop the partition manually once again (it should remove the task)
+
+- If the partition exists but the part is missing (maybe because it is superseded by a newer merged part) then you can try to DETACH/ATTACH the partition.
+- Below DML generates the ALTER commands to do this:
+
+```sql
+WITH
+ extract(new_part_name, '^[^_]+') as partition_id
+SELECT
+ '/* count: ' || count() || ' */\n' ||
+ 'ALTER TABLE ' || database || '.' || table || ' DETACH PARTITION ID \''|| partition_id || '\';\n' ||
+ 'ALTER TABLE ' || database || '.' || table || ' ATTACH PARTITION ID \''|| partition_id || '\';\n'
+FROM
+ system.replication_queue as rq
+GROUP BY
+ database, table, partition_id
+HAVING sum(num_tries) > 1000 OR count() > 100
+ORDER BY count() DESC, sum(num_tries) DESC
+FORMAT TSVRaw;
+```
+
+## Problem with mutation stuck in the queue
+
+- This can happen if the mutation is finished and, for some reason, the task is not removed from the queue. This can be detected by checking `system.mutations` table and seeing if the mutation is done, but the task is still in the queue.
+
+- kill the mutation (again)
+
+## Replica is not starting because local set of files differs too much
+
+- First try increase the thresholds or set flag `force_restore_data` flag and restarting clickhouse/pod https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication#recovery-after-complete-data-loss
+
+## Replica is in Read-Only MODE
+
+Sometimes, due to crashes, zookeeper unavailability, slowness, or other reasons, some of the tables can be in Read-Only mode. This allows SELECTS but not INSERTS. So we need to do DROP / RESTORE replica procedure.
+
+Just to be clear, this procedure **will not delete any data**, it will just re-create the metadata in zookeeper with the current state of the [ClickHouse replica](/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica/).
+
+How it works:
+
+```sql
+ALTER TABLE table_name DROP DETACHED PARTITION ALL -- clean detached folder before operation. PARTITION ALL works only for the fresh clickhouse versions
+DETACH TABLE table_name; -- Required for DROP REPLICA
+-- Use the zookeeper_path and replica_name from system.replicas.
+SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/table_path_in_zk'; -- It will remove everything from the /table_path_in_zk/replicas/replica_name
+ATTACH TABLE table_name; -- Table will be in readonly mode, because there is no metadata in ZK and after that execute
+SYSTEM RESTORE REPLICA table_name; -- It will detach all partitions, re-create metadata in ZK (like it's new empty table), and then attach all partitions back
+SYSTEM SYNC REPLICA table_name; -- Not mandatory. It will Wait for replicas to synchronize parts. Also it's recommended to check `system.detached_parts` on all replicas after recovery is finished.
+SELECT name FROM system.detached_parts WHERE table = 'table_name'; -- check for leftovers. See the potential problems here https://altinity.com/blog/understanding-detached-parts-in-clickhouse
+```
+
+Starting from version 23, it's possible to use syntax [SYSTEM DROP REPLICA \'replica_name\' FROM TABLE db.table](https://clickhouse.com/docs/en/sql-reference/statements/system#drop-replica) instead of the `ZKPATH` variant, but you need to execute the above command from a different replica than the one you want to drop, which is not convenient sometimes. We recommend using the above method because it works with any version and is more reliable.
+
+## Procedure to restore multiple tables in Read-Only mode per replica
+
+It is better to make an approach per replica, because restoring a replica using ON CLUSTER could lead to race conditions that would cause errors and a big stress in zookeeper/keeper
+
+
+```sql
+SELECT
+ '-- Table ' || toString(row_num) || '\n' ||
+ 'DETACH TABLE `' || database || '`.`' || table || '`;\n' ||
+ 'SYSTEM DROP REPLICA ''' || replica_name || ''' FROM ZKPATH ''' || zookeeper_path || ''';\n' ||
+ 'ATTACH TABLE `' || database || '`.`' || table || '`;\n' ||
+ 'SYSTEM RESTORE REPLICA `' || database || '`.`' || table || '`;\n'
+FROM (
+ SELECT
+ *,
+ rowNumberInAllBlocks() + 1 as row_num
+ FROM (
+ SELECT
+ database,
+ table,
+ any(replica_name) as replica_name,
+ any(zookeeper_path) as zookeeper_path
+ FROM system.replicas
+ WHERE is_readonly
+ GROUP BY database, table
+ ORDER BY database, table
+ )
+ ORDER BY database, table
+)
+FORMAT TSVRaw;
+```
+
+This will generate the DDL statements to be executed per replica and generate an ouput that can be saved as an SQL file . It is important to execute the commands per replica in the sequence generated by the above DDL:
+
+- DETACH the table
+- DROP REPLICA
+- ATTACH the table
+- RESTORE REPLICA
+
+If we do this in parallel a table could still be attaching while another query is dropping/restoring the replica in zookeeper, causing errors.
+
+The following bash script will read the generated SQL file and execute the commands sequentially, asking for user input in case of errors. Simply save the generated SQL to a file (e.g. `recovery_commands.sql`) and run the script below (that you can name as `clickhouse_replica_recovery.sh`):
+
+```bash
+$ clickhouse_replica_recovery.sh recovery_commands.sql
+```
+
+
+Here the script:
+
+```bash
+#!/bin/bash
+
+# ClickHouse Replica Recovery Script
+# This script executes DETACH, DROP REPLICA, ATTACH, and RESTORE REPLICA commands sequentially
+
+# Configuration
+CLICKHOUSE_HOST="${CLICKHOUSE_HOST:-localhost}"
+CLICKHOUSE_PORT="${CLICKHOUSE_PORT:-9000}"
+CLICKHOUSE_USER="${CLICKHOUSE_USER:-clickhouse_operator}"
+CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-xxxxxxxxx}"
+COMMANDS_FILE="${1:-recovery_commands.sql}"
+LOG_FILE="recovery_$(date +%Y%m%d_%H%M%S).log"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+MAGENTA='\033[0;35m'
+NC='\033[0m' # No Color
+
+# Function to log messages
+log() {
+ echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
+}
+
+# Function to execute a SQL statement with retry logic
+execute_sql() {
+ local sql="$1"
+ local table_num="$2"
+ local step_name="$3"
+
+ while true; do
+ log "${YELLOW}Executing command for Table $table_num - $step_name:${NC}"
+ log "$sql"
+
+ # Build clickhouse-client command
+ local ch_cmd="clickhouse-client --host=$CLICKHOUSE_HOST --port=$CLICKHOUSE_PORT --user=$CLICKHOUSE_USER"
+
+ if [ -n "$CLICKHOUSE_PASSWORD" ]; then
+ ch_cmd="$ch_cmd --password=$CLICKHOUSE_PASSWORD"
+ fi
+
+ # Execute the command and capture output and exit code
+ local output
+ local exit_code
+ output=$(echo "$sql" | $ch_cmd 2>&1)
+ exit_code=$?
+
+ # Log the output
+ echo "$output" | tee -a "$LOG_FILE"
+
+ if [ $exit_code -eq 0 ]; then
+ log "${GREEN}✓ Successfully executed${NC}"
+ return 0
+ else
+ log "${RED}✗ Failed to execute (Exit code: $exit_code)${NC}"
+ log "${RED}Error output: $output${NC}"
+
+ # Ask user what to do
+ while true; do
+ echo ""
+ log "${MAGENTA}========================================${NC}"
+ log "${MAGENTA}Error occurred! Choose an option:${NC}"
+ log "${MAGENTA}========================================${NC}"
+ echo -e "${YELLOW}[R]${NC} - Retry this command"
+ echo -e "${YELLOW}[I]${NC} - Ignore this error and continue to next command in this table"
+ echo -e "${YELLOW}[S]${NC} - Skip this entire table and move to next table"
+ echo -e "${YELLOW}[A]${NC} - Abort script execution"
+ echo ""
+ echo -n "Enter your choice (R/I/S/A): "
+
+ # Read from /dev/tty to get user input from terminal
+ read -r response < /dev/tty
+
+ case "${response^^}" in
+ R|RETRY)
+ log "${BLUE}Retrying command...${NC}"
+ break # Break inner loop to retry
+ ;;
+ I|IGNORE)
+ log "${YELLOW}Ignoring error and continuing to next command...${NC}"
+ return 1 # Return error but continue
+ ;;
+ S|SKIP)
+ log "${YELLOW}Skipping entire table $table_num...${NC}"
+ return 2 # Return special code to skip table
+ ;;
+ A|ABORT)
+ log "${RED}Aborting script execution...${NC}"
+ exit 1
+ ;;
+ *)
+ echo -e "${RED}Invalid option '$response'. Please enter R, I, S, or A.${NC}"
+ ;;
+ esac
+ done
+ fi
+ done
+}
+
+# Main execution function
+main() {
+ log "${BLUE}========================================${NC}"
+ log "${BLUE}ClickHouse Replica Recovery Script${NC}"
+ log "${BLUE}========================================${NC}"
+ log "Host: $CLICKHOUSE_HOST:$CLICKHOUSE_PORT"
+ log "User: $CLICKHOUSE_USER"
+ log "Commands file: $COMMANDS_FILE"
+ log "Log file: $LOG_FILE"
+ echo ""
+
+ # Check if commands file exists
+ if [ ! -f "$COMMANDS_FILE" ]; then
+ log "${RED}Error: Commands file '$COMMANDS_FILE' not found!${NC}"
+ echo ""
+ echo "Usage: $0 [commands_file]"
+ echo " commands_file: Path to SQL commands file (default: recovery_commands.sql)"
+ echo ""
+ echo "Example: $0 my_commands.sql"
+ exit 1
+ fi
+
+ # Process SQL commands from file
+ local current_sql=""
+ local table_counter=0
+ local step_in_table=0
+ local failed_count=0
+ local success_count=0
+ local ignored_count=0
+ local skipped_tables=()
+ local skip_current_table=false
+
+ while IFS= read -r line || [ -n "$line" ]; do
+ # Skip empty lines
+ if [[ -z "$line" ]] || [[ "$line" =~ ^[[:space:]]*$ ]]; then
+ continue
+ fi
+
+ # Check if this is a comment line indicating a new table
+ if [[ "$line" =~ ^[[:space:]]*--[[:space:]]*Table[[:space:]]+([0-9]+) ]]; then
+ table_counter="${BASH_REMATCH[1]}"
+ step_in_table=0
+ skip_current_table=false
+ log ""
+ log "${BLUE}========================================${NC}"
+ log "${BLUE}Processing Table $table_counter${NC}"
+ log "${BLUE}========================================${NC}"
+ continue
+ elif [[ "$line" =~ ^[[:space:]]*-- ]]; then
+ # Skip other comment lines
+ continue
+ fi
+
+ # Skip if we're skipping this table
+ if [ "$skip_current_table" = true ]; then
+ # Check if line ends with semicolon to count statements
+ if [[ "$line" =~ \;[[:space:]]*$ ]]; then
+ step_in_table=$((step_in_table + 1))
+ fi
+ continue
+ fi
+
+ # Accumulate the SQL statement
+ current_sql+="$line "
+
+ # Check if we have a complete statement (ends with semicolon)
+ if [[ "$line" =~ \;[[:space:]]*$ ]]; then
+ step_in_table=$((step_in_table + 1))
+
+ # Determine the step name
+ local step_name=""
+ if [[ "$current_sql" =~ ^[[:space:]]*DETACH ]]; then
+ step_name="DETACH"
+ elif [[ "$current_sql" =~ ^[[:space:]]*SYSTEM[[:space:]]+DROP[[:space:]]+REPLICA ]]; then
+ step_name="DROP REPLICA"
+ elif [[ "$current_sql" =~ ^[[:space:]]*ATTACH ]]; then
+ step_name="ATTACH"
+ elif [[ "$current_sql" =~ ^[[:space:]]*SYSTEM[[:space:]]+RESTORE[[:space:]]+REPLICA ]]; then
+ step_name="RESTORE REPLICA"
+ fi
+
+ log ""
+ log "Step $step_in_table/4: $step_name"
+
+ # Execute the statement
+ local result
+ execute_sql "$current_sql" "$table_counter" "$step_name"
+ result=$?
+
+ if [ $result -eq 0 ]; then
+ success_count=$((success_count + 1))
+ sleep 1 # Small delay between commands
+ elif [ $result -eq 1 ]; then
+ # User chose to ignore this error
+ failed_count=$((failed_count + 1))
+ ignored_count=$((ignored_count + 1))
+ sleep 1
+ elif [ $result -eq 2 ]; then
+ # User chose to skip this table
+ skip_current_table=true
+ skipped_tables+=("$table_counter")
+ log "${YELLOW}Skipping remaining commands for Table $table_counter${NC}"
+ fi
+
+ # Reset current_sql for next statement
+ current_sql=""
+ fi
+ done < "$COMMANDS_FILE"
+
+ # Summary
+ log ""
+ log "${BLUE}========================================${NC}"
+ log "${BLUE}Execution Summary${NC}"
+ log "${BLUE}========================================${NC}"
+ log "Total successful commands: ${GREEN}$success_count${NC}"
+ log "Total failed commands: ${RED}$failed_count${NC}"
+ log "Total ignored errors: ${YELLOW}$ignored_count${NC}"
+ log "Total tables processed: $table_counter"
+
+ if [ ${#skipped_tables[@]} -gt 0 ]; then
+ log "Skipped tables: ${YELLOW}${skipped_tables[*]}${NC}"
+ fi
+
+ log "Log file: $LOG_FILE"
+
+ if [ $failed_count -eq 0 ]; then
+ log "${GREEN}All commands executed successfully!${NC}"
+ exit 0
+ else
+ log "${YELLOW}Some commands failed or were ignored. Please check the log file.${NC}"
+ exit 1
+ fi
+}
+
+# Run the main function
+main
+
+```
+
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md
index cf6bc5bbaf..6bd19aa4d9 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md
@@ -1,14 +1,14 @@
---
-title: "ClickHouse in Docker"
-linkTitle: "ClickHouse in Docker"
+title: "ClickHouse® in Docker"
+linkTitle: "ClickHouse® in Docker"
description: >
- ClickHouse in Docker
+ ClickHouse® in Docker
---
## Do you have documentation on Docker deployments?
Check
-* [https://hub.docker.com/r/yandex/clickhouse-server/](https://hub.docker.com/r/yandex/clickhouse-server/)
+* [https://hub.docker.com/r/clickhouse/clickhouse-server](https://hub.docker.com/r/clickhouse/clickhouse-server)
* [https://docs.altinity.com/clickhouseonkubernetes/](https://docs.altinity.com/clickhouseonkubernetes/)
* sources of entry point - [https://github.com/ClickHouse/ClickHouse/blob/master/docker/server/entrypoint.sh](https://github.com/ClickHouse/ClickHouse/blob/master/docker/server/entrypoint.sh)
@@ -21,7 +21,7 @@ Important things:
* Also, you may mount in some files or folders in the configuration folder:
* `/etc/clickhouse-server/config.d/listen_ports.xml`
* `--ulimit nofile=262144:262144`
-* You can also set on some linux capabilities to enable some of extra features of ClickHouse (not obligatory): `SYS_PTRACE NET_ADMIN IPC_LOCK SYS_NICE`
+* You can also set on some linux capabilities to enable some of extra features of ClickHouse® (not obligatory): `SYS_PTRACE NET_ADMIN IPC_LOCK SYS_NICE`
* you may also mount in the folder `/docker-entrypoint-initdb.d/` - all SQL or bash scripts there will be executed during container startup.
* if you use cgroup limits - it may misbehave https://github.com/ClickHouse/ClickHouse/issues/2261 (set up `` manually)
* there are several ENV switches, see: [https://github.com/ClickHouse/ClickHouse/blob/master/docker/server/entrypoint.sh](https://github.com/ClickHouse/ClickHouse/blob/master/docker/server/entrypoint.sh)
@@ -40,7 +40,7 @@ docker run -d \
--cap-add=IPC_LOCK \
--cap-add=SYS_PTRACE \
--network=host \
- yandex/clickhouse-server:21.1.7
+ clickhouse/clickhouse-server:latest
docker exec -it some-clickhouse-server clickhouse-client
docker exec -it some-clickhouse-server bash
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md
index 8e445d9b2f..ff72e62af3 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md
@@ -2,19 +2,22 @@
title: "Converting MergeTree to Replicated"
linkTitle: "Converting MergeTree to Replicated"
description: >
- Converting MergeTree to Replicated
+ Adding replication to a table
+keywords:
+ - clickhouse replicatedmergetree
+ - clickhouse replicated
---
-Options here are:
+To enable replication in a table that uses the `MergeTree` engine, you need to convert the engine to `ReplicatedMergeTree`. Options here are:
-1. Use`INSERT INTO foo_replicated SELECT * FROM foo` .
-2. Create table aside and attach all partition from the existing table then drop original table (uses hard links don't require extra disk space). `ALTER TABLE foo_replicated ATTACH PARTITION ID 'bar' FROM 'foo'` You can easily auto generate those commands using a query like: `SELECT DISTINCT 'ALTER TABLE foo_replicated ATTACH PARTITION ID \'' || partition_id || '\' FROM foo;' from system.parts WHERE table = 'foo';`
+1. Use`INSERT INTO foo_replicated SELECT * FROM foo`. (suitable for small tables)
+2. Create table aside and attach all partition from the existing table then drop original table (uses hard links don't require extra disk space). `ALTER TABLE foo_replicated ATTACH PARTITION ID 'bar' FROM 'foo'` You can easily auto generate those commands using a query like: `SELECT DISTINCT 'ALTER TABLE foo_replicated ATTACH PARTITION ID \'' || partition_id || '\' FROM foo;' from system.parts WHERE table = 'foo';` See [the example below](#example-for-option-2-above) for details.
3. Do it 'in place' using some file manipulation. see the procedure described here: [https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replication/\#converting-from-mergetree-to-replicatedmergetree](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replication/#converting-from-mergetree-to-replicatedmergetree)
-4. Do a backup of MergeTree and recover as ReplicatedMergeTree. [https://github.com/AlexAkulov/clickhouse-backup/blob/master/Examples.md\#how-to-convert-mergetree-to-replicatedmegretree](https://github.com/AlexAkulov/clickhouse-backup/blob/master/Examples.md#how-to-convert-mergetree-to-replicatedmegretree)
-5. Embedded command for that should be added in future.
+4. Do a backup of MergeTree and recover as ReplicatedMergeTree. [https://github.com/Altinity/clickhouse-backup/blob/master/Examples.md\#how-to-convert-mergetree-to-replicatedmegretree](https://github.com/Altinity/clickhouse-backup/blob/master/Examples.md#how-to-convert-mergetree-to-replicatedmegretree)
+5. Embedded command for recent Clickhouse versions - https://clickhouse.com/docs/en/sql-reference/statements/attach#attach-mergetree-table-as-replicatedmergetree
-## example for option 2
+## Example for option 2 above
-Note: ATTACH PARTITION ID 'bar' FROM 'foo'` is practically free from compute and disk space perspective. This feature utilizes filesystem hard-links and the fact that files are immutable in Clickhouse ( it's the core of the Clickhouse design, filesystem hard-links and such file manipulations are widely used ).
+Note: `ATTACH PARTITION ID 'bar' FROM 'foo'` is practically free from a compute and disk space perspective. This feature utilizes filesystem hard-links and the fact that files are immutable in ClickHouse® (it's the core of the ClickHouse design, filesystem hard-links and such file manipulations are widely used).
```sql
create table foo( A Int64, D Date, S String )
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md
index edc9521312..cdcfd679a9 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md
@@ -11,7 +11,7 @@ Pros:
Cons:
* Decoding & encoding of common data formats may be slower / require more CPU
-* The data size is usually bigger than ClickHouse formats.
+* The data size is usually bigger than ClickHouse® formats.
* Some of the common data formats have limitations.
{{% alert title="Info" color="info" %}}
@@ -31,35 +31,12 @@ Pros:
Cons:
* Uses CPU / RAM (mostly on the receiver side)
-See details in:
+See details of both approaches in:
[remote-table-function.md]({{}})
-## clickhouse-copier
-
-Pros:
-* Possible to do **some** changes in schema.
-* Needs only access to ClickHouse TCP port.
-* It’s possible to change the distribution of data between shards.
-* Suitable for large clusters: many clickhouse-copier can execute the same task together.
-
-Cons:
-* May create an inconsistent result if source cluster data is changing during the process.
-* Hard to setup.
-* Requires zookeeper.
-* Uses CPU / RAM (mostly on the clickhouse-copier and receiver side)
+[distributed-table-cluster.md]({{}})
-{{% alert title="Info" color="info" %}}
-Internally it works like smart `INSERT INTO cluster(…) SELECT * FROM ...` with some consistency checks.
-{{% /alert %}}
-
-{{% alert title="Info" color="info" %}}
-Run clickhouse copier on the same nodes as receiver clickhouse, to avoid doubling the network load.
-{{% /alert %}}
-
-See details in:
-
-[altinity-kb-clickhouse-copier]({{}})
## Manual parts moving: freeze / rsync / attach
@@ -89,9 +66,9 @@ Cons:
Just create the backup on server 1, upload it to server 2, and restore the backup.
-See [https://github.com/AlexAkulov/clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup)
+See [https://github.com/Altinity/clickhouse-backup](https://github.com/Altinity/clickhouse-backup)
-[https://altinity.com/blog/introduction-to-clickhouse-backups-and-clickhouse-backup]("https://altinity.com/blog/introduction-to-clickhouse-backups-and-clickhouse-backup")
+[https://altinity.com/blog/introduction-to-clickhouse-backups-and-clickhouse-backup](https://altinity.com/blog/introduction-to-clickhouse-backups-and-clickhouse-backup)
## Fetch from zookeeper path
@@ -100,7 +77,7 @@ Pros:
Cons:
* Table schema should be the same.
-* Works only when the source and the destination clickhouse servers share the same zookeeper (without chroot)
+* Works only when the source and the destination ClickHouse servers share the same zookeeper (without chroot)
* Needs to access zookeeper and ClickHouse replication ports: (`interserver_http_port` or `interserver_https_port`)
```sql
@@ -108,21 +85,23 @@ ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper'
```
[alter table fetch detail]({{}})
-## Replication protocol
+## Using the replication protocol by adding a new replica
Just make one more replica in another place.
Pros:
* Simple to setup
* Data is consistent all the time automatically.
-* Low CPU and network usage.
+* Low CPU and network usage should be tuned.
Cons:
* Needs to reach both zookeeper client (2181) and ClickHouse replication ports: (`interserver_http_port` or `interserver_https_port`)
* In case of cluster migration, zookeeper need’s to be migrated too.
-* Replication works both ways.
+* Replication works both ways so new replica should be outside the main cluster.
+
+Check the details in:
-[../altinity-kb-zookeeper/altinity-kb-zookeeper-cluster-migration.md](../altinity-kb-zookeeper/altinity-kb-zookeeper-cluster-migration.md)
+[Add a replica to a Cluster]({{}})
## See also
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica.md
new file mode 100644
index 0000000000..5d47f3efcd
--- /dev/null
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica.md
@@ -0,0 +1,277 @@
+---
+title: "Add/Remove a new replica to a ClickHouse® cluster"
+linkTitle: "add_remove_replica"
+description: >
+ How to add/remove a new ClickHouse replica manually and using `clickhouse-backup`
+keywords:
+ - clickhouse replica
+ - clickhouse add replica
+ - clickhouse remove replica
+---
+
+## ADD nodes/replicas to a ClickHouse® cluster
+
+To add some ClickHouse® replicas to an existing cluster if -30TB then better to use replication:
+
+- don’t add the `remote_servers.xml` until replication is done.
+- Add these files and restart to limit bandwidth and avoid saturation (70% total bandwidth):
+
+[Core Settings | ClickHouse Docs](https://clickhouse.com/docs/en/operations/settings/settings/#max_replicated_fetches_network_bandwidth_for_server)
+
+💡 Do the **Gbps to Bps** math correctly. For 10G —> 1250MB/s —> 1250000000 B/s and change `max_replicated_*` settings accordingly:
+
+- Nodes replicating from:
+
+```xml
+
+
+
+ 50000
+
+
+
+```
+
+- Nodes replicating to:
+
+```xml
+
+
+
+ 50000
+
+
+
+```
+
+### Manual method (DDL)
+
+- Create tables `manually` and be sure macros in all replicas are aligned with the ZK path. If zk path uses `{cluster}` then this method won’t work. ZK path should use `{shard}` and `{replica}` or `{uuid}` (if databases are Atomic) only.
+
+```sql
+-- DDL for Databases
+SELECT concat('CREATE DATABASE "', name, '" ENGINE = ', engine_full, ';')
+FROM system.databases WHERE name NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')
+INTO OUTFILE '/tmp/databases.sql'
+FORMAT TSVRaw;
+-- DDL for tables and views
+SELECT
+ replaceRegexpOne(replaceOne(concat(create_table_query, ';'), '(', 'ON CLUSTER \'{cluster}\' ('), 'CREATE (TABLE|DICTIONARY|VIEW|LIVE VIEW|WINDOW VIEW)', 'CREATE \\1 IF NOT EXISTS')
+FROM
+ system.tables
+WHERE engine != 'MaterializedView' and
+ database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA') AND
+ create_table_query != '' AND
+ name NOT LIKE '.inner.%%' AND
+ name NOT LIKE '.inner_id.%%'
+INTO OUTFILE '/tmp/schema.sql' AND STDOUT
+FORMAT TSVRaw
+SETTINGS show_table_uuid_in_table_create_query_if_not_nil=1;
+--- DDL only for materialized views
+SELECT
+ replaceRegexpOne(replaceOne(concat(create_table_query, ';'), 'TO', 'ON CLUSTER \'{cluster}\' TO'), '(CREATE MATERIALIZED VIEW)', '\\1 IF NOT EXISTS')
+FROM
+ system.tables
+WHERE engine = 'MaterializedView' and
+ database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA') AND
+ create_table_query != '' AND
+ name NOT LIKE '.inner.%%' AND
+ name NOT LIKE '.inner_id.%%' AND
+ as_select != ''
+INTO OUTFILE '/tmp/schema.sql' APPEND AND STDOUT
+FORMAT TSVRaw
+SETTINGS show_table_uuid_in_table_create_query_if_not_nil=1;
+```
+
+This will generate the UUIDs in the CREATE TABLE definition, something like this:
+
+```sql
+CREATE TABLE IF NOT EXISTS default.insert_test UUID '51b41170-5192-4947-b13b-d4094c511f06' ON CLUSTER '{cluster}' (`id_order` UInt16, `id_plat` UInt32, `id_warehouse` UInt64, `id_product` UInt16, `order_type` UInt16, `order_status` String, `datetime_order` DateTime, `units` Int16, `total` Float32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') PARTITION BY tuple() ORDER BY (id_order, id_plat, id_warehouse) SETTINGS index_granularity = 8192;
+```
+
+- Copy both SQL to destination replica and execute
+
+```sql
+clickhouse-client --host localhost --port 9000 -mn < databases.sql
+clickhouse-client --host localhost --port 9000 -mn < schema.sql
+```
+
+### Using `clickhouse-backup`
+
+- Using `clickhouse-backup` to copy the schema of a replica to another is also convenient and moreover if [using Atomic database](/engines/altinity-kb-atomic-database-engine/) with `{uuid}` macros in [ReplicatedMergeTree engines](https://www.youtube.com/watch?v=oHwhXc0re6k):
+
+```bash
+sudo -u clickhouse clickhouse-backup create --schema --rbac --named-collections rbac_and_schema
+# From the destination replica do this in 2 steps:
+sudo -u clickhouse clickhouse-backup restore --rbac-only rbac_and_schema
+sudo -u clickhouse clickhouse-backup restore --schema --named-collections rbac_and_schema
+
+```
+
+### Using `altinity operator`
+
+If there is at least one alive replica in the shard, you can remove PVCs and STS for affected nodes and trigger reconciliation. The operator will try to copy the schema from other replicas.
+
+### Check that schema migration was successful and node is replicating
+
+- To check that the schema migration has been **successful** query system.replicas:
+
+```sql
+SELECT DISTINCT database,table,replica_is_active FROM system.replicas FORMAT Vertical
+```
+
+- Check how the replication process is performing using https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-replication-queue/
+
+ - If there are many postponed tasks with message:
+
+ ```sql
+ Not executing fetch of part 7_22719661_22719661_0 because 16 fetches already executing, max 16. │ 2023-09-25 17:03:06 │ │
+ ```
+
+ then it is ok, the maximum replication slots are being used. Exceptions are not OK and should be investigated
+
+- If migration was successful and replication is working then wait until the replication is finished. It may take some days depending on how much data is being replicated. After this edit the cluster configuration xml file for all replicas (`remote_servers.xml`) and add the new replica to the cluster.
+
+
+### Possible problems
+
+#### **Exception** `REPLICA_ALREADY_EXISTS`
+
+```sql
+Code: 253. DB::Exception: Received from localhost:9000.
+DB::Exception: There was an error on [dl-ny2-vm-09.internal.io:9000]:
+Code: 253. DB::Exception: Replica /clickhouse/tables/3c3503c3-ed3c-443b-9cb3-ef41b3aed0a8/1/replicas/dl-ny2-vm-09.internal.io
+already exists. (REPLICA_ALREADY_EXISTS) (version 23.5.3.24 (official build)). (REPLICA_ALREADY_EXISTS)
+(query: CREATE TABLE IF NOT EXISTS xxxx.yyyy UUID '3c3503c3-ed3c-443b-9cb3-ef41b3aed0a8'
+```
+
+[The DDLs](/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue/) have been executed and some tables have been created and after that dropped but some left overs are left in ZK:
+- If databases can be dropped then use `DROP DATABASE xxxxx SYNC`
+- If databases cannot be dropped use `SYSTEM DROP REPLICA ‘replica_name’ FROM db.table`
+
+#### **Exception** `TABLE_ALREADY_EXISTS`
+
+```sql
+Code: 57. DB::Exception: Received from localhost:9000.
+DB::Exception: There was an error on [dl-ny2-vm-09.internal.io:9000]:
+Code: 57. DB::Exception: Directory for table data store/3c3/3c3503c3-ed3c-443b-9cb3-ef41b3aed0a8/ already exists.
+(TABLE_ALREADY_EXISTS) (version 23.5.3.24 (official build)). (TABLE_ALREADY_EXISTS)
+(query: CREATE TABLE IF NOT EXISTS xxxx.yyyy UUID '3c3503c3-ed3c-443b-9cb3-ef41b3aed0a8' ON CLUSTER '{cluster}'
+```
+
+Tables have not been dropped correctly:
+ - If databases can be dropped then use `DROP DATABASE xxxxx SYNC`
+ - If databases cannot be dropped use:
+
+```sql
+SELECT concat('DROP TABLE ', database, '.', name, ' SYNC;')
+FROM system.tables
+WHERE database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')
+INTO OUTFILE '/tmp/drop_tables.sql'
+FORMAT TSVRaw;
+```
+
+### Tuning
+
+- Sometimes replication goes very fast and if you have a tiered storage hot/cold you could run out of space, so for that it is interesting to:
+ - reduce fetches from 8 to 4
+ - increase moves from 8 to 16
+
+```xml
+
+
+
+ 625000000
+ 4
+ 16
+
+
+
+```
+
+- Also to monitor this with:
+
+```sql
+SELECT *
+FROM system.metrics
+WHERE metric LIKE '%Move%'
+
+Query id: 5050155b-af4a-474f-a07a-f2f7e95fb395
+
+┌─metric─────────────────┬─value─┬─description──────────────────────────────────────────────────┐
+│ BackgroundMovePoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool for moves │
+└────────────────────────┴───────┴──────────────────────────────────────────────────────────────┘
+
+1 row in set. Elapsed: 0.164 sec.
+
+dnieto-test :) SELECT * FROM system.metrics WHERE metric LIKE '%Fetch%';
+
+SELECT *
+FROM system.metrics
+WHERE metric LIKE '%Fetch%'
+
+Query id: 992cae2a-fb58-4150-a088-83273805d0c4
+
+┌─metric────────────────────┬─value─┬─description───────────────────────────────────────────────┐
+│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replica │
+│ BackgroundFetchesPoolTask │ 0 │ Number of active fetches in an associated background pool │
+└───────────────────────────┴───────┴───────────────────────────────────────────────────────────┘
+
+2 rows in set. Elapsed: 0.163 sec.
+```
+
+- There are new tables in v23 `system.replicated_fetches` and `system.moves` check it out for more info.
+- if needed just stop replication using `SYSTEM STOP FETCHES` from the replicating nodes
+
+
+## REMOVE nodes/Replicas from a Cluster
+
+- It is important to know which replica/node you want to remove to avoid problems. To check it you need to connect to a different replica/node that the one you want to remove. For instance we want to remove `arg_t04`, so we connected to replica `arg_t01`:
+
+```sql
+SELECT DISTINCT arrayJoin(mapKeys(replica_is_active)) AS replica_name
+FROM system.replicas
+
+┌─replica_name─┐
+│ arg_t01 │
+│ arg_t02 │
+│ arg_t03 │
+│ arg_t04 │
+└──────────────┘
+```
+
+- After that (make sure you're connected to a replica different from the one that you want to remove, `arg_tg01`) and execute:
+
+```sql
+SYSTEM DROP REPLICA 'arg_t04'
+```
+
+- If by any chance you're connected to the same replica you want to remove then **`SYSTEM DROP REPLICA`** will not work.
+- BTW `SYSTEM DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk, it will only remove metadata from Zookeeper/Keeper
+
+```sql
+-- What happens if executing system drop replica in the local replica to remove.
+SYSTEM DROP REPLICA 'arg_t04'
+
+Elapsed: 0.017 sec.
+
+Received exception from server (version 23.8.6):
+Code: 305. DB::Exception: Received from dnieto-zenbook.lan:9440. DB::Exception: We can't drop local replica, please use `DROP TABLE` if you want to clean the data and drop this replica. (TABLE_WAS_NOT_DROPPED)
+```
+
+- After DROP REPLICA, we need to check that the replica is gone from the list or replicas:
+
+```sql
+SELECT DISTINCT arrayJoin(mapKeys(replica_is_active)) AS replica_name
+FROM system.replicas
+
+┌─replica_name─┐
+│ arg_t01 │
+│ arg_t02 │
+│ arg_t03 │
+└──────────────┘
+
+-- We should see there is no replica arg_t04
+```
+
+- Delete the replica in the cluster configuration: `remote_servers.xml` and shutdown the node/replica removed.
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/_index.md
index 7544fbab80..b4a57d4b1a 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/_index.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/_index.md
@@ -4,27 +4,23 @@ linkTitle: "clickhouse-copier"
description: >
clickhouse-copier
---
-The description of the utility and its parameters, as well as examples of the config files that you need to create for the copier are in the doc [https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/](https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/)
+The description of the utility and its parameters, as well as examples of the config files that you need to create for the copier are in the official repo for the [ClickHouse® copier utility](https://github.com/clickhouse/copier/)
The steps to run a task:
-1. Create a config file for clickhouse-copier (zookeeper.xml)
-
- [https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/\#format-of-zookeeper-xml](https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/#format-of-zookeeper-xml)
-
+1. Create a config file for `clickhouse-copier` (zookeeper.xml)
2. Create a config file for the task (task1.xml)
+3. Create the task in ZooKeeper and start an instance of `clickhouse-copier`
- [https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/\#configuration-of-copying-tasks](https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/#configuration-of-copying-tasks)
-
-3. Create the task in ZooKeeper and start an instance of clickhouse-copier`clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config /opt/clickhouse-copier/zookeeper.xml --task-path /clickhouse/copier/task1 --task-file /opt/clickhouse-copier/task1.xml`
+ `clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config=/opt/clickhouse-copier/zookeeper.xml --task-path=/clickhouse/copier/task1 --task-file=/opt/clickhouse-copier/task1.xml`
-If the node in ZooKeeper already exists and you want to change it, you need to add the `task-upload-force` parameter:
+ If the node in ZooKeeper already exists and you want to change it, you need to add the `task-upload-force` parameter:
-`clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config /opt/clickhouse-copier/zookeeper.xml --task-path /clickhouse/copier/task1 --task-file /opt/clickhouse-copier/task1.xml --task-upload-force 1`
+ `clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config=/opt/clickhouse-copier/zookeeper.xml --task-path=/clickhouse/copier/task1 --task-file=/opt/clickhouse-copier/task1.xml --task-upload-force=1`
-If you want to run another instance of clickhouse-copier for the same task, you need to copy the config file (zookeeper.xml) to another server, and run this command:
+ If you want to run another instance of `clickhouse-copier` for the same task, you need to copy the config file (zookeeper.xml) to another server, and run this command:
-`clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config /opt/clickhouse-copier/zookeeper.xml --task-path /clickhouse/copier/task1`
+ `clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config=/opt/clickhouse-copier/zookeeper.xml --task-path=/clickhouse/copier/task1`
The number of simultaneously running instances is controlled be the `max_workers` parameter in your task configuration file. If you run more workers superfluous workers will sleep and log messages like this:
@@ -32,11 +28,10 @@ The number of simultaneously running instances is controlled be the `max_workers
### See also
-* https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/
+* https://github.com/clickhouse/copier/
* Никита Михайлов. Кластер ClickHouse ctrl-с ctrl-v. HighLoad++ Весна 2021 [slides]( https://raw.githubusercontent.com/ClickHouse/clickhouse-presentations/master/highload2021/copier.pdf)
* 21.7 have a huge bulk of fixes / improvements. https://github.com/ClickHouse/ClickHouse/pull/23518
* https://altinity.com/blog/2018/8/22/clickhouse-copier-in-practice
-* http://www.clickhouse.com.cn/topic/601fb322b06e5e0f21ba79e1
* https://github.com/getsentry/snuba/blob/master/docs/clickhouse-copier.md
* https://hughsite.com/post/clickhouse-copier-usage.html
* https://www.jianshu.com/p/c058edd664a6
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.3-and-earlier.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.3-and-earlier.md
index ece660b90a..d2d46103e4 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.3-and-earlier.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.3-and-earlier.md
@@ -4,10 +4,10 @@ linkTitle: "clickhouse-copier 20.3 and earlier"
description: >
clickhouse-copier 20.3 and earlier
---
-Clickhouse-copier was created to move data between clusters.
+`clickhouse-copier` was created to move data between clusters.
It runs simple INSERT…SELECT queries and can copy data between tables with different engine parameters and between clusters with different number of shards.
In the task configuration file you need to describe the layout of the source and the target cluster, and list the tables that you need to copy. You can copy whole tables or specific partitions.
-Clickhouse-copier uses temporary distributed tables to select from the source cluster and insert into the target cluster.
+`clickhouse-copier` uses temporary distributed tables to select from the source cluster and insert into the target cluster.
## The process is as follows
@@ -27,17 +27,17 @@ If a worker was interrupted, another worker can be started to continue the task.
## Configuring the engine of the target table
-Clickhouse-copier uses the engine from the task configuration file for these purposes:
+`clickhouse-copier` uses the engine from the task configuration file for these purposes:
* to create target tables if they don’t exist.
* PARTITION BY: to SELECT a partition of data from the source table, to DROP existing partitions from target tables.
-Clickhouse-copier does not support the old MergeTree format.
-However, you can create the target tables manually and specify the engine in the task configuration file in the new format so that clickhouse-copier can parse it for its SELECT queries.
+`clickhouse-copier` does not support the old MergeTree format.
+However, you can create the target tables manually and specify the engine in the task configuration file in the new format so that `clickhouse-copier` can parse it for its SELECT queries.
## How to monitor the status of running tasks
-Clickhouse-copier uses ZooKeeper to keep track of the progress and to communicate between workers.
+`clickhouse-copier` uses ZooKeeper to keep track of the progress and to communicate between workers.
Here is a list of queries that you can use to see what’s happening.
```sql
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4+.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4_21.6.md
similarity index 58%
rename from content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4+.md
rename to content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4_21.6.md
index 42a7ca6fe7..c3bf83b6a5 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4+.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4_21.6.md
@@ -3,23 +3,25 @@ title: "clickhouse-copier 20.4 - 21.6"
linkTitle: "clickhouse-copier 20.4 - 21.6"
description: >
clickhouse-copier 20.4 - 21.6
+aliases:
+ /altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4+/
---
-Clickhouse-copier was created to move data between clusters.
-It runs simple INSERT…SELECT queries and can copy data between tables with different engine parameters and between clusters with different number of shards.
+`clickhouse-copier` was created to move data between clusters.
+It runs simple `INSERT…SELECT` queries and can copy data between tables with different engine parameters and between clusters with different number of shards.
In the task configuration file you need to describe the layout of the source and the target cluster, and list the tables that you need to copy. You can copy whole tables or specific partitions.
-Clickhouse-copier uses temporary distributed tables to select from the source cluster and insert into the target cluster.
+`clickhouse-copier` uses temporary distributed tables to select from the source cluster and insert into the target cluster.
-The behavior of clickhouse-copier was changed in 20.4:
+The behavior of `clickhouse-copier` was changed in 20.4:
-* Now clickhouse-copier inserts data into intermediate tables, and after the insert finishes successfully clickhouse-copier attaches the completed partition into the target table. This allows for incremental data copying, because the data in the target table is intact during the process. **Important note:** ATTACH PARTITION respects the `max_partition_size_to_drop` limit. Make sure the `max_partition_size_to_drop` limit is big enough (or set to zero) in the destination cluster. If clickhouse-copier is unable to attach a partition because of the limit, it will proceed to the next partition, and it will drop the intermediate table when the task is finished (if the intermediate table is less than the `max_table_size_to_drop` limit). **Another important note:** ATTACH PARTITION is replicated. The attached partition will need to be downloaded by the other replicas. This can create significant network traffic between ClickHouse nodes. If an attach takes a long time, clickhouse-copier will log a timeout and will proceed to the next step.
-* Now clickhouse-copier splits the source data into chunks and copies them one by one. This is useful for big source tables, when inserting one partition of data can take hours. If there is an error during the insert clickhouse-copier has to drop the whole partition and start again. The `number_of_splits` parameter lets you split your data into chunks so that in case of an exception clickhouse-copier has to re-insert only one chunk of the data.
-* Now clickhouse-copier runs `OPTIMIZE target_table PARTITION ... DEDUPLICATE` for non-Replicated MergeTree tables. **Important note:** This is a very strange feature that can do more harm than good. We recommend to disable it by configuring the engine of the target table as Replicated in the task configuration file, and create the target tables manually if they are not supposed to be replicated. Intermediate tables are always created as plain MergeTree.
+* Now `clickhouse-copier` inserts data into intermediate tables, and after the insert finishes successfully `clickhouse-copier` attaches the completed partition into the target table. This allows for incremental data copying, because the data in the target table is intact during the process. **Important note:** ATTACH PARTITION respects the `max_partition_size_to_drop` limit. Make sure the `max_partition_size_to_drop` limit is big enough (or set to zero) in the destination cluster. If `clickhouse-copier` is unable to attach a partition because of the limit, it will proceed to the next partition, and it will drop the intermediate table when the task is finished (if the intermediate table is less than the `max_table_size_to_drop` limit). **Another important note:** ATTACH PARTITION is replicated. The attached partition will need to be downloaded by the other replicas. This can create significant network traffic between ClickHouse nodes. If an attach takes a long time, `clickhouse-copier` will log a timeout and will proceed to the next step.
+* Now `clickhouse-copier` splits the source data into chunks and copies them one by one. This is useful for big source tables, when inserting one partition of data can take hours. If there is an error during the insert `clickhouse-copier` has to drop the whole partition and start again. The `number_of_splits` parameter lets you split your data into chunks so that in case of an exception `clickhouse-copier` has to re-insert only one chunk of the data.
+* Now `clickhouse-copier` runs `OPTIMIZE target_table PARTITION ... DEDUPLICATE` for non-Replicated MergeTree tables. **Important note:** This is a very strange feature that can do more harm than good. We recommend to disable it by configuring the engine of the target table as Replicated in the task configuration file, and create the target tables manually if they are not supposed to be replicated. Intermediate tables are always created as plain MergeTree.
## The process is as follows
1. Process the configuration files.
2. Discover the list of partitions if not provided in the config.
-3. Copy partitions one by one _**I’m not sure of the order since I was copying from 1 shard to 4 shards.**_ _**The metadata in ZooKeeper suggests the order described here.**_
+3. Copy partitions one by one ** The metadata in ZooKeeper suggests the order described here.**
1. Copy chunks of data one by one.
1. Copy data from source shards one by one.
1. Create intermediate tables on all shards of the target cluster.
@@ -37,23 +39,23 @@ If a worker was interrupted, another worker can be started to continue the task.
## Configuring the engine of the target table
-Clickhouse-copier uses the engine from the task configuration file for these purposes:
+`clickhouse-copier` uses the engine from the task configuration file for these purposes:
* to create target and intermediate tables if they don’t exist.
* PARTITION BY: to SELECT a partition of data from the source table, to ATTACH partitions into target tables, to DROP incomplete partitions from intermediate tables, to OPTIMIZE partitions after they are attached to the target.
* ORDER BY: to SELECT a chunk of data from the source table.
-Here is an example of SELECT that clickhouse-copier runs to get the sixth of ten chunks of data:
+Here is an example of SELECT that `clickhouse-copier` runs to get the sixth of ten chunks of data:
```sql
WHERE ( = ( AS partition_key))
AND (cityHash64() % 10 = 6 )
```
-Clickhouse-copier does not support the old MergeTree format.
-However, you can create the intermediate tables manually with the same engine as the target tables (otherwise ATTACH will not work), and specify the engine in the task configuration file in the new format so that clickhouse-copier can parse it for SELECT, ATTACH PARTITION and DROP PARTITION queries.
+`clickhouse-copier` does not support the old MergeTree format.
+However, you can create the intermediate tables manually with the same engine as the target tables (otherwise ATTACH will not work), and specify the engine in the task configuration file in the new format so that `clickhouse-copier` can parse it for SELECT, ATTACH PARTITION and DROP PARTITION queries.
-**Important note**: always configure engine as Replicated to disable OPTIMIZE … DEDUPLICATE (unless you know why you need clickhouse-copier to run OPTIMIZE … DEDUPLICATE).
+**Important note**: always configure engine as Replicated to disable OPTIMIZE … DEDUPLICATE (unless you know why you need `clickhouse-copier` to run OPTIMIZE … DEDUPLICATE).
## How to configure the number of chunks
@@ -70,11 +72,11 @@ You can change this parameter in the `table` section of the task configuration f
## How to monitor the status of running tasks
-Clickhouse-copier uses ZooKeeper to keep track of the progress and to communicate between workers.
+`clickhouse-copier` uses ZooKeeper to keep track of the progress and to communicate between workers.
Here is a list of queries that you can use to see what’s happening.
```sql
---task-path /clickhouse/copier/task1
+--task-path=/clickhouse/copier/task1
-- The task config
select * from system.zookeeper
@@ -82,11 +84,17 @@ where path=''
name | ctime | mtime
----------------------------+---------------------+--------------------
description | 2021-03-22 13:15:48 | 2021-03-22 13:25:28
+status | 2021-03-22 13:15:48 | 2021-03-22 13:25:28
task_active_workers_version | 2021-03-22 13:15:48 | 2021-03-22 20:32:09
tables | 2021-03-22 13:16:47 | 2021-03-22 13:16:47
task_active_workers | 2021-03-22 13:15:48 | 2021-03-22 13:15:48
+-- Status
+select * from system.zookeeper
+where path='/status'
+
+
-- Running workers
select * from system.zookeeper
where path='/task_active_workers'
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md
new file mode 100644
index 0000000000..849449ba7f
--- /dev/null
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md
@@ -0,0 +1,255 @@
+---
+title: "Kubernetes job for clickhouse-copier"
+linkTitle: "Kubernetes job for clickhouse-copier"
+description: >
+ Kubernetes job for `clickhouse-copier`
+---
+# `clickhouse-copier` deployment in kubernetes
+
+`clickhouse-copier` can be deployed in a kubernetes environment to automate some simple backups or copy fresh data between clusters.
+
+Some documentation to read:
+* https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/
+* https://github.com/clickhouse/copier/
+
+## Deployment
+
+Use a kubernetes job is recommended but a simple pod can be used if you only want to execute the copy one time.
+
+Just edit/change all the ```yaml``` files to your needs.
+
+### 1) Create the PVC:
+
+First create a namespace in which all the pods and resources are going to be deployed
+
+```bash
+kubectl create namespace clickhouse-copier
+```
+
+Then create the PVC using a ```storageClass``` gp2-encrypted class or use any other storageClass from other providers:
+
+```yaml
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: copier-logs
+ namespace: clickhouse-copier
+spec:
+ storageClassName: gp2-encrypted
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 100Mi
+```
+
+and deploy:
+
+```bash
+kubectl -n clickhouse-copier create -f ./kubernetes/copier-pvc.yaml
+```
+
+### 2) Create the configmap:
+
+The configmap has both files ```zookeeper.xml``` and ```task01.xml``` with the zookeeper node listing and the parameters for the task respectively.
+
+```yaml
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: copier-config
+ namespace: clickhouse-copier
+data:
+ task01.xml: |
+
+
+ true
+
+
+ trace
+
+
+
+
+
+ clickhouse01.svc.cluster.local
+ 9000
+ chcopier
+ pass
+
+
+ clickhouse02.svc.cluster.local
+ 9000
+ chcopier
+ pass
+
+
+
+
+
+
+
+ clickhouse03.svc.cluster.local
+ 9000
+ chcopier
+ pass
+
+
+
+
+ clickhouse03.svc.cluster.local
+ 9000
+ chcopier
+ pass
+
+
+
+
+ 1
+
+ 1
+
+
+ 0
+
+
+ 3
+ 1
+
+
+
+ all-replicated
+ default
+ fact_sales_event
+ all-sharded
+ default
+ fact_sales_event
+
+ Engine=ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/fact_sales_event', '{replica}')
+ PARTITION BY toYYYYMM(timestamp)
+ ORDER BY (channel_id, product_id)
+ SETTINGS index_granularity = 8192
+
+ rand()
+
+
+
+ zookeeper.xml: |
+
+
+ trace
+ 100M
+ 3
+
+
+
+ zookeeper1.svc.cluster.local
+ 2181
+
+
+ zookeeper2.svc.cluster.local
+ 2181
+
+
+ zookeeper3.svc.cluster.local
+ 2181
+
+
+
+```
+
+and deploy:
+
+```bash
+kubectl -n clickhouse-copier create -f ./kubernetes/copier-configmap.yaml
+```
+
+The ```task01.xml``` file has many parameters to take into account explained in the repo for [clickhouse-copier](https://github.com/clickhouse/copier/). Important to note that it is needed a FQDN for the Zookeeper nodes and ClickHouse® server that are valid for the cluster. As the deployment creates a new namespace, it is recommended to use a FQDN linked to a service. For example ```zookeeper01.svc.cluster.local```. This file should be adapted to both clusters topologies and to the needs of the user.
+
+The ```zookeeper.xml``` file is pretty straightforward with a simple 3 node ensemble configuration.
+
+### 3) Create the job:
+
+Basically the job will download the official ClickHouse image and will create a pod with 2 containers:
+
+ - clickhouse-copier: This container will run the clickhouse-copier utility.
+
+ - sidecar-logging: This container will be used to read the logs of the clickhouse-copier container for different runs (this part can be improved):
+
+```yaml
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: clickhouse-copier-test
+ namespace: clickhouse-copier
+spec:
+ # only for kubernetes 1.23
+ # ttlSecondsAfterFinished: 86400
+ template:
+ spec:
+ containers:
+ - name: clickhouse-copier
+ image: clickhouse/clickhouse-server:21.8
+ command:
+ - clickhouse-copier
+ - --task-upload-force=1
+ - --config-file=$(CH_COPIER_CONFIG)
+ - --task-path=$(CH_COPIER_TASKPATH)
+ - --task-file=$(CH_COPIER_TASKFILE)
+ - --base-dir=$(CH_COPIER_BASEDIR)
+ env:
+ - name: CH_COPIER_CONFIG
+ value: "/var/lib/clickhouse/tmp/zookeeper.xml"
+ - name: CH_COPIER_TASKPATH
+ value: "/clickhouse/copier/tasks/task01"
+ - name: CH_COPIER_TASKFILE
+ value: "/var/lib/clickhouse/tmp/task01.xml"
+ - name: CH_COPIER_BASEDIR
+ value: "/var/lib/clickhouse/tmp"
+ resources:
+ limits:
+ cpu: "1"
+ memory: 2048Mi
+ volumeMounts:
+ - name: copier-config
+ mountPath: /var/lib/clickhouse/tmp/zookeeper.xml
+ subPath: zookeeper.xml
+ - name: copier-config
+ mountPath: /var/lib/clickhouse/tmp/task01.xml
+ subPath: task01.xml
+ - name: copier-logs
+ mountPath: /var/lib/clickhouse/tmp
+ - name: sidecar-logger
+ image: busybox:1.35
+ command: ['/bin/sh', '-c', 'tail', '-n', '1000', '-f', '/tmp/copier-logs/clickhouse-copier*/*.log']
+ resources:
+ limits:
+ cpu: "1"
+ memory: 512Mi
+ volumeMounts:
+ - name: copier-logs
+ mountPath: /tmp/copier-logs
+ volumes:
+ - name: copier-config
+ configMap:
+ name: copier-config
+ items:
+ - key: zookeeper.xml
+ path: zookeeper.xml
+ - key: task01.xml
+ path: task01.xml
+ - name: copier-logs
+ persistentVolumeClaim:
+ claimName: copier-logs
+ restartPolicy: Never
+ backoffLimit: 3
+```
+
+Deploy and watch progress checking the logs:
+
+```bash
+kubectl -n clickhouse-copier logs sidecar-logging
+```
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster.md
new file mode 100644
index 0000000000..214240a014
--- /dev/null
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster.md
@@ -0,0 +1,139 @@
+---
+title: "Distributed table to ClickHouse® Cluster"
+linkTitle: "Distributed table to cluster"
+description: >
+ Shifting INSERTs to a standby cluster
+keywords:
+ - clickhouse distributed table
+ - clickhouse distributed
+---
+
+In order to shift INSERTS to a standby cluster (for example increase zone availability or [disaster recovery](https://docs.altinity.com/operationsguide/availability-and-recovery/recovery-architecture/)) some ClickHouse® features can be used.
+
+Basically we need to create a distributed table, a MV, rewrite the `remote_servers.xml` config file and tune some parameters.
+
+Distributed engine information and parameters:
+https://clickhouse.com/docs/en/engines/table-engines/special/distributed/
+
+## Steps
+
+### Create a Distributed table in the source cluster
+
+For example, we should have a `ReplicatedMergeTree` table in which all inserts are falling. This table is the first step in our pipeline:
+
+```sql
+CREATE TABLE db.inserts_source ON CLUSTER 'source'
+(
+ column1 String
+ column2 DateTime
+ .....
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/inserts_source', '{replica}')
+PARTITION BY toYYYYMM(column2)
+ORDER BY (column1, column2)
+```
+
+This table lives in the source cluster and all INSERTS go there. In order to shift all INSERTS in the source cluster to destination cluster we can create a `Distributed` table that points to another `ReplicatedMergeTree` in the destination cluster:
+
+```sql
+CREATE TABLE db.inserts_source_dist ON CLUSTER 'source'
+(
+ column1 String
+ column2 DateTime
+ .....
+)
+ENGINE = Distributed('destination', db, inserts_destination)
+```
+
+### Create a Materialized View to shift INSERTS to destination cluster:
+
+```sql
+CREATE MATERIALIZED VIEW shift_inserts ON CLUSTER 'source'
+TO db.inserts_source_dist AS
+SELECT * FROM db.inserts_source
+```
+
+### Create a ReplicatedMergeTree table in the destination cluster:
+
+This is the table in the destination cluster that is pointed by the distributed table in the source cluster
+
+```sql
+CREATE TABLE db.inserts_destination ON CLUSTER 'destination'
+(
+ column1 String
+ column2 DateTime
+ .....
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/inserts_destination', '{replica}')
+PARTITION BY toYYYYMM(column2)
+ORDER BY (column1, column2)
+```
+
+### Rewrite remote_servers.xml:
+
+All the hostnames/FQDN from each replica/node must be accessible from both clusters. Also the remote_servers.xml from the source cluster should read like this:
+
+```xml
+
+
+
+
+
+ host03
+ 9000
+
+
+ host04
+ 9000
+
+
+
+
+
+
+ host01
+ 9000
+
+
+ host02
+ 9000
+
+
+
+
+
+
+
+ load_balancer.xxxx.com
+ 9440
+ 1
+ user
+ pass
+
+
+
+
+
+```
+
+### Configuration settings
+
+Depending on your use case you can set the the distributed INSERTs to sync or [async mode](/altinity-kb-queries-and-syntax/async-inserts/). This example is for async mode:
+Put this config settings on the default profile. Check for more info about the possible modes:
+
+https://clickhouse.com/docs/en/operations/settings/settings#insert_distributed_sync
+
+```xml
+
+ ....
+
+
+
+ 1
+
+ 1
+
+ .....
+
+
+```
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/fetch_alter_table.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/fetch_alter_table.md
index 2a86c623a1..3b428cab6e 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/fetch_alter_table.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/fetch_alter_table.md
@@ -50,9 +50,11 @@ If needed, after moving the data and checking that everything is sound, you can
```sql
-- Required for DROP REPLICA
DETACH TABLE ;
--- It will remove everything from /table_path_in_z
+
+-- This will remove everything from /table_path_in_z/replicas/replica_name
-- but not the data. You could reattach the table again and
--- restore the replica if needed
+-- restore the replica if needed. Get the zookeeper_path and replica_name from system.replicas
+
SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/table_path_in_zk/';
```
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/mssql-clickhouse.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/mssql-clickhouse.md
new file mode 100644
index 0000000000..4e4d37cf74
--- /dev/null
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/mssql-clickhouse.md
@@ -0,0 +1,101 @@
+---
+title: "MSSQL bcp pipe to clickhouse-client"
+linkTitle: "Export from MSSQL to ClickHouse®"
+weight: 100
+description: >-
+ Export from MSSQL to ClickHouse®
+---
+
+## How to pipe data to ClickHouse® from bcp export tool for MSSQL database
+
+### Prepare tables
+
+```bash
+LAPTOP.localdomain :) CREATE TABLE tbl(key UInt32) ENGINE=MergeTree ORDER BY key;
+
+root@LAPTOP:/home/user# sqlcmd -U sa -P Password78
+1> WITH t0(i) AS (SELECT 0 UNION ALL SELECT 0), t1(i) AS (SELECT 0 FROM t0 a, t0 b), t2(i) AS (SELECT 0 FROM t1 a, t1 b), t3(i) AS (SELECT 0 FROM t2 a, t2 b), t4(i) AS (SELECT 0 FROM t3 a, t3 b), t5(i) AS (SELECT 0 FROM t4 a, t3 b),n(i) AS (SELECT ROW_NUMBER() OVER(ORDER BY (SELECT 0)) FROM t5) SELECT i INTO tbl FROM n WHERE i BETWEEN 1 AND 16777216
+2> GO
+
+(16777216 rows affected)
+
+root@LAPTOP:/home/user# sqlcmd -U sa -P Password78 -Q "SELECT count(*) FROM tbl"
+
+-----------
+ 16777216
+
+(1 rows affected)
+```
+
+### Piping
+
+```bash
+root@LAPTOP:/home/user# mkfifo import_pipe
+root@LAPTOP:/home/user# bcp "SELECT * FROM tbl" queryout import_pipe -t, -c -b 200000 -U sa -P Password78 -S localhost &
+[1] 6038
+root@LAPTOP:/home/user#
+Starting copy...
+1000 rows successfully bulk-copied to host-file. Total received: 1000
+1000 rows successfully bulk-copied to host-file. Total received: 2000
+1000 rows successfully bulk-copied to host-file. Total received: 3000
+1000 rows successfully bulk-copied to host-file. Total received: 4000
+1000 rows successfully bulk-copied to host-file. Total received: 5000
+1000 rows successfully bulk-copied to host-file. Total received: 6000
+1000 rows successfully bulk-copied to host-file. Total received: 7000
+1000 rows successfully bulk-copied to host-file. Total received: 8000
+1000 rows successfully bulk-copied to host-file. Total received: 9000
+1000 rows successfully bulk-copied to host-file. Total received: 10000
+1000 rows successfully bulk-copied to host-file. Total received: 11000
+1000 rows successfully bulk-copied to host-file. Total received: 12000
+1000 rows successfully bulk-copied to host-file. Total received: 13000
+1000 rows successfully bulk-copied to host-file. Total received: 14000
+1000 rows successfully bulk-copied to host-file. Total received: 15000
+1000 rows successfully bulk-copied to host-file. Total received: 16000
+1000 rows successfully bulk-copied to host-file. Total received: 17000
+1000 rows successfully bulk-copied to host-file. Total received: 18000
+1000 rows successfully bulk-copied to host-file. Total received: 19000
+1000 rows successfully bulk-copied to host-file. Total received: 20000
+1000 rows successfully bulk-copied to host-file. Total received: 21000
+1000 rows successfully bulk-copied to host-file. Total received: 22000
+1000 rows successfully bulk-copied to host-file. Total received: 23000
+-- Enter
+root@LAPTOP:/home/user# cat import_pipe | clickhouse-client --query "INSERT INTO tbl FORMAT CSV" &
+...
+1000 rows successfully bulk-copied to host-file. Total received: 16769000
+1000 rows successfully bulk-copied to host-file. Total received: 16770000
+1000 rows successfully bulk-copied to host-file. Total received: 16771000
+1000 rows successfully bulk-copied to host-file. Total received: 16772000
+1000 rows successfully bulk-copied to host-file. Total received: 16773000
+1000 rows successfully bulk-copied to host-file. Total received: 16774000
+1000 rows successfully bulk-copied to host-file. Total received: 16775000
+1000 rows successfully bulk-copied to host-file. Total received: 16776000
+1000 rows successfully bulk-copied to host-file. Total received: 16777000
+16777216 rows copied.
+Network packet size (bytes): 4096
+Clock Time (ms.) Total : 11540 Average : (1453831.5 rows per sec.)
+
+[1]- Done bcp "SELECT * FROM tbl" queryout import_pipe -t, -c -b 200000 -U sa -P Password78 -S localhost
+[2]+ Done cat import_pipe | clickhouse-client --query "INSERT INTO tbl FORMAT CSV"
+```
+
+### Another shell
+
+```bash
+root@LAPTOP:/home/user# for i in `seq 1 600`; do clickhouse-client -q "select count() from tbl";sleep 1; done
+0
+0
+0
+0
+0
+0
+1048545
+4194180
+6291270
+9436905
+11533995
+13631085
+16777216
+16777216
+16777216
+16777216
+```
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md
index f131a47ee6..3c4b39c300 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md
@@ -33,7 +33,7 @@ While of course it should be checked, every case is different.
Yes, by the cost of extra memory usage (on the receiver side).
-Clickhouse tries to form blocks of data in memory and while one of limit: `min_insert_block_size_rows` or `min_insert_block_size_bytes` being hit, clickhouse dump this block on disk. If clickhouse tries to execute insert in parallel (`max_insert_threads > 1`), it would form multiple blocks at one time.
+ClickHouse® tries to form blocks of data in memory and while one of limit: `min_insert_block_size_rows` or `min_insert_block_size_bytes` being hit, ClickHouse dump this block on disk. If ClickHouse tries to execute insert in parallel (`max_insert_threads > 1`), it would form multiple blocks at one time.
So maximum memory usage can be calculated like this: `max_insert_threads * first(min_insert_block_size_rows OR min_insert_block_size_bytes)`
Default values:
@@ -72,3 +72,44 @@ Default values:
│ connect_timeout_with_failover_secure_ms │ 100 │
└─────────────────────────────────────────┴───────┘
```
+
+### Example
+
+```
+#!/bin/bash
+
+table='...'
+database='bvt'
+local='...'
+remote='...'
+CH="clickhouse-client" # you may add auth here
+settings=" max_insert_threads=20,
+ max_threads=20,
+ min_insert_block_size_bytes = 536870912,
+ min_insert_block_size_rows = 16777216,
+ max_insert_block_size = 16777216,
+ optimize_on_insert=0";
+
+# need it to create temp table with same structure (suitable for attach)
+params=$($CH -h $remote -q "select partition_key,sorting_key,primary_key from system.tables where table='$table' and database = '$database' " -f TSV)
+IFS=$'\t' read -r partition_key sorting_key primary_key <<< $params
+
+$CH -h $local \ # get list of source partitions
+-q "select distinct partition from system.parts where table='$table' and database = '$database' "
+
+while read -r partition; do
+# check that the partition is already copied
+ if [ `$CH -h $remote -q " select count() from system.parts table='$table' and database = '$database' and partition='$partition'"` -eq 0 ] ; then
+ $CH -n -h $remote -q "
+ create temporary table temp as $database.$table engine=MergeTree -- 23.3 required for temporary table
+ partition by ($partition_key) primary key ($primary_key) order by ($sorting_key);
+ -- SYSTEM STOP MERGES temp; -- maybe....
+ set $settings;
+ insert into temp select * from remote($local,$database.$table) where _partition='$partition'
+ -- order by ($sorting_key) -- maybe....
+ ;
+ alter table $database.$table attach partition $partition from temp
+ "
+ fi
+done
+```
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/rsync.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/rsync.md
index bd4060c53d..b3a496b3fe 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/rsync.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/rsync.md
@@ -1,19 +1,66 @@
---
-title: "rsync"
+title: "Moving ClickHouse to Another Server"
linkTitle: "rsync"
description: >
- rsync
+ Copying Multi-Terabyte Live ClickHouse to Another Server
---
-### Short Instruction
-1. Do [FREEZE TABLE](https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/#alter_freeze-partition) on needed table, partition. It would produce consistent snapshot of table data.
-2. Run rsync command.
+When migrating a large, live ClickHouse cluster (multi-terabyte scale) to a new server or cluster, the goal is to minimize downtime while ensuring data consistency. A practical method is to use **incremental `rsync`** in multiple passes, combined with ClickHouse’s replication features.
- ```bash
- rsync -ravlW --bwlimit=100000 /var/lib/clickhouse/data/shadow/N/database/table
- root@remote_host:/var/lib/clickhouse/data/database/table/detached
- ```
+1. **Prepare the new cluster**
+ - Ensure the new cluster is set up with its own ZooKeeper (or Keeper).
+ - Configure ClickHouse but keep it stopped initially.
+2. **Initial data sync**
+
+ Run a full recursive sync of the data directory from the old server to the new one:
+
+ ```bash
+ rsync -ravlW --delete /var/lib/clickhouse/ user@new_host:/var/lib/clickhouse/
+ ```
+
+ Explanation of flags:
+
+ - `r`: recursive, includes all subdirectories.
+ - `a`: archive mode (preserves symlinks, permissions, timestamps, ownership, devices).
+ - `v`: verbose, shows progress.
+ - `l`: copy symlinks as symlinks.
+ - `W`: copy whole files instead of using rsync’s delta algorithm (faster for large DB files).
+ - --delete: remove files from the destination that don’t exist on the source.
- `--bwlimit` is transfer limit in KBytes per second.
+ If you plan to run several replicas on a new cluster, rsync data to all of them. To save the performance of production servers, you can copy data to 1 new replica and then use it as a source for others. However, you can start from a single replica and add more after switching.
-3. Run [ATTACH PARTITION](https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/#alter_attach-partition) for each partition from `./detached` directory.
+ Add --bwlimit=100000 to preserve the performance of the production cluster while copying a lot of data.
+
+ Consider shards as independent clusters.
+
+4. **Incremental re-syncs**
+ - Repeat the `rsync` step multiple times while the old cluster is live.
+ - Each subsequent run will copy only changes and reduce the final sync time.
+5. **Restore replication metadata**
+ - Start the new ClickHouse node(s).
+ - Run `SYSTEM RESTORE REPLICA` to rebuild replication metadata in ZooKeeper.
+6. **Test the application**
+ - Point your test environment to the new cluster.
+ - Validate queries, schema consistency, and application behavior.
+7. **Final sync and switchover**
+ - Stop ClickHouse on the old cluster.
+ - Immediately run a final incremental `rsync` to catch last-minute changes.
+ - Reinitialize ZooKeeper/Keeper database (stop/clear snapshots/start).
+ - Run `SYSTEM RESTORE REPLICA` to rebuild replication metadata in ZooKeeper again.
+ - Start ClickHouse on the new cluster and switch production traffic.
+ - add more replicas as needed
+
+
+NOTES:
+
+1. If you are using a mount point that differs from /var/lib/clickhouse/data, adjust the rsync command accordingly to point to the correct location. For example, suppose you reconfigure the storage path as follows in /etc/clickhouse-server/config.d/config.xml.
+```
+
+
+ /data1/clickhouse/
+ ...
+
+```
+You'll need to use `/data1/clickhouse` instead of `/var/lib/clickhouse` in the rsync paths.
+
+2. ClickHouse docker container image does not have rsync installed. Add it using apt-get or run sidecar in k8s.
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/_index.md
index 052701a190..0722b0021c 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/_index.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/_index.md
@@ -1,20 +1,25 @@
---
-title: "DDLWorker"
-linkTitle: "DDLWorker"
-description: >
- DDLWorker
+title: "DDLWorker and DDL queue problems"
+linkTitle: "DDLWorker and DDL queue problems"
+description: >
+ Finding and troubleshooting problems in the `distributed_ddl_queue`
+keywords:
+ - clickhouse ddl
+ - clickhouse replication queue
---
-DDLWorker is a subprocess (thread) of clickhouse-server that executes `ON CLUSTER` tasks at the node.
+DDLWorker is a subprocess (thread) of `clickhouse-server` that executes `ON CLUSTER` tasks at the node.
-When you execute a DDL query with `ON CLUSTER mycluster` section the query executor at the current node reads the cluster `mycluster` definition (remote_servers / system.clusters) and places tasks into Zookeeper znode `task_queue/ddl/...` for members of the cluster `mycluster`.
+When you execute a DDL query with `ON CLUSTER mycluster` section, the query executor at the current node reads the cluster `mycluster` definition (remote_servers / system.clusters) and places tasks into Zookeeper znode `task_queue/ddl/...` for members of the cluster `mycluster`.
-DDLWorker at all ClickHouse nodes constantly check this `task_queue` for their tasks and executes them locally and reports about a result back into `task_queue`.
+DDLWorker at all ClickHouse® nodes constantly check this `task_queue` for their tasks, executes them locally, and reports about the results back into `task_queue`.
The common issue is the different hostnames/IPAddresses in the cluster definition and locally.
-So a node initiator puts tasks for a host named Host1. But the Host1 thinks about own name as localhost or **xdgt634678d** (internal docker hostname) and never sees tasks for the Host1 because is looking tasks for **xdgt634678d.** The same with internal VS external IP addresses.
+So if the initiator node puts tasks for a host named Host1. But the Host1 thinks about own name as localhost or **xdgt634678d** (internal docker hostname) and never sees tasks for the Host1 because is looking tasks for **xdgt634678d.** The same with internal VS external IP addresses.
-Another issue that sometimes DDLWorker thread can crash then ClickHouse node stops to execute `ON CLUSTER` tasks.
+## DDLWorker thread crashed
+
+That causes ClickHouse to stop executing `ON CLUSTER` tasks.
Check that DDLWorker is alive:
@@ -36,6 +41,7 @@ config.xml
/clickhouse/task_queue/ddl
+ 1100060480060
@@ -50,3 +56,25 @@ Default values:
**task_max_lifetime** = 7 \* 24 \* 60 \* 60 (in seconds = week) – Delete task if its age is greater than that.
**max_tasks_in_queue** = 1000 – How many tasks could be in the queue.
+
+**pool_size** = 1 - How many ON CLUSTER queries can be run simultaneously.
+
+## Too intensive stream of ON CLUSTER command
+
+Generally, it's a bad design, but you can increase pool_size setting
+
+## Stuck DDL tasks in the distributed_ddl_queue
+
+Sometimes [DDL tasks](/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/) (the ones that use ON CLUSTER) can get stuck in the `distributed_ddl_queue` because the replicas can overload if multiple DDLs (thousands of CREATE/DROP/ALTER) are executed at the same time. This is very normal in heavy ETL jobs.This can be detected by checking the `distributed_ddl_queue` table and see if there are tasks that are not moving or are stuck for a long time.
+
+If these DDLs are completed in some replicas but failed in others, the simplest way to solve this is to execute the failed command in the missed replicas without ON CLUSTER. If most of the DDLs failed, then check the number of unfinished records in `distributed_ddl_queue` on the other nodes, because most probably it will be as high as thousands.
+
+First, backup the `distributed_ddl_queue` into a table so you will have a snapshot of the table with the states of the tasks. You can do this with the following command:
+
+```sql
+CREATE TABLE default.system_distributed_ddl_queue AS SELECT * FROM system.distributed_ddl_queue;
+```
+
+After this, we need to check from the backup table which tasks are not finished and execute them manually in the missed replicas, and review the pipeline which do `ON CLUSTER` command and does not abuse them. There is a new `CREATE TEMPORARY TABLE` command that can be used to avoid the `ON CLUSTER` command in some cases, where you need an intermediate table to do some operations and after that you can `INSERT INTO` the final table or do `ALTER TABLE final ATTACH PARTITION FROM TABLE temp` and this temp table will be dropped automatically after the session is closed.
+
+
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md
index 6850e2955b..ca02a38cf7 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md
@@ -2,13 +2,13 @@
title: "There are N unfinished hosts (0 of them are currently active)."
linkTitle: "There are N unfinished hosts (0 of them are currently active)."
description: >
- "There are N unfinished hosts (0 of them are currently active)."
+ There are N unfinished hosts (0 of them are currently active).
---
Sometimes your Distributed DDL queries are being stuck, and not executing on all or subset of nodes, there are a lot of possible reasons for that kind of behavior, so it would take some time and effort to investigate.
## Possible reasons
-### Clickhouse node can't recognize itself
+### ClickHouse® node can't recognize itself
```sql
SELECT * FROM system.clusters; -- check is_local column, it should have 1 for itself
@@ -24,7 +24,7 @@ cat /etc/hostname
### Debian / Ubuntu
-There is an issue in Debian based images, when hostname being mapped to 127.0.1.1 address which doesn't literally match network interface and clickhouse fails to detect this address as local.
+There is an issue in Debian based images, when hostname being mapped to 127.0.1.1 address which doesn't literally match network interface and ClickHouse fails to detect this address as local.
[https://github.com/ClickHouse/ClickHouse/issues/23504](https://github.com/ClickHouse/ClickHouse/issues/23504)
@@ -99,7 +99,10 @@ WHERE metric LIKE '%MaxDDLEntryID%'
grep -C 40 "ddl_entry" /var/log/clickhouse-server/clickhouse-server*.log
```
-#### Issues that can prevent the task execution
+
+### Issues that can prevent task execution
+
+#### Obsolete Replicas
Obsolete replicas left in zookeeper.
@@ -116,6 +119,8 @@ SYSTEM START REPLICATION QUEUES;
[https://clickhouse.tech/docs/en/sql-reference/statements/system/\#query_language-system-drop-replica](https://clickhouse.tech/docs/en/sql-reference/statements/system/\#query_language-system-drop-replica)
+#### Tasks manually removed from DDL queue
+
Task were removed from DDL queue, but left in Replicated\*MergeTree table queue.
```bash
@@ -148,3 +153,29 @@ Context of this problem is:
Solution:
* Reload/Restore this replica from scratch.
+
+#### DDL path was changed in Zookeeper without restarting ClickHouse
+
+Changing the DDL queue path in Zookeeper without restarting ClickHouse will make ClickHouse confused. If you need to do this ensure that you restart ClickHouse before submitting additional distributed DDL commands. Here's an example.
+
+```sql
+-- Path before change:
+SELECT *
+FROM system.zookeeper
+WHERE path = '/clickhouse/clickhouse101/task_queue'
+
+┌─name─┬─value─┬─path─────────────────────────────────┐
+│ ddl │ │ /clickhouse/clickhouse101/task_queue │
+└──────┴───────┴──────────────────────────────────────┘
+
+-- Path after change
+SELECT *
+FROM system.zookeeper
+WHERE path = '/clickhouse/clickhouse101/task_queue'
+
+┌─name─┬─value─┬─path─────────────────────────────────┐
+│ ddl2 │ │ /clickhouse/clickhouse101/task_queue │
+└──────┴───────┴──────────────────────────────────────┘
+```
+
+The reason is that ClickHouse will not "see" this change and will continue to look for tasks in the old path. Altering paths in Zookeeper should be avoided if at all possible. If necessary it must be done *very carefully*.
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-configuration-settings.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-configuration-settings.md
index 01cce387aa..ab493f097a 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-configuration-settings.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-configuration-settings.md
@@ -22,9 +22,18 @@ By default it 90% of the physical RAM of the server.
You can decrease that in some scenarios (like you need to leave more free RAM for page cache or to some other software).
+### Limits?
+
+```sql
+select metric, formatReadableSize(value) from system.asynchronous_metrics where metric ilike '%MemoryTotal%'
+union all
+select name, formatReadableSize(toUInt64(value)) from system.server_settings where name='max_server_memory_usage'
+FORMAT PrettyCompactMonoBlock
+```
+
### How to check what is using my RAM?
-[altinity-kb-who-ate-my-memory.md" ]({{}})
+[altinity-kb-who-ate-my-memory.md]({{}})
### Mark cache
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-overcommit.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-overcommit.md
new file mode 100644
index 0000000000..a6a718718d
--- /dev/null
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-overcommit.md
@@ -0,0 +1,49 @@
+---
+title: "Memory Overcommiter"
+linkTitle: "Memory Overcommiter"
+description: >
+ Enable Memory overcommiter instead of ussing `max_memory_usage` per query
+---
+
+## Memory Overcommiter
+
+From version 22.2+ [ClickHouse® was updated with enhanced Memory overcommit capabilities](https://github.com/ClickHouse/ClickHouse/pull/31182). In the past, queries were constrained by the `max_memory_usage` setting, imposing a rigid limitation. Users had the option to increase this limit, but it came at the potential expense of impacting other users during a single query. With the introduction of Memory overcommit, more memory-intensive queries can now execute, granted there are ample resources available. When the [server reaches its maximum memory limit](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#max_server_memory_usage), ClickHouse identifies the most overcommitted queries and attempts to terminate them. It's important to note that the terminated query might not be the one causing the condition. If it's not, the query will undergo a waiting period to allow the termination of the high-memory query before resuming its execution. This setup ensures that low-memory queries always have the opportunity to run, while more resource-intensive queries can execute during server idle times when resources are abundant. Users have the flexibility to fine-tune this behavior at both the server and user levels.
+
+If the memory overcommitter is not being used you'll get something like this:
+
+```bash
+Received exception from server (version 22.8.20):
+Code: 241. DB::Exception: Received from altinity.cloud:9440. DB::Exception: Received from chi-replica1-2-0:9000. DB::Exception: Memory limit (for query) exceeded: would use 5.00 GiB (attempt to allocate chunk of 4196736 bytes), maximum: 5.00 GiB. OvercommitTracker decision: Memory overcommit isn't used. OvercommitTracker isn't set.: (avg_value_size_hint = 0, avg_chars_size = 1, limit = 8192): while receiving packet from chi-replica1-1-0:9000: While executing Remote. (MEMORY_LIMIT_EXCEEDED)
+```
+
+So to enable Memory Overcommit you need to get rid of the `max_memory_usage` and `max_memory_usage_for_user` (set them to 0) and configure overcommit specific settings (**usually defaults are ok, so read carefully the documentation**)
+
+- `memory_overcommit_ratio_denominator`: It represents soft memory limit on the user level. This value is used to compute query overcommit ratio.
+- `memory_overcommit_ratio_denominator_for_user`: It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.
+- `memory_usage_overcommit_max_wait_microseconds`: Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown
+
+Please check https://clickhouse.com/docs/en/operations/settings/memory-overcommit
+
+Also you will check/need to configure global memory server setting. These are by default:
+
+```xml
+
+
+ 0
+ 0.8
+
+```
+
+With these set, now if you execute some queries with bigger memory needs than your `max_server_memory_usage` you'll get something like this:
+
+```bash
+Received exception from server (version 22.8.20):
+Code: 241. DB::Exception: Received from altinity.cloud:9440. DB::Exception: Received from chi-test1-2-0:9000. DB::Exception: Memory limit (total) exceeded: would use 12.60 GiB (attempt to allocate chunk of 4280448 bytes), maximum: 12.60 GiB. OvercommitTracker decision: Query was selected to stop by OvercommitTracker.: while receiving packet from chi-replica1-2-0:9000: While executing Remote. (MEMORY_LIMIT_EXCEEDED)
+```
+
+This will allow you to know that the Overcommit memory tracker is set and working.
+
+Also to note that maybe you don't need the Memory Overcommit system because with `max_memory_usage` per query you're ok.
+
+The good thing about memory overcommit is that you let ClickHouse handle the memory limitations instead of doing it manually, but there may be some scenarios where you don't want to use it and using `max_memory_usage` or `max_memory_usage_for_user` is a better fit. For example, if your workload has a lot of small/medium queries that are not memory intensive and you need to run few memory intensive queries for some users with a fixed memory limit. This is a common scenario for `dbt` or other ETL tools that usually run big memory intensive queries.
+
diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-monitoring.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-monitoring.md
index b38984b580..039af22ea6 100644
--- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-monitoring.md
+++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-monitoring.md
@@ -1,17 +1,16 @@
---
-title: "ClickHouse Monitoring"
-linkTitle: "ClickHouse Monitoring"
+title: "ClickHouse® Monitoring"
+linkTitle: "ClickHouse® Monitoring"
description: >
- ClickHouse Monitoring
+ Tracking potential issues in your cluster before they cause a critical error
+keywords:
+ - clickhouse monitoring
+ - clickhouse metrics
---
-## ClickHouse Monitoring
-
-Monitoring helps to track potential issues in your cluster before they cause a critical error.
-
-What to read / watch on subject:
-* Altinity webinar "ClickHouse Monitoring 101: What to monitor and how". [recording](https://www.youtube.com/watch?v=W9KlehhgwLw), [slides](https://www.slideshare.net/Altinity/clickhouse-monitoring-101-what-to-monitor-and-how)
-* docs https://clickhouse.com/docs/en/operations/monitoring/
+What to read / watch on the subject:
+* Altinity webinar "ClickHouse Monitoring 101: What to monitor and how". [Watch the video](https://www.youtube.com/watch?v=W9KlehhgwLw) or [download the slides](https://www.slideshare.net/Altinity/clickhouse-monitoring-101-what-to-monitor-and-how).
+* [The ClickHouse docs](https://clickhouse.com/docs/en/operations/monitoring/)
## What should be monitored
@@ -38,34 +37,39 @@ The following metrics should be collected / monitored
* [See separate article](../altinity-kb-zookeeper/zookeeper-monitoring/)
-## Monitoring tools
+## ClickHouse monitoring tools
### Prometheus (embedded exporter) + Grafana
* Enable [embedded exporter](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings/#server_configuration_parameters-prometheus)
* Grafana dashboards [https://grafana.com/grafana/dashboards/14192](https://grafana.com/grafana/dashboards/14192) or [https://grafana.com/grafana/dashboards/13500](https://grafana.com/grafana/dashboards/13500)
-### clickhouse-operator embedded exporter
+### Prometheus (embedded http handler with Altinity Kubernetes Operator for ClickHouse style metrics) + Grafana
+
+* Enable [http handler](../monitoring-operator-exporter-compatibility/)
+* Useful, if you want to use the dashboard from the Altinity Kubernetes Operator for ClickHouse, but do not run ClickHouse in k8s.
+
+### Prometheus (embedded exporter in the Altinity Kubernetes Operator for ClickHouse) + Grafana
-* exporter is included in clickhouse-operator, and enabled automatically
+* exporter is included in the Altinity Kubernetes Operator for ClickHouse, and enabled automatically
* see instructions of [Prometheus](https://github.com/Altinity/clickhouse-operator/blob/eb3fc4e28514d0d6ea25a40698205b02949bcf9d/docs/prometheus_setup.md) and [Grafana](https://github.com/Altinity/clickhouse-operator/blob/eb3fc4e28514d0d6ea25a40698205b02949bcf9d/docs/grafana_setup.md) installation (if you don't have one)
* Grafana dashboard [https://github.com/Altinity/clickhouse-operator/tree/master/grafana-dashboard](https://github.com/Altinity/clickhouse-operator/tree/master/grafana-dashboard)
* Prometheus alerts [https://github.com/Altinity/clickhouse-operator/blob/master/deploy/prometheus/prometheus-alert-rules-clickhouse.yaml](https://github.com/Altinity/clickhouse-operator/blob/master/deploy/prometheus/prometheus-alert-rules-clickhouse.yaml)
-### Prometheus exporter (external) + Grafana
+### Prometheus (ClickHouse external exporter) + Grafana
* [clickhouse-exporter](https://github.com/ClickHouse/clickhouse_exporter)
* Dashboard: https://grafana.com/grafana/dashboards/882
(unmaintained)
-### Dashboards quering clickhouse directly via vertamedia / Altinity plugin
+### Dashboards querying ClickHouse directly via vertamedia / Altinity plugin
* Overview: [https://grafana.com/grafana/dashboards/13606](https://grafana.com/grafana/dashboards/13606)
* Queries dashboard (analyzing system.query_log) https://grafana.com/grafana/dashboards/2515
-## Dashboard quering clickhouse directly via Grafana plugin
+## Dashboard querying ClickHouse directly via Grafana plugin
* https://grafana.com/blog/2022/05/05/introducing-the-official-clickhouse-plugin-for-grafana/
@@ -94,14 +98,14 @@ The following metrics should be collected / monitored
* site24x7 https://www.site24x7.com/plugins/clickhouse-monitoring.html
* Acceldata Pulse https://www.acceldata.io/blog/acceldata-pulse-for-clickhouse-monitoring
-### "Build your own" monitoring
+### "Build your own" ClickHouse monitoring
-ClickHouse allow to access lot of internals using system tables. The main tables to access monitoring data are:
+ClickHouse allows to access lots of internals using system tables. The main tables to access monitoring data are:
* system.metrics
* system.asynchronous_metrics
* system.events
-Minimum neccessary set of checks
+Minimum necessary set of checks