apache
diff --git a/‎iotdb-core/ainode/ainode/TimerXL/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎iotdb-core/ainode/ainode/TimerXL/__init__.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/TimerXL/layers/Attn_Bias.py‎
Lines changed: 95 additions & 0 deletions b/‎iotdb-core/ainode/ainode/TimerXL/layers/Attn_Bias.py‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/ainode/TimerXL/layers/Attn_Projection.py‎
Lines changed: 123 additions & 0 deletions b/‎iotdb-core/ainode/ainode/TimerXL/layers/Attn_Projection.py‎
Lines changed: 123 additions & 0 deletions
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import abc
+import math
+import torch
+from einops import rearrange
+from torch import nn
+
+
+class AttentionBias(nn.Module, abc.ABC):
+    def __init__(self, dim: int, num_heads: int):
+        super().__init__()
+        assert num_heads > 0 and dim % num_heads == 0
+
+        self.num_heads = num_heads
+        self.head_dim = dim // num_heads
+
+    @abc.abstractmethod
+    def forward(self, query_id, kv_id): ...
+
+
+class BinaryAttentionBias(AttentionBias):
+    def __init__(self, dim: int, num_heads: int):
+        super().__init__(dim, num_heads)
+        self.emb = nn.Embedding(num_embeddings=2, embedding_dim=self.num_heads)
+
+    def forward(self, query_id, kv_id):
+        ind = torch.eq(query_id.unsqueeze(-1), kv_id.unsqueeze(-2))
+        weight = rearrange(
+            self.emb.weight, "two num_heads -> two num_heads 1 1")
+        bias = ~ind * weight[:1] + ind * weight[1:]
+        return bias
+
+
+def _relative_position_bucket(relative_position, bidirectional=True, num_buckets=32, max_distance=128):
+    relative_buckets = 0
+    if bidirectional:
+        num_buckets //= 2
+        relative_buckets += (relative_position >
+                             0).to(torch.long) * num_buckets
+        relative_position = torch.abs(relative_position)
+    else:
+        relative_position = - \
+            torch.min(relative_position, torch.zeros_like(relative_position))
+
+    max_exact = num_buckets // 2
+    is_small = relative_position < max_exact
+    relative_position_if_large = max_exact + (
+        torch.log(relative_position.float() / max_exact)
+        / math.log(max_distance / max_exact)
+        * (num_buckets - max_exact)
+    ).to(torch.long)
+    relative_position_if_large = torch.min(
+        relative_position_if_large, torch.full_like(
+            relative_position_if_large, num_buckets - 1)
+    )
+
+    relative_buckets += torch.where(is_small,
+                                    relative_position, relative_position_if_large)
+    return relative_buckets
+
+
+class T5AttentionBias(AttentionBias):
+    def __init__(self, dim: int, num_heads: int):
+        super().__init__(dim, num_heads)
+        self.num_buckets = 32
+        self.max_distance = 32
+        self.relative_attention_bias = nn.Embedding(self.num_buckets, 1)
+
+    def forward(self, n_vars, n_tokens):
+        context_position = torch.arange(n_tokens, dtype=torch.long,)[:, None]
+        memory_position = torch.arange(n_tokens, dtype=torch.long, )[None, :]
+        relative_position = memory_position - context_position
+        bucket = _relative_position_bucket(relative_position=relative_position, bidirectional=False,
+                                           num_buckets=self.num_buckets, max_distance=self.max_distance).to(self.relative_attention_bias.weight.device)
+        bias = self.relative_attention_bias(bucket).squeeze(-1)
+        bias = bias.reshape(1, 1, bias.shape[0], bias.shape[1])
+        mask1 = torch.ones((n_vars, n_vars), dtype=torch.bool).to(bias.device)
+        final_bias = torch.kron(mask1, bias)
+        return final_bias
@@ -0,0 +1,123 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import abc
+import torch
+from functools import cached_property
+from einops import einsum, rearrange, repeat
+from torch import nn
+
+
+class Projection(nn.Module, abc.ABC):
+    def __init__(self, proj_width: int, num_heads: int, **kwargs):
+        super().__init__()
+        self.proj_width = proj_width
+        self.num_heads = num_heads
+
+    @abc.abstractmethod
+    def forward(self, x, seq_id): ...
+
+
+class RotaryProjection(Projection):
+    def __init__(self, *, proj_width: int, num_heads: int, max_len: int = 512, base: int = 10000):
+        super().__init__(proj_width, num_heads)
+        assert (
+            self.proj_width % 2 == 0
+        ), f"proj_width must be even, got {self.proj_width}"
+        self.register_buffer(
+            "theta",
+            1.0
+            / torch.pow(
+                base,
+                torch.arange(0, self.proj_width, 2, dtype=torch.float)
+                / self.proj_width,
+            ),
+            persistent=False,
+        )
+        self.register_buffer("cos", None, persistent=False)
+        self.register_buffer("sin", None, persistent=False)
+        self._init_freq(max_len=max_len)
+
+    def _init_freq(self, max_len: int):
+        if self.cos is None or self.cos.size(-2) < max_len:
+            position = torch.arange(
+                max_len, device=self.theta.device, dtype=self.theta.dtype
+            )
+            m_theta = einsum(position, self.theta,
+                             "length, width -> length width")
+            m_theta = repeat(m_theta, "length width -> length (width 2)")
+            self.register_buffer("cos", torch.cos(m_theta), persistent=False)
+            self.register_buffer("sin", torch.sin(m_theta), persistent=False)
+
+    @staticmethod
+    def _rotate(x):
+        x1, x2 = rearrange(x, "... (dim r) -> r ... dim", r=2)
+        return rearrange([-x2, x1], "r ... dim -> ... (dim r)", r=2)  # noqa
+
+    def forward(self, x, seq_id):
+        self._init_freq(max_len=seq_id.max() + 1)
+        rot_cos = self.cos[seq_id]
+        rot_sin = self.sin[seq_id]
+        return rot_cos * x + rot_sin * self._rotate(x)
+
+
+class QueryKeyProjection(nn.Module):
+    def __init__(self, dim: int, num_heads: int, proj_layer, kwargs=None, partial_factor=None):
+        super().__init__()
+        if partial_factor is not None:
+            assert (
+                0.0 <= partial_factor[0] < partial_factor[1] <= 1.0
+            ), f"got {partial_factor[0]}, {partial_factor[1]}"
+        assert num_heads > 0 and dim % num_heads == 0
+
+        self.head_dim = dim // num_heads
+        self.partial_factor = partial_factor
+        self.query_proj = proj_layer(
+            proj_width=self.proj_width,
+            num_heads=num_heads,
+            **(kwargs or {}),
+        )
+        self.key_proj = self.query_proj
+
+    @cached_property
+    def proj_width(self) -> int:
+        if self.partial_factor is None:
+            return self.head_dim
+        return int(self.head_dim * (self.partial_factor[1] - self.partial_factor[0]))
+
+    @cached_property
+    def split_sizes(self):
+        if self.partial_factor is None:
+            return 0, self.head_dim, 0
+        return (
+            int(self.partial_factor[0] * self.head_dim),
+            self.proj_width,
+            int((1.0 - self.partial_factor[1]) * self.head_dim),
+        )
+
+    def forward(self, query, key, query_id, kv_id):
+        if self.partial_factor is not None:
+            queries = list(query.split(self.split_sizes, dim=-1))
+            keys = list(key.split(self.split_sizes, dim=-1))
+            queries[1] = self.query_proj(queries[1], seq_id=query_id)
+            keys[1] = self.key_proj(keys[1], seq_id=kv_id)
+            query = torch.cat(queries, dim=-1)
+            key = torch.cat(keys, dim=-1)
+        else:
+            query = self.query_proj(query, seq_id=query_id)
+            key = self.key_proj(key, seq_id=kv_id)
+        return query, key