We're having a weird, one day, warm spike tomorrow. Might get to he mid-60s. It was 50s today and will be 50s on Sunday, but Saturday should be warmer.
Printable View
We're having a weird, one day, warm spike tomorrow. Might get to he mid-60s. It was 50s today and will be 50s on Sunday, but Saturday should be warmer.
Gotta blow out the sprinklers tomorrow.
I don't even know how much it matters, but it does seem like a good idea.
I’m taking my boy on the pirogue today. He wants to fish, but I’m not sure if I even have my license.
Isn't that a Polish stuffed dumpling?
I think a boat might be more stable, if not quite as tasty.
I got the sprinklers blown out. Took about two hours, but got it done.
Ready for winter, now.
Today I worked on my AI a bit. I started months ago with a vb version, but started over today with a whole new design, coded it in C++, and it works pretty good on my 15 year old pc.
I trained it today with a long document on its design that I wrote last week, and it tries to respond to my prompts with relevant information.
It gets a few things right, but it makes alot of mistakes.
As soon as I get it to respond better after a few updates under it's hood, I'll post some Youtube videos here.
Moti is gonna freak out! :bigyello:
It's coded from scratch in C++, with no third party libraries or known AI engines.
My AI uses a transformer.
Below was the first version it used for testing:
Code:#ifndef TRANSFORMER_ENCODER_H
#define TRANSFORMER_ENCODER_H
#include <vector>
#include <string>
#include <unordered_map>
#include <cmath>
#include <random>
#include <algorithm>
#include <numeric>
#include <sstream>
#include <fstream>
#include <iostream>
using std::vector;
using std::string;
using std::unordered_map;
using std::min;
using std::max;
using namespace std;
// Small RNG
static inline float rndf(float a=-0.1f, float b=0.1f) {
static std::mt19937_64 rng((uint64_t)1234567);
static std::uniform_real_distribution<float> dist(a,b);
return dist(rng);
}
// Linear algebra helpers (naive)
static void vec_add_inplace(vector<float>& a, const vector<float>& b, float alpha=1.0f) {
size_t n = min(a.size(), b.size());
for (size_t i=0;i<n;++i) a[i] += alpha * b[i];
}
static float vec_dot(const vector<float>& a, const vector<float>& b) {
float s=0;
size_t n = min(a.size(), b.size());
for (size_t i=0;i<n;++i) s += a[i]*b[i];
return s;
}
static void l2_normalize(vector<float>& v) {
double s=0;
for (float x: v) s += (double)x*x;
if (s<=1e-12) return;
double norm = sqrt(s);
for (float &x: v) x = (float)(x / norm);
}
static vector<float> softmax_vec(const vector<float>& x) {
float maxv = *std::max_element(x.begin(), x.end());
vector<float> y(x.size());
double sum=0;
for (size_t i = 0; i < x.size(); ++i) {
y[i] = static_cast<float>(exp(static_cast<double>(x[i]) - static_cast<double>(maxv)));
sum += y[i];
}
for (size_t i=0;i<x.size();++i) y[i] = (float)(y[i]/sum);
return y;
}
// Simple whitespace tokenizer + vocabulary builder
struct Tokenizer {
unordered_map<string,int> stoi;
vector<string> itos;
int unk_id = 0;
void build_from_corpus(const vector<string>& texts, int max_vocab=20000) {
unordered_map<string,int> freq;
for (auto &t : texts) {
std::istringstream iss(t);
string w;
while (iss >> w) { freq[w]++; }
}
// sort by freq
vector<std::pair<string,int>> items(freq.begin(), freq.end());
std::sort(items.begin(), items.end(), [](auto &a, auto &b){ return a.second > b.second; });
itos.clear(); stoi.clear();
// reserve special tokens: 0 -> <unk>, 1 -> <cls>, 2 -> <sep>
itos.push_back("<unk>"); stoi["<unk>"]=0;
itos.push_back("<cls>"); stoi["<cls>"]=1;
itos.push_back("<sep>"); stoi["<sep>"]=2;
int added=3;
for (auto &p: items) {
if (added >= max_vocab) break;
itos.push_back(p.first);
stoi[p.first]=added++;
}
}
vector<int> encode(const string &text, int maxlen=128) {
vector<int> out;
std::istringstream iss(text);
string w;
out.push_back(1); // CLS
while (iss >> w) {
auto it = stoi.find(w);
if (it==stoi.end()) out.push_back(0); else out.push_back(it->second);
if ((int)out.size() >= maxlen-1) break;
}
out.push_back(2); // SEP
return out;
}
};
// Minimal LayerNorm
struct LayerNorm {
int dim;
vector<float> gamma, beta;
LayerNorm() : dim(0) {}
LayerNorm(int d) { init(d); }
void init(int d) { dim=d; gamma.assign(d,1.0f); beta.assign(d,0.0f); }
void apply(vector<float>& x) {
// x is dim-length
double mean=0, var=0;
for (int i=0;i<dim;++i) mean += x[i];
mean /= dim;
for (int i=0;i<dim;++i) { double diff=x[i]-mean; var += diff*diff; }
var /= dim;
double denom = 1.0 / sqrt(var + 1e-5);
for (int i=0;i<dim;++i) x[i] = (float)(gamma[i] * (x[i]-mean) * denom + beta[i]);
}
};
// Small feedforward
struct FeedForward {
int dim, hidden;
vector<float> w1; // hidden x dim
vector<float> b1;
vector<float> w2; // dim x hidden
vector<float> b2;
FeedForward() : dim(0), hidden(0) {}
void init(int d, int h) {
dim=d; hidden=h;
w1.assign((size_t)hidden*dim, 0.0f); b1.assign(hidden,0.0f);
w2.assign((size_t)dim*hidden, 0.0f); b2.assign(dim,0.0f);
// tiny random init
for (auto &x: w1) x = rndf(-0.02f, 0.02f);
for (auto &x: w2) x = rndf(-0.02f, 0.02f);
}
// x is dim vector -> returns dim vector
vector<float> forward(const vector<float>& x) {
vector<float> h(hidden, 0.0f);
// h = w1 * x + b1
for (int i=0;i<hidden;++i) {
float s=0;
for (int j=0;j<dim;++j) s += w1[(size_t)i*dim + j] * x[j];
h[i] = s + b1[i];
// GELU approx: x * 0.5 * (1 + tanh(sqrt(2/pi)*(x + 0.044715 x^3)))
float xx = h[i];
float gelu = 0.5f * xx * (1.0f + tanh(0.79788456f*(xx + 0.044715f*xx*xx*xx)));
h[i] = gelu;
}
vector<float> out(dim, 0.0f);
for (int i=0;i<dim;++i) {
float s=0;
for (int j=0;j<hidden;++j) s += w2[(size_t)i*hidden + j] * h[j];
out[i] = s + b2[i];
}
return out;
}
};
// Multi-head attention (naive)
struct MHAttention {
int dim, heads, head_dim;
vector<float> wq, wk, wv, wo; // combined weights
MHAttention() : dim(0), heads(0), head_dim(0) {}
void init(int d, int h) {
dim=d; heads=h; head_dim = d / h;
wq.assign((size_t)d*d, 0.0f); wk.assign((size_t)d*d,0.0f); wv.assign((size_t)d*d,0.0f);
wo.assign((size_t)d*d,0.0f);
// small random init
for (auto &x: wq) x = rndf(-0.02f, 0.02f);
for (auto &x: wk) x = rndf(-0.02f, 0.02f);
for (auto &x: wv) x = rndf(-0.02f, 0.02f);
for (auto &x: wo) x = rndf(-0.02f, 0.02f);
}
// simple matmul helper: out = W * in (W: dxd, in: d -> out: d)
static void matvec(const vector<float>& W, const vector<float>& in, vector<float>& out, int d) {
out.assign(d, 0.0f);
for (int i=0;i<d;++i) {
float s=0;
for (int j=0;j<d;++j) s += W[(size_t)i*d + j] * in[j];
out[i]=s;
}
}
// sequence form: inputs is seq x d, returns seq x d
vector<float> forward(const vector<float>& inputs, int seq_len) {
// We'll do: compute Q,K,V per token and apply scaled dot-product attention with full softmax
// inputs is flatten seq_len * dim
vector<float> outputs((size_t)seq_len*dim, 0.0f);
// Precompute all Q,K,V
vector<float> Q((size_t)seq_len*dim), K((size_t)seq_len*dim), V((size_t)seq_len*dim);
for (int t=0;t<seq_len;++t) {
const float* token = &inputs[(size_t)t*dim];
vector<float> tmp(dim);
matvec(wq, vector<float>(token, token+dim), tmp, dim);
for (int i=0;i<dim;++i) Q[(size_t)t*dim + i] = tmp[i];
matvec(wk, vector<float>(token, token+dim), tmp, dim);
for (int i=0;i<dim;++i) K[(size_t)t*dim + i] = tmp[i];
matvec(wv, vector<float>(token, token+dim), tmp, dim);
for (int i=0;i<dim;++i) V[(size_t)t*dim + i] = tmp[i];
}
// For each token, compute attention weighted sum
float scale = 1.0f / sqrt((float)head_dim);
for (int t=0;t<seq_len;++t) {
vector<float> out_token(dim, 0.0f);
// iterate heads
for (int h=0; h<heads; ++h) {
int off = h*head_dim;
// compute scores: seq_len
vector<float> scores(seq_len);
for (int sidx=0;sidx<seq_len;++sidx) {
float dot=0;
for (int k=0;k<head_dim;++k)
dot += Q[(size_t)t*dim + off + k] * K[(size_t)sidx*dim + off + k];
scores[sidx] = dot * scale;
}
auto probs = softmax_vec(scores);
// weighted sum over V
for (int k=0;k<head_dim;++k) {
float val=0;
for (int sidx=0;sidx<seq_len;++sidx) val += probs[sidx] * V[(size_t)sidx*dim + off + k];
out_token[off + k] += val;
}
}
// write to outputs
for (int i=0;i<dim;++i) outputs[(size_t)t*dim + i] = out_token[i];
}
// final linear projection (wo): out = wo * concat_heads (we already used dim layout)
vector<float> projected((size_t)seq_len*dim, 0.0f);
for (int t=0;t<seq_len;++t) {
for (int i=0;i<dim;++i) {
float s=0;
for (int j=0;j<dim;++j) s += wo[(size_t)i*dim + j] * outputs[(size_t)t*dim + j];
projected[(size_t)t*dim + i] = s;
}
}
return projected;
}
};
// Transformer layer
struct TransformerLayer {
int dim;
LayerNorm ln1, ln2;
MHAttention attn;
FeedForward ffn;
TransformerLayer() : dim(0) {}
void init(int d, int heads, int ffn_hidden) {
dim=d;
ln1.init(dim); ln2.init(dim);
attn.init(dim, heads);
ffn.init(dim, ffn_hidden);
}
// inputs: seq_len * dim flattened
vector<float> forward(const vector<float>& inputs, int seq_len) {
// apply ln1 + attn + residual
vector<float> out(inputs); // copy
vector<float> normed(seq_len*dim);
for (int t=0;t<seq_len;++t) {
vector<float> tok(inputs.begin() + t*dim, inputs.begin()+ (t+1)*dim);
ln1.apply(tok);
for (int i=0;i<dim;++i) normed[t*dim + i] = tok[i];
}
auto attn_out = attn.forward(normed, seq_len); // seq_len * dim
for (int i=0;i<(int)out.size();++i) out[i] += attn_out[i];
// ln2 + ffn + residual
vector<float> normed2(seq_len*dim);
for (int t=0;t<seq_len;++t) {
vector<float> tok(out.begin() + t*dim, out.begin() + (t+1)*dim);
ln2.apply(tok);
for (int i=0;i<dim;++i) normed2[t*dim + i] = tok[i];
}
// apply FFN per token
for (int t=0;t<seq_len;++t) {
vector<float> tok(normed2.begin() + t*dim, normed2.begin() + (t+1)*dim);
auto f = ffn.forward(tok);
for (int i=0;i<dim;++i) out[t*dim + i] += f[i];
}
return out;
}
};
// Main TransformerEncoder
struct TransformerEncoder {
int dim;
int n_layers;
int n_heads;
int ffn_hidden;
int max_seq;
Tokenizer tokenizer;
vector<float> token_embeddings; // vocab x dim flatten
vector<float> pos_embeddings; // max_seq x dim
vector<TransformerLayer> layers;
TransformerEncoder(int dim_=128, int n_layers_=2, int n_heads_=4, int ffn_mult=4, int max_seq_=128)
: dim(dim_), n_layers(n_layers_), n_heads(n_heads_), max_seq(max_seq_) {
ffn_hidden = dim * ffn_mult;
}
void init_with_vocab(int vocab_size) {
// allocate token embeddings and pos embeddings
token_embeddings.assign((size_t)vocab_size * dim, 0.0f);
pos_embeddings.assign((size_t)max_seq * dim, 0.0f);
for (auto &x: token_embeddings) x = rndf(-0.02f, 0.02f);
for (auto &x: pos_embeddings) x = rndf(-0.02f, 0.02f);
// layers
layers.resize(n_layers);
for (int i=0;i<n_layers;++i) layers[i].init(dim, n_heads, ffn_hidden);
}
// quick builder: build vocab from corpus then init embeddings
void build_vocab_from_texts(const vector<string>& corpus, int vocab_size=10000) {
tokenizer.build_from_corpus(corpus, vocab_size);
init_with_vocab((int)tokenizer.itos.size());
}
// encode text -> L2-normalized vector (dim)
vector<float> encode(const string& text) {
auto toks = tokenizer.encode(text, max_seq);
int seq = (int)toks.size();
// build input tokens embeddings seq x dim
vector<float> input((size_t)seq*dim, 0.0f);
for (int t=0;t<seq;++t) {
int id = (toks[t] < (int)tokenizer.itos.size()) ? toks[t] : 0;
for (int i=0;i<dim;++i) {
input[(size_t)t*dim + i] = token_embeddings[(size_t)id*dim + i] + pos_embeddings[(size_t)t*dim + i];
}
}
// forward through layers
vector<float> cur = input;
for (int l=0;l<n_layers;++l) cur = layers[l].forward(cur, seq);
// pooling: mean over token vectors (excluding <cls> optionally)
vector<float> pooled(dim, 0.0f);
int count = 0;
for (int t=0;t<seq;++t) {
// skip special tokens? we keep them — but you can skip tokens 0..2 if desired
for (int i=0;i<dim;++i) pooled[i] += cur[(size_t)t*dim + i];
++count;
}
if (count > 0) for (int i=0;i<dim;++i) pooled[i] /= (float)count;
l2_normalize(pooled);
return pooled;
}
// Simple utility: save and load token embeddings and pos embeddings - text format
void save_weights(const string& path) const {
ofstream ofs(path);
if (!ofs) {
cerr << "Error: Cannot open file for writing: " << path << endl;
return;
}
ofs << dim << " " << n_layers << " " << n_heads << "\n";
int vocab = static_cast<int>(token_embeddings.size() / dim);
ofs << vocab << " " << max_seq << "\n";
// tokens
for (size_t i = 0; i < token_embeddings.size(); ++i)
ofs << token_embeddings[i] << " ";
ofs << "\n";
// positional embeddings
for (size_t i = 0; i < pos_embeddings.size(); ++i)
ofs << pos_embeddings[i] << " ";
ofs << "\n";
ofs.close();
}
};
#endif // TRANSFORMER_ENCODER_H
That's a robot in disguise?
Well, it only took 48yrs but this month Voyager 1 will be "One Light-Day" from earth. :wave:
That is impressive.