喜欢将平衡比作钟摆的这个形容linkshttps://www.loot-drop.io/https://github.com/ssoccean/PPT_Pilotreinforcemennt learning需要考虑的核心问题之一掌握好explore和exploit的接受平衡非常重要idea-实践 循环核心就两件事用工程进度文件而不是上下文压缩长时工作更好规划、执行、评估分开更好能解决模型自信问题lc1220class Solution {public:vectorlong long state;// a e i o u// 0 1 2 3 4// 01 10 12 20 21 23 24 32 34 40long long mod 1e9 7;vectorlong long run(vectorlong long s) {vectorlong long s_(5);s_[0] (s[1] s[2] s[4]) % mod;s_[1] (s[0] s[2]) % mod;s_[2] (s[1] s[3]) % mod;s_[3] s[2] % mod;s_[4] (s[2] s[3]) % mod;return s_;}int countVowelPermutation(int n) {state vectorlong long(5, 1);for (int i 1; i n; i) {state run(state);}long long sum 0;for (int i 0; i 5; i) {sum state[i];sum % mod;}return sum;}};lc1269归来任是第一反应写最朴素的dp 释怀的笑了const int MO 1e9 7;class Solution {public:int numWays(int steps, int arrLen) {vectorvectorlong long f(steps 1, vectorlong long(steps 1));long long maxLen min(steps - 1, arrLen - 1);f[0][0] 1;for (int i 1; i steps; i) {for (int j 0; j maxLen; j) {f[i][j] f[i - 1][j];if (j - 1 0)f[i][j] (f[i][j] f[i - 1][j - 1]) % MO;if (j 1 maxLen)f[i][j] (f[i][j] f[i - 1][j 1]) % MO;}}return f[steps][0] % MO;}};