Categorygithub.com/aliyun/aliyun-pai-featurestore-go-sdk/v2

# README

aliyun-pai-featurestore-go-sdk

阿里云 PAI 平台 PAI-FeatureStore Go Sdk

安装

go get github.com/aliyun/aliyun-pai-featurestore-go-sdk/v2

使用方式

  • 初始化 client
import (
	"os"

	"github.com/aliyun/aliyun-pai-featurestore-go-sdk/v2/featurestore"
)

accessId := os.Getenv("AccessId")
accessKey := os.Getenv("AccessKey")
regionId := "cn-hangzhou"
projectName := "fs_test_ots"

client, err := featurestore.NewFeatureStoreClient(regionId, accessId, accessKey, projectName)

如果需要从 FeatureStore 在线存储 FeatureDB 中读取数据,初始化 client 需要填写开通 FeatureDB 时设置的用户名和密码(工作空间管理员可在控制台 数据源 页面进行修改):

username := os.Getenv("FeatureDBUsername")
password := os.Getenv("FeatureDBPassword")
client, err := featurestore.NewFeatureStoreClient(regionId, accessId, accessKey, projectName, featurestore.WithFeatureDBLogin(username, password))

由于 SDK 是直连 onlinestore 的, client 需要在 VPC 环境运行。 比如 hologres/graphcompute , 需要在指定的 VPC 才能连接。

如果需要在本地非 VPC 环境下进行测试,初始化 client 需要加上 WithTestMode(),此时连接 FeatureStore 和 onlinestore 均会使用公网地址。

client, err := featurestore.NewFeatureStoreClient(regionId, accessId, accessKey, projectName, featurestore.WithTestMode())

注意: 当使用 WithTestMode() 初始化 client 时,需要确认在线数据源是否已开启公网。通过公网访问onlinestore会有对应数据源的流量开销,可能会产生下行流量费用。因此建议在此模式下只进行测试,生产环境请勿添加 WithTestMode()。

  • 获取 (离线/实时) FeatureView 的特征数据
// get project by name
project, err := client.GetProject("fs_test_ots")
if err != nil {
    // t.Fatal(err)
}

// get featureview by name
user_feature_view := project.GetFeatureView("user_fea")
if user_feature_view == nil {
    // t.Fatal("feature view not exist")
}

// get online features
features, err := user_feature_view.GetOnlineFeatures([]interface{}{"100043186", "100060369"}, []string{"*"}, nil)

[]string{"*"} 代表获取 featureview 下的所有特征, 也可以指定部分特征名称。

返回的数据示例如下

[
    {
        "city":"合肥市",
        "follow_cnt":1,
        "gender":"male",
        "user_id":"100043186"
    },
    {
        "city":"",
        "follow_cnt":5,
        "gender":"male",
        "user_id":"100060369"
    }
]
  • 获取 序列特征 FeatureView 的特征数据
// get project by name
project, err := client.GetProject("holo_p1")
if err != nil {
    // t.Fatal(err)
}

// get featureview by name
seq_feature_view := project.GetFeatureView("seq_fea")
if seq_feature_view == nil {
    // t.Fatal("feature view not exist")
}

// get online features
features, err := seq_feature_view.GetOnlineFeatures([]interface{}{"186569075", "186569870"}, []string{"*"}, nil)

[]string{"*"} 代表获取 featureview 中配置的所有在线序列特征名称,也可以指定部分在线序列特征名称。

第三个输入别名(alias)部分对序列特征 featureview 不生效

假设序列特征读取配置为

离线序列特征字段事件名称序列长度在线序列特征名称
click_seq_50_seqclick50click_seq_50_seq
expr_seq_100_seqexpr100expr_seq_100

返回的数据示例如下

[
  {
    "click_seq_50_seq": "216751275;228787053;220852269;242884721",
    "click_seq_50_seq__event": "click;click;click;click",
    "click_seq_50_seq__event_time": "1699128398;1699128398;1699118623;1699118623",
    "click_seq_50_seq__item_id": "216751275;228787053;220852269;242884721",
    "click_seq_50_seq__playtime": "65.40;72.06;104.69;62.74",
    "click_seq_50_seq__ts": "389018;389018;398793;398793",
    "expr_seq_100": "207474427;216751275;228787053;247136848;270584471;299485479;220852269;242884721;245999124;265863707",
    "expr_seq_100__event": "expr;expr;expr;expr;expr;expr;expr;expr;expr;expr",
    "expr_seq_100__event_time": "1699128398;1699128398;1699128398;1699128398;1699128398;1699128398;1699118623;1699118623;1699118623;1699118623",
    "expr_seq_100__item_id": "207474427;216751275;228787053;247136848;270584471;299485479;220852269;242884721;245999124;265863707",
    "expr_seq_100__playtime": "0.00;0.00;0.00;0.00;0.00;0.00;0.00;0.00;0.00;0.00",
    "expr_seq_100__ts": "389018;389018;389018;389018;389018;389018;398793;398793;398793;398793",
    "user_id": "186569075"
  },
  {
    "click_seq_50_seq": "201741544;236327912;293320498",
    "click_seq_50_seq__event": "click;click;click",
    "click_seq_50_seq__event_time": "1699178245;1699178245;1699178245",
    "click_seq_50_seq__item_id": "201741544;236327912;293320498",
    "click_seq_50_seq__playtime": "97.41;70.32;135.21",
    "click_seq_50_seq__ts": "339171;339171;339171",
    "expr_seq_100": "201741544;224940066;236327912;240253906;247562151;293320498",
    "expr_seq_100__event": "expr;expr;expr;expr;expr;expr",
    "expr_seq_100__event_time": "1699178245;1699178245;1699178245;1699178245;1699178245;1699178245",
    "expr_seq_100__item_id": "201741544;224940066;236327912;240253906;247562151;293320498",
    "expr_seq_100__playtime": "0.00;0.00;0.00;0.00;0.00;0.00",
    "expr_seq_100__ts": "339171;339171;339171;339171;339171;339171",
    "user_id": "186569870"
  }
]
  • 获取 ModelFeature 里的特征数据
// get project by name
project, err := client.GetProject("fs_test_ots")
if err != nil {
    // t.Fatal(err)
}

// get ModelFeature by name
model_feature := project.GetModelFeature("rank")
if model_feature == nil {
    // t.Fatal("model feature not exist")
}

// get online features
features, err := model_feature.GetOnlineFeatures(map[string][]interface{}{"user_id": {"100000676", "100004208"}, "item_id":{"238038872", "264025480"}} )

ModelFeature 可以关联多个 FeatureEntity, 可以设置多个 join_id, 然后特征统一返回。

示例中有两个 join_id, user_id 和 item_id 。 获取特征的时候需要设置相同的 id 数量

返回数据示例

[
    {
        "age":26,
        "author":100015828,
        "category":"14",
        "city":"沈阳市",
        "duration":63,
        "gender":"male",
        "item_id":"238038872",
        "user_id":"100000676"
    },
    {
        "age":23,
        "author":100015828,
        "category":"15",
        "city":"西安市",
        "duration":22,
        "gender":"male",
        "item_id":"264025480",
        "user_id":"100004208"
    }
]

也可以指定某个 FeatureEntity, 把 FeatureEntity 对应的特征一块返回。

// user 是 FeatureEntity 名称
features, err := model_feature.GetOnlineFeaturesWithEntity(map[string][]interface{}{"user_id": {"100000676", "100004208"}}, "user" )

上面的含义是把 ModelFeature 下的 user(FeatureEntity) 对应的特征全部获取到。

返回数据示例

[
    {
        "age":26,
        "city":"沈阳市",
        "gender":"male",
        "user_id":"100000676"
    },
    {
        "age":23,
        "city":"西安市",
        "gender":"male",
        "user_id":"100004208"
    }
]
  • 获取 ModelFeature 里的特征数据(含序列特征)

注册 ModelFeature 时可以选择 序列特征Feature View里注册的 离线序列特征字段,之后在 FeatureStore Go SDK中便可以获取到对应的 在线序列特征名称。

// get project by name
project, err := client.GetProject("holo_p1")
if err != nil {
    // t.Fatal(err)
}

// get ModelFeature by name
model_feature := project.GetModelFeature("rank_v1")
if model_feature == nil {
    // t.Fatal("model feature not exist")
}

// get online features
features, err := model_feature.GetOnlineFeatures(map[string][]interface{}{"user_id": {"186569075", "186569870"}, "item_id":{"299485479", "207474427"}} )

序列特征对应 FeatureEntity 一般为 user, 示例中有两个 join_id, user_id 和 item_id。获取特征的时候需要设置相同的id数量。

假设ModelFeature注册时选择的序列特征字段为: 假设序列特征读取配置为

离线序列特征字段事件名称序列长度在线序列特征名称
click_seq_50_seqclick50click_seq_50_seq

返回数据示例

[
  {
    "age": 51,
    "author": 147848300,
    "category": "7",
    "city": "",
    "click_count": 0,
    "click_seq_50_seq": "216751275;228787053;220852269;242884721",
    "click_seq_50_seq__event": "click;click;click;click",
    "click_seq_50_seq__event_time": "1699128398;1699128398;1699118623;1699118623",
    "click_seq_50_seq__item_id": "216751275;228787053;220852269;242884721",
    "click_seq_50_seq__playtime": "65.40;72.06;104.69;62.74",
    "click_seq_50_seq__ts": "391447;391447;401222;401222",
    "duration": 48,
    "follow_cnt": 2,
    "follower_cnt": 0,
    "gender": "female",
    "item_cnt": 0,
    "item_id": 299485479,
    "praise_count": 2,
    "pub_time": 1697885713,
    "register_time": 1696582012,
    "tags": "0",
    "title": "#健身打卡",
    "user_id": "186569075"
  },
  {
    "age": 28,
    "author": 119734983,
    "category": "18",
    "city": "",
    "click_count": 0,
    "click_seq_50_seq": "201741544;236327912;293320498",
    "click_seq_50_seq__event": "click;click;click",
    "click_seq_50_seq__event_time": "1699178245;1699178245;1699178245",
    "click_seq_50_seq__item_id": "201741544;236327912;293320498",
    "click_seq_50_seq__playtime": "97.41;70.32;135.21",
    "click_seq_50_seq__ts": "341600;341600;341600",
    "duration": 15,
    "follow_cnt": 0,
    "follower_cnt": 2,
    "gender": "male",
    "item_cnt": 0,
    "item_id": 207474427,
    "praise_count": 79,
    "pub_time": 1697731285,
    "register_time": 1699135393,
    "tags": "1",
    "title": "#成语故事",
    "user_id": "186569870"
  }
]

也可以指定某个 FeatureEntity,比如指定 FeatureEntity 为 user,则会把 ModelFeature 下的 user(FeatureEntity) 对应的特征(包括序列特征)全部获取到。

features, err := model_feature.GetOnlineFeaturesWithEntity(map[string][]interface{}{"user_id": {"186569075", "186569870"}}, "user")

返回数据示例

[
  {
    "age": 51,
    "city": "",
    "click_seq_50_seq": "216751275;228787053;220852269;242884721",
    "click_seq_50_seq__event": "click;click;click;click",
    "click_seq_50_seq__event_time": "1699128398;1699128398;1699118623;1699118623",
    "click_seq_50_seq__item_id": "216751275;228787053;220852269;242884721",
    "click_seq_50_seq__playtime": "65.40;72.06;104.69;62.74",
    "click_seq_50_seq__ts": "392212;392212;401987;401987",
    "follow_cnt": 2,
    "follower_cnt": 0,
    "gender": "female",
    "item_cnt": 0,
    "register_time": 1696582012,
    "tags": "0",
    "user_id": "186569075"
  },
  {
    "age": 28,
    "city": "",
    "click_seq_50_seq": "201741544;236327912;293320498",
    "click_seq_50_seq__event": "click;click;click",
    "click_seq_50_seq__event_time": "1699178245;1699178245;1699178245",
    "click_seq_50_seq__item_id": "201741544;236327912;293320498",
    "click_seq_50_seq__playtime": "97.41;70.32;135.21",
    "click_seq_50_seq__ts": "342365;342365;342365",
    "follow_cnt": 0,
    "follower_cnt": 2,
    "gender": "male",
    "item_cnt": 0,
    "register_time": 1699135393,
    "tags": "1",
    "user_id": "186569870"
  }
]

# Packages

No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author