Một trong những khái niệm trừu tượng chính trong Apache Spark là SparkSQL
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1, tương tự như cấu trúc # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 được tìm thấy trong R và Pandas. Một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 tương tự như một bảng và hỗ trợ kiểu chức năng [bản đồ/thu nhỏ/bộ lọc/v.v. ] hoạt động và hoạt động SQL [chọn, dự án, tổng hợp]# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
4 mạnh mẽ và được sử dụng rộng rãi, nhưng chúng có những hạn chế đối với các hoạt động trích xuất, chuyển đổi và tải [ETL]. Đáng kể nhất, chúng yêu cầu một lược đồ được chỉ định trước khi tải bất kỳ dữ liệu nào. SparkSQL giải quyết vấn đề này bằng cách thực hiện hai lần chuyển dữ liệu—lần đầu tiên để suy luận lược đồ và lần thứ hai để tải dữ liệu. Tuy nhiên, suy luận này bị hạn chế và không giải quyết được thực tế của dữ liệu lộn xộn. Ví dụ: cùng một trường có thể thuộc loại khác trong các bản ghi khác nhau. Apache Spark thường bỏ cuộc và báo cáo loại là # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
5 bằng cách sử dụng văn bản gốc của trường. Điều này có thể không chính xác và bạn có thể muốn kiểm soát tốt hơn cách giải quyết sự khác biệt trong giản đồ. Và đối với các tập dữ liệu lớn, việc chuyển bổ sung dữ liệu nguồn có thể rất tốn kémĐể giải quyết những hạn chế này, AWS Glue giới thiệu
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 tương tự như một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1, ngoại trừ mỗi bản ghi là tự mô tả, do đó ban đầu không cần lược đồ. Thay vào đó, AWS Glue tính toán nhanh lược đồ khi được yêu cầu và mã hóa rõ ràng các điểm không nhất quán của lược đồ bằng cách sử dụng loại lựa chọn [hoặc liên kết]. Bạn có thể giải quyết những điểm không nhất quán này để làm cho bộ dữ liệu của mình tương thích với các kho lưu trữ dữ liệu yêu cầu lược đồ cố địnhTương tự, một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
9 đại diện cho một bản ghi logic trong một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Nó giống như một hàng trong Spark # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1, ngoại trừ việc nó tự mô tả và có thể được sử dụng cho dữ liệu không tuân theo một lược đồ cố địnhBạn có thể chuyển đổi
Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
2 thành và từ # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
4 sau khi bạn giải quyết bất kỳ sự không nhất quán nào trong lược đồ- sự thi công -
__trong đó__
Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
4
5 – Tham chiếu đến khung dữ liệu trong Máy ảo Java [JVM]Schema for friends DynamicFrame before calling drop_fields: root |-- name: string |-- age: int |-- location: struct | |-- state: string | |-- county: string |-- friends: array | |-- element: struct | | |-- name: string | | |-- age: int Schema for friends DynamicFrame after removing age, county, and friend age: root |-- name: string |-- location: struct | |-- state: string |-- friends: array | |-- element: struct | | |-- name: string
6 – Một đối tượng lớp GlueContextSchema for friends DynamicFrame before calling drop_fields: root |-- name: string |-- age: int |-- location: struct | |-- state: string | |-- county: string |-- friends: array | |-- element: struct | | |-- name: string | | |-- age: int Schema for friends DynamicFrame after removing age, county, and friend age: root |-- name: string |-- location: struct | |-- state: string |-- friends: array | |-- element: struct | | |-- name: string
7 – Chuỗi tên tùy chọn, trống theo mặc địnhSchema for friends DynamicFrame before calling drop_fields: root |-- name: string |-- age: int |-- location: struct | |-- state: string | |-- county: string |-- friends: array | |-- element: struct | | |-- name: string | | |-- age: int Schema for friends DynamicFrame after removing age, county, and friend age: root |-- name: string |-- location: struct | |-- state: string |-- friends: array | |-- element: struct | | |-- name: string
fromDF
Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
8Chuyển đổi một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 thành một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 bằng cách chuyển đổi các trường # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 thành trường # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
9. Trả về # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mớiMột
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
9 đại diện cho một bản ghi logic trong một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Nó tương tự như một hàng trong Spark # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1, ngoại trừ việc nó tự mô tả và có thể được sử dụng cho dữ liệu không tuân theo một lược đồ cố định
7 – Apache Spark SQL# Example: Use filter to create a new DynamicFrame # with a filtered selection of records from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create DynamicFrame from Glue Data Catalog medicare = glueContext.create_dynamic_frame.from_options[ "s3", { "paths": [ "s3://awsglue-datasets/examples/medicare/Medicare_Hospital_Provider.csv" ] }, "csv", {"withHeader": True}, ] # Create filtered DynamicFrame with custom lambda # to filter records by Provider State and Provider City sac_or_mon = medicare.filter[ f=lambda x: x["Provider State"] in ["CA", "AL"] and x["Provider City"] in ["SACRAMENTO", "MONTGOMERY"] ] # Compare record counts print["Unfiltered record count: ", medicare.count[]] print["Filtered record count: ", sac_or_mon.count[]]
1 để chuyển đổi [bắt buộc]# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
6 – Đối tượng lớp GlueContext chỉ định ngữ cảnh cho biến đổi này [bắt buộc]Schema for friends DynamicFrame before calling drop_fields: root |-- name: string |-- age: int |-- location: struct | |-- state: string | |-- county: string |-- friends: array | |-- element: struct | | |-- name: string | | |-- age: int Schema for friends DynamicFrame after removing age, county, and friend age: root |-- name: string |-- location: struct | |-- state: string |-- friends: array | |-- element: struct | | |-- name: string
7 – Tên của kết quảSchema for friends DynamicFrame before calling drop_fields: root |-- name: string |-- age: int |-- location: struct | |-- state: string | |-- county: string |-- friends: array | |-- element: struct | | |-- name: string | | |-- age: int Schema for friends DynamicFrame after removing age, county, and friend age: root |-- name: string |-- location: struct | |-- state: string |-- friends: array | |-- element: struct | | |-- name: string
6 [bắt buộc]# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
toDF
Unfiltered record count: 163065
Filtered record count: 564
2Chuyển đổi một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 thành Apache Spark # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 bằng cách chuyển đổi Unfiltered record count: 163065
Filtered record count: 564
5 thành các trường # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1. Trả về # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 mớiMột
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
9 đại diện cho một bản ghi logic trong một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Nó tương tự như một hàng trong Spark # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1, ngoại trừ việc nó tự mô tả và có thể được sử dụng cho dữ liệu không tuân theo một lược đồ cố định
1 – Danh sách các tùy chọn. Chỉ định loại mục tiêu nếu bạn chọn loại hành động# Example: Use join to combine data from three DynamicFrames from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Load DynamicFrames from Glue Data Catalog persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] memberships = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="memberships_json" ] orgs = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="organizations_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] print["Schema for the memberships DynamicFrame:"] memberships.printSchema[] print["Schema for the orgs DynamicFrame:"] orgs.printSchema[] # Join persons and memberships by ID persons_memberships = persons.join[ paths1=["id"], paths2=["person_id"], frame2=memberships ] # Rename and drop fields from orgs # to prevent field name collisions with persons_memberships orgs = [ orgs.drop_fields[["other_names", "identifiers"]] .rename_field["id", "org_id"] .rename_field["name", "org_name"] ] # Create final join of all three DynamicFrames legislators_combined = orgs.join[ paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships ].drop_fields[["person_id", "org_id"]] # Inspect the schema for the joined data print["Schema for the new legislators_combined DynamicFrame:"] legislators_combined.printSchema[]
2 và# Example: Use join to combine data from three DynamicFrames from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Load DynamicFrames from Glue Data Catalog persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] memberships = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="memberships_json" ] orgs = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="organizations_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] print["Schema for the memberships DynamicFrame:"] memberships.printSchema[] print["Schema for the orgs DynamicFrame:"] orgs.printSchema[] # Join persons and memberships by ID persons_memberships = persons.join[ paths1=["id"], paths2=["person_id"], frame2=memberships ] # Rename and drop fields from orgs # to prevent field name collisions with persons_memberships orgs = [ orgs.drop_fields[["other_names", "identifiers"]] .rename_field["id", "org_id"] .rename_field["name", "org_name"] ] # Create final join of all three DynamicFrames legislators_combined = orgs.join[ paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships ].drop_fields[["person_id", "org_id"]] # Inspect the schema for the joined data print["Schema for the new legislators_combined DynamicFrame:"] legislators_combined.printSchema[]
3. Ví dụ bao gồm những điều sau đây# Example: Use join to combine data from three DynamicFrames from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Load DynamicFrames from Glue Data Catalog persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] memberships = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="memberships_json" ] orgs = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="organizations_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] print["Schema for the memberships DynamicFrame:"] memberships.printSchema[] print["Schema for the orgs DynamicFrame:"] orgs.printSchema[] # Join persons and memberships by ID persons_memberships = persons.join[ paths1=["id"], paths2=["person_id"], frame2=memberships ] # Rename and drop fields from orgs # to prevent field name collisions with persons_memberships orgs = [ orgs.drop_fields[["other_names", "identifiers"]] .rename_field["id", "org_id"] .rename_field["name", "org_name"] ] # Create final join of all three DynamicFrames legislators_combined = orgs.join[ paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships ].drop_fields[["person_id", "org_id"]] # Inspect the schema for the joined data print["Schema for the new legislators_combined DynamicFrame:"] legislators_combined.printSchema[]
>>>toDF[[ResolveOption["a.b.c", "KeepAsStruct"]]] >>>toDF[[ResolveOption["a.b.c", "Project", DoubleType[]]]]
- thông tin -
đếm
# Example: Use join to combine data from three DynamicFrames
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Load DynamicFrames from Glue Data Catalog
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
memberships = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="memberships_json"
]
orgs = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="organizations_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
print["Schema for the memberships DynamicFrame:"]
memberships.printSchema[]
print["Schema for the orgs DynamicFrame:"]
orgs.printSchema[]
# Join persons and memberships by ID
persons_memberships = persons.join[
paths1=["id"], paths2=["person_id"], frame2=memberships
]
# Rename and drop fields from orgs
# to prevent field name collisions with persons_memberships
orgs = [
orgs.drop_fields[["other_names", "identifiers"]]
.rename_field["id", "org_id"]
.rename_field["name", "org_name"]
]
# Create final join of all three DynamicFrames
legislators_combined = orgs.join[
paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships
].drop_fields[["person_id", "org_id"]]
# Inspect the schema for the joined data
print["Schema for the new legislators_combined DynamicFrame:"]
legislators_combined.printSchema[]
4 – Trả về số hàng trong # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 bên dướilược đồ
# Example: Use join to combine data from three DynamicFrames
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Load DynamicFrames from Glue Data Catalog
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
memberships = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="memberships_json"
]
orgs = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="organizations_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
print["Schema for the memberships DynamicFrame:"]
memberships.printSchema[]
print["Schema for the orgs DynamicFrame:"]
orgs.printSchema[]
# Join persons and memberships by ID
persons_memberships = persons.join[
paths1=["id"], paths2=["person_id"], frame2=memberships
]
# Rename and drop fields from orgs
# to prevent field name collisions with persons_memberships
orgs = [
orgs.drop_fields[["other_names", "identifiers"]]
.rename_field["id", "org_id"]
.rename_field["name", "org_name"]
]
# Create final join of all three DynamicFrames
legislators_combined = orgs.join[
paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships
].drop_fields[["person_id", "org_id"]]
# Inspect the schema for the joined data
print["Schema for the new legislators_combined DynamicFrame:"]
legislators_combined.printSchema[]
6 – Trả về giản đồ của # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 này, hoặc nếu không có sẵn, trả về giản đồ của # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 bên dướiin lược đồ
# Example: Use join to combine data from three DynamicFrames
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Load DynamicFrames from Glue Data Catalog
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
memberships = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="memberships_json"
]
orgs = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="organizations_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
print["Schema for the memberships DynamicFrame:"]
memberships.printSchema[]
print["Schema for the orgs DynamicFrame:"]
orgs.printSchema[]
# Join persons and memberships by ID
persons_memberships = persons.join[
paths1=["id"], paths2=["person_id"], frame2=memberships
]
# Rename and drop fields from orgs
# to prevent field name collisions with persons_memberships
orgs = [
orgs.drop_fields[["other_names", "identifiers"]]
.rename_field["id", "org_id"]
.rename_field["name", "org_name"]
]
# Create final join of all three DynamicFrames
legislators_combined = orgs.join[
paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships
].drop_fields[["person_id", "org_id"]]
# Inspect the schema for the joined data
print["Schema for the new legislators_combined DynamicFrame:"]
legislators_combined.printSchema[]
9 – In lược đồ của # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 bên dướitrình diễn
["`this.old.name`", "string", "thisNewName", "string"]
01 – In một số hàng được chỉ định từ # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1 bên dướiphân vùng lại
["`this.old.name`", "string", "thisNewName", "string"]
03 – Trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới với phân vùng ["`this.old.name`", "string", "thisNewName", "string"]
05kết hợp lại
["`this.old.name`", "string", "thisNewName", "string"]
06 – Trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới với phân vùng ["`this.old.name`", "string", "thisNewName", "string"]
05— biến đổi —
apply_mapping
["`this.old.name`", "string", "thisNewName", "string"]
09Áp dụng một ánh xạ khai báo cho một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 và trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới với những ánh xạ đó được áp dụng cho các trường mà bạn chỉ định. Các trường không xác định được bỏ qua từ # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới
13 – Danh sách các bộ dữ liệu ánh xạ [bắt buộc]. Mỗi bao gồm. [cột nguồn, loại nguồn, cột đích, loại đích]["`this.old.name`", "string", "thisNewName", "string"]
Nếu cột nguồn có dấu chấm "
14" trong tên, bạn phải đánh dấu ngược "["`this.old.name`", "string", "thisNewName", "string"]
15" xung quanh cột đó. Ví dụ, để ánh xạ["`this.old.name`", "string", "thisNewName", "string"]
16 [chuỗi] tới["`this.old.name`", "string", "thisNewName", "string"]
17, bạn sẽ sử dụng bộ dữ liệu sau["`this.old.name`", "string", "thisNewName", "string"]
["`this.old.name`", "string", "thisNewName", "string"]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng apply_mapping để đổi tên trường và thay đổi loại trường
Ví dụ mã sau đây cho biết cách sử dụng phương pháp
["`this.old.name`", "string", "thisNewName", "string"]
22 để đổi tên các trường đã chọn và thay đổi loại trường# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
drop_fields
["`this.old.name`", "string", "thisNewName", "string"]
23Gọi biến đổi lớp FlatMap để xóa các trường khỏi
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới với các trường được chỉ định bị loại bỏ
26 – Danh sách các chuỗi. Mỗi cái chứa đường dẫn đầy đủ đến một nút trường mà bạn muốn loại bỏ. Bạn có thể sử dụng ký hiệu dấu chấm để chỉ định các trường lồng nhau. Ví dụ: nếu trường["`this.old.name`", "string", "thisNewName", "string"]
27 là con của trường["`this.old.name`", "string", "thisNewName", "string"]
7 trong cây, bạn chỉ địnhSchema for friends DynamicFrame before calling drop_fields: root |-- name: string |-- age: int |-- location: struct | |-- state: string | |-- county: string |-- friends: array | |-- element: struct | | |-- name: string | | |-- age: int Schema for friends DynamicFrame after removing age, county, and friend age: root |-- name: string |-- location: struct | |-- state: string |-- friends: array | |-- element: struct | | |-- name: string
29 cho đường dẫn["`this.old.name`", "string", "thisNewName", "string"]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng drop_fields để xóa các trường khỏi # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Ví dụ về mã này sử dụng phương pháp
["`this.old.name`", "string", "thisNewName", "string"]
35 để xóa các trường lồng nhau và cấp cao nhất đã chọn khỏi # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6tập dữ liệu mẫu
Ví dụ sử dụng tập dữ liệu sau được biểu thị bằng bảng
["`this.old.name`", "string", "thisNewName", "string"]
37 trong mã{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
mã ví dụ
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
lọc
["`this.old.name`", "string", "thisNewName", "string"]
38Trả về một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới chứa tất cả Unfiltered record count: 163065
Filtered record count: 564
5 trong đầu vào # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 đáp ứng chức năng vị từ đã chỉ định ["`this.old.name`", "string", "thisNewName", "string"]
42
42 – Hàm vị từ áp dụng cho["`this.old.name`", "string", "thisNewName", "string"]
6. Hàm phải lấy một đối số là# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
9 và trả về True nếu# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
9 đáp ứng các yêu cầu của bộ lọc hoặc Sai nếu không [bắt buộc]# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]Một
9 đại diện cho một bản ghi logic trong một# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
6. Nó tương tự như một hàng trong Spark# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
1, ngoại trừ việc nó tự mô tả và có thể được sử dụng cho dữ liệu không tuân theo một lược đồ cố định# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng bộ lọc để có được lựa chọn các trường được lọc
Ví dụ này sử dụng phương pháp
["`this.old.name`", "string", "thisNewName", "string"]
54 để tạo một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới bao gồm một lựa chọn đã lọc của các trường của # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 khácGiống như phương thức
["`this.old.name`", "string", "thisNewName", "string"]
57, ["`this.old.name`", "string", "thisNewName", "string"]
54 nhận một hàm làm đối số được áp dụng cho từng bản ghi trong bản gốc # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Hàm lấy một bản ghi làm đầu vào và trả về giá trị Boolean. Nếu giá trị trả về là true, bản ghi sẽ được đưa vào kết quả # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Nếu nó sai, bản ghi sẽ bị bỏ qua# Example: Use filter to create a new DynamicFrame
# with a filtered selection of records
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create DynamicFrame from Glue Data Catalog
medicare = glueContext.create_dynamic_frame.from_options[
"s3",
{
"paths": [
"s3://awsglue-datasets/examples/medicare/Medicare_Hospital_Provider.csv"
]
},
"csv",
{"withHeader": True},
]
# Create filtered DynamicFrame with custom lambda
# to filter records by Provider State and Provider City
sac_or_mon = medicare.filter[
f=lambda x: x["Provider State"] in ["CA", "AL"]
and x["Provider City"] in ["SACRAMENTO", "MONTGOMERY"]
]
# Compare record counts
print["Unfiltered record count: ", medicare.count[]]
print["Filtered record count: ", sac_or_mon.count[]]
Unfiltered record count: 163065
Filtered record count: 564
tham gia
["`this.old.name`", "string", "thisNewName", "string"]
61Thực hiện phép nối đẳng thức với một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 khác và trả về kết quả là # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6
64 – Danh sách các khóa trong khung này để tham gia["`this.old.name`", "string", "thisNewName", "string"]
65 – Danh sách các khóa trong khung khác để tham gia["`this.old.name`", "string", "thisNewName", "string"]
66 – Người còn lại["`this.old.name`", "string", "thisNewName", "string"]
6 tham gia# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng tham gia để kết hợp Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
2
Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
Ví dụ này sử dụng phương pháp
["`this.old.name`", "string", "thisNewName", "string"]
73 để thực hiện phép nối trên ba Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
2. AWS Glue thực hiện liên kết dựa trên các khóa trường mà bạn cung cấp. Kết quả # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 chứa các hàng từ hai khung ban đầu nơi các khóa được chỉ định khớp với nhauLưu ý rằng biến đổi
["`this.old.name`", "string", "thisNewName", "string"]
73 giữ nguyên tất cả các trường. Điều này có nghĩa là các trường mà bạn chỉ định khớp sẽ xuất hiện trong DynamicFrame kết quả, ngay cả khi chúng dư thừa và chứa các khóa giống nhau. Trong ví dụ này, chúng tôi sử dụng ["`this.old.name`", "string", "thisNewName", "string"]
35 để xóa các khóa thừa này sau khi tham gia# Example: Use join to combine data from three DynamicFrames
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Load DynamicFrames from Glue Data Catalog
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
memberships = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="memberships_json"
]
orgs = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="organizations_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
print["Schema for the memberships DynamicFrame:"]
memberships.printSchema[]
print["Schema for the orgs DynamicFrame:"]
orgs.printSchema[]
# Join persons and memberships by ID
persons_memberships = persons.join[
paths1=["id"], paths2=["person_id"], frame2=memberships
]
# Rename and drop fields from orgs
# to prevent field name collisions with persons_memberships
orgs = [
orgs.drop_fields[["other_names", "identifiers"]]
.rename_field["id", "org_id"]
.rename_field["name", "org_name"]
]
# Create final join of all three DynamicFrames
legislators_combined = orgs.join[
paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships
].drop_fields[["person_id", "org_id"]]
# Inspect the schema for the joined data
print["Schema for the new legislators_combined DynamicFrame:"]
legislators_combined.printSchema[]
["`this.old.name`", "string", "thisNewName", "string"]
0bản đồ
["`this.old.name`", "string", "thisNewName", "string"]
78Trả về một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới là kết quả của việc áp dụng chức năng ánh xạ đã chỉ định cho tất cả các bản ghi trong bản gốc # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6
42 – Chức năng lập bản đồ để áp dụng cho tất cả các bản ghi trong["`this.old.name`", "string", "thisNewName", "string"]
6. Hàm phải lấy một đối số là# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
9 và trả về một# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
9 mới [bắt buộc]# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]Một
9 đại diện cho một bản ghi logic trong một# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
6. Nó tương tự như một hàng trong Apache Spark# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
1, ngoại trừ việc nó tự mô tả và có thể được sử dụng cho dữ liệu không tuân theo một lược đồ cố định# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi có liên quan đến lỗi trong quá trình chuyển đổi [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lỗi tối đa có thể xảy ra trong quá trình chuyển đổi trước khi nó bị lỗi [không bắt buộc]. Mặc định là số không["`this.old.name`", "string", "thisNewName", "string"]
21 – Tổng số lỗi tối đa có thể xảy ra trước khi xử lý lỗi [tùy chọn]. Mặc định là số không["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng bản đồ để áp dụng một chức năng cho mọi bản ghi trong # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Ví dụ này cho thấy cách sử dụng phương pháp
["`this.old.name`", "string", "thisNewName", "string"]
57 để áp dụng một hàm cho mọi bản ghi của một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Cụ thể, ví dụ này áp dụng một hàm có tên là ["`this.old.name`", "string", "thisNewName", "string"]
95 cho mỗi bản ghi để hợp nhất một số trường địa chỉ thành một loại duy nhất ["`this.old.name`", "string", "thisNewName", "string"]
96["`this.old.name`", "string", "thisNewName", "string"]
1["`this.old.name`", "string", "thisNewName", "string"]
2hợp nhấtKhung động
["`this.old.name`", "string", "thisNewName", "string"]
97Hợp nhất
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 này với một dàn # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 dựa trên các khóa chính được chỉ định để xác định bản ghi. Các bản ghi trùng lặp [các bản ghi có cùng khóa chính] không được sao chép. Nếu không có bản ghi phù hợp trong khung phân loại, thì tất cả các bản ghi [bao gồm cả bản sao] sẽ được giữ lại từ nguồn. Nếu khung phân tầng có các bản ghi phù hợp, thì các bản ghi từ khung phân tầng sẽ ghi đè lên các bản ghi trong nguồn trong AWS Glue
00 – Dàn dựng# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
6 để hợp nhất# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
02 – Danh sách các trường khóa chính để khớp với các bản ghi từ nguồn và dàn khung động# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
18 – Một chuỗi duy nhất được sử dụng để truy xuất siêu dữ liệu về chuyển đổi hiện tại [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
1 – Một chuỗi các cặp tên-giá trị JSON cung cấp thông tin bổ sung cho quá trình chuyển đổi này. Đối số này hiện không được sử dụng# Example: Use join to combine data from three DynamicFrames from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Load DynamicFrames from Glue Data Catalog persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] memberships = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="memberships_json" ] orgs = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="organizations_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] print["Schema for the memberships DynamicFrame:"] memberships.printSchema[] print["Schema for the orgs DynamicFrame:"] orgs.printSchema[] # Join persons and memberships by ID persons_memberships = persons.join[ paths1=["id"], paths2=["person_id"], frame2=memberships ] # Rename and drop fields from orgs # to prevent field name collisions with persons_memberships orgs = [ orgs.drop_fields[["other_names", "identifiers"]] .rename_field["id", "org_id"] .rename_field["name", "org_name"] ] # Create final join of all three DynamicFrames legislators_combined = orgs.join[ paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships ].drop_fields[["person_id", "org_id"]] # Inspect the schema for the joined data print["Schema for the new legislators_combined DynamicFrame:"] legislators_combined.printSchema[]
19 – Một["`this.old.name`", "string", "thisNewName", "string"]
06. Bất kỳ chuỗi nào có liên quan đến lỗi trong quá trình chuyển đổi này# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
20 – Một["`this.old.name`", "string", "thisNewName", "string"]
08. Số lượng lỗi trong chuyển đổi nhất định mà quá trình xử lý cần loại bỏ lỗi# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
21 – Một["`this.old.name`", "string", "thisNewName", "string"]
08. Tổng số lỗi lên đến và bao gồm cả chuyển đổi này mà quá trình xử lý cần loại bỏ lỗi# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
Phương thức này trả về một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới thu được bằng cách hợp nhất # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 này với dàn # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 được trả về chứa bản ghi A trong những trường hợp nàyNếu
15 tồn tại trong cả khung nguồn và khung dàn dựng, thì# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
15 trong khung dàn dựng được trả về# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
Nếu
15 có trong bảng nguồn và# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
18 không có trong# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
19, thì# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
15 không được cập nhật trong bảng phân tầng# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
Khung nguồn và khung dàn dựng không cần phải có cùng một lược đồ
Ví dụ. Sử dụng mergeDynamicFrame để hợp nhất hai Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
2 dựa trên khóa chính
Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
Ví dụ mã sau đây cho thấy cách sử dụng phương pháp
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
22 để hợp nhất một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 với một "dàn dựng" # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6, dựa trên khóa chính # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
25tập dữ liệu mẫu
Ví dụ sử dụng hai
Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
2 từ một # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
27 được gọi là # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
28. Sau đây là danh sách các khóa trong # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
28["`this.old.name`", "string", "thisNewName", "string"]
3mã ví dụ
["`this.old.name`", "string", "thisNewName", "string"]
4["`this.old.name`", "string", "thisNewName", "string"]
5quan hệ hóa
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
30Chuyển đổi một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 thành một biểu mẫu phù hợp với cơ sở dữ liệu quan hệ. Việc tương quan hóa một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 đặc biệt hữu ích khi bạn muốn di chuyển dữ liệu từ môi trường NoSQL như DynamoDB sang cơ sở dữ liệu quan hệ như MySQLBiến đổi tạo ra một danh sách các khung bằng cách bỏ lồng các cột lồng nhau và xoay các cột mảng. Bạn có thể nối các cột mảng được xoay vòng vào bảng gốc bằng cách sử dụng khóa nối được tạo trong giai đoạn không liên kết
33 – Tên của bảng gốc# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
34 – Đường dẫn mà phương thức có thể lưu trữ các phân vùng của bảng tổng hợp ở định dạng CSV [tùy chọn]. Các bảng tổng hợp được đọc lại từ đường dẫn này# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
1 – Từ điển các tham số tùy chọn# Example: Use join to combine data from three DynamicFrames from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Load DynamicFrames from Glue Data Catalog persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] memberships = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="memberships_json" ] orgs = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="organizations_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] print["Schema for the memberships DynamicFrame:"] memberships.printSchema[] print["Schema for the orgs DynamicFrame:"] orgs.printSchema[] # Join persons and memberships by ID persons_memberships = persons.join[ paths1=["id"], paths2=["person_id"], frame2=memberships ] # Rename and drop fields from orgs # to prevent field name collisions with persons_memberships orgs = [ orgs.drop_fields[["other_names", "identifiers"]] .rename_field["id", "org_id"] .rename_field["name", "org_name"] ] # Create final join of all three DynamicFrames legislators_combined = orgs.join[ paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships ].drop_fields[["person_id", "org_id"]] # Inspect the schema for the joined data print["Schema for the new legislators_combined DynamicFrame:"] legislators_combined.printSchema[]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng quan hệ hóa để làm phẳng lược đồ lồng nhau trong # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Ví dụ mã này sử dụng phương thức
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
41 để làm phẳng một lược đồ lồng nhau thành một biểu mẫu phù hợp với cơ sở dữ liệu quan hệtập dữ liệu mẫu
Ví dụ sử dụng một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 được gọi là # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
43 với lược đồ sau. # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
43 có nhiều trường lồng nhau, chẳng hạn như # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
45, # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
46 và # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
47, sẽ được làm phẳng bởi biến đổi # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
41["`this.old.name`", "string", "thisNewName", "string"]
6mã ví dụ
["`this.old.name`", "string", "thisNewName", "string"]
7Đầu ra sau đây cho phép bạn so sánh lược đồ của trường lồng nhau có tên là
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
47 với bảng mà biến đổi # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
41 đã tạo. Lưu ý rằng các bản ghi của bảng liên kết trở lại bảng chính bằng khóa ngoại có tên là # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
25 và cột # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
52 đại diện cho các vị trí của mảng["`this.old.name`", "string", "thisNewName", "string"]
8đổi tên_field
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
53Đổi tên một trường trong
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 này và trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới với trường được đổi tên
56 – Đường dẫn đầy đủ đến nút bạn muốn đổi tên# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
Nếu tên cũ có dấu chấm trong đó, thì
57 sẽ không hoạt động trừ khi bạn đặt dấu gạch ngược xung quanh nó [_______74_______58]. Ví dụ: để thay thế# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
16 bằng["`this.old.name`", "string", "thisNewName", "string"]
17, bạn sẽ gọi rename_field như sau["`this.old.name`", "string", "thisNewName", "string"]
9["`this.old.name`", "string", "thisNewName", "string"]
61 – Tên mới, dưới dạng đường dẫn đầy đủ# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng rename_field để đổi tên các trường trong # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Ví dụ mã này sử dụng phương pháp
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
67 để đổi tên các trường trong một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Lưu ý rằng ví dụ sử dụng chuỗi phương thức để đổi tên nhiều trường cùng một lúcmã ví dụ
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
0# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
1giải quyếtLựa chọn
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
69Giải quyết một loại lựa chọn trong
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 này và trả về # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới
72 – Danh sách các điểm mơ hồ cụ thể cần giải quyết, mỗi điểm ở dạng một bộ.# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
73# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
Có hai cách để sử dụng
74. Đầu tiên là sử dụng đối số# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
72 để chỉ định một chuỗi các trường cụ thể và cách giải quyết chúng. Chế độ khác cho# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
74 là sử dụng đối số# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
77 để chỉ định một giải pháp duy nhất cho tất cả# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
78# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
Các giá trị cho
72 được chỉ định dưới dạng các bộ được tạo thành từ các cặp# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
73. Giá trị# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
81 xác định một yếu tố không rõ ràng cụ thể và giá trị# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
82 xác định độ phân giải tương ứng. Sau đây là các hành động có thể# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
83 – Cố gắng chuyển tất cả các giá trị sang loại đã chỉ định. Ví dụ.# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
84# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
85 – Chuyển đổi từng loại riêng biệt thành một cột có tên# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
86_# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
87. Nó giải quyết sự mơ hồ tiềm ẩn bằng cách làm phẳng dữ liệu. Ví dụ: nếu# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
88 có thể là một# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
89 hoặc một# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
5, thì giải pháp sẽ là tạo ra hai cột có tên là# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
91 và# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
92 trong kết quả là# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
6# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
94 – Giải quyết sự mơ hồ tiềm ẩn bằng cách sử dụng# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
96 để biểu thị dữ liệu. Ví dụ: nếu dữ liệu trong một cột có thể là một["`this.old.name`", "string", "thisNewName", "string"]
89 hoặc một# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
5, thì hành động# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
94 sẽ tạo ra một cột gồm các cấu trúc trong kết quả là# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
6. Mỗi cấu trúc chứa cả một# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
89 và một# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
5# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]project:________74_______87
– Giải quyết sự mơ hồ tiềm ẩn bằng cách chiếu tất cả dữ liệu vào một trong các loại dữ liệu có thể. Ví dụ: nếu dữ liệu trong một cột có thể là một
89 hoặc một# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
5, thì việc sử dụng một hành động# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
05 sẽ tạo ra một cột trong kết quả làSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
6 trong đó tất cả các giá trị# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
89 đã được chuyển đổi thành chuỗi# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
Nếu
81 xác định một mảng, hãy đặt dấu ngoặc vuông trống sau tên của mảng để tránh nhầm lẫn. Ví dụ: giả sử bạn đang làm việc với dữ liệu có cấu trúc như sau# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
2# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
Bạn có thể chọn phiên bản số thay vì chuỗi của giá bằng cách đặt
81 thành# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
10 và đặtSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
82 thành# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
12Schema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
Bạn chỉ có thể sử dụng một trong các tham số
72 và# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
77. Nếu tham số# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
72 không phải là# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
16, thì tham sốSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
77 phải là một chuỗi rỗng. Ngược lại, nếu# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
77 không phải là một chuỗi rỗng, thì tham số# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
72 phải là# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
16Schema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
77 – Chỉ định một giải pháp duy nhất cho tất cả# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
78. Bạn có thể sử dụng điều này trong trường hợp không biết danh sách đầy đủ của# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
78 trước thời gian chạy. Ngoài các hành động được liệt kê trước đó cho# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
72, lập luận này cũng hỗ trợ cho hành động sau# Example: Use apply_mapping to reshape source data into # the desired column names and types as a new DynamicFrame from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame and view its schema persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] # Select and rename fields, change field type print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"] persons_mapped = persons.apply_mapping[ [ ["family_name", "String", "last_name", "String"], ["name", "String", "first_name", "String"], ["birth_date", "String", "date_of_birth", "Date"], ] ] persons_mapped.printSchema[]
25 – Cố gắng chuyển từngSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
26 sang loại tương ứng trong bảng Danh mục dữ liệu đã chỉ địnhSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
27 – Cơ sở dữ liệu Danh mục dữ liệu để sử dụng với hành độngSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
25Schema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
29 – Bảng Danh mục dữ liệu để sử dụng với hành độngSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
25Schema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
35 – ID danh mục của Danh mục dữ liệu đang được truy cập [ID tài khoản của Danh mục dữ liệu]. Khi được đặt thànhSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
16 [giá trị mặc định], nó sẽ sử dụng ID danh mục của tài khoản gọi điệnSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
Ví dụ. Sử dụng giải quyếtChoice để xử lý một cột chứa nhiều loại
Ví dụ mã này sử dụng phương pháp
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
74 để chỉ định cách xử lý cột # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 chứa các giá trị thuộc nhiều loại. Ví dụ minh họa hai cách phổ biến để xử lý một cột với các loại khác nhauTruyền cột thành một kiểu dữ liệu
Giữ lại tất cả các loại trong các cột riêng biệt
tập dữ liệu mẫu
Ví dụ sử dụng một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 được gọi là Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
40 với lược đồ sau# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
3mã ví dụ
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
4# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
5select_fields
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
41Trả về một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới chứa các trường đã chọn
26 – Danh sách các chuỗi. Mỗi chuỗi là một đường dẫn đến nút cấp cao nhất mà bạn muốn chọn["`this.old.name`", "string", "thisNewName", "string"]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng select_fields để tạo một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới với các trường đã chọn
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Ví dụ mã sau đây cho thấy cách sử dụng phương pháp
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
49 để tạo một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới với một danh sách các trường đã chọn từ một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 hiện có# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
6# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
7cái vòi
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
52Ghi các bản ghi mẫu vào một đích cụ thể để giúp bạn xác minh các chuyển đổi được thực hiện bởi công việc của bạn
53 – Đường dẫn của điểm đến để viết thư [bắt buộc]Schema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
1 – Các cặp khóa-giá trị chỉ định các tùy chọn [tùy chọn]. Tùy chọn# Example: Use join to combine data from three DynamicFrames from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Load DynamicFrames from Glue Data Catalog persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] memberships = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="memberships_json" ] orgs = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="organizations_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] print["Schema for the memberships DynamicFrame:"] memberships.printSchema[] print["Schema for the orgs DynamicFrame:"] orgs.printSchema[] # Join persons and memberships by ID persons_memberships = persons.join[ paths1=["id"], paths2=["person_id"], frame2=memberships ] # Rename and drop fields from orgs # to prevent field name collisions with persons_memberships orgs = [ orgs.drop_fields[["other_names", "identifiers"]] .rename_field["id", "org_id"] .rename_field["name", "org_name"] ] # Create final join of all three DynamicFrames legislators_combined = orgs.join[ paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships ].drop_fields[["person_id", "org_id"]] # Inspect the schema for the joined data print["Schema for the new legislators_combined DynamicFrame:"] legislators_combined.printSchema[]
55 xác định rằng bản ghiSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
56 đầu tiên phải được ghi. Tùy chọnSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
57 chỉ định xác suất [dưới dạng số thập phân] của việc chọn bất kỳ bản ghi đã cho nào. Bạn có thể sử dụng nó trong việc chọn bản ghi để viếtSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng đầu nối để ghi các trường mẫu từ # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 lên Amazon S3
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Ví dụ về mã này sử dụng phương thức
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
60 để ghi các bản ghi mẫu vào bộ chứa Amazon S3 sau khi áp dụng biến đổi Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
49tập dữ liệu mẫu
Ví dụ sử dụng một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 được gọi là Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
63 với lược đồ sau# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
8mã ví dụ
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
9Sau đây là ví dụ về dữ liệu mà
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
60 ghi vào Amazon S3. Vì mã ví dụ đã chỉ định Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
65, dữ liệu mẫu chứa 10 bản ghi đầu tiênSchema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
0split_fields
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
66Trả về một
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
27 mới chứa hai Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
2. # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 đầu tiên chứa tất cả các nút đã được tách ra và nút thứ hai chứa các nút còn lại
26 – Một danh sách các chuỗi, mỗi chuỗi là một đường dẫn đầy đủ đến một nút mà bạn muốn tách thành một["`this.old.name`", "string", "thisNewName", "string"]
6 mới# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
72 – Một chuỗi tên choSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
6 được tách ra# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
74 – Một chuỗi tên choSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
6 còn lại sau khi các nút được chỉ định đã được tách ra# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng split_fields để chia các trường đã chọn thành một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 riêng biệt
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Ví dụ về mã này sử dụng phương pháp
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
81 để tách danh sách các trường được chỉ định thành một ___________6 riêng biệttập dữ liệu mẫu
Ví dụ sử dụng một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 được gọi là Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
84 từ một bộ sưu tập có tên là Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
85Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
84 có lược đồ và các mục sauSchema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
1mã ví dụ
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
2Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
3split_rows
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
87Tách một hoặc nhiều hàng trong một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 thành một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mớiPhương thức trả về một
# Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
27 mới chứa hai Schema for friends DynamicFrame before calling drop_fields:
root
|-- name: string
|-- age: int
|-- location: struct
| |-- state: string
| |-- county: string
|-- friends: array
| |-- element: struct
| | |-- name: string
| | |-- age: int
Schema for friends DynamicFrame after removing age, county, and friend age:
root
|-- name: string
|-- location: struct
| |-- state: string
|-- friends: array
| |-- element: struct
| | |-- name: string
2. # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 đầu tiên chứa tất cả các hàng đã được tách ra và cái thứ hai chứa các hàng còn lại
93 – Một từ điển trong đó khóa là đường dẫn đến một cột và giá trị là một từ điển khác để ánh xạ bộ so sánh tới các giá trị mà các giá trị cột được so sánh với. Ví dụ:Schema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
94 tách tất cả các hàng có giá trị trong cột tuổi lớn hơn 10 và nhỏ hơn 20Schema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
72 – Một chuỗi tên choSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
6 được tách ra# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
74 – Một chuỗi tên choSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
6 còn lại sau khi các nút được chỉ định đã được tách ra# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame. # Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name. # Replace EXAMPLE-FRIENDS-DATA with your table name. from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Create a DynamicFrame from Glue Data Catalog glue_source_database = "
MY-EXAMPLE-DATABASE
" glue_source_table = "EXAMPLE-FRIENDS-DATA
" friends = glueContext.create_dynamic_frame.from_catalog[ database=glue_source_database, table_name=glue_source_table ] print["Schema for friends DynamicFrame before calling drop_fields:"] friends.printSchema[] # Remove location.county, remove friends.age, remove age friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]] print["Schema for friends DynamicFrame after removing age, county, and friend age:"] friends.printSchema[]
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng split_rows để chia các hàng trong # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
Ví dụ về mã này sử dụng phương pháp
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
04 để phân chia các hàng trong một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 dựa trên giá trị trường # Example: Use apply_mapping to reshape source data into
# the desired column names and types as a new DynamicFrame
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame and view its schema
persons = glueContext.create_dynamic_frame.from_catalog[
database="legislators", table_name="persons_json"
]
print["Schema for the persons DynamicFrame:"]
persons.printSchema[]
# Select and rename fields, change field type
print["Schema for the persons_mapped DynamicFrame, created with apply_mapping:"]
persons_mapped = persons.apply_mapping[
[
["family_name", "String", "last_name", "String"],
["name", "String", "first_name", "String"],
["birth_date", "String", "date_of_birth", "Date"],
]
]
persons_mapped.printSchema[]
25tập dữ liệu mẫu
Ví dụ sử dụng một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 có tên là Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
84 được chọn từ một bộ sưu tập có tên là Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
85Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
84 có lược đồ và các mục sauSchema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
4mã ví dụ
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
5Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
6mở hộp
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
11Bỏ hộp [định dạng lại] một trường chuỗi trong một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 và trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới có chứa Unfiltered record count: 163065
Filtered record count: 564
5 đã được mở hộpMột
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
9 đại diện cho một bản ghi logic trong một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Nó tương tự như một hàng trong Apache Spark # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1, ngoại trừ việc nó tự mô tả và có thể được sử dụng cho dữ liệu không tuân theo một lược đồ cố định
53 – Đường dẫn đầy đủ đến nút chuỗi mà bạn muốn mở hộpSchema for the persons DynamicFrame: root |-- family_name: string |-- name: string |-- links: array | |-- element: struct | | |-- note: string | | |-- url: string |-- gender: string |-- image: string |-- identifiers: array | |-- element: struct | | |-- scheme: string | | |-- identifier: string |-- other_names: array | |-- element: struct | | |-- lang: string | | |-- note: string | | |-- name: string |-- sort_name: string |-- images: array | |-- element: struct | | |-- url: string |-- given_name: string |-- birth_date: string |-- id: string |-- contact_details: array | |-- element: struct | | |-- type: string | | |-- value: string |-- death_date: string Schema for the persons_mapped DynamicFrame, created with apply_mapping: root |-- last_name: string |-- first_name: string |-- date_of_birth: date
19 – Đặc tả định dạng [tùy chọn]. Bạn sử dụng điều này cho kết nối Amazon S3 hoặc AWS Glue hỗ trợ nhiều định dạng. Để biết các định dạng được hỗ trợ, hãy xem Tùy chọn định dạng dữ liệu cho đầu vào và đầu ra trong AWS Glue{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
1 – Một hoặc nhiều điều sau đây# Example: Use join to combine data from three DynamicFrames from pyspark.context import SparkContext from awsglue.context import GlueContext # Create GlueContext sc = SparkContext.getOrCreate[] glueContext = GlueContext[sc] # Load DynamicFrames from Glue Data Catalog persons = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="persons_json" ] memberships = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="memberships_json" ] orgs = glueContext.create_dynamic_frame.from_catalog[ database="legislators", table_name="organizations_json" ] print["Schema for the persons DynamicFrame:"] persons.printSchema[] print["Schema for the memberships DynamicFrame:"] memberships.printSchema[] print["Schema for the orgs DynamicFrame:"] orgs.printSchema[] # Join persons and memberships by ID persons_memberships = persons.join[ paths1=["id"], paths2=["person_id"], frame2=memberships ] # Rename and drop fields from orgs # to prevent field name collisions with persons_memberships orgs = [ orgs.drop_fields[["other_names", "identifiers"]] .rename_field["id", "org_id"] .rename_field["name", "org_name"] ] # Create final join of all three DynamicFrames legislators_combined = orgs.join[ paths1=["org_id"], paths2=["organization_id"], frame2=persons_memberships ].drop_fields[["person_id", "org_id"]] # Inspect the schema for the joined data print["Schema for the new legislators_combined DynamicFrame:"] legislators_combined.printSchema[]
25 – Một chuỗi chứa ký tự ngăn cách{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
26 – Một chuỗi chứa ký tự thoát{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
27 – Một giá trị Boolean cho biết có nên bỏ qua phiên bản đầu tiên hay không{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
28 – Một chuỗi chứa lược đồ. Bạn phải gọi nó bằng cách sử dụng{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
29{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
30 – Giá trị Boolean cho biết liệu tiêu đề có được bao gồm hay không{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
Ví dụ. Sử dụng unbox để bỏ hộp một trường chuỗi thành một cấu trúc
Ví dụ mã này sử dụng phương pháp
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
31 để mở hộp hoặc định dạng lại trường chuỗi trong # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 thành trường kiểu cấu trúctập dữ liệu mẫu
Ví dụ sử dụng một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 được gọi là {"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
34 với lược đồ và các mục nhập sauLưu ý trường có tên
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
35. Đây là trường mà ví dụ mở hộp thành một cấu trúcSchema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
7mã ví dụ
Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
8Schema for the persons DynamicFrame:
root
|-- family_name: string
|-- name: string
|-- links: array
| |-- element: struct
| | |-- note: string
| | |-- url: string
|-- gender: string
|-- image: string
|-- identifiers: array
| |-- element: struct
| | |-- scheme: string
| | |-- identifier: string
|-- other_names: array
| |-- element: struct
| | |-- lang: string
| | |-- note: string
| | |-- name: string
|-- sort_name: string
|-- images: array
| |-- element: struct
| | |-- url: string
|-- given_name: string
|-- birth_date: string
|-- id: string
|-- contact_details: array
| |-- element: struct
| | |-- type: string
| | |-- value: string
|-- death_date: string
Schema for the persons_mapped DynamicFrame, created with apply_mapping:
root
|-- last_name: string
|-- first_name: string
|-- date_of_birth: date
9không hợp nhau
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
36Bỏ lồng các đối tượng lồng nhau trong một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6, làm cho chúng trở thành các đối tượng cấp cao nhất và trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới chưa được lồng
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà quy trình sẽ loại bỏ lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ xảy ra lỗi [tùy chọn]. Giá trị mặc định là 0, điều này cho biết rằng quá trình sẽ không xảy ra lỗi["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ. Sử dụng unnest để biến các trường lồng nhau thành các trường cấp cao nhất
Ví dụ mã này sử dụng phương pháp
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
43 để làm phẳng tất cả các trường lồng nhau trong một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 thành các trường cấp cao nhấttập dữ liệu mẫu
Ví dụ sử dụng một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 được gọi là {"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
46 với lược đồ sau. Lưu ý rằng trường {"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
47 là trường duy nhất chứa dữ liệu lồng nhau{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
0mã ví dụ
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
1{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
2unnest_ddb_json
Bỏ lồng các cột lồng nhau trong một
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 cụ thể trong cấu trúc JSON của DynamoDB và trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới chưa lồng. Các cột thuộc một mảng các kiểu cấu trúc sẽ không được bỏ lồng. Lưu ý rằng đây là một loại biến đổi hủy lồng cụ thể hoạt động khác với biến đổi {"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
43 thông thường và yêu cầu dữ liệu phải có sẵn trong cấu trúc JSON của DynamoDB. Để biết thêm thông tin, xem{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
51
18 – Một chuỗi duy nhất được sử dụng để xác định thông tin trạng thái [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
19 – Một chuỗi được liên kết với báo cáo lỗi cho chuyển đổi này [tùy chọn]["`this.old.name`", "string", "thisNewName", "string"]
20 – Số lượng lỗi gặp phải trong quá trình chuyển đổi này mà tại đó quy trình sẽ loại bỏ lỗi [tùy chọn. 0 theo mặc định, cho biết rằng quy trình sẽ không bị lỗi]["`this.old.name`", "string", "thisNewName", "string"]
21 – Số lượng lỗi gặp phải cho đến và bao gồm cả quá trình chuyển đổi này mà tại đó quy trình sẽ loại bỏ lỗi [tùy chọn. 0 theo mặc định, cho biết rằng quy trình sẽ không bị lỗi]["`this.old.name`", "string", "thisNewName", "string"]
Ví dụ: lược đồ đọc một bản xuất có cấu trúc JSON của DynamoDB có thể giống như sau
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
3Biến đổi
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
56 sẽ chuyển đổi cái này thành{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
4Ví dụ mã sau đây cho biết cách sử dụng trình kết nối xuất AWS Glue DynamoDB, gọi một JSON DynamoDB không hợp lệ và in số lượng phân vùng
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
5viết
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
57Nhận một loại kết nối được chỉ định từ lớp GlueContext của
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 này và sử dụng nó để định dạng và viết nội dung của # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 này. Trả về # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 mới được định dạng và viết như đã chỉ định
61 – Loại kết nối sẽ sử dụng. Các giá trị hợp lệ bao gồm{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
62,{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
63,{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
64,{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
65,{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
66 và{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
67{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
68 – Tùy chọn kết nối để sử dụng [tùy chọn]. Đối với một{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
61 của{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
62, một đường dẫn Amazon S3 được xác định{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
6{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
Đối với các kết nối JDBC, một số thuộc tính phải được xác định. Lưu ý rằng tên cơ sở dữ liệu phải là một phần của URL. Nó có thể tùy chọn được bao gồm trong các tùy chọn kết nối
7{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
19 – Đặc tả định dạng [tùy chọn]. Điều này được sử dụng cho Dịch vụ lưu trữ đơn giản của Amazon [Amazon S3] hoặc kết nối AWS Glue hỗ trợ nhiều định dạng. Xem Tùy chọn định dạng dữ liệu cho đầu vào và đầu ra trong AWS Glue để biết các định dạng được hỗ trợ{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
72 – Tùy chọn định dạng cho định dạng được chỉ định. Xem Tùy chọn định dạng dữ liệu cho đầu vào và đầu ra trong AWS Glue để biết các định dạng được hỗ trợ{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
73 – Kích thước có thể tích lũy để sử dụng [tùy chọn]{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []} {"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]} {"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]} {"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}} {"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
— lỗi —
khẳng địnhErrorThreshold
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
74 – Khẳng định về lỗi trong các phép biến đổi đã tạo ra điều này # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6. Trả về một {"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
76 từ cơ sở # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
1lỗiAsDynamicKhung
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
78 – Trả về một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 có các bản ghi lỗi được lồng bên trongVí dụ. Sử dụng errorAsDynamicFrame để xem các bản ghi lỗi
Ví dụ mã sau đây cho thấy cách sử dụng phương thức
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
80 để xem bản ghi lỗi cho một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6tập dữ liệu mẫu
Ví dụ sử dụng tập dữ liệu sau mà bạn có thể tải lên Amazon S3 dưới dạng JSON. Lưu ý rằng bản ghi thứ hai không đúng định dạng. Dữ liệu không đúng định dạng thường phá vỡ quá trình phân tích cú pháp tệp khi bạn sử dụng SparkSQL. Tuy nhiên,
# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 nhận ra các vấn đề về dị tật và chuyển các dòng không đúng định dạng thành các bản ghi lỗi mà bạn có thể xử lý riêng lẻ{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
8mã ví dụ
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
9# Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
0lỗiĐếm
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
83 – Trả về tổng số lỗi trong một # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6giai đoạnLỗiĐếm
{"name": "Sally", "age": 23, "location": {"state": "WY", "county": "Fremont"}, "friends": []}
{"name": "Varun", "age": 34, "location": {"state": "NE", "county": "Douglas"}, "friends": [{"name": "Arjun", "age": 3}]}
{"name": "George", "age": 52, "location": {"state": "NY"}, "friends": [{"name": "Fred"}, {"name": "Amy", "age": 15}]}
{"name": "Haruki", "age": 21, "location": {"state": "AK", "county": "Denali"}}
{"name": "Sheila", "age": 63, "friends": [{"name": "Nancy", "age": 22}]}
85 – Trả về số lượng lỗi đã xảy ra trong quá trình tạo # Example: Use drop_fields to remove top-level and nested fields from a DynamicFrame.
# Replace MY-EXAMPLE-DATABASE with your Glue Data Catalog database name.
# Replace EXAMPLE-FRIENDS-DATA with your table name.
from pyspark.context import SparkContext
from awsglue.context import GlueContext
# Create GlueContext
sc = SparkContext.getOrCreate[]
glueContext = GlueContext[sc]
# Create a DynamicFrame from Glue Data Catalog
glue_source_database = "MY-EXAMPLE-DATABASE
"
glue_source_table = "EXAMPLE-FRIENDS-DATA
"
friends = glueContext.create_dynamic_frame.from_catalog[
database=glue_source_database, table_name=glue_source_table
]
print["Schema for friends DynamicFrame before calling drop_fields:"]
friends.printSchema[]
# Remove location.county, remove friends.age, remove age
friends = friends.drop_fields[paths=["age", "location.county", "friends.age"]]
print["Schema for friends DynamicFrame after removing age, county, and friend age:"]
friends.printSchema[]
6 này